Adding qemu as a submodule of KVMFORNFV

This Patch includes the changes to add qemu as a submodule to kvmfornfv repo and make use of the updated latest qemu for the execution of all testcase Change-Id: I1280af507a857675c7f81d30c95255635667bdd7 Signed-off-by:RajithaY<rajithax.yerrumsetty@intel.com>
author: RajithaY <rajithax.yerrumsetty@intel.com> 2017-04-25 03:31:15 -0700
committer: Rajitha Yerrumchetty <rajithax.yerrumsetty@intel.com> 2017-05-22 06:48:08 +0000
commit: bb756eebdac6fd24e8919e2c43f7d2c8c4091f59 (patch)
tree: ca11e03542edf2d8f631efeca5e1626d211107e3 /qemu/block
parent: a14b48d18a9ed03ec191cf16b162206998a895ce (diff)
62 files changed, 0 insertions, 55248 deletions
diff --git a/qemu/block/Makefile.objs b/qemu/block/Makefile.objs
deleted file mode 100644
index 44a541622..000000000
--- a/qemu/block/Makefile.objs
+++ /dev/null
@@ -1,46 +0,0 @@
-block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
-block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
-block-obj-y += qed-check.o
-block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
-block-obj-y += quorum.o
-block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
-block-obj-y += block-backend.o snapshot.o qapi.o
-block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
-block-obj-$(CONFIG_POSIX) += raw-posix.o
-block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
-block-obj-y += null.o mirror.o io.o
-block-obj-y += throttle-groups.o
-
-block-obj-y += nbd.o nbd-client.o sheepdog.o
-block-obj-$(CONFIG_LIBISCSI) += iscsi.o
-block-obj-$(CONFIG_LIBNFS) += nfs.o
-block-obj-$(CONFIG_CURL) += curl.o
-block-obj-$(CONFIG_RBD) += rbd.o
-block-obj-$(CONFIG_GLUSTERFS) += gluster.o
-block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
-block-obj-y += accounting.o dirty-bitmap.o
-block-obj-y += write-threshold.o
-
-block-obj-y += crypto.o
-
-common-obj-y += stream.o
-common-obj-y += commit.o
-common-obj-y += backup.o
-
-iscsi.o-cflags     := $(LIBISCSI_CFLAGS)
-iscsi.o-libs       := $(LIBISCSI_LIBS)
-curl.o-cflags      := $(CURL_CFLAGS)
-curl.o-libs        := $(CURL_LIBS)
-rbd.o-cflags       := $(RBD_CFLAGS)
-rbd.o-libs         := $(RBD_LIBS)
-gluster.o-cflags   := $(GLUSTERFS_CFLAGS)
-gluster.o-libs     := $(GLUSTERFS_LIBS)
-ssh.o-cflags       := $(LIBSSH2_CFLAGS)
-ssh.o-libs         := $(LIBSSH2_LIBS)
-archipelago.o-libs := $(ARCHIPELAGO_LIBS)
-block-obj-m        += dmg.o
-dmg.o-libs         := $(BZIP2_LIBS)
-qcow.o-libs        := -lz
-linux-aio.o-libs   := -laio
diff --git a/qemu/block/accounting.c b/qemu/block/accounting.c
deleted file mode 100644
index 3f457c4e7..000000000
--- a/qemu/block/accounting.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * QEMU System Emulator block accounting
- *
- * Copyright (c) 2011 Christoph Hellwig
- * Copyright (c) 2015 Igalia, S.L.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "block/accounting.h"
-#include "block/block_int.h"
-#include "qemu/timer.h"
-#include "sysemu/qtest.h"
-
-static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
-static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;
-
-void block_acct_init(BlockAcctStats *stats, bool account_invalid,
-                     bool account_failed)
-{
-    stats->account_invalid = account_invalid;
-    stats->account_failed = account_failed;
-
-    if (qtest_enabled()) {
-        clock_type = QEMU_CLOCK_VIRTUAL;
-    }
-}
-
-void block_acct_cleanup(BlockAcctStats *stats)
-{
-    BlockAcctTimedStats *s, *next;
-    QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
-        g_free(s);
-    }
-}
-
-void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
-{
-    BlockAcctTimedStats *s;
-    unsigned i;
-
-    s = g_new0(BlockAcctTimedStats, 1);
-    s->interval_length = interval_length;
-    QSLIST_INSERT_HEAD(&stats->intervals, s, entries);
-
-    for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
-        timed_average_init(&s->latency[i], clock_type,
-                           (uint64_t) interval_length * NANOSECONDS_PER_SECOND);
-    }
-}
-
-BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
-                                              BlockAcctTimedStats *s)
-{
-    if (s == NULL) {
-        return QSLIST_FIRST(&stats->intervals);
-    } else {
-        return QSLIST_NEXT(s, entries);
-    }
-}
-
-void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
-                      int64_t bytes, enum BlockAcctType type)
-{
-    assert(type < BLOCK_MAX_IOTYPE);
-
-    cookie->bytes = bytes;
-    cookie->start_time_ns = qemu_clock_get_ns(clock_type);
-    cookie->type = type;
-}
-
-void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
-{
-    BlockAcctTimedStats *s;
-    int64_t time_ns = qemu_clock_get_ns(clock_type);
-    int64_t latency_ns = time_ns - cookie->start_time_ns;
-
-    if (qtest_enabled()) {
-        latency_ns = qtest_latency_ns;
-    }
-
-    assert(cookie->type < BLOCK_MAX_IOTYPE);
-
-    stats->nr_bytes[cookie->type] += cookie->bytes;
-    stats->nr_ops[cookie->type]++;
-    stats->total_time_ns[cookie->type] += latency_ns;
-    stats->last_access_time_ns = time_ns;
-
-    QSLIST_FOREACH(s, &stats->intervals, entries) {
-        timed_average_account(&s->latency[cookie->type], latency_ns);
-    }
-}
-
-void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
-{
-    assert(cookie->type < BLOCK_MAX_IOTYPE);
-
-    stats->failed_ops[cookie->type]++;
-
-    if (stats->account_failed) {
-        BlockAcctTimedStats *s;
-        int64_t time_ns = qemu_clock_get_ns(clock_type);
-        int64_t latency_ns = time_ns - cookie->start_time_ns;
-
-        if (qtest_enabled()) {
-            latency_ns = qtest_latency_ns;
-        }
-
-        stats->total_time_ns[cookie->type] += latency_ns;
-        stats->last_access_time_ns = time_ns;
-
-        QSLIST_FOREACH(s, &stats->intervals, entries) {
-            timed_average_account(&s->latency[cookie->type], latency_ns);
-        }
-    }
-}
-
-void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
-{
-    assert(type < BLOCK_MAX_IOTYPE);
-
-    /* block_acct_done() and block_acct_failed() update
-     * total_time_ns[], but this one does not. The reason is that
-     * invalid requests are accounted during their submission,
-     * therefore there's no actual I/O involved. */
-
-    stats->invalid_ops[type]++;
-
-    if (stats->account_invalid) {
-        stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
-    }
-}
-
-void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
-                      int num_requests)
-{
-    assert(type < BLOCK_MAX_IOTYPE);
-    stats->merged[type] += num_requests;
-}
-
-int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
-{
-    return qemu_clock_get_ns(clock_type) - stats->last_access_time_ns;
-}
-
-double block_acct_queue_depth(BlockAcctTimedStats *stats,
-                              enum BlockAcctType type)
-{
-    uint64_t sum, elapsed;
-
-    assert(type < BLOCK_MAX_IOTYPE);
-
-    sum = timed_average_sum(&stats->latency[type], &elapsed);
-
-    return (double) sum / elapsed;
-}
diff --git a/qemu/block/archipelago.c b/qemu/block/archipelago.c
deleted file mode 100644
index b9f5e69d4..000000000
--- a/qemu/block/archipelago.c
+++ /dev/null
@@ -1,1084 +0,0 @@
-/*
- * QEMU Block driver for Archipelago
- *
- * Copyright (C) 2014 Chrysostomos Nanakos <cnanakos@grnet.gr>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-/*
- * VM Image on Archipelago volume is specified like this:
- *
- * file.driver=archipelago,file.volume=<volumename>
- * [,file.mport=<mapperd_port>[,file.vport=<vlmcd_port>]
- * [,file.segment=<segment_name>]]
- *
- * or
- *
- * file=archipelago:<volumename>[/mport=<mapperd_port>[:vport=<vlmcd_port>][:
- * segment=<segment_name>]]
- *
- * 'archipelago' is the protocol.
- *
- * 'mport' is the port number on which mapperd is listening. This is optional
- * and if not specified, QEMU will make Archipelago to use the default port.
- *
- * 'vport' is the port number on which vlmcd is listening. This is optional
- * and if not specified, QEMU will make Archipelago to use the default port.
- *
- * 'segment' is the name of the shared memory segment Archipelago stack
- * is using. This is optional and if not specified, QEMU will make Archipelago
- * to use the default value, 'archipelago'.
- *
- * Examples:
- *
- * file.driver=archipelago,file.volume=my_vm_volume
- * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123
- * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123,
- *  file.vport=1234
- * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123,
- *  file.vport=1234,file.segment=my_segment
- *
- * or
- *
- * file=archipelago:my_vm_volume
- * file=archipelago:my_vm_volume/mport=123
- * file=archipelago:my_vm_volume/mport=123:vport=1234
- * file=archipelago:my_vm_volume/mport=123:vport=1234:segment=my_segment
- *
- */
-
-#include "qemu/osdep.h"
-#include "qemu/cutils.h"
-#include "block/block_int.h"
-#include "qemu/error-report.h"
-#include "qemu/thread.h"
-#include "qapi/qmp/qint.h"
-#include "qapi/qmp/qstring.h"
-#include "qapi/qmp/qjson.h"
-#include "qemu/atomic.h"
-
-#include <xseg/xseg.h>
-#include <xseg/protocol.h>
-
-#define MAX_REQUEST_SIZE    524288
-
-#define ARCHIPELAGO_OPT_VOLUME      "volume"
-#define ARCHIPELAGO_OPT_SEGMENT     "segment"
-#define ARCHIPELAGO_OPT_MPORT       "mport"
-#define ARCHIPELAGO_OPT_VPORT       "vport"
-#define ARCHIPELAGO_DFL_MPORT       1001
-#define ARCHIPELAGO_DFL_VPORT       501
-
-#define archipelagolog(fmt, ...) \
-    do {                         \
-        fprintf(stderr, "archipelago\t%-24s: " fmt, __func__, ##__VA_ARGS__); \
-    } while (0)
-
-typedef enum {
-    ARCHIP_OP_READ,
-    ARCHIP_OP_WRITE,
-    ARCHIP_OP_FLUSH,
-    ARCHIP_OP_VOLINFO,
-    ARCHIP_OP_TRUNCATE,
-} ARCHIPCmd;
-
-typedef struct ArchipelagoAIOCB {
-    BlockAIOCB common;
-    QEMUBH *bh;
-    struct BDRVArchipelagoState *s;
-    QEMUIOVector *qiov;
-    ARCHIPCmd cmd;
-    int status;
-    int64_t size;
-    int64_t ret;
-} ArchipelagoAIOCB;
-
-typedef struct BDRVArchipelagoState {
-    ArchipelagoAIOCB *event_acb;
-    char *volname;
-    char *segment_name;
-    uint64_t size;
-    /* Archipelago specific */
-    struct xseg *xseg;
-    struct xseg_port *port;
-    xport srcport;
-    xport sport;
-    xport mportno;
-    xport vportno;
-    QemuMutex archip_mutex;
-    QemuCond archip_cond;
-    bool is_signaled;
-    /* Request handler specific */
-    QemuThread request_th;
-    QemuCond request_cond;
-    QemuMutex request_mutex;
-    bool th_is_signaled;
-    bool stopping;
-} BDRVArchipelagoState;
-
-typedef struct ArchipelagoSegmentedRequest {
-    size_t count;
-    size_t total;
-    int ref;
-    int failed;
-} ArchipelagoSegmentedRequest;
-
-typedef struct AIORequestData {
-    const char *volname;
-    off_t offset;
-    size_t size;
-    uint64_t bufidx;
-    int ret;
-    int op;
-    ArchipelagoAIOCB *aio_cb;
-    ArchipelagoSegmentedRequest *segreq;
-} AIORequestData;
-
-static void qemu_archipelago_complete_aio(void *opaque);
-
-static void init_local_signal(struct xseg *xseg, xport sport, xport srcport)
-{
-    if (xseg && (sport != srcport)) {
-        xseg_init_local_signal(xseg, srcport);
-        sport = srcport;
-    }
-}
-
-static void archipelago_finish_aiocb(AIORequestData *reqdata)
-{
-    if (reqdata->aio_cb->ret != reqdata->segreq->total) {
-        reqdata->aio_cb->ret = -EIO;
-    } else if (reqdata->aio_cb->ret == reqdata->segreq->total) {
-        reqdata->aio_cb->ret = 0;
-    }
-    reqdata->aio_cb->bh = aio_bh_new(
-                        bdrv_get_aio_context(reqdata->aio_cb->common.bs),
-                        qemu_archipelago_complete_aio, reqdata
-                        );
-    qemu_bh_schedule(reqdata->aio_cb->bh);
-}
-
-static int wait_reply(struct xseg *xseg, xport srcport, struct xseg_port *port,
-                      struct xseg_request *expected_req)
-{
-    struct xseg_request *req;
-    xseg_prepare_wait(xseg, srcport);
-    void *psd = xseg_get_signal_desc(xseg, port);
-    while (1) {
-        req = xseg_receive(xseg, srcport, X_NONBLOCK);
-        if (req) {
-            if (req != expected_req) {
-                archipelagolog("Unknown received request\n");
-                xseg_put_request(xseg, req, srcport);
-            } else if (!(req->state & XS_SERVED)) {
-                return -1;
-            } else {
-                break;
-            }
-        }
-        xseg_wait_signal(xseg, psd, 100000UL);
-    }
-    xseg_cancel_wait(xseg, srcport);
-    return 0;
-}
-
-static void xseg_request_handler(void *state)
-{
-    BDRVArchipelagoState *s = (BDRVArchipelagoState *) state;
-    void *psd = xseg_get_signal_desc(s->xseg, s->port);
-    qemu_mutex_lock(&s->request_mutex);
-
-    while (!s->stopping) {
-        struct xseg_request *req;
-        void *data;
-        xseg_prepare_wait(s->xseg, s->srcport);
-        req = xseg_receive(s->xseg, s->srcport, X_NONBLOCK);
-        if (req) {
-            AIORequestData *reqdata;
-            ArchipelagoSegmentedRequest *segreq;
-            xseg_get_req_data(s->xseg, req, (void **)&reqdata);
-
-            switch (reqdata->op) {
-            case ARCHIP_OP_READ:
-                data = xseg_get_data(s->xseg, req);
-                segreq = reqdata->segreq;
-                segreq->count += req->serviced;
-
-                qemu_iovec_from_buf(reqdata->aio_cb->qiov, reqdata->bufidx,
-                                    data,
-                                    req->serviced);
-
-                xseg_put_request(s->xseg, req, s->srcport);
-
-                if (atomic_fetch_dec(&segreq->ref) == 1) {
-                    if (!segreq->failed) {
-                        reqdata->aio_cb->ret = segreq->count;
-                        archipelago_finish_aiocb(reqdata);
-                        g_free(segreq);
-                    } else {
-                        g_free(segreq);
-                        g_free(reqdata);
-                    }
-                } else {
-                    g_free(reqdata);
-                }
-                break;
-            case ARCHIP_OP_WRITE:
-            case ARCHIP_OP_FLUSH:
-                segreq = reqdata->segreq;
-                segreq->count += req->serviced;
-                xseg_put_request(s->xseg, req, s->srcport);
-
-                if (atomic_fetch_dec(&segreq->ref) == 1) {
-                    if (!segreq->failed) {
-                        reqdata->aio_cb->ret = segreq->count;
-                        archipelago_finish_aiocb(reqdata);
-                        g_free(segreq);
-                    } else {
-                        g_free(segreq);
-                        g_free(reqdata);
-                    }
-                } else {
-                    g_free(reqdata);
-                }
-                break;
-            case ARCHIP_OP_VOLINFO:
-            case ARCHIP_OP_TRUNCATE:
-                s->is_signaled = true;
-                qemu_cond_signal(&s->archip_cond);
-                break;
-            }
-        } else {
-            xseg_wait_signal(s->xseg, psd, 100000UL);
-        }
-        xseg_cancel_wait(s->xseg, s->srcport);
-    }
-
-    s->th_is_signaled = true;
-    qemu_cond_signal(&s->request_cond);
-    qemu_mutex_unlock(&s->request_mutex);
-    qemu_thread_exit(NULL);
-}
-
-static int qemu_archipelago_xseg_init(BDRVArchipelagoState *s)
-{
-    if (xseg_initialize()) {
-        archipelagolog("Cannot initialize XSEG\n");
-        goto err_exit;
-    }
-
-    s->xseg = xseg_join("posix", s->segment_name,
-                        "posixfd", NULL);
-    if (!s->xseg) {
-        archipelagolog("Cannot join XSEG shared memory segment\n");
-        goto err_exit;
-    }
-    s->port = xseg_bind_dynport(s->xseg);
-    s->srcport = s->port->portno;
-    init_local_signal(s->xseg, s->sport, s->srcport);
-    return 0;
-
-err_exit:
-    return -1;
-}
-
-static int qemu_archipelago_init(BDRVArchipelagoState *s)
-{
-    int ret;
-
-    ret = qemu_archipelago_xseg_init(s);
-    if (ret < 0) {
-        error_report("Cannot initialize XSEG. Aborting...");
-        goto err_exit;
-    }
-
-    qemu_cond_init(&s->archip_cond);
-    qemu_mutex_init(&s->archip_mutex);
-    qemu_cond_init(&s->request_cond);
-    qemu_mutex_init(&s->request_mutex);
-    s->th_is_signaled = false;
-    qemu_thread_create(&s->request_th, "xseg_io_th",
-                       (void *) xseg_request_handler,
-                       (void *) s, QEMU_THREAD_JOINABLE);
-
-err_exit:
-    return ret;
-}
-
-static void qemu_archipelago_complete_aio(void *opaque)
-{
-    AIORequestData *reqdata = (AIORequestData *) opaque;
-    ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb;
-
-    qemu_bh_delete(aio_cb->bh);
-    aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret);
-    aio_cb->status = 0;
-
-    qemu_aio_unref(aio_cb);
-    g_free(reqdata);
-}
-
-static void xseg_find_port(char *pstr, const char *needle, xport *aport)
-{
-    const char *a;
-    char *endptr = NULL;
-    unsigned long port;
-    if (strstart(pstr, needle, &a)) {
-        if (strlen(a) > 0) {
-            port = strtoul(a, &endptr, 10);
-            if (strlen(endptr)) {
-                *aport = -2;
-                return;
-            }
-            *aport = (xport) port;
-        }
-    }
-}
-
-static void xseg_find_segment(char *pstr, const char *needle,
-                              char **segment_name)
-{
-    const char *a;
-    if (strstart(pstr, needle, &a)) {
-        if (strlen(a) > 0) {
-            *segment_name = g_strdup(a);
-        }
-    }
-}
-
-static void parse_filename_opts(const char *filename, Error **errp,
-                                char **volume, char **segment_name,
-                                xport *mport, xport *vport)
-{
-    const char *start;
-    char *tokens[4], *ds;
-    int idx;
-    xport lmport = NoPort, lvport = NoPort;
-
-    strstart(filename, "archipelago:", &start);
-
-    ds = g_strdup(start);
-    tokens[0] = strtok(ds, "/");
-    tokens[1] = strtok(NULL, ":");
-    tokens[2] = strtok(NULL, ":");
-    tokens[3] = strtok(NULL, "\0");
-
-    if (!strlen(tokens[0])) {
-        error_setg(errp, "volume name must be specified first");
-        g_free(ds);
-        return;
-    }
-
-    for (idx = 1; idx < 4; idx++) {
-        if (tokens[idx] != NULL) {
-            if (strstart(tokens[idx], "mport=", NULL)) {
-                xseg_find_port(tokens[idx], "mport=", &lmport);
-            }
-            if (strstart(tokens[idx], "vport=", NULL)) {
-                xseg_find_port(tokens[idx], "vport=", &lvport);
-            }
-            if (strstart(tokens[idx], "segment=", NULL)) {
-                xseg_find_segment(tokens[idx], "segment=", segment_name);
-            }
-        }
-    }
-
-    if ((lmport == -2) || (lvport == -2)) {
-        error_setg(errp, "mport and/or vport must be set");
-        g_free(ds);
-        return;
-    }
-    *volume = g_strdup(tokens[0]);
-    *mport = lmport;
-    *vport = lvport;
-    g_free(ds);
-}
-
-static void archipelago_parse_filename(const char *filename, QDict *options,
-                                       Error **errp)
-{
-    const char *start;
-    char *volume = NULL, *segment_name = NULL;
-    xport mport = NoPort, vport = NoPort;
-
-    if (qdict_haskey(options, ARCHIPELAGO_OPT_VOLUME)
-            || qdict_haskey(options, ARCHIPELAGO_OPT_SEGMENT)
-            || qdict_haskey(options, ARCHIPELAGO_OPT_MPORT)
-            || qdict_haskey(options, ARCHIPELAGO_OPT_VPORT)) {
-        error_setg(errp, "volume/mport/vport/segment and a file name may not"
-                         " be specified at the same time");
-        return;
-    }
-
-    if (!strstart(filename, "archipelago:", &start)) {
-        error_setg(errp, "File name must start with 'archipelago:'");
-        return;
-    }
-
-    if (!strlen(start) || strstart(start, "/", NULL)) {
-        error_setg(errp, "volume name must be specified");
-        return;
-    }
-
-    parse_filename_opts(filename, errp, &volume, &segment_name, &mport, &vport);
-
-    if (volume) {
-        qdict_put(options, ARCHIPELAGO_OPT_VOLUME, qstring_from_str(volume));
-        g_free(volume);
-    }
-    if (segment_name) {
-        qdict_put(options, ARCHIPELAGO_OPT_SEGMENT,
-                  qstring_from_str(segment_name));
-        g_free(segment_name);
-    }
-    if (mport != NoPort) {
-        qdict_put(options, ARCHIPELAGO_OPT_MPORT, qint_from_int(mport));
-    }
-    if (vport != NoPort) {
-        qdict_put(options, ARCHIPELAGO_OPT_VPORT, qint_from_int(vport));
-    }
-}
-
-static QemuOptsList archipelago_runtime_opts = {
-    .name = "archipelago",
-    .head = QTAILQ_HEAD_INITIALIZER(archipelago_runtime_opts.head),
-    .desc = {
-        {
-            .name = ARCHIPELAGO_OPT_VOLUME,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of the volume image",
-        },
-        {
-            .name = ARCHIPELAGO_OPT_SEGMENT,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of the Archipelago shared memory segment",
-        },
-        {
-            .name = ARCHIPELAGO_OPT_MPORT,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Archipelago mapperd port number"
-        },
-        {
-            .name = ARCHIPELAGO_OPT_VPORT,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Archipelago vlmcd port number"
-
-        },
-        { /* end of list */ }
-    },
-};
-
-static int qemu_archipelago_open(BlockDriverState *bs,
-                                 QDict *options,
-                                 int bdrv_flags,
-                                 Error **errp)
-{
-    int ret = 0;
-    const char *volume, *segment_name;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    BDRVArchipelagoState *s = bs->opaque;
-
-    opts = qemu_opts_create(&archipelago_runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto err_exit;
-    }
-
-    s->mportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_MPORT,
-                                     ARCHIPELAGO_DFL_MPORT);
-    s->vportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_VPORT,
-                                     ARCHIPELAGO_DFL_VPORT);
-
-    segment_name = qemu_opt_get(opts, ARCHIPELAGO_OPT_SEGMENT);
-    if (segment_name == NULL) {
-        s->segment_name = g_strdup("archipelago");
-    } else {
-        s->segment_name = g_strdup(segment_name);
-    }
-
-    volume = qemu_opt_get(opts, ARCHIPELAGO_OPT_VOLUME);
-    if (volume == NULL) {
-        error_setg(errp, "archipelago block driver requires the 'volume'"
-                   " option");
-        ret = -EINVAL;
-        goto err_exit;
-    }
-    s->volname = g_strdup(volume);
-
-    /* Initialize XSEG, join shared memory segment */
-    ret = qemu_archipelago_init(s);
-    if (ret < 0) {
-        error_setg(errp, "cannot initialize XSEG and join shared "
-                   "memory segment");
-        goto err_exit;
-    }
-
-    qemu_opts_del(opts);
-    return 0;
-
-err_exit:
-    g_free(s->volname);
-    g_free(s->segment_name);
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static void qemu_archipelago_close(BlockDriverState *bs)
-{
-    int r, targetlen;
-    char *target;
-    struct xseg_request *req;
-    BDRVArchipelagoState *s = bs->opaque;
-
-    s->stopping = true;
-
-    qemu_mutex_lock(&s->request_mutex);
-    while (!s->th_is_signaled) {
-        qemu_cond_wait(&s->request_cond,
-                       &s->request_mutex);
-    }
-    qemu_mutex_unlock(&s->request_mutex);
-    qemu_thread_join(&s->request_th);
-    qemu_cond_destroy(&s->request_cond);
-    qemu_mutex_destroy(&s->request_mutex);
-
-    qemu_cond_destroy(&s->archip_cond);
-    qemu_mutex_destroy(&s->archip_mutex);
-
-    targetlen = strlen(s->volname);
-    req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC);
-    if (!req) {
-        archipelagolog("Cannot get XSEG request\n");
-        goto err_exit;
-    }
-    r = xseg_prep_request(s->xseg, req, targetlen, 0);
-    if (r < 0) {
-        xseg_put_request(s->xseg, req, s->srcport);
-        archipelagolog("Cannot prepare XSEG close request\n");
-        goto err_exit;
-    }
-
-    target = xseg_get_target(s->xseg, req);
-    memcpy(target, s->volname, targetlen);
-    req->size = req->datalen;
-    req->offset = 0;
-    req->op = X_CLOSE;
-
-    xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
-    if (p == NoPort) {
-        xseg_put_request(s->xseg, req, s->srcport);
-        archipelagolog("Cannot submit XSEG close request\n");
-        goto err_exit;
-    }
-
-    xseg_signal(s->xseg, p);
-    wait_reply(s->xseg, s->srcport, s->port, req);
-
-    xseg_put_request(s->xseg, req, s->srcport);
-
-err_exit:
-    g_free(s->volname);
-    g_free(s->segment_name);
-    xseg_quit_local_signal(s->xseg, s->srcport);
-    xseg_leave_dynport(s->xseg, s->port);
-    xseg_leave(s->xseg);
-}
-
-static int qemu_archipelago_create_volume(Error **errp, const char *volname,
-                                          char *segment_name,
-                                          uint64_t size, xport mportno,
-                                          xport vportno)
-{
-    int ret, targetlen;
-    struct xseg *xseg = NULL;
-    struct xseg_request *req;
-    struct xseg_request_clone *xclone;
-    struct xseg_port *port;
-    xport srcport = NoPort, sport = NoPort;
-    char *target;
-
-    /* Try default values if none has been set */
-    if (mportno == (xport) -1) {
-        mportno = ARCHIPELAGO_DFL_MPORT;
-    }
-
-    if (vportno == (xport) -1) {
-        vportno = ARCHIPELAGO_DFL_VPORT;
-    }
-
-    if (xseg_initialize()) {
-        error_setg(errp, "Cannot initialize XSEG");
-        return -1;
-    }
-
-    xseg = xseg_join("posix", segment_name,
-                     "posixfd", NULL);
-
-    if (!xseg) {
-        error_setg(errp, "Cannot join XSEG shared memory segment");
-        return -1;
-    }
-
-    port = xseg_bind_dynport(xseg);
-    srcport = port->portno;
-    init_local_signal(xseg, sport, srcport);
-
-    req = xseg_get_request(xseg, srcport, mportno, X_ALLOC);
-    if (!req) {
-        error_setg(errp, "Cannot get XSEG request");
-        return -1;
-    }
-
-    targetlen = strlen(volname);
-    ret = xseg_prep_request(xseg, req, targetlen,
-                            sizeof(struct xseg_request_clone));
-    if (ret < 0) {
-        error_setg(errp, "Cannot prepare XSEG request");
-        goto err_exit;
-    }
-
-    target = xseg_get_target(xseg, req);
-    if (!target) {
-        error_setg(errp, "Cannot get XSEG target.");
-        goto err_exit;
-    }
-    memcpy(target, volname, targetlen);
-    xclone = (struct xseg_request_clone *) xseg_get_data(xseg, req);
-    memset(xclone->target, 0 , XSEG_MAX_TARGETLEN);
-    xclone->targetlen = 0;
-    xclone->size = size;
-    req->offset = 0;
-    req->size = req->datalen;
-    req->op = X_CLONE;
-
-    xport p = xseg_submit(xseg, req, srcport, X_ALLOC);
-    if (p == NoPort) {
-        error_setg(errp, "Could not submit XSEG request");
-        goto err_exit;
-    }
-    xseg_signal(xseg, p);
-
-    ret = wait_reply(xseg, srcport, port, req);
-    if (ret < 0) {
-        error_setg(errp, "wait_reply() error.");
-    }
-
-    xseg_put_request(xseg, req, srcport);
-    xseg_quit_local_signal(xseg, srcport);
-    xseg_leave_dynport(xseg, port);
-    xseg_leave(xseg);
-    return ret;
-
-err_exit:
-    xseg_put_request(xseg, req, srcport);
-    xseg_quit_local_signal(xseg, srcport);
-    xseg_leave_dynport(xseg, port);
-    xseg_leave(xseg);
-    return -1;
-}
-
-static int qemu_archipelago_create(const char *filename,
-                                   QemuOpts *options,
-                                   Error **errp)
-{
-    int ret = 0;
-    uint64_t total_size = 0;
-    char *volname = NULL, *segment_name = NULL;
-    const char *start;
-    xport mport = NoPort, vport = NoPort;
-
-    if (!strstart(filename, "archipelago:", &start)) {
-        error_setg(errp, "File name must start with 'archipelago:'");
-        return -1;
-    }
-
-    if (!strlen(start) || strstart(start, "/", NULL)) {
-        error_setg(errp, "volume name must be specified");
-        return -1;
-    }
-
-    parse_filename_opts(filename, errp, &volname, &segment_name, &mport,
-                        &vport);
-    total_size = ROUND_UP(qemu_opt_get_size_del(options, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-
-    if (segment_name == NULL) {
-        segment_name = g_strdup("archipelago");
-    }
-
-    /* Create an Archipelago volume */
-    ret = qemu_archipelago_create_volume(errp, volname, segment_name,
-                                         total_size, mport,
-                                         vport);
-
-    g_free(volname);
-    g_free(segment_name);
-    return ret;
-}
-
-static const AIOCBInfo archipelago_aiocb_info = {
-    .aiocb_size = sizeof(ArchipelagoAIOCB),
-};
-
-static int archipelago_submit_request(BDRVArchipelagoState *s,
-                                        uint64_t bufidx,
-                                        size_t count,
-                                        off_t offset,
-                                        ArchipelagoAIOCB *aio_cb,
-                                        ArchipelagoSegmentedRequest *segreq,
-                                        int op)
-{
-    int ret, targetlen;
-    char *target;
-    void *data = NULL;
-    struct xseg_request *req;
-    AIORequestData *reqdata = g_new(AIORequestData, 1);
-
-    targetlen = strlen(s->volname);
-    req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC);
-    if (!req) {
-        archipelagolog("Cannot get XSEG request\n");
-        goto err_exit2;
-    }
-    ret = xseg_prep_request(s->xseg, req, targetlen, count);
-    if (ret < 0) {
-        archipelagolog("Cannot prepare XSEG request\n");
-        goto err_exit;
-    }
-    target = xseg_get_target(s->xseg, req);
-    if (!target) {
-        archipelagolog("Cannot get XSEG target\n");
-        goto err_exit;
-    }
-    memcpy(target, s->volname, targetlen);
-    req->size = count;
-    req->offset = offset;
-
-    switch (op) {
-    case ARCHIP_OP_READ:
-        req->op = X_READ;
-        break;
-    case ARCHIP_OP_WRITE:
-        req->op = X_WRITE;
-        break;
-    case ARCHIP_OP_FLUSH:
-        req->op = X_FLUSH;
-        break;
-    }
-    reqdata->volname = s->volname;
-    reqdata->offset = offset;
-    reqdata->size = count;
-    reqdata->bufidx = bufidx;
-    reqdata->aio_cb = aio_cb;
-    reqdata->segreq = segreq;
-    reqdata->op = op;
-
-    xseg_set_req_data(s->xseg, req, reqdata);
-    if (op == ARCHIP_OP_WRITE) {
-        data = xseg_get_data(s->xseg, req);
-        if (!data) {
-            archipelagolog("Cannot get XSEG data\n");
-            goto err_exit;
-        }
-        qemu_iovec_to_buf(aio_cb->qiov, bufidx, data, count);
-    }
-
-    xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
-    if (p == NoPort) {
-        archipelagolog("Could not submit XSEG request\n");
-        goto err_exit;
-    }
-    xseg_signal(s->xseg, p);
-    return 0;
-
-err_exit:
-    g_free(reqdata);
-    xseg_put_request(s->xseg, req, s->srcport);
-    return -EIO;
-err_exit2:
-    g_free(reqdata);
-    return -EIO;
-}
-
-static int archipelago_aio_segmented_rw(BDRVArchipelagoState *s,
-                                        size_t count,
-                                        off_t offset,
-                                        ArchipelagoAIOCB *aio_cb,
-                                        int op)
-{
-    int ret, segments_nr;
-    size_t pos = 0;
-    ArchipelagoSegmentedRequest *segreq;
-
-    segreq = g_new0(ArchipelagoSegmentedRequest, 1);
-
-    if (op == ARCHIP_OP_FLUSH) {
-        segments_nr = 1;
-    } else {
-        segments_nr = (int)(count / MAX_REQUEST_SIZE) + \
-                      ((count % MAX_REQUEST_SIZE) ? 1 : 0);
-    }
-    segreq->total = count;
-    atomic_mb_set(&segreq->ref, segments_nr);
-
-    while (segments_nr > 1) {
-        ret = archipelago_submit_request(s, pos,
-                                            MAX_REQUEST_SIZE,
-                                            offset + pos,
-                                            aio_cb, segreq, op);
-
-        if (ret < 0) {
-            goto err_exit;
-        }
-        count -= MAX_REQUEST_SIZE;
-        pos += MAX_REQUEST_SIZE;
-        segments_nr--;
-    }
-    ret = archipelago_submit_request(s, pos, count, offset + pos,
-                                     aio_cb, segreq, op);
-
-    if (ret < 0) {
-        goto err_exit;
-    }
-    return 0;
-
-err_exit:
-    segreq->failed = 1;
-    if (atomic_fetch_sub(&segreq->ref, segments_nr) == segments_nr) {
-        g_free(segreq);
-    }
-    return ret;
-}
-
-static BlockAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs,
-                                           int64_t sector_num,
-                                           QEMUIOVector *qiov,
-                                           int nb_sectors,
-                                           BlockCompletionFunc *cb,
-                                           void *opaque,
-                                           int op)
-{
-    ArchipelagoAIOCB *aio_cb;
-    BDRVArchipelagoState *s = bs->opaque;
-    int64_t size, off;
-    int ret;
-
-    aio_cb = qemu_aio_get(&archipelago_aiocb_info, bs, cb, opaque);
-    aio_cb->cmd = op;
-    aio_cb->qiov = qiov;
-
-    aio_cb->ret = 0;
-    aio_cb->s = s;
-    aio_cb->status = -EINPROGRESS;
-
-    off = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
-    aio_cb->size = size;
-
-    ret = archipelago_aio_segmented_rw(s, size, off,
-                                       aio_cb, op);
-    if (ret < 0) {
-        goto err_exit;
-    }
-    return &aio_cb->common;
-
-err_exit:
-    error_report("qemu_archipelago_aio_rw(): I/O Error");
-    qemu_aio_unref(aio_cb);
-    return NULL;
-}
-
-static BlockAIOCB *qemu_archipelago_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
-                                   opaque, ARCHIP_OP_READ);
-}
-
-static BlockAIOCB *qemu_archipelago_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb,
-                                   opaque, ARCHIP_OP_WRITE);
-}
-
-static int64_t archipelago_volume_info(BDRVArchipelagoState *s)
-{
-    uint64_t size;
-    int ret, targetlen;
-    struct xseg_request *req;
-    struct xseg_reply_info *xinfo;
-    AIORequestData *reqdata = g_new(AIORequestData, 1);
-
-    const char *volname = s->volname;
-    targetlen = strlen(volname);
-    req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC);
-    if (!req) {
-        archipelagolog("Cannot get XSEG request\n");
-        goto err_exit2;
-    }
-    ret = xseg_prep_request(s->xseg, req, targetlen,
-                            sizeof(struct xseg_reply_info));
-    if (ret < 0) {
-        archipelagolog("Cannot prepare XSEG request\n");
-        goto err_exit;
-    }
-    char *target = xseg_get_target(s->xseg, req);
-    if (!target) {
-        archipelagolog("Cannot get XSEG target\n");
-        goto err_exit;
-    }
-    memcpy(target, volname, targetlen);
-    req->size = req->datalen;
-    req->offset = 0;
-    req->op = X_INFO;
-
-    reqdata->op = ARCHIP_OP_VOLINFO;
-    reqdata->volname = volname;
-    xseg_set_req_data(s->xseg, req, reqdata);
-
-    xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
-    if (p == NoPort) {
-        archipelagolog("Cannot submit XSEG request\n");
-        goto err_exit;
-    }
-    xseg_signal(s->xseg, p);
-    qemu_mutex_lock(&s->archip_mutex);
-    while (!s->is_signaled) {
-        qemu_cond_wait(&s->archip_cond, &s->archip_mutex);
-    }
-    s->is_signaled = false;
-    qemu_mutex_unlock(&s->archip_mutex);
-
-    xinfo = (struct xseg_reply_info *) xseg_get_data(s->xseg, req);
-    size = xinfo->size;
-    xseg_put_request(s->xseg, req, s->srcport);
-    g_free(reqdata);
-    s->size = size;
-    return size;
-
-err_exit:
-    xseg_put_request(s->xseg, req, s->srcport);
-err_exit2:
-    g_free(reqdata);
-    return -EIO;
-}
-
-static int64_t qemu_archipelago_getlength(BlockDriverState *bs)
-{
-    int64_t ret;
-    BDRVArchipelagoState *s = bs->opaque;
-
-    ret = archipelago_volume_info(s);
-    return ret;
-}
-
-static int qemu_archipelago_truncate(BlockDriverState *bs, int64_t offset)
-{
-    int ret, targetlen;
-    struct xseg_request *req;
-    BDRVArchipelagoState *s = bs->opaque;
-    AIORequestData *reqdata = g_new(AIORequestData, 1);
-
-    const char *volname = s->volname;
-    targetlen = strlen(volname);
-    req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC);
-    if (!req) {
-        archipelagolog("Cannot get XSEG request\n");
-        goto err_exit2;
-    }
-
-    ret = xseg_prep_request(s->xseg, req, targetlen, 0);
-    if (ret < 0) {
-        archipelagolog("Cannot prepare XSEG request\n");
-        goto err_exit;
-    }
-    char *target = xseg_get_target(s->xseg, req);
-    if (!target) {
-        archipelagolog("Cannot get XSEG target\n");
-        goto err_exit;
-    }
-    memcpy(target, volname, targetlen);
-    req->offset = offset;
-    req->op = X_TRUNCATE;
-
-    reqdata->op = ARCHIP_OP_TRUNCATE;
-    reqdata->volname = volname;
-
-    xseg_set_req_data(s->xseg, req, reqdata);
-
-    xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC);
-    if (p == NoPort) {
-        archipelagolog("Cannot submit XSEG request\n");
-        goto err_exit;
-    }
-
-    xseg_signal(s->xseg, p);
-    qemu_mutex_lock(&s->archip_mutex);
-    while (!s->is_signaled) {
-        qemu_cond_wait(&s->archip_cond, &s->archip_mutex);
-    }
-    s->is_signaled = false;
-    qemu_mutex_unlock(&s->archip_mutex);
-    xseg_put_request(s->xseg, req, s->srcport);
-    g_free(reqdata);
-    return 0;
-
-err_exit:
-    xseg_put_request(s->xseg, req, s->srcport);
-err_exit2:
-    g_free(reqdata);
-    return -EIO;
-}
-
-static QemuOptsList qemu_archipelago_create_opts = {
-    .name = "archipelago-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_archipelago_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        { /* end of list */ }
-    }
-};
-
-static BlockAIOCB *qemu_archipelago_aio_flush(BlockDriverState *bs,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return qemu_archipelago_aio_rw(bs, 0, NULL, 0, cb, opaque,
-                                   ARCHIP_OP_FLUSH);
-}
-
-static BlockDriver bdrv_archipelago = {
-    .format_name         = "archipelago",
-    .protocol_name       = "archipelago",
-    .instance_size       = sizeof(BDRVArchipelagoState),
-    .bdrv_parse_filename = archipelago_parse_filename,
-    .bdrv_file_open      = qemu_archipelago_open,
-    .bdrv_close          = qemu_archipelago_close,
-    .bdrv_create         = qemu_archipelago_create,
-    .bdrv_getlength      = qemu_archipelago_getlength,
-    .bdrv_truncate       = qemu_archipelago_truncate,
-    .bdrv_aio_readv      = qemu_archipelago_aio_readv,
-    .bdrv_aio_writev     = qemu_archipelago_aio_writev,
-    .bdrv_aio_flush      = qemu_archipelago_aio_flush,
-    .bdrv_has_zero_init  = bdrv_has_zero_init_1,
-    .create_opts         = &qemu_archipelago_create_opts,
-};
-
-static void bdrv_archipelago_init(void)
-{
-    bdrv_register(&bdrv_archipelago);
-}
-
-block_init(bdrv_archipelago_init);
diff --git a/qemu/block/backup.c b/qemu/block/backup.c
deleted file mode 100644
index 491fd1406..000000000
--- a/qemu/block/backup.c
+++ /dev/null
@@ -1,613 +0,0 @@
-/*
- * QEMU backup
- *
- * Copyright (C) 2013 Proxmox Server Solutions
- *
- * Authors:
- *  Dietmar Maurer (dietmar@proxmox.com)
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-
-#include "trace.h"
-#include "block/block.h"
-#include "block/block_int.h"
-#include "block/blockjob.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
-#include "qemu/ratelimit.h"
-#include "qemu/cutils.h"
-#include "sysemu/block-backend.h"
-#include "qemu/bitmap.h"
-
-#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
-#define SLICE_TIME 100000000ULL /* ns */
-
-typedef struct CowRequest {
-    int64_t start;
-    int64_t end;
-    QLIST_ENTRY(CowRequest) list;
-    CoQueue wait_queue; /* coroutines blocked on this request */
-} CowRequest;
-
-typedef struct BackupBlockJob {
-    BlockJob common;
-    BlockDriverState *target;
-    /* bitmap for sync=incremental */
-    BdrvDirtyBitmap *sync_bitmap;
-    MirrorSyncMode sync_mode;
-    RateLimit limit;
-    BlockdevOnError on_source_error;
-    BlockdevOnError on_target_error;
-    CoRwlock flush_rwlock;
-    uint64_t sectors_read;
-    unsigned long *done_bitmap;
-    int64_t cluster_size;
-    QLIST_HEAD(, CowRequest) inflight_reqs;
-} BackupBlockJob;
-
-/* Size of a cluster in sectors, instead of bytes. */
-static inline int64_t cluster_size_sectors(BackupBlockJob *job)
-{
-  return job->cluster_size / BDRV_SECTOR_SIZE;
-}
-
-/* See if in-flight requests overlap and wait for them to complete */
-static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
-                                                       int64_t start,
-                                                       int64_t end)
-{
-    CowRequest *req;
-    bool retry;
-
-    do {
-        retry = false;
-        QLIST_FOREACH(req, &job->inflight_reqs, list) {
-            if (end > req->start && start < req->end) {
-                qemu_co_queue_wait(&req->wait_queue);
-                retry = true;
-                break;
-            }
-        }
-    } while (retry);
-}
-
-/* Keep track of an in-flight request */
-static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
-                                     int64_t start, int64_t end)
-{
-    req->start = start;
-    req->end = end;
-    qemu_co_queue_init(&req->wait_queue);
-    QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
-}
-
-/* Forget about a completed request */
-static void cow_request_end(CowRequest *req)
-{
-    QLIST_REMOVE(req, list);
-    qemu_co_queue_restart_all(&req->wait_queue);
-}
-
-static int coroutine_fn backup_do_cow(BlockDriverState *bs,
-                                      int64_t sector_num, int nb_sectors,
-                                      bool *error_is_read,
-                                      bool is_write_notifier)
-{
-    BackupBlockJob *job = (BackupBlockJob *)bs->job;
-    CowRequest cow_request;
-    struct iovec iov;
-    QEMUIOVector bounce_qiov;
-    void *bounce_buffer = NULL;
-    int ret = 0;
-    int64_t sectors_per_cluster = cluster_size_sectors(job);
-    int64_t start, end;
-    int n;
-
-    qemu_co_rwlock_rdlock(&job->flush_rwlock);
-
-    start = sector_num / sectors_per_cluster;
-    end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
-
-    trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
-
-    wait_for_overlapping_requests(job, start, end);
-    cow_request_begin(&cow_request, job, start, end);
-
-    for (; start < end; start++) {
-        if (test_bit(start, job->done_bitmap)) {
-            trace_backup_do_cow_skip(job, start);
-            continue; /* already copied */
-        }
-
-        trace_backup_do_cow_process(job, start);
-
-        n = MIN(sectors_per_cluster,
-                job->common.len / BDRV_SECTOR_SIZE -
-                start * sectors_per_cluster);
-
-        if (!bounce_buffer) {
-            bounce_buffer = qemu_blockalign(bs, job->cluster_size);
-        }
-        iov.iov_base = bounce_buffer;
-        iov.iov_len = n * BDRV_SECTOR_SIZE;
-        qemu_iovec_init_external(&bounce_qiov, &iov, 1);
-
-        if (is_write_notifier) {
-            ret = bdrv_co_readv_no_serialising(bs,
-                                           start * sectors_per_cluster,
-                                           n, &bounce_qiov);
-        } else {
-            ret = bdrv_co_readv(bs, start * sectors_per_cluster, n,
-                                &bounce_qiov);
-        }
-        if (ret < 0) {
-            trace_backup_do_cow_read_fail(job, start, ret);
-            if (error_is_read) {
-                *error_is_read = true;
-            }
-            goto out;
-        }
-
-        if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
-            ret = bdrv_co_write_zeroes(job->target,
-                                       start * sectors_per_cluster,
-                                       n, BDRV_REQ_MAY_UNMAP);
-        } else {
-            ret = bdrv_co_writev(job->target,
-                                 start * sectors_per_cluster, n,
-                                 &bounce_qiov);
-        }
-        if (ret < 0) {
-            trace_backup_do_cow_write_fail(job, start, ret);
-            if (error_is_read) {
-                *error_is_read = false;
-            }
-            goto out;
-        }
-
-        set_bit(start, job->done_bitmap);
-
-        /* Publish progress, guest I/O counts as progress too.  Note that the
-         * offset field is an opaque progress value, it is not a disk offset.
-         */
-        job->sectors_read += n;
-        job->common.offset += n * BDRV_SECTOR_SIZE;
-    }
-
-out:
-    if (bounce_buffer) {
-        qemu_vfree(bounce_buffer);
-    }
-
-    cow_request_end(&cow_request);
-
-    trace_backup_do_cow_return(job, sector_num, nb_sectors, ret);
-
-    qemu_co_rwlock_unlock(&job->flush_rwlock);
-
-    return ret;
-}
-
-static int coroutine_fn backup_before_write_notify(
-        NotifierWithReturn *notifier,
-        void *opaque)
-{
-    BdrvTrackedRequest *req = opaque;
-    int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
-    int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
-
-    assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-
-    return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true);
-}
-
-static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-
-    if (speed < 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
-        return;
-    }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
-}
-
-static void backup_iostatus_reset(BlockJob *job)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-
-    if (s->target->blk) {
-        blk_iostatus_reset(s->target->blk);
-    }
-}
-
-static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
-{
-    BdrvDirtyBitmap *bm;
-    BlockDriverState *bs = job->common.bs;
-
-    if (ret < 0 || block_job_is_cancelled(&job->common)) {
-        /* Merge the successor back into the parent, delete nothing. */
-        bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
-        assert(bm);
-    } else {
-        /* Everything is fine, delete this bitmap and install the backup. */
-        bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
-        assert(bm);
-    }
-}
-
-static void backup_commit(BlockJob *job)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-    if (s->sync_bitmap) {
-        backup_cleanup_sync_bitmap(s, 0);
-    }
-}
-
-static void backup_abort(BlockJob *job)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-    if (s->sync_bitmap) {
-        backup_cleanup_sync_bitmap(s, -1);
-    }
-}
-
-static const BlockJobDriver backup_job_driver = {
-    .instance_size  = sizeof(BackupBlockJob),
-    .job_type       = BLOCK_JOB_TYPE_BACKUP,
-    .set_speed      = backup_set_speed,
-    .iostatus_reset = backup_iostatus_reset,
-    .commit         = backup_commit,
-    .abort          = backup_abort,
-};
-
-static BlockErrorAction backup_error_action(BackupBlockJob *job,
-                                            bool read, int error)
-{
-    if (read) {
-        return block_job_error_action(&job->common, job->common.bs,
-                                      job->on_source_error, true, error);
-    } else {
-        return block_job_error_action(&job->common, job->target,
-                                      job->on_target_error, false, error);
-    }
-}
-
-typedef struct {
-    int ret;
-} BackupCompleteData;
-
-static void backup_complete(BlockJob *job, void *opaque)
-{
-    BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-    BackupCompleteData *data = opaque;
-
-    bdrv_unref(s->target);
-
-    block_job_completed(job, data->ret);
-    g_free(data);
-}
-
-static bool coroutine_fn yield_and_check(BackupBlockJob *job)
-{
-    if (block_job_is_cancelled(&job->common)) {
-        return true;
-    }
-
-    /* we need to yield so that bdrv_drain_all() returns.
-     * (without, VM does not reboot)
-     */
-    if (job->common.speed) {
-        uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
-                                                      job->sectors_read);
-        job->sectors_read = 0;
-        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
-    } else {
-        block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
-    }
-
-    if (block_job_is_cancelled(&job->common)) {
-        return true;
-    }
-
-    return false;
-}
-
-static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
-{
-    bool error_is_read;
-    int ret = 0;
-    int clusters_per_iter;
-    uint32_t granularity;
-    int64_t sector;
-    int64_t cluster;
-    int64_t end;
-    int64_t last_cluster = -1;
-    int64_t sectors_per_cluster = cluster_size_sectors(job);
-    BlockDriverState *bs = job->common.bs;
-    HBitmapIter hbi;
-
-    granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
-    clusters_per_iter = MAX((granularity / job->cluster_size), 1);
-    bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
-
-    /* Find the next dirty sector(s) */
-    while ((sector = hbitmap_iter_next(&hbi)) != -1) {
-        cluster = sector / sectors_per_cluster;
-
-        /* Fake progress updates for any clusters we skipped */
-        if (cluster != last_cluster + 1) {
-            job->common.offset += ((cluster - last_cluster - 1) *
-                                   job->cluster_size);
-        }
-
-        for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
-            do {
-                if (yield_and_check(job)) {
-                    return ret;
-                }
-                ret = backup_do_cow(bs, cluster * sectors_per_cluster,
-                                    sectors_per_cluster, &error_is_read,
-                                    false);
-                if ((ret < 0) &&
-                    backup_error_action(job, error_is_read, -ret) ==
-                    BLOCK_ERROR_ACTION_REPORT) {
-                    return ret;
-                }
-            } while (ret < 0);
-        }
-
-        /* If the bitmap granularity is smaller than the backup granularity,
-         * we need to advance the iterator pointer to the next cluster. */
-        if (granularity < job->cluster_size) {
-            bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster);
-        }
-
-        last_cluster = cluster - 1;
-    }
-
-    /* Play some final catchup with the progress meter */
-    end = DIV_ROUND_UP(job->common.len, job->cluster_size);
-    if (last_cluster + 1 < end) {
-        job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
-    }
-
-    return ret;
-}
-
-static void coroutine_fn backup_run(void *opaque)
-{
-    BackupBlockJob *job = opaque;
-    BackupCompleteData *data;
-    BlockDriverState *bs = job->common.bs;
-    BlockDriverState *target = job->target;
-    BlockdevOnError on_target_error = job->on_target_error;
-    NotifierWithReturn before_write = {
-        .notify = backup_before_write_notify,
-    };
-    int64_t start, end;
-    int64_t sectors_per_cluster = cluster_size_sectors(job);
-    int ret = 0;
-
-    QLIST_INIT(&job->inflight_reqs);
-    qemu_co_rwlock_init(&job->flush_rwlock);
-
-    start = 0;
-    end = DIV_ROUND_UP(job->common.len, job->cluster_size);
-
-    job->done_bitmap = bitmap_new(end);
-
-    if (target->blk) {
-        blk_set_on_error(target->blk, on_target_error, on_target_error);
-        blk_iostatus_enable(target->blk);
-    }
-
-    bdrv_add_before_write_notifier(bs, &before_write);
-
-    if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
-        while (!block_job_is_cancelled(&job->common)) {
-            /* Yield until the job is cancelled.  We just let our before_write
-             * notify callback service CoW requests. */
-            job->common.busy = false;
-            qemu_coroutine_yield();
-            job->common.busy = true;
-        }
-    } else if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
-        ret = backup_run_incremental(job);
-    } else {
-        /* Both FULL and TOP SYNC_MODE's require copying.. */
-        for (; start < end; start++) {
-            bool error_is_read;
-            if (yield_and_check(job)) {
-                break;
-            }
-
-            if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
-                int i, n;
-                int alloced = 0;
-
-                /* Check to see if these blocks are already in the
-                 * backing file. */
-
-                for (i = 0; i < sectors_per_cluster;) {
-                    /* bdrv_is_allocated() only returns true/false based
-                     * on the first set of sectors it comes across that
-                     * are are all in the same state.
-                     * For that reason we must verify each sector in the
-                     * backup cluster length.  We end up copying more than
-                     * needed but at some point that is always the case. */
-                    alloced =
-                        bdrv_is_allocated(bs,
-                                start * sectors_per_cluster + i,
-                                sectors_per_cluster - i, &n);
-                    i += n;
-
-                    if (alloced == 1 || n == 0) {
-                        break;
-                    }
-                }
-
-                /* If the above loop never found any sectors that are in
-                 * the topmost image, skip this backup. */
-                if (alloced == 0) {
-                    continue;
-                }
-            }
-            /* FULL sync mode we copy the whole drive. */
-            ret = backup_do_cow(bs, start * sectors_per_cluster,
-                                sectors_per_cluster, &error_is_read, false);
-            if (ret < 0) {
-                /* Depending on error action, fail now or retry cluster */
-                BlockErrorAction action =
-                    backup_error_action(job, error_is_read, -ret);
-                if (action == BLOCK_ERROR_ACTION_REPORT) {
-                    break;
-                } else {
-                    start--;
-                    continue;
-                }
-            }
-        }
-    }
-
-    notifier_with_return_remove(&before_write);
-
-    /* wait until pending backup_do_cow() calls have completed */
-    qemu_co_rwlock_wrlock(&job->flush_rwlock);
-    qemu_co_rwlock_unlock(&job->flush_rwlock);
-    g_free(job->done_bitmap);
-
-    if (target->blk) {
-        blk_iostatus_disable(target->blk);
-    }
-    bdrv_op_unblock_all(target, job->common.blocker);
-
-    data = g_malloc(sizeof(*data));
-    data->ret = ret;
-    block_job_defer_to_main_loop(&job->common, backup_complete, data);
-}
-
-void backup_start(BlockDriverState *bs, BlockDriverState *target,
-                  int64_t speed, MirrorSyncMode sync_mode,
-                  BdrvDirtyBitmap *sync_bitmap,
-                  BlockdevOnError on_source_error,
-                  BlockdevOnError on_target_error,
-                  BlockCompletionFunc *cb, void *opaque,
-                  BlockJobTxn *txn, Error **errp)
-{
-    int64_t len;
-    BlockDriverInfo bdi;
-    int ret;
-
-    assert(bs);
-    assert(target);
-    assert(cb);
-
-    if (bs == target) {
-        error_setg(errp, "Source and target cannot be the same");
-        return;
-    }
-
-    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
-         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
-        return;
-    }
-
-    if (!bdrv_is_inserted(bs)) {
-        error_setg(errp, "Device is not inserted: %s",
-                   bdrv_get_device_name(bs));
-        return;
-    }
-
-    if (!bdrv_is_inserted(target)) {
-        error_setg(errp, "Device is not inserted: %s",
-                   bdrv_get_device_name(target));
-        return;
-    }
-
-    if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
-        return;
-    }
-
-    if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
-        return;
-    }
-
-    if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
-        if (!sync_bitmap) {
-            error_setg(errp, "must provide a valid bitmap name for "
-                             "\"incremental\" sync mode");
-            return;
-        }
-
-        /* Create a new bitmap, and freeze/disable this one. */
-        if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
-            return;
-        }
-    } else if (sync_bitmap) {
-        error_setg(errp,
-                   "a sync_bitmap was provided to backup_run, "
-                   "but received an incompatible sync_mode (%s)",
-                   MirrorSyncMode_lookup[sync_mode]);
-        return;
-    }
-
-    len = bdrv_getlength(bs);
-    if (len < 0) {
-        error_setg_errno(errp, -len, "unable to get length for '%s'",
-                         bdrv_get_device_name(bs));
-        goto error;
-    }
-
-    BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
-                                           cb, opaque, errp);
-    if (!job) {
-        goto error;
-    }
-
-    job->on_source_error = on_source_error;
-    job->on_target_error = on_target_error;
-    job->target = target;
-    job->sync_mode = sync_mode;
-    job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
-                       sync_bitmap : NULL;
-
-    /* If there is no backing file on the target, we cannot rely on COW if our
-     * backup cluster size is smaller than the target cluster size. Even for
-     * targets with a backing file, try to avoid COW if possible. */
-    ret = bdrv_get_info(job->target, &bdi);
-    if (ret < 0 && !target->backing) {
-        error_setg_errno(errp, -ret,
-            "Couldn't determine the cluster size of the target image, "
-            "which has no backing file");
-        error_append_hint(errp,
-            "Aborting, since this may create an unusable destination image\n");
-        goto error;
-    } else if (ret < 0 && target->backing) {
-        /* Not fatal; just trudge on ahead. */
-        job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
-    } else {
-        job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
-    }
-
-    bdrv_op_block_all(target, job->common.blocker);
-    job->common.len = len;
-    job->common.co = qemu_coroutine_create(backup_run);
-    block_job_txn_add_job(txn, &job->common);
-    qemu_coroutine_enter(job->common.co, job);
-    return;
-
- error:
-    if (sync_bitmap) {
-        bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
-    }
-}
diff --git a/qemu/block/blkdebug.c b/qemu/block/blkdebug.c
deleted file mode 100644
index 20d25bda6..000000000
--- a/qemu/block/blkdebug.c
+++ /dev/null
@@ -1,759 +0,0 @@
-/*
- * Block protocol for I/O error injection
- *
- * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu/cutils.h"
-#include "qemu/config-file.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include "qapi/qmp/qbool.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qint.h"
-#include "qapi/qmp/qstring.h"
-#include "sysemu/qtest.h"
-
-typedef struct BDRVBlkdebugState {
-    int state;
-    int new_state;
-
-    QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
-    QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
-    QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
-} BDRVBlkdebugState;
-
-typedef struct BlkdebugAIOCB {
-    BlockAIOCB common;
-    QEMUBH *bh;
-    int ret;
-} BlkdebugAIOCB;
-
-typedef struct BlkdebugSuspendedReq {
-    Coroutine *co;
-    char *tag;
-    QLIST_ENTRY(BlkdebugSuspendedReq) next;
-} BlkdebugSuspendedReq;
-
-static const AIOCBInfo blkdebug_aiocb_info = {
-    .aiocb_size    = sizeof(BlkdebugAIOCB),
-};
-
-enum {
-    ACTION_INJECT_ERROR,
-    ACTION_SET_STATE,
-    ACTION_SUSPEND,
-};
-
-typedef struct BlkdebugRule {
-    BlkdebugEvent event;
-    int action;
-    int state;
-    union {
-        struct {
-            int error;
-            int immediately;
-            int once;
-            int64_t sector;
-        } inject;
-        struct {
-            int new_state;
-        } set_state;
-        struct {
-            char *tag;
-        } suspend;
-    } options;
-    QLIST_ENTRY(BlkdebugRule) next;
-    QSIMPLEQ_ENTRY(BlkdebugRule) active_next;
-} BlkdebugRule;
-
-static QemuOptsList inject_error_opts = {
-    .name = "inject-error",
-    .head = QTAILQ_HEAD_INITIALIZER(inject_error_opts.head),
-    .desc = {
-        {
-            .name = "event",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "state",
-            .type = QEMU_OPT_NUMBER,
-        },
-        {
-            .name = "errno",
-            .type = QEMU_OPT_NUMBER,
-        },
-        {
-            .name = "sector",
-            .type = QEMU_OPT_NUMBER,
-        },
-        {
-            .name = "once",
-            .type = QEMU_OPT_BOOL,
-        },
-        {
-            .name = "immediately",
-            .type = QEMU_OPT_BOOL,
-        },
-        { /* end of list */ }
-    },
-};
-
-static QemuOptsList set_state_opts = {
-    .name = "set-state",
-    .head = QTAILQ_HEAD_INITIALIZER(set_state_opts.head),
-    .desc = {
-        {
-            .name = "event",
-            .type = QEMU_OPT_STRING,
-        },
-        {
-            .name = "state",
-            .type = QEMU_OPT_NUMBER,
-        },
-        {
-            .name = "new_state",
-            .type = QEMU_OPT_NUMBER,
-        },
-        { /* end of list */ }
-    },
-};
-
-static QemuOptsList *config_groups[] = {
-    &inject_error_opts,
-    &set_state_opts,
-    NULL
-};
-
-static int get_event_by_name(const char *name, BlkdebugEvent *event)
-{
-    int i;
-
-    for (i = 0; i < BLKDBG__MAX; i++) {
-        if (!strcmp(BlkdebugEvent_lookup[i], name)) {
-            *event = i;
-            return 0;
-        }
-    }
-
-    return -1;
-}
-
-struct add_rule_data {
-    BDRVBlkdebugState *s;
-    int action;
-};
-
-static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
-{
-    struct add_rule_data *d = opaque;
-    BDRVBlkdebugState *s = d->s;
-    const char* event_name;
-    BlkdebugEvent event;
-    struct BlkdebugRule *rule;
-
-    /* Find the right event for the rule */
-    event_name = qemu_opt_get(opts, "event");
-    if (!event_name) {
-        error_setg(errp, "Missing event name for rule");
-        return -1;
-    } else if (get_event_by_name(event_name, &event) < 0) {
-        error_setg(errp, "Invalid event name \"%s\"", event_name);
-        return -1;
-    }
-
-    /* Set attributes common for all actions */
-    rule = g_malloc0(sizeof(*rule));
-    *rule = (struct BlkdebugRule) {
-        .event  = event,
-        .action = d->action,
-        .state  = qemu_opt_get_number(opts, "state", 0),
-    };
-
-    /* Parse action-specific options */
-    switch (d->action) {
-    case ACTION_INJECT_ERROR:
-        rule->options.inject.error = qemu_opt_get_number(opts, "errno", EIO);
-        rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
-        rule->options.inject.immediately =
-            qemu_opt_get_bool(opts, "immediately", 0);
-        rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1);
-        break;
-
-    case ACTION_SET_STATE:
-        rule->options.set_state.new_state =
-            qemu_opt_get_number(opts, "new_state", 0);
-        break;
-
-    case ACTION_SUSPEND:
-        rule->options.suspend.tag =
-            g_strdup(qemu_opt_get(opts, "tag"));
-        break;
-    };
-
-    /* Add the rule */
-    QLIST_INSERT_HEAD(&s->rules[event], rule, next);
-
-    return 0;
-}
-
-static void remove_rule(BlkdebugRule *rule)
-{
-    switch (rule->action) {
-    case ACTION_INJECT_ERROR:
-    case ACTION_SET_STATE:
-        break;
-    case ACTION_SUSPEND:
-        g_free(rule->options.suspend.tag);
-        break;
-    }
-
-    QLIST_REMOVE(rule, next);
-    g_free(rule);
-}
-
-static int read_config(BDRVBlkdebugState *s, const char *filename,
-                       QDict *options, Error **errp)
-{
-    FILE *f = NULL;
-    int ret;
-    struct add_rule_data d;
-    Error *local_err = NULL;
-
-    if (filename) {
-        f = fopen(filename, "r");
-        if (f == NULL) {
-            error_setg_errno(errp, errno, "Could not read blkdebug config file");
-            return -errno;
-        }
-
-        ret = qemu_config_parse(f, config_groups, filename);
-        if (ret < 0) {
-            error_setg(errp, "Could not parse blkdebug config file");
-            ret = -EINVAL;
-            goto fail;
-        }
-    }
-
-    qemu_config_parse_qdict(options, config_groups, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    d.s = s;
-    d.action = ACTION_INJECT_ERROR;
-    qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    d.action = ACTION_SET_STATE;
-    qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = 0;
-fail:
-    qemu_opts_reset(&inject_error_opts);
-    qemu_opts_reset(&set_state_opts);
-    if (f) {
-        fclose(f);
-    }
-    return ret;
-}
-
-/* Valid blkdebug filenames look like blkdebug:path/to/config:path/to/image */
-static void blkdebug_parse_filename(const char *filename, QDict *options,
-                                    Error **errp)
-{
-    const char *c;
-
-    /* Parse the blkdebug: prefix */
-    if (!strstart(filename, "blkdebug:", &filename)) {
-        /* There was no prefix; therefore, all options have to be already
-           present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
-        return;
-    }
-
-    /* Parse config file path */
-    c = strchr(filename, ':');
-    if (c == NULL) {
-        error_setg(errp, "blkdebug requires both config file and image path");
-        return;
-    }
-
-    if (c != filename) {
-        QString *config_path;
-        config_path = qstring_from_substr(filename, 0, c - filename - 1);
-        qdict_put(options, "config", config_path);
-    }
-
-    /* TODO Allow multi-level nesting and set file.filename here */
-    filename = c + 1;
-    qdict_put(options, "x-image", qstring_from_str(filename));
-}
-
-static QemuOptsList runtime_opts = {
-    .name = "blkdebug",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "config",
-            .type = QEMU_OPT_STRING,
-            .help = "Path to the configuration file",
-        },
-        {
-            .name = "x-image",
-            .type = QEMU_OPT_STRING,
-            .help = "[internal use only, will be removed]",
-        },
-        {
-            .name = "align",
-            .type = QEMU_OPT_SIZE,
-            .help = "Required alignment in bytes",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *config;
-    uint64_t align;
-    int ret;
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    /* Read rules from config file or command line options */
-    config = qemu_opt_get(opts, "config");
-    ret = read_config(s, config, options, errp);
-    if (ret) {
-        goto out;
-    }
-
-    /* Set initial state */
-    s->state = 1;
-
-    /* Open the image file */
-    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
-                               bs, &child_file, false, &local_err);
-    if (local_err) {
-        ret = -EINVAL;
-        error_propagate(errp, local_err);
-        goto out;
-    }
-
-    /* Set request alignment */
-    align = qemu_opt_get_size(opts, "align", bs->request_alignment);
-    if (align > 0 && align < INT_MAX && !(align & (align - 1))) {
-        bs->request_alignment = align;
-    } else {
-        error_setg(errp, "Invalid alignment");
-        ret = -EINVAL;
-        goto fail_unref;
-    }
-
-    ret = 0;
-    goto out;
-
-fail_unref:
-    bdrv_unref_child(bs, bs->file);
-out:
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static void error_callback_bh(void *opaque)
-{
-    struct BlkdebugAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
-    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
-}
-
-static BlockAIOCB *inject_error(BlockDriverState *bs,
-    BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    int error = rule->options.inject.error;
-    struct BlkdebugAIOCB *acb;
-    QEMUBH *bh;
-    bool immediately = rule->options.inject.immediately;
-
-    if (rule->options.inject.once) {
-        QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
-        remove_rule(rule);
-    }
-
-    if (immediately) {
-        return NULL;
-    }
-
-    acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
-    acb->ret = -error;
-
-    bh = aio_bh_new(bdrv_get_aio_context(bs), error_callback_bh, acb);
-    acb->bh = bh;
-    qemu_bh_schedule(bh);
-
-    return &acb->common;
-}
-
-static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
-    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
-
-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1 ||
-            (rule->options.inject.sector >= sector_num &&
-             rule->options.inject.sector < sector_num + nb_sectors)) {
-            break;
-        }
-    }
-
-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
-    }
-
-    return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors,
-                          cb, opaque);
-}
-
-static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
-    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
-
-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1 ||
-            (rule->options.inject.sector >= sector_num &&
-             rule->options.inject.sector < sector_num + nb_sectors)) {
-            break;
-        }
-    }
-
-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
-    }
-
-    return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
-                           cb, opaque);
-}
-
-static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
-    BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
-
-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1) {
-            break;
-        }
-    }
-
-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
-    }
-
-    return bdrv_aio_flush(bs->file->bs, cb, opaque);
-}
-
-
-static void blkdebug_close(BlockDriverState *bs)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule, *next;
-    int i;
-
-    for (i = 0; i < BLKDBG__MAX; i++) {
-        QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
-            remove_rule(rule);
-        }
-    }
-}
-
-static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq r;
-
-    r = (BlkdebugSuspendedReq) {
-        .co         = qemu_coroutine_self(),
-        .tag        = g_strdup(rule->options.suspend.tag),
-    };
-
-    remove_rule(rule);
-    QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next);
-
-    if (!qtest_enabled()) {
-        printf("blkdebug: Suspended request '%s'\n", r.tag);
-    }
-    qemu_coroutine_yield();
-    if (!qtest_enabled()) {
-        printf("blkdebug: Resuming request '%s'\n", r.tag);
-    }
-
-    QLIST_REMOVE(&r, next);
-    g_free(r.tag);
-}
-
-static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
-    bool injected)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-
-    /* Only process rules for the current state */
-    if (rule->state && rule->state != s->state) {
-        return injected;
-    }
-
-    /* Take the action */
-    switch (rule->action) {
-    case ACTION_INJECT_ERROR:
-        if (!injected) {
-            QSIMPLEQ_INIT(&s->active_rules);
-            injected = true;
-        }
-        QSIMPLEQ_INSERT_HEAD(&s->active_rules, rule, active_next);
-        break;
-
-    case ACTION_SET_STATE:
-        s->new_state = rule->options.set_state.new_state;
-        break;
-
-    case ACTION_SUSPEND:
-        suspend_request(bs, rule);
-        break;
-    }
-    return injected;
-}
-
-static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    struct BlkdebugRule *rule, *next;
-    bool injected;
-
-    assert((int)event >= 0 && event < BLKDBG__MAX);
-
-    injected = false;
-    s->new_state = s->state;
-    QLIST_FOREACH_SAFE(rule, &s->rules[event], next, next) {
-        injected = process_rule(bs, rule, injected);
-    }
-    s->state = s->new_state;
-}
-
-static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
-                                     const char *tag)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    struct BlkdebugRule *rule;
-    BlkdebugEvent blkdebug_event;
-
-    if (get_event_by_name(event, &blkdebug_event) < 0) {
-        return -ENOENT;
-    }
-
-
-    rule = g_malloc(sizeof(*rule));
-    *rule = (struct BlkdebugRule) {
-        .event  = blkdebug_event,
-        .action = ACTION_SUSPEND,
-        .state  = 0,
-        .options.suspend.tag = g_strdup(tag),
-    };
-
-    QLIST_INSERT_HEAD(&s->rules[blkdebug_event], rule, next);
-
-    return 0;
-}
-
-static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r, *next;
-
-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
-        if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co, NULL);
-            return 0;
-        }
-    }
-    return -ENOENT;
-}
-
-static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
-                                            const char *tag)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r, *r_next;
-    BlkdebugRule *rule, *next;
-    int i, ret = -ENOENT;
-
-    for (i = 0; i < BLKDBG__MAX; i++) {
-        QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
-            if (rule->action == ACTION_SUSPEND &&
-                !strcmp(rule->options.suspend.tag, tag)) {
-                remove_rule(rule);
-                ret = 0;
-            }
-        }
-    }
-    QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
-        if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co, NULL);
-            ret = 0;
-        }
-    }
-    return ret;
-}
-
-static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
-{
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugSuspendedReq *r;
-
-    QLIST_FOREACH(r, &s->suspended_reqs, next) {
-        if (!strcmp(r->tag, tag)) {
-            return true;
-        }
-    }
-    return false;
-}
-
-static int64_t blkdebug_getlength(BlockDriverState *bs)
-{
-    return bdrv_getlength(bs->file->bs);
-}
-
-static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
-{
-    return bdrv_truncate(bs->file->bs, offset);
-}
-
-static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
-{
-    QDict *opts;
-    const QDictEntry *e;
-    bool force_json = false;
-
-    for (e = qdict_first(options); e; e = qdict_next(options, e)) {
-        if (strcmp(qdict_entry_key(e), "config") &&
-            strcmp(qdict_entry_key(e), "x-image"))
-        {
-            force_json = true;
-            break;
-        }
-    }
-
-    if (force_json && !bs->file->bs->full_open_options) {
-        /* The config file cannot be recreated, so creating a plain filename
-         * is impossible */
-        return;
-    }
-
-    if (!force_json && bs->file->bs->exact_filename[0]) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "blkdebug:%s:%s",
-                 qdict_get_try_str(options, "config") ?: "",
-                 bs->file->bs->exact_filename);
-    }
-
-    opts = qdict_new();
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));
-
-    QINCREF(bs->file->bs->full_open_options);
-    qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));
-
-    for (e = qdict_first(options); e; e = qdict_next(options, e)) {
-        if (strcmp(qdict_entry_key(e), "x-image")) {
-            qobject_incref(qdict_entry_value(e));
-            qdict_put_obj(opts, qdict_entry_key(e), qdict_entry_value(e));
-        }
-    }
-
-    bs->full_open_options = opts;
-}
-
-static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
-                                   BlockReopenQueue *queue, Error **errp)
-{
-    return 0;
-}
-
-static BlockDriver bdrv_blkdebug = {
-    .format_name            = "blkdebug",
-    .protocol_name          = "blkdebug",
-    .instance_size          = sizeof(BDRVBlkdebugState),
-
-    .bdrv_parse_filename    = blkdebug_parse_filename,
-    .bdrv_file_open         = blkdebug_open,
-    .bdrv_close             = blkdebug_close,
-    .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
-    .bdrv_getlength         = blkdebug_getlength,
-    .bdrv_truncate          = blkdebug_truncate,
-    .bdrv_refresh_filename  = blkdebug_refresh_filename,
-
-    .bdrv_aio_readv         = blkdebug_aio_readv,
-    .bdrv_aio_writev        = blkdebug_aio_writev,
-    .bdrv_aio_flush         = blkdebug_aio_flush,
-
-    .bdrv_debug_event           = blkdebug_debug_event,
-    .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,
-    .bdrv_debug_remove_breakpoint
-                                = blkdebug_debug_remove_breakpoint,
-    .bdrv_debug_resume          = blkdebug_debug_resume,
-    .bdrv_debug_is_suspended    = blkdebug_debug_is_suspended,
-};
-
-static void bdrv_blkdebug_init(void)
-{
-    bdrv_register(&bdrv_blkdebug);
-}
-
-block_init(bdrv_blkdebug_init);
diff --git a/qemu/block/blkreplay.c b/qemu/block/blkreplay.c
deleted file mode 100755
index 42f1813af..000000000
--- a/qemu/block/blkreplay.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * Block protocol for record/replay
- *
- * Copyright (c) 2010-2016 Institute for System Programming
- *                         of the Russian Academy of Sciences.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "sysemu/replay.h"
-#include "qapi/error.h"
-
-typedef struct Request {
-    Coroutine *co;
-    QEMUBH *bh;
-} Request;
-
-/* Next request id.
-   This counter is global, because requests from different
-   block devices should not get overlapping ids. */
-static uint64_t request_id;
-
-static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
-                          Error **errp)
-{
-    Error *local_err = NULL;
-    int ret;
-
-    /* Open the image file */
-    bs->file = bdrv_open_child(NULL, options, "image",
-                               bs, &child_file, false, &local_err);
-    if (local_err) {
-        ret = -EINVAL;
-        error_propagate(errp, local_err);
-        goto fail;
-    }
-
-    ret = 0;
-fail:
-    if (ret < 0) {
-        bdrv_unref_child(bs, bs->file);
-    }
-    return ret;
-}
-
-static void blkreplay_close(BlockDriverState *bs)
-{
-}
-
-static int64_t blkreplay_getlength(BlockDriverState *bs)
-{
-    return bdrv_getlength(bs->file->bs);
-}
-
-/* This bh is used for synchronization of return from coroutines.
-   It continues yielded coroutine which then finishes its execution.
-   BH is called adjusted to some replay checkpoint, therefore
-   record and replay will always finish coroutines deterministically.
-*/
-static void blkreplay_bh_cb(void *opaque)
-{
-    Request *req = opaque;
-    qemu_coroutine_enter(req->co, NULL);
-    qemu_bh_delete(req->bh);
-    g_free(req);
-}
-
-static void block_request_create(uint64_t reqid, BlockDriverState *bs,
-                                 Coroutine *co)
-{
-    Request *req = g_new(Request, 1);
-    *req = (Request) {
-        .co = co,
-        .bh = aio_bh_new(bdrv_get_aio_context(bs), blkreplay_bh_cb, req),
-    };
-    replay_block_event(req->bh, reqid);
-}
-
-static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    uint64_t reqid = request_id++;
-    int ret = bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
-    block_request_create(reqid, bs, qemu_coroutine_self());
-    qemu_coroutine_yield();
-
-    return ret;
-}
-
-static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    uint64_t reqid = request_id++;
-    int ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
-    block_request_create(reqid, bs, qemu_coroutine_self());
-    qemu_coroutine_yield();
-
-    return ret;
-}
-
-static int coroutine_fn blkreplay_co_write_zeroes(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
-{
-    uint64_t reqid = request_id++;
-    int ret = bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
-    block_request_create(reqid, bs, qemu_coroutine_self());
-    qemu_coroutine_yield();
-
-    return ret;
-}
-
-static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors)
-{
-    uint64_t reqid = request_id++;
-    int ret = bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
-    block_request_create(reqid, bs, qemu_coroutine_self());
-    qemu_coroutine_yield();
-
-    return ret;
-}
-
-static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
-{
-    uint64_t reqid = request_id++;
-    int ret = bdrv_co_flush(bs->file->bs);
-    block_request_create(reqid, bs, qemu_coroutine_self());
-    qemu_coroutine_yield();
-
-    return ret;
-}
-
-static BlockDriver bdrv_blkreplay = {
-    .format_name            = "blkreplay",
-    .protocol_name          = "blkreplay",
-    .instance_size          = 0,
-
-    .bdrv_file_open         = blkreplay_open,
-    .bdrv_close             = blkreplay_close,
-    .bdrv_getlength         = blkreplay_getlength,
-
-    .bdrv_co_readv          = blkreplay_co_readv,
-    .bdrv_co_writev         = blkreplay_co_writev,
-
-    .bdrv_co_write_zeroes   = blkreplay_co_write_zeroes,
-    .bdrv_co_discard        = blkreplay_co_discard,
-    .bdrv_co_flush          = blkreplay_co_flush,
-};
-
-static void bdrv_blkreplay_init(void)
-{
-    bdrv_register(&bdrv_blkreplay);
-}
-
-block_init(bdrv_blkreplay_init);
diff --git a/qemu/block/blkverify.c b/qemu/block/blkverify.c
deleted file mode 100644
index 9414b7a84..000000000
--- a/qemu/block/blkverify.c
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * Block protocol for block driver correctness testing
- *
- * Copyright (C) 2010 IBM, Corp.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
-#include "block/block_int.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qstring.h"
-#include "qemu/cutils.h"
-
-typedef struct {
-    BdrvChild *test_file;
-} BDRVBlkverifyState;
-
-typedef struct BlkverifyAIOCB BlkverifyAIOCB;
-struct BlkverifyAIOCB {
-    BlockAIOCB common;
-    QEMUBH *bh;
-
-    /* Request metadata */
-    bool is_write;
-    int64_t sector_num;
-    int nb_sectors;
-
-    int ret;                    /* first completed request's result */
-    unsigned int done;          /* completion counter */
-
-    QEMUIOVector *qiov;         /* user I/O vector */
-    QEMUIOVector raw_qiov;      /* cloned I/O vector for raw file */
-    void *buf;                  /* buffer for raw file I/O */
-
-    void (*verify)(BlkverifyAIOCB *acb);
-};
-
-static const AIOCBInfo blkverify_aiocb_info = {
-    .aiocb_size         = sizeof(BlkverifyAIOCB),
-};
-
-static void GCC_FMT_ATTR(2, 3) blkverify_err(BlkverifyAIOCB *acb,
-                                             const char *fmt, ...)
-{
-    va_list ap;
-
-    va_start(ap, fmt);
-    fprintf(stderr, "blkverify: %s sector_num=%" PRId64 " nb_sectors=%d ",
-            acb->is_write ? "write" : "read", acb->sector_num,
-            acb->nb_sectors);
-    vfprintf(stderr, fmt, ap);
-    fprintf(stderr, "\n");
-    va_end(ap);
-    exit(1);
-}
-
-/* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */
-static void blkverify_parse_filename(const char *filename, QDict *options,
-                                     Error **errp)
-{
-    const char *c;
-    QString *raw_path;
-
-
-    /* Parse the blkverify: prefix */
-    if (!strstart(filename, "blkverify:", &filename)) {
-        /* There was no prefix; therefore, all options have to be already
-           present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
-        return;
-    }
-
-    /* Parse the raw image filename */
-    c = strchr(filename, ':');
-    if (c == NULL) {
-        error_setg(errp, "blkverify requires raw copy and original image path");
-        return;
-    }
-
-    /* TODO Implement option pass-through and set raw.filename here */
-    raw_path = qstring_from_substr(filename, 0, c - filename - 1);
-    qdict_put(options, "x-raw", raw_path);
-
-    /* TODO Allow multi-level nesting and set file.filename here */
-    filename = c + 1;
-    qdict_put(options, "x-image", qstring_from_str(filename));
-}
-
-static QemuOptsList runtime_opts = {
-    .name = "blkverify",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "x-raw",
-            .type = QEMU_OPT_STRING,
-            .help = "[internal use only, will be removed]",
-        },
-        {
-            .name = "x-image",
-            .type = QEMU_OPT_STRING,
-            .help = "[internal use only, will be removed]",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
-                          Error **errp)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    int ret;
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* Open the raw file */
-    bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw",
-                               bs, &child_file, false, &local_err);
-    if (local_err) {
-        ret = -EINVAL;
-        error_propagate(errp, local_err);
-        goto fail;
-    }
-
-    /* Open the test file */
-    s->test_file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options,
-                                   "test", bs, &child_format, false,
-                                   &local_err);
-    if (local_err) {
-        ret = -EINVAL;
-        error_propagate(errp, local_err);
-        goto fail;
-    }
-
-    ret = 0;
-fail:
-    if (ret < 0) {
-        bdrv_unref_child(bs, bs->file);
-    }
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static void blkverify_close(BlockDriverState *bs)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    bdrv_unref_child(bs, s->test_file);
-    s->test_file = NULL;
-}
-
-static int64_t blkverify_getlength(BlockDriverState *bs)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    return bdrv_getlength(s->test_file->bs);
-}
-
-static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
-                                         int64_t sector_num, QEMUIOVector *qiov,
-                                         int nb_sectors,
-                                         BlockCompletionFunc *cb,
-                                         void *opaque)
-{
-    BlkverifyAIOCB *acb = qemu_aio_get(&blkverify_aiocb_info, bs, cb, opaque);
-
-    acb->bh = NULL;
-    acb->is_write = is_write;
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-    acb->ret = -EINPROGRESS;
-    acb->done = 0;
-    acb->qiov = qiov;
-    acb->buf = NULL;
-    acb->verify = NULL;
-    return acb;
-}
-
-static void blkverify_aio_bh(void *opaque)
-{
-    BlkverifyAIOCB *acb = opaque;
-
-    qemu_bh_delete(acb->bh);
-    if (acb->buf) {
-        qemu_iovec_destroy(&acb->raw_qiov);
-        qemu_vfree(acb->buf);
-    }
-    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
-}
-
-static void blkverify_aio_cb(void *opaque, int ret)
-{
-    BlkverifyAIOCB *acb = opaque;
-
-    switch (++acb->done) {
-    case 1:
-        acb->ret = ret;
-        break;
-
-    case 2:
-        if (acb->ret != ret) {
-            blkverify_err(acb, "return value mismatch %d != %d", acb->ret, ret);
-        }
-
-        if (acb->verify) {
-            acb->verify(acb);
-        }
-
-        acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
-                             blkverify_aio_bh, acb);
-        qemu_bh_schedule(acb->bh);
-        break;
-    }
-}
-
-static void blkverify_verify_readv(BlkverifyAIOCB *acb)
-{
-    ssize_t offset = qemu_iovec_compare(acb->qiov, &acb->raw_qiov);
-    if (offset != -1) {
-        blkverify_err(acb, "contents mismatch in sector %" PRId64,
-                      acb->sector_num + (int64_t)(offset / BDRV_SECTOR_SIZE));
-    }
-}
-
-static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-    BlkverifyAIOCB *acb = blkverify_aio_get(bs, false, sector_num, qiov,
-                                            nb_sectors, cb, opaque);
-
-    acb->verify = blkverify_verify_readv;
-    acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
-    qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
-    qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
-
-    bdrv_aio_readv(s->test_file->bs, sector_num, qiov, nb_sectors,
-                   blkverify_aio_cb, acb);
-    bdrv_aio_readv(bs->file->bs, sector_num, &acb->raw_qiov, nb_sectors,
-                   blkverify_aio_cb, acb);
-    return &acb->common;
-}
-
-static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-    BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
-                                            nb_sectors, cb, opaque);
-
-    bdrv_aio_writev(s->test_file->bs, sector_num, qiov, nb_sectors,
-                    blkverify_aio_cb, acb);
-    bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
-                    blkverify_aio_cb, acb);
-    return &acb->common;
-}
-
-static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    /* Only flush test file, the raw file is not important */
-    return bdrv_aio_flush(s->test_file->bs, cb, opaque);
-}
-
-static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
-                                                  BlockDriverState *candidate)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
-
-    if (perm) {
-        return true;
-    }
-
-    return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
-}
-
-/* Propagate AioContext changes to ->test_file */
-static void blkverify_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    bdrv_detach_aio_context(s->test_file->bs);
-}
-
-static void blkverify_attach_aio_context(BlockDriverState *bs,
-                                         AioContext *new_context)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    bdrv_attach_aio_context(s->test_file->bs, new_context);
-}
-
-static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
-{
-    BDRVBlkverifyState *s = bs->opaque;
-
-    /* bs->file->bs has already been refreshed */
-    bdrv_refresh_filename(s->test_file->bs);
-
-    if (bs->file->bs->full_open_options
-        && s->test_file->bs->full_open_options)
-    {
-        QDict *opts = qdict_new();
-        qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify")));
-
-        QINCREF(bs->file->bs->full_open_options);
-        qdict_put_obj(opts, "raw", QOBJECT(bs->file->bs->full_open_options));
-        QINCREF(s->test_file->bs->full_open_options);
-        qdict_put_obj(opts, "test",
-                      QOBJECT(s->test_file->bs->full_open_options));
-
-        bs->full_open_options = opts;
-    }
-
-    if (bs->file->bs->exact_filename[0]
-        && s->test_file->bs->exact_filename[0])
-    {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "blkverify:%s:%s",
-                 bs->file->bs->exact_filename,
-                 s->test_file->bs->exact_filename);
-    }
-}
-
-static BlockDriver bdrv_blkverify = {
-    .format_name                      = "blkverify",
-    .protocol_name                    = "blkverify",
-    .instance_size                    = sizeof(BDRVBlkverifyState),
-
-    .bdrv_parse_filename              = blkverify_parse_filename,
-    .bdrv_file_open                   = blkverify_open,
-    .bdrv_close                       = blkverify_close,
-    .bdrv_getlength                   = blkverify_getlength,
-    .bdrv_refresh_filename            = blkverify_refresh_filename,
-
-    .bdrv_aio_readv                   = blkverify_aio_readv,
-    .bdrv_aio_writev                  = blkverify_aio_writev,
-    .bdrv_aio_flush                   = blkverify_aio_flush,
-
-    .bdrv_attach_aio_context          = blkverify_attach_aio_context,
-    .bdrv_detach_aio_context          = blkverify_detach_aio_context,
-
-    .is_filter                        = true,
-    .bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
-};
-
-static void bdrv_blkverify_init(void)
-{
-    bdrv_register(&bdrv_blkverify);
-}
-
-block_init(bdrv_blkverify_init);
diff --git a/qemu/block/block-backend.c b/qemu/block/block-backend.c
deleted file mode 100644
index 16c9d5e0f..000000000
--- a/qemu/block/block-backend.c
+++ /dev/null
@@ -1,1635 +0,0 @@
-/*
- * QEMU Block backends
- *
- * Copyright (C) 2014 Red Hat, Inc.
- *
- * Authors:
- *  Markus Armbruster <armbru@redhat.com>,
- *
- * This work is licensed under the terms of the GNU LGPL, version 2.1
- * or later.  See the COPYING.LIB file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "sysemu/block-backend.h"
-#include "block/block_int.h"
-#include "block/blockjob.h"
-#include "block/throttle-groups.h"
-#include "sysemu/blockdev.h"
-#include "sysemu/sysemu.h"
-#include "qapi-event.h"
-#include "qemu/id.h"
-
-/* Number of coroutines to reserve per attached device model */
-#define COROUTINE_POOL_RESERVATION 64
-
-#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
-
-static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
-
-struct BlockBackend {
-    char *name;
-    int refcnt;
-    BdrvChild *root;
-    DriveInfo *legacy_dinfo;    /* null unless created by drive_new() */
-    QTAILQ_ENTRY(BlockBackend) link;         /* for block_backends */
-    QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
-
-    void *dev;                  /* attached device model, if any */
-    /* TODO change to DeviceState when all users are qdevified */
-    const BlockDevOps *dev_ops;
-    void *dev_opaque;
-
-    /* the block size for which the guest device expects atomicity */
-    int guest_block_size;
-
-    /* If the BDS tree is removed, some of its options are stored here (which
-     * can be used to restore those options in the new BDS on insert) */
-    BlockBackendRootState root_state;
-
-    bool enable_write_cache;
-
-    /* I/O stats (display with "info blockstats"). */
-    BlockAcctStats stats;
-
-    BlockdevOnError on_read_error, on_write_error;
-    bool iostatus_enabled;
-    BlockDeviceIoStatus iostatus;
-
-    bool allow_write_beyond_eof;
-
-    NotifierList remove_bs_notifiers, insert_bs_notifiers;
-};
-
-typedef struct BlockBackendAIOCB {
-    BlockAIOCB common;
-    QEMUBH *bh;
-    BlockBackend *blk;
-    int ret;
-} BlockBackendAIOCB;
-
-static const AIOCBInfo block_backend_aiocb_info = {
-    .get_aio_context = blk_aiocb_get_aio_context,
-    .aiocb_size = sizeof(BlockBackendAIOCB),
-};
-
-static void drive_info_del(DriveInfo *dinfo);
-
-/* All BlockBackends */
-static QTAILQ_HEAD(, BlockBackend) block_backends =
-    QTAILQ_HEAD_INITIALIZER(block_backends);
-
-/* All BlockBackends referenced by the monitor and which are iterated through by
- * blk_next() */
-static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
-    QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
-
-static void blk_root_inherit_options(int *child_flags, QDict *child_options,
-                                     int parent_flags, QDict *parent_options)
-{
-    /* We're not supposed to call this function for root nodes */
-    abort();
-}
-
-static const BdrvChildRole child_root = {
-    .inherit_options = blk_root_inherit_options,
-};
-
-/*
- * Create a new BlockBackend with a reference count of one.
- * Store an error through @errp on failure, unless it's null.
- * Return the new BlockBackend on success, null on failure.
- */
-BlockBackend *blk_new(Error **errp)
-{
-    BlockBackend *blk;
-
-    blk = g_new0(BlockBackend, 1);
-    blk->refcnt = 1;
-    notifier_list_init(&blk->remove_bs_notifiers);
-    notifier_list_init(&blk->insert_bs_notifiers);
-    QTAILQ_INSERT_TAIL(&block_backends, blk, link);
-    return blk;
-}
-
-/*
- * Create a new BlockBackend with a new BlockDriverState attached.
- * Otherwise just like blk_new(), which see.
- */
-BlockBackend *blk_new_with_bs(Error **errp)
-{
-    BlockBackend *blk;
-    BlockDriverState *bs;
-
-    blk = blk_new(errp);
-    if (!blk) {
-        return NULL;
-    }
-
-    bs = bdrv_new_root();
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root);
-    bs->blk = blk;
-    return blk;
-}
-
-/*
- * Calls blk_new_with_bs() and then calls bdrv_open() on the BlockDriverState.
- *
- * Just as with bdrv_open(), after having called this function the reference to
- * @options belongs to the block layer (even on failure).
- *
- * TODO: Remove @filename and @flags; it should be possible to specify a whole
- * BDS tree just by specifying the @options QDict (or @reference,
- * alternatively). At the time of adding this function, this is not possible,
- * though, so callers of this function have to be able to specify @filename and
- * @flags.
- */
-BlockBackend *blk_new_open(const char *filename, const char *reference,
-                           QDict *options, int flags, Error **errp)
-{
-    BlockBackend *blk;
-    int ret;
-
-    blk = blk_new_with_bs(errp);
-    if (!blk) {
-        QDECREF(options);
-        return NULL;
-    }
-
-    ret = bdrv_open(&blk->root->bs, filename, reference, options, flags, errp);
-    if (ret < 0) {
-        blk_unref(blk);
-        return NULL;
-    }
-
-    blk_set_enable_write_cache(blk, true);
-
-    return blk;
-}
-
-static void blk_delete(BlockBackend *blk)
-{
-    assert(!blk->refcnt);
-    assert(!blk->name);
-    assert(!blk->dev);
-    if (blk->root) {
-        blk_remove_bs(blk);
-    }
-    assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
-    assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
-    if (blk->root_state.throttle_state) {
-        g_free(blk->root_state.throttle_group);
-        throttle_group_unref(blk->root_state.throttle_state);
-    }
-    QTAILQ_REMOVE(&block_backends, blk, link);
-    drive_info_del(blk->legacy_dinfo);
-    block_acct_cleanup(&blk->stats);
-    g_free(blk);
-}
-
-static void drive_info_del(DriveInfo *dinfo)
-{
-    if (!dinfo) {
-        return;
-    }
-    qemu_opts_del(dinfo->opts);
-    g_free(dinfo->serial);
-    g_free(dinfo);
-}
-
-int blk_get_refcnt(BlockBackend *blk)
-{
-    return blk ? blk->refcnt : 0;
-}
-
-/*
- * Increment @blk's reference count.
- * @blk must not be null.
- */
-void blk_ref(BlockBackend *blk)
-{
-    blk->refcnt++;
-}
-
-/*
- * Decrement @blk's reference count.
- * If this drops it to zero, destroy @blk.
- * For convenience, do nothing if @blk is null.
- */
-void blk_unref(BlockBackend *blk)
-{
-    if (blk) {
-        assert(blk->refcnt > 0);
-        if (!--blk->refcnt) {
-            blk_delete(blk);
-        }
-    }
-}
-
-/*
- * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
- * ones which are hidden (i.e. are not referenced by the monitor).
- */
-static BlockBackend *blk_all_next(BlockBackend *blk)
-{
-    return blk ? QTAILQ_NEXT(blk, link)
-               : QTAILQ_FIRST(&block_backends);
-}
-
-void blk_remove_all_bs(void)
-{
-    BlockBackend *blk = NULL;
-
-    while ((blk = blk_all_next(blk)) != NULL) {
-        AioContext *ctx = blk_get_aio_context(blk);
-
-        aio_context_acquire(ctx);
-        if (blk->root) {
-            blk_remove_bs(blk);
-        }
-        aio_context_release(ctx);
-    }
-}
-
-/*
- * Return the monitor-owned BlockBackend after @blk.
- * If @blk is null, return the first one.
- * Else, return @blk's next sibling, which may be null.
- *
- * To iterate over all BlockBackends, do
- * for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
- *     ...
- * }
- */
-BlockBackend *blk_next(BlockBackend *blk)
-{
-    return blk ? QTAILQ_NEXT(blk, monitor_link)
-               : QTAILQ_FIRST(&monitor_block_backends);
-}
-
-/*
- * Iterates over all BlockDriverStates which are attached to a BlockBackend.
- * This function is for use by bdrv_next().
- *
- * @bs must be NULL or a BDS that is attached to a BB.
- */
-BlockDriverState *blk_next_root_bs(BlockDriverState *bs)
-{
-    BlockBackend *blk;
-
-    if (bs) {
-        assert(bs->blk);
-        blk = bs->blk;
-    } else {
-        blk = NULL;
-    }
-
-    do {
-        blk = blk_all_next(blk);
-    } while (blk && !blk->root);
-
-    return blk ? blk->root->bs : NULL;
-}
-
-/*
- * Add a BlockBackend into the list of backends referenced by the monitor, with
- * the given @name acting as the handle for the monitor.
- * Strictly for use by blockdev.c.
- *
- * @name must not be null or empty.
- *
- * Returns true on success and false on failure. In the latter case, an Error
- * object is returned through @errp.
- */
-bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
-{
-    assert(!blk->name);
-    assert(name && name[0]);
-
-    if (!id_wellformed(name)) {
-        error_setg(errp, "Invalid device name");
-        return false;
-    }
-    if (blk_by_name(name)) {
-        error_setg(errp, "Device with id '%s' already exists", name);
-        return false;
-    }
-    if (bdrv_find_node(name)) {
-        error_setg(errp,
-                   "Device name '%s' conflicts with an existing node name",
-                   name);
-        return false;
-    }
-
-    blk->name = g_strdup(name);
-    QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
-    return true;
-}
-
-/*
- * Remove a BlockBackend from the list of backends referenced by the monitor.
- * Strictly for use by blockdev.c.
- */
-void monitor_remove_blk(BlockBackend *blk)
-{
-    if (!blk->name) {
-        return;
-    }
-
-    QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
-    g_free(blk->name);
-    blk->name = NULL;
-}
-
-/*
- * Return @blk's name, a non-null string.
- * Returns an empty string iff @blk is not referenced by the monitor.
- */
-const char *blk_name(BlockBackend *blk)
-{
-    return blk->name ?: "";
-}
-
-/*
- * Return the BlockBackend with name @name if it exists, else null.
- * @name must not be null.
- */
-BlockBackend *blk_by_name(const char *name)
-{
-    BlockBackend *blk = NULL;
-
-    assert(name);
-    while ((blk = blk_next(blk)) != NULL) {
-        if (!strcmp(name, blk->name)) {
-            return blk;
-        }
-    }
-    return NULL;
-}
-
-/*
- * Return the BlockDriverState attached to @blk if any, else null.
- */
-BlockDriverState *blk_bs(BlockBackend *blk)
-{
-    return blk->root ? blk->root->bs : NULL;
-}
-
-/*
- * Return @blk's DriveInfo if any, else null.
- */
-DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
-{
-    return blk->legacy_dinfo;
-}
-
-/*
- * Set @blk's DriveInfo to @dinfo, and return it.
- * @blk must not have a DriveInfo set already.
- * No other BlockBackend may have the same DriveInfo set.
- */
-DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
-{
-    assert(!blk->legacy_dinfo);
-    return blk->legacy_dinfo = dinfo;
-}
-
-/*
- * Return the BlockBackend with DriveInfo @dinfo.
- * It must exist.
- */
-BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
-{
-    BlockBackend *blk = NULL;
-
-    while ((blk = blk_next(blk)) != NULL) {
-        if (blk->legacy_dinfo == dinfo) {
-            return blk;
-        }
-    }
-    abort();
-}
-
-/*
- * Disassociates the currently associated BlockDriverState from @blk.
- */
-void blk_remove_bs(BlockBackend *blk)
-{
-    assert(blk->root->bs->blk == blk);
-
-    notifier_list_notify(&blk->remove_bs_notifiers, blk);
-
-    blk_update_root_state(blk);
-
-    blk->root->bs->blk = NULL;
-    bdrv_root_unref_child(blk->root);
-    blk->root = NULL;
-}
-
-/*
- * Associates a new BlockDriverState with @blk.
- */
-void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
-{
-    assert(!blk->root && !bs->blk);
-    bdrv_ref(bs);
-    blk->root = bdrv_root_attach_child(bs, "root", &child_root);
-    bs->blk = blk;
-
-    notifier_list_notify(&blk->insert_bs_notifiers, blk);
-}
-
-/*
- * Attach device model @dev to @blk.
- * Return 0 on success, -EBUSY when a device model is attached already.
- */
-int blk_attach_dev(BlockBackend *blk, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
-{
-    if (blk->dev) {
-        return -EBUSY;
-    }
-    blk_ref(blk);
-    blk->dev = dev;
-    blk_iostatus_reset(blk);
-    return 0;
-}
-
-/*
- * Attach device model @dev to @blk.
- * @blk must not have a device model attached already.
- * TODO qdevified devices don't use this, remove when devices are qdevified
- */
-void blk_attach_dev_nofail(BlockBackend *blk, void *dev)
-{
-    if (blk_attach_dev(blk, dev) < 0) {
-        abort();
-    }
-}
-
-/*
- * Detach device model @dev from @blk.
- * @dev must be currently attached to @blk.
- */
-void blk_detach_dev(BlockBackend *blk, void *dev)
-/* TODO change to DeviceState *dev when all users are qdevified */
-{
-    assert(blk->dev == dev);
-    blk->dev = NULL;
-    blk->dev_ops = NULL;
-    blk->dev_opaque = NULL;
-    blk->guest_block_size = 512;
-    blk_unref(blk);
-}
-
-/*
- * Return the device model attached to @blk if any, else null.
- */
-void *blk_get_attached_dev(BlockBackend *blk)
-/* TODO change to return DeviceState * when all users are qdevified */
-{
-    return blk->dev;
-}
-
-/*
- * Set @blk's device model callbacks to @ops.
- * @opaque is the opaque argument to pass to the callbacks.
- * This is for use by device models.
- */
-void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
-                     void *opaque)
-{
-    blk->dev_ops = ops;
-    blk->dev_opaque = opaque;
-}
-
-/*
- * Notify @blk's attached device model of media change.
- * If @load is true, notify of media load.
- * Else, notify of media eject.
- * Also send DEVICE_TRAY_MOVED events as appropriate.
- */
-void blk_dev_change_media_cb(BlockBackend *blk, bool load)
-{
-    if (blk->dev_ops && blk->dev_ops->change_media_cb) {
-        bool tray_was_open, tray_is_open;
-
-        tray_was_open = blk_dev_is_tray_open(blk);
-        blk->dev_ops->change_media_cb(blk->dev_opaque, load);
-        tray_is_open = blk_dev_is_tray_open(blk);
-
-        if (tray_was_open != tray_is_open) {
-            qapi_event_send_device_tray_moved(blk_name(blk), tray_is_open,
-                                              &error_abort);
-        }
-    }
-}
-
-/*
- * Does @blk's attached device model have removable media?
- * %true if no device model is attached.
- */
-bool blk_dev_has_removable_media(BlockBackend *blk)
-{
-    return !blk->dev || (blk->dev_ops && blk->dev_ops->change_media_cb);
-}
-
-/*
- * Does @blk's attached device model have a tray?
- */
-bool blk_dev_has_tray(BlockBackend *blk)
-{
-    return blk->dev_ops && blk->dev_ops->is_tray_open;
-}
-
-/*
- * Notify @blk's attached device model of a media eject request.
- * If @force is true, the medium is about to be yanked out forcefully.
- */
-void blk_dev_eject_request(BlockBackend *blk, bool force)
-{
-    if (blk->dev_ops && blk->dev_ops->eject_request_cb) {
-        blk->dev_ops->eject_request_cb(blk->dev_opaque, force);
-    }
-}
-
-/*
- * Does @blk's attached device model have a tray, and is it open?
- */
-bool blk_dev_is_tray_open(BlockBackend *blk)
-{
-    if (blk_dev_has_tray(blk)) {
-        return blk->dev_ops->is_tray_open(blk->dev_opaque);
-    }
-    return false;
-}
-
-/*
- * Does @blk's attached device model have the medium locked?
- * %false if the device model has no such lock.
- */
-bool blk_dev_is_medium_locked(BlockBackend *blk)
-{
-    if (blk->dev_ops && blk->dev_ops->is_medium_locked) {
-        return blk->dev_ops->is_medium_locked(blk->dev_opaque);
-    }
-    return false;
-}
-
-/*
- * Notify @blk's attached device model of a backend size change.
- */
-void blk_dev_resize_cb(BlockBackend *blk)
-{
-    if (blk->dev_ops && blk->dev_ops->resize_cb) {
-        blk->dev_ops->resize_cb(blk->dev_opaque);
-    }
-}
-
-void blk_iostatus_enable(BlockBackend *blk)
-{
-    blk->iostatus_enabled = true;
-    blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-}
-
-/* The I/O status is only enabled if the drive explicitly
- * enables it _and_ the VM is configured to stop on errors */
-bool blk_iostatus_is_enabled(const BlockBackend *blk)
-{
-    return (blk->iostatus_enabled &&
-           (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
-            blk->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
-            blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
-}
-
-BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
-{
-    return blk->iostatus;
-}
-
-void blk_iostatus_disable(BlockBackend *blk)
-{
-    blk->iostatus_enabled = false;
-}
-
-void blk_iostatus_reset(BlockBackend *blk)
-{
-    if (blk_iostatus_is_enabled(blk)) {
-        BlockDriverState *bs = blk_bs(blk);
-        blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-        if (bs && bs->job) {
-            block_job_iostatus_reset(bs->job);
-        }
-    }
-}
-
-void blk_iostatus_set_err(BlockBackend *blk, int error)
-{
-    assert(blk_iostatus_is_enabled(blk));
-    if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-        blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
-                                          BLOCK_DEVICE_IO_STATUS_FAILED;
-    }
-}
-
-void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
-{
-    blk->allow_write_beyond_eof = allow;
-}
-
-static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
-                                  size_t size)
-{
-    int64_t len;
-
-    if (size > INT_MAX) {
-        return -EIO;
-    }
-
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    if (offset < 0) {
-        return -EIO;
-    }
-
-    if (!blk->allow_write_beyond_eof) {
-        len = blk_getlength(blk);
-        if (len < 0) {
-            return len;
-        }
-
-        if (offset > len || len - offset < size) {
-            return -EIO;
-        }
-    }
-
-    return 0;
-}
-
-static int blk_check_request(BlockBackend *blk, int64_t sector_num,
-                             int nb_sectors)
-{
-    if (sector_num < 0 || sector_num > INT64_MAX / BDRV_SECTOR_SIZE) {
-        return -EIO;
-    }
-
-    if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
-        return -EIO;
-    }
-
-    return blk_check_byte_request(blk, sector_num * BDRV_SECTOR_SIZE,
-                                  nb_sectors * BDRV_SECTOR_SIZE);
-}
-
-static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
-                                      unsigned int bytes, QEMUIOVector *qiov,
-                                      BdrvRequestFlags flags)
-{
-    int ret = blk_check_byte_request(blk, offset, bytes);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_do_preadv(blk_bs(blk), offset, bytes, qiov, flags);
-}
-
-static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
-                                      unsigned int bytes, QEMUIOVector *qiov,
-                                      BdrvRequestFlags flags)
-{
-    int ret;
-
-    ret = blk_check_byte_request(blk, offset, bytes);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (!blk->enable_write_cache) {
-        flags |= BDRV_REQ_FUA;
-    }
-
-    return bdrv_co_do_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
-}
-
-typedef struct BlkRwCo {
-    BlockBackend *blk;
-    int64_t offset;
-    QEMUIOVector *qiov;
-    int ret;
-    BdrvRequestFlags flags;
-} BlkRwCo;
-
-static void blk_read_entry(void *opaque)
-{
-    BlkRwCo *rwco = opaque;
-
-    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
-                              rwco->qiov, rwco->flags);
-}
-
-static void blk_write_entry(void *opaque)
-{
-    BlkRwCo *rwco = opaque;
-
-    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
-                               rwco->qiov, rwco->flags);
-}
-
-static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
-                   int64_t bytes, CoroutineEntry co_entry,
-                   BdrvRequestFlags flags)
-{
-    AioContext *aio_context;
-    QEMUIOVector qiov;
-    struct iovec iov;
-    Coroutine *co;
-    BlkRwCo rwco;
-
-    iov = (struct iovec) {
-        .iov_base = buf,
-        .iov_len = bytes,
-    };
-    qemu_iovec_init_external(&qiov, &iov, 1);
-
-    rwco = (BlkRwCo) {
-        .blk    = blk,
-        .offset = offset,
-        .qiov   = &qiov,
-        .flags  = flags,
-        .ret    = NOT_DONE,
-    };
-
-    co = qemu_coroutine_create(co_entry);
-    qemu_coroutine_enter(co, &rwco);
-
-    aio_context = blk_get_aio_context(blk);
-    while (rwco.ret == NOT_DONE) {
-        aio_poll(aio_context, true);
-    }
-
-    return rwco.ret;
-}
-
-static int blk_rw(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
-                  int nb_sectors, CoroutineEntry co_entry,
-                  BdrvRequestFlags flags)
-{
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return -EINVAL;
-    }
-
-    return blk_prw(blk, sector_num << BDRV_SECTOR_BITS, buf,
-                   nb_sectors << BDRV_SECTOR_BITS, co_entry, flags);
-}
-
-int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
-             int nb_sectors)
-{
-    return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0);
-}
-
-int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
-                         int nb_sectors)
-{
-    BlockDriverState *bs = blk_bs(blk);
-    bool enabled;
-    int ret;
-
-    ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    enabled = bs->io_limits_enabled;
-    bs->io_limits_enabled = false;
-    ret = blk_read(blk, sector_num, buf, nb_sectors);
-    bs->io_limits_enabled = enabled;
-    return ret;
-}
-
-int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
-              int nb_sectors)
-{
-    return blk_rw(blk, sector_num, (uint8_t*) buf, nb_sectors,
-                  blk_write_entry, 0);
-}
-
-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
-                     int nb_sectors, BdrvRequestFlags flags)
-{
-    return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry,
-                  flags | BDRV_REQ_ZERO_WRITE);
-}
-
-static void error_callback_bh(void *opaque)
-{
-    struct BlockBackendAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
-    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
-}
-
-BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
-                                  BlockCompletionFunc *cb,
-                                  void *opaque, int ret)
-{
-    struct BlockBackendAIOCB *acb;
-    QEMUBH *bh;
-
-    acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
-    acb->blk = blk;
-    acb->ret = ret;
-
-    bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
-    acb->bh = bh;
-    qemu_bh_schedule(bh);
-
-    return &acb->common;
-}
-
-typedef struct BlkAioEmAIOCB {
-    BlockAIOCB common;
-    BlkRwCo rwco;
-    int bytes;
-    bool has_returned;
-    QEMUBH* bh;
-} BlkAioEmAIOCB;
-
-static const AIOCBInfo blk_aio_em_aiocb_info = {
-    .aiocb_size         = sizeof(BlkAioEmAIOCB),
-};
-
-static void blk_aio_complete(BlkAioEmAIOCB *acb)
-{
-    if (acb->bh) {
-        assert(acb->has_returned);
-        qemu_bh_delete(acb->bh);
-    }
-    if (acb->has_returned) {
-        acb->common.cb(acb->common.opaque, acb->rwco.ret);
-        qemu_aio_unref(acb);
-    }
-}
-
-static void blk_aio_complete_bh(void *opaque)
-{
-    blk_aio_complete(opaque);
-}
-
-static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
-                                QEMUIOVector *qiov, CoroutineEntry co_entry,
-                                BdrvRequestFlags flags,
-                                BlockCompletionFunc *cb, void *opaque)
-{
-    BlkAioEmAIOCB *acb;
-    Coroutine *co;
-
-    acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
-    acb->rwco = (BlkRwCo) {
-        .blk    = blk,
-        .offset = offset,
-        .qiov   = qiov,
-        .flags  = flags,
-        .ret    = NOT_DONE,
-    };
-    acb->bytes = bytes;
-    acb->bh = NULL;
-    acb->has_returned = false;
-
-    co = qemu_coroutine_create(co_entry);
-    qemu_coroutine_enter(co, acb);
-
-    acb->has_returned = true;
-    if (acb->rwco.ret != NOT_DONE) {
-        acb->bh = aio_bh_new(blk_get_aio_context(blk), blk_aio_complete_bh, acb);
-        qemu_bh_schedule(acb->bh);
-    }
-
-    return &acb->common;
-}
-
-static void blk_aio_read_entry(void *opaque)
-{
-    BlkAioEmAIOCB *acb = opaque;
-    BlkRwCo *rwco = &acb->rwco;
-
-    assert(rwco->qiov->size == acb->bytes);
-    rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
-                              rwco->qiov, rwco->flags);
-    blk_aio_complete(acb);
-}
-
-static void blk_aio_write_entry(void *opaque)
-{
-    BlkAioEmAIOCB *acb = opaque;
-    BlkRwCo *rwco = &acb->rwco;
-
-    assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
-    rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
-                               rwco->qiov, rwco->flags);
-    blk_aio_complete(acb);
-}
-
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
-                                 int nb_sectors, BdrvRequestFlags flags,
-                                 BlockCompletionFunc *cb, void *opaque)
-{
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
-    }
-
-    return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS,
-                        nb_sectors << BDRV_SECTOR_BITS, NULL,
-                        blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE,
-                        cb, opaque);
-}
-
-int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
-{
-    int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
-    if (ret < 0) {
-        return ret;
-    }
-    return count;
-}
-
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
-{
-    int ret = blk_prw(blk, offset, (void*) buf, count, blk_write_entry, 0);
-    if (ret < 0) {
-        return ret;
-    }
-    return count;
-}
-
-int64_t blk_getlength(BlockBackend *blk)
-{
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    return bdrv_getlength(blk_bs(blk));
-}
-
-void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
-{
-    if (!blk_bs(blk)) {
-        *nb_sectors_ptr = 0;
-    } else {
-        bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
-    }
-}
-
-int64_t blk_nb_sectors(BlockBackend *blk)
-{
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    return bdrv_nb_sectors(blk_bs(blk));
-}
-
-BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
-                          QEMUIOVector *iov, int nb_sectors,
-                          BlockCompletionFunc *cb, void *opaque)
-{
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
-    }
-
-    assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
-    return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
-                        blk_aio_read_entry, 0, cb, opaque);
-}
-
-BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
-                           QEMUIOVector *iov, int nb_sectors,
-                           BlockCompletionFunc *cb, void *opaque)
-{
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
-    }
-
-    assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
-    return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
-                        blk_aio_write_entry, 0, cb, opaque);
-}
-
-BlockAIOCB *blk_aio_flush(BlockBackend *blk,
-                          BlockCompletionFunc *cb, void *opaque)
-{
-    if (!blk_is_available(blk)) {
-        return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
-    }
-
-    return bdrv_aio_flush(blk_bs(blk), cb, opaque);
-}
-
-BlockAIOCB *blk_aio_discard(BlockBackend *blk,
-                            int64_t sector_num, int nb_sectors,
-                            BlockCompletionFunc *cb, void *opaque)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return blk_abort_aio_request(blk, cb, opaque, ret);
-    }
-
-    return bdrv_aio_discard(blk_bs(blk), sector_num, nb_sectors, cb, opaque);
-}
-
-void blk_aio_cancel(BlockAIOCB *acb)
-{
-    bdrv_aio_cancel(acb);
-}
-
-void blk_aio_cancel_async(BlockAIOCB *acb)
-{
-    bdrv_aio_cancel_async(acb);
-}
-
-int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
-{
-    int i, ret;
-
-    for (i = 0; i < num_reqs; i++) {
-        ret = blk_check_request(blk, reqs[i].sector, reqs[i].nb_sectors);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    return bdrv_aio_multiwrite(blk_bs(blk), reqs, num_reqs);
-}
-
-int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
-{
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    return bdrv_ioctl(blk_bs(blk), req, buf);
-}
-
-BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
-                          BlockCompletionFunc *cb, void *opaque)
-{
-    if (!blk_is_available(blk)) {
-        return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
-    }
-
-    return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
-}
-
-int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors);
-}
-
-int blk_co_flush(BlockBackend *blk)
-{
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    return bdrv_co_flush(blk_bs(blk));
-}
-
-int blk_flush(BlockBackend *blk)
-{
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    return bdrv_flush(blk_bs(blk));
-}
-
-void blk_drain(BlockBackend *blk)
-{
-    if (blk_bs(blk)) {
-        bdrv_drain(blk_bs(blk));
-    }
-}
-
-void blk_drain_all(void)
-{
-    bdrv_drain_all();
-}
-
-void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
-                      BlockdevOnError on_write_error)
-{
-    blk->on_read_error = on_read_error;
-    blk->on_write_error = on_write_error;
-}
-
-BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
-{
-    return is_read ? blk->on_read_error : blk->on_write_error;
-}
-
-BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
-                                      int error)
-{
-    BlockdevOnError on_err = blk_get_on_error(blk, is_read);
-
-    switch (on_err) {
-    case BLOCKDEV_ON_ERROR_ENOSPC:
-        return (error == ENOSPC) ?
-               BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
-    case BLOCKDEV_ON_ERROR_STOP:
-        return BLOCK_ERROR_ACTION_STOP;
-    case BLOCKDEV_ON_ERROR_REPORT:
-        return BLOCK_ERROR_ACTION_REPORT;
-    case BLOCKDEV_ON_ERROR_IGNORE:
-        return BLOCK_ERROR_ACTION_IGNORE;
-    default:
-        abort();
-    }
-}
-
-static void send_qmp_error_event(BlockBackend *blk,
-                                 BlockErrorAction action,
-                                 bool is_read, int error)
-{
-    IoOperationType optype;
-
-    optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
-    qapi_event_send_block_io_error(blk_name(blk), optype, action,
-                                   blk_iostatus_is_enabled(blk),
-                                   error == ENOSPC, strerror(error),
-                                   &error_abort);
-}
-
-/* This is done by device models because, while the block layer knows
- * about the error, it does not know whether an operation comes from
- * the device or the block layer (from a job, for example).
- */
-void blk_error_action(BlockBackend *blk, BlockErrorAction action,
-                      bool is_read, int error)
-{
-    assert(error >= 0);
-
-    if (action == BLOCK_ERROR_ACTION_STOP) {
-        /* First set the iostatus, so that "info block" returns an iostatus
-         * that matches the events raised so far (an additional error iostatus
-         * is fine, but not a lost one).
-         */
-        blk_iostatus_set_err(blk, error);
-
-        /* Then raise the request to stop the VM and the event.
-         * qemu_system_vmstop_request_prepare has two effects.  First,
-         * it ensures that the STOP event always comes after the
-         * BLOCK_IO_ERROR event.  Second, it ensures that even if management
-         * can observe the STOP event and do a "cont" before the STOP
-         * event is issued, the VM will not stop.  In this case, vm_start()
-         * also ensures that the STOP/RESUME pair of events is emitted.
-         */
-        qemu_system_vmstop_request_prepare();
-        send_qmp_error_event(blk, action, is_read, error);
-        qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
-    } else {
-        send_qmp_error_event(blk, action, is_read, error);
-    }
-}
-
-int blk_is_read_only(BlockBackend *blk)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        return bdrv_is_read_only(bs);
-    } else {
-        return blk->root_state.read_only;
-    }
-}
-
-int blk_is_sg(BlockBackend *blk)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (!bs) {
-        return 0;
-    }
-
-    return bdrv_is_sg(bs);
-}
-
-int blk_enable_write_cache(BlockBackend *blk)
-{
-    return blk->enable_write_cache;
-}
-
-void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
-{
-    blk->enable_write_cache = wce;
-}
-
-void blk_invalidate_cache(BlockBackend *blk, Error **errp)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (!bs) {
-        error_setg(errp, "Device '%s' has no medium", blk->name);
-        return;
-    }
-
-    bdrv_invalidate_cache(bs, errp);
-}
-
-bool blk_is_inserted(BlockBackend *blk)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    return bs && bdrv_is_inserted(bs);
-}
-
-bool blk_is_available(BlockBackend *blk)
-{
-    return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
-}
-
-void blk_lock_medium(BlockBackend *blk, bool locked)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        bdrv_lock_medium(bs, locked);
-    }
-}
-
-void blk_eject(BlockBackend *blk, bool eject_flag)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        bdrv_eject(bs, eject_flag);
-    }
-}
-
-int blk_get_flags(BlockBackend *blk)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        return bdrv_get_flags(bs);
-    } else {
-        return blk->root_state.open_flags;
-    }
-}
-
-int blk_get_max_transfer_length(BlockBackend *blk)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        return bs->bl.max_transfer_length;
-    } else {
-        return 0;
-    }
-}
-
-int blk_get_max_iov(BlockBackend *blk)
-{
-    return blk->root->bs->bl.max_iov;
-}
-
-void blk_set_guest_block_size(BlockBackend *blk, int align)
-{
-    blk->guest_block_size = align;
-}
-
-void *blk_try_blockalign(BlockBackend *blk, size_t size)
-{
-    return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
-}
-
-void *blk_blockalign(BlockBackend *blk, size_t size)
-{
-    return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
-}
-
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (!bs) {
-        return false;
-    }
-
-    return bdrv_op_is_blocked(bs, op, errp);
-}
-
-void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        bdrv_op_unblock(bs, op, reason);
-    }
-}
-
-void blk_op_block_all(BlockBackend *blk, Error *reason)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        bdrv_op_block_all(bs, reason);
-    }
-}
-
-void blk_op_unblock_all(BlockBackend *blk, Error *reason)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        bdrv_op_unblock_all(bs, reason);
-    }
-}
-
-AioContext *blk_get_aio_context(BlockBackend *blk)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        return bdrv_get_aio_context(bs);
-    } else {
-        return qemu_get_aio_context();
-    }
-}
-
-static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
-{
-    BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
-    return blk_get_aio_context(blk_acb->blk);
-}
-
-void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        bdrv_set_aio_context(bs, new_context);
-    }
-}
-
-void blk_add_aio_context_notifier(BlockBackend *blk,
-        void (*attached_aio_context)(AioContext *new_context, void *opaque),
-        void (*detach_aio_context)(void *opaque), void *opaque)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        bdrv_add_aio_context_notifier(bs, attached_aio_context,
-                                      detach_aio_context, opaque);
-    }
-}
-
-void blk_remove_aio_context_notifier(BlockBackend *blk,
-                                     void (*attached_aio_context)(AioContext *,
-                                                                  void *),
-                                     void (*detach_aio_context)(void *),
-                                     void *opaque)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        bdrv_remove_aio_context_notifier(bs, attached_aio_context,
-                                         detach_aio_context, opaque);
-    }
-}
-
-void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
-{
-    notifier_list_add(&blk->remove_bs_notifiers, notify);
-}
-
-void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
-{
-    notifier_list_add(&blk->insert_bs_notifiers, notify);
-}
-
-void blk_io_plug(BlockBackend *blk)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        bdrv_io_plug(bs);
-    }
-}
-
-void blk_io_unplug(BlockBackend *blk)
-{
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        bdrv_io_unplug(bs);
-    }
-}
-
-BlockAcctStats *blk_get_stats(BlockBackend *blk)
-{
-    return &blk->stats;
-}
-
-void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
-                  BlockCompletionFunc *cb, void *opaque)
-{
-    return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
-}
-
-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num,
-                                     int nb_sectors, BdrvRequestFlags flags)
-{
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return -EINVAL;
-    }
-
-    return blk_co_pwritev(blk, sector_num << BDRV_SECTOR_BITS,
-                          nb_sectors << BDRV_SECTOR_BITS, NULL,
-                          flags | BDRV_REQ_ZERO_WRITE);
-}
-
-int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
-                         const uint8_t *buf, int nb_sectors)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
-}
-
-int blk_truncate(BlockBackend *blk, int64_t offset)
-{
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    return bdrv_truncate(blk_bs(blk), offset);
-}
-
-int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
-{
-    int ret = blk_check_request(blk, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bdrv_discard(blk_bs(blk), sector_num, nb_sectors);
-}
-
-int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
-                     int64_t pos, int size)
-{
-    int ret;
-
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (ret == size && !blk->enable_write_cache) {
-        ret = bdrv_flush(blk_bs(blk));
-    }
-
-    return ret < 0 ? ret : size;
-}
-
-int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
-{
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
-}
-
-int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
-{
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    return bdrv_probe_blocksizes(blk_bs(blk), bsz);
-}
-
-int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
-{
-    if (!blk_is_available(blk)) {
-        return -ENOMEDIUM;
-    }
-
-    return bdrv_probe_geometry(blk_bs(blk), geo);
-}
-
-/*
- * Updates the BlockBackendRootState object with data from the currently
- * attached BlockDriverState.
- */
-void blk_update_root_state(BlockBackend *blk)
-{
-    assert(blk->root);
-
-    blk->root_state.open_flags    = blk->root->bs->open_flags;
-    blk->root_state.read_only     = blk->root->bs->read_only;
-    blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
-
-    if (blk->root_state.throttle_group) {
-        g_free(blk->root_state.throttle_group);
-        throttle_group_unref(blk->root_state.throttle_state);
-    }
-    if (blk->root->bs->throttle_state) {
-        const char *name = throttle_group_get_name(blk->root->bs);
-        blk->root_state.throttle_group = g_strdup(name);
-        blk->root_state.throttle_state = throttle_group_incref(name);
-    } else {
-        blk->root_state.throttle_group = NULL;
-        blk->root_state.throttle_state = NULL;
-    }
-}
-
-/*
- * Applies the information in the root state to the given BlockDriverState. This
- * does not include the flags which have to be specified for bdrv_open(), use
- * blk_get_open_flags_from_root_state() to inquire them.
- */
-void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
-{
-    bs->detect_zeroes = blk->root_state.detect_zeroes;
-    if (blk->root_state.throttle_group) {
-        bdrv_io_limits_enable(bs, blk->root_state.throttle_group);
-    }
-}
-
-/*
- * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
- * supposed to inherit the root state.
- */
-int blk_get_open_flags_from_root_state(BlockBackend *blk)
-{
-    int bs_flags;
-
-    bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
-    bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
-
-    return bs_flags;
-}
-
-BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
-{
-    return &blk->root_state;
-}
-
-int blk_commit_all(void)
-{
-    BlockBackend *blk = NULL;
-
-    while ((blk = blk_all_next(blk)) != NULL) {
-        AioContext *aio_context = blk_get_aio_context(blk);
-
-        aio_context_acquire(aio_context);
-        if (blk_is_inserted(blk) && blk->root->bs->backing) {
-            int ret = bdrv_commit(blk->root->bs);
-            if (ret < 0) {
-                aio_context_release(aio_context);
-                return ret;
-            }
-        }
-        aio_context_release(aio_context);
-    }
-    return 0;
-}
-
-int blk_flush_all(void)
-{
-    BlockBackend *blk = NULL;
-    int result = 0;
-
-    while ((blk = blk_all_next(blk)) != NULL) {
-        AioContext *aio_context = blk_get_aio_context(blk);
-        int ret;
-
-        aio_context_acquire(aio_context);
-        if (blk_is_inserted(blk)) {
-            ret = blk_flush(blk);
-            if (ret < 0 && !result) {
-                result = ret;
-            }
-        }
-        aio_context_release(aio_context);
-    }
-
-    return result;
-}
diff --git a/qemu/block/bochs.c b/qemu/block/bochs.c
deleted file mode 100644
index af8b7abdf..000000000
--- a/qemu/block/bochs.c
+++ /dev/null
@@ -1,279 +0,0 @@
-/*
- * Block driver for the various disk image formats used by Bochs
- * Currently only for "growing" type in read-only mode
- *
- * Copyright (c) 2005 Alex Beregszaszi
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-
-/**************************************************************/
-
-#define HEADER_MAGIC "Bochs Virtual HD Image"
-#define HEADER_VERSION 0x00020000
-#define HEADER_V1 0x00010000
-#define HEADER_SIZE 512
-
-#define REDOLOG_TYPE "Redolog"
-#define GROWING_TYPE "Growing"
-
-// not allocated: 0xffffffff
-
-// always little-endian
-struct bochs_header {
-    char magic[32];     /* "Bochs Virtual HD Image" */
-    char type[16];      /* "Redolog" */
-    char subtype[16];   /* "Undoable" / "Volatile" / "Growing" */
-    uint32_t version;
-    uint32_t header;    /* size of header */
-
-    uint32_t catalog;   /* num of entries */
-    uint32_t bitmap;    /* bitmap size */
-    uint32_t extent;    /* extent size */
-
-    union {
-        struct {
-            uint32_t reserved;  /* for ??? */
-            uint64_t disk;      /* disk size */
-            char padding[HEADER_SIZE - 64 - 20 - 12];
-        } QEMU_PACKED redolog;
-        struct {
-            uint64_t disk;      /* disk size */
-            char padding[HEADER_SIZE - 64 - 20 - 8];
-        } QEMU_PACKED redolog_v1;
-        char padding[HEADER_SIZE - 64 - 20];
-    } extra;
-} QEMU_PACKED;
-
-typedef struct BDRVBochsState {
-    CoMutex lock;
-    uint32_t *catalog_bitmap;
-    uint32_t catalog_size;
-
-    uint32_t data_offset;
-
-    uint32_t bitmap_blocks;
-    uint32_t extent_blocks;
-    uint32_t extent_size;
-} BDRVBochsState;
-
-static int bochs_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const struct bochs_header *bochs = (const void *)buf;
-
-    if (buf_size < HEADER_SIZE)
-	return 0;
-
-    if (!strcmp(bochs->magic, HEADER_MAGIC) &&
-	!strcmp(bochs->type, REDOLOG_TYPE) &&
-	!strcmp(bochs->subtype, GROWING_TYPE) &&
-	((le32_to_cpu(bochs->version) == HEADER_VERSION) ||
-	(le32_to_cpu(bochs->version) == HEADER_V1)))
-	return 100;
-
-    return 0;
-}
-
-static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
-{
-    BDRVBochsState *s = bs->opaque;
-    uint32_t i;
-    struct bochs_header bochs;
-    int ret;
-
-    bs->read_only = 1; // no write support yet
-
-    ret = bdrv_pread(bs->file->bs, 0, &bochs, sizeof(bochs));
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (strcmp(bochs.magic, HEADER_MAGIC) ||
-        strcmp(bochs.type, REDOLOG_TYPE) ||
-        strcmp(bochs.subtype, GROWING_TYPE) ||
-	((le32_to_cpu(bochs.version) != HEADER_VERSION) &&
-	(le32_to_cpu(bochs.version) != HEADER_V1))) {
-        error_setg(errp, "Image not in Bochs format");
-        return -EINVAL;
-    }
-
-    if (le32_to_cpu(bochs.version) == HEADER_V1) {
-        bs->total_sectors = le64_to_cpu(bochs.extra.redolog_v1.disk) / 512;
-    } else {
-        bs->total_sectors = le64_to_cpu(bochs.extra.redolog.disk) / 512;
-    }
-
-    /* Limit to 1M entries to avoid unbounded allocation. This is what is
-     * needed for the largest image that bximage can create (~8 TB). */
-    s->catalog_size = le32_to_cpu(bochs.catalog);
-    if (s->catalog_size > 0x100000) {
-        error_setg(errp, "Catalog size is too large");
-        return -EFBIG;
-    }
-
-    s->catalog_bitmap = g_try_new(uint32_t, s->catalog_size);
-    if (s->catalog_size && s->catalog_bitmap == NULL) {
-        error_setg(errp, "Could not allocate memory for catalog");
-        return -ENOMEM;
-    }
-
-    ret = bdrv_pread(bs->file->bs, le32_to_cpu(bochs.header), s->catalog_bitmap,
-                     s->catalog_size * 4);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    for (i = 0; i < s->catalog_size; i++)
-	le32_to_cpus(&s->catalog_bitmap[i]);
-
-    s->data_offset = le32_to_cpu(bochs.header) + (s->catalog_size * 4);
-
-    s->bitmap_blocks = 1 + (le32_to_cpu(bochs.bitmap) - 1) / 512;
-    s->extent_blocks = 1 + (le32_to_cpu(bochs.extent) - 1) / 512;
-
-    s->extent_size = le32_to_cpu(bochs.extent);
-    if (s->extent_size < BDRV_SECTOR_SIZE) {
-        /* bximage actually never creates extents smaller than 4k */
-        error_setg(errp, "Extent size must be at least 512");
-        ret = -EINVAL;
-        goto fail;
-    } else if (!is_power_of_2(s->extent_size)) {
-        error_setg(errp, "Extent size %" PRIu32 " is not a power of two",
-                   s->extent_size);
-        ret = -EINVAL;
-        goto fail;
-    } else if (s->extent_size > 0x800000) {
-        error_setg(errp, "Extent size %" PRIu32 " is too large",
-                   s->extent_size);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    if (s->catalog_size < DIV_ROUND_UP(bs->total_sectors,
-                                       s->extent_size / BDRV_SECTOR_SIZE))
-    {
-        error_setg(errp, "Catalog size is too small for this disk size");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    qemu_co_mutex_init(&s->lock);
-    return 0;
-
-fail:
-    g_free(s->catalog_bitmap);
-    return ret;
-}
-
-static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
-{
-    BDRVBochsState *s = bs->opaque;
-    uint64_t offset = sector_num * 512;
-    uint64_t extent_index, extent_offset, bitmap_offset;
-    char bitmap_entry;
-    int ret;
-
-    // seek to sector
-    extent_index = offset / s->extent_size;
-    extent_offset = (offset % s->extent_size) / 512;
-
-    if (s->catalog_bitmap[extent_index] == 0xffffffff) {
-	return 0; /* not allocated */
-    }
-
-    bitmap_offset = s->data_offset +
-        (512 * (uint64_t) s->catalog_bitmap[extent_index] *
-        (s->extent_blocks + s->bitmap_blocks));
-
-    /* read in bitmap for current extent */
-    ret = bdrv_pread(bs->file->bs, bitmap_offset + (extent_offset / 8),
-                     &bitmap_entry, 1);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (!((bitmap_entry >> (extent_offset % 8)) & 1)) {
-	return 0; /* not allocated */
-    }
-
-    return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
-}
-
-static int bochs_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    int ret;
-
-    while (nb_sectors > 0) {
-        int64_t block_offset = seek_to_sector(bs, sector_num);
-        if (block_offset < 0) {
-            return block_offset;
-        } else if (block_offset > 0) {
-            ret = bdrv_pread(bs->file->bs, block_offset, buf, 512);
-            if (ret < 0) {
-                return ret;
-            }
-        } else {
-            memset(buf, 0, 512);
-        }
-        nb_sectors--;
-        sector_num++;
-        buf += 512;
-    }
-    return 0;
-}
-
-static coroutine_fn int bochs_co_read(BlockDriverState *bs, int64_t sector_num,
-                                      uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    BDRVBochsState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    ret = bochs_read(bs, sector_num, buf, nb_sectors);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-static void bochs_close(BlockDriverState *bs)
-{
-    BDRVBochsState *s = bs->opaque;
-    g_free(s->catalog_bitmap);
-}
-
-static BlockDriver bdrv_bochs = {
-    .format_name	= "bochs",
-    .instance_size	= sizeof(BDRVBochsState),
-    .bdrv_probe		= bochs_probe,
-    .bdrv_open		= bochs_open,
-    .bdrv_read          = bochs_co_read,
-    .bdrv_close		= bochs_close,
-};
-
-static void bdrv_bochs_init(void)
-{
-    bdrv_register(&bdrv_bochs);
-}
-
-block_init(bdrv_bochs_init);
diff --git a/qemu/block/cloop.c b/qemu/block/cloop.c
deleted file mode 100644
index a84f14019..000000000
--- a/qemu/block/cloop.c
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * QEMU Block driver for CLOOP images
- *
- * Copyright (c) 2004 Johannes E. Schindelin
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include <zlib.h>
-
-/* Maximum compressed block size */
-#define MAX_BLOCK_SIZE (64 * 1024 * 1024)
-
-typedef struct BDRVCloopState {
-    CoMutex lock;
-    uint32_t block_size;
-    uint32_t n_blocks;
-    uint64_t *offsets;
-    uint32_t sectors_per_block;
-    uint32_t current_block;
-    uint8_t *compressed_block;
-    uint8_t *uncompressed_block;
-    z_stream zstream;
-} BDRVCloopState;
-
-static int cloop_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const char *magic_version_2_0 = "#!/bin/sh\n"
-        "#V2.0 Format\n"
-        "modprobe cloop file=$0 && mount -r -t iso9660 /dev/cloop $1\n";
-    int length = strlen(magic_version_2_0);
-    if (length > buf_size) {
-        length = buf_size;
-    }
-    if (!memcmp(magic_version_2_0, buf, length)) {
-        return 2;
-    }
-    return 0;
-}
-
-static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
-{
-    BDRVCloopState *s = bs->opaque;
-    uint32_t offsets_size, max_compressed_block_size = 1, i;
-    int ret;
-
-    bs->read_only = 1;
-
-    /* read header */
-    ret = bdrv_pread(bs->file->bs, 128, &s->block_size, 4);
-    if (ret < 0) {
-        return ret;
-    }
-    s->block_size = be32_to_cpu(s->block_size);
-    if (s->block_size % 512) {
-        error_setg(errp, "block_size %" PRIu32 " must be a multiple of 512",
-                   s->block_size);
-        return -EINVAL;
-    }
-    if (s->block_size == 0) {
-        error_setg(errp, "block_size cannot be zero");
-        return -EINVAL;
-    }
-
-    /* cloop's create_compressed_fs.c warns about block sizes beyond 256 KB but
-     * we can accept more.  Prevent ridiculous values like 4 GB - 1 since we
-     * need a buffer this big.
-     */
-    if (s->block_size > MAX_BLOCK_SIZE) {
-        error_setg(errp, "block_size %" PRIu32 " must be %u MB or less",
-                   s->block_size,
-                   MAX_BLOCK_SIZE / (1024 * 1024));
-        return -EINVAL;
-    }
-
-    ret = bdrv_pread(bs->file->bs, 128 + 4, &s->n_blocks, 4);
-    if (ret < 0) {
-        return ret;
-    }
-    s->n_blocks = be32_to_cpu(s->n_blocks);
-
-    /* read offsets */
-    if (s->n_blocks > (UINT32_MAX - 1) / sizeof(uint64_t)) {
-        /* Prevent integer overflow */
-        error_setg(errp, "n_blocks %" PRIu32 " must be %zu or less",
-                   s->n_blocks,
-                   (UINT32_MAX - 1) / sizeof(uint64_t));
-        return -EINVAL;
-    }
-    offsets_size = (s->n_blocks + 1) * sizeof(uint64_t);
-    if (offsets_size > 512 * 1024 * 1024) {
-        /* Prevent ridiculous offsets_size which causes memory allocation to
-         * fail or overflows bdrv_pread() size.  In practice the 512 MB
-         * offsets[] limit supports 16 TB images at 256 KB block size.
-         */
-        error_setg(errp, "image requires too many offsets, "
-                   "try increasing block size");
-        return -EINVAL;
-    }
-
-    s->offsets = g_try_malloc(offsets_size);
-    if (s->offsets == NULL) {
-        error_setg(errp, "Could not allocate offsets table");
-        return -ENOMEM;
-    }
-
-    ret = bdrv_pread(bs->file->bs, 128 + 4 + 4, s->offsets, offsets_size);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    for (i = 0; i < s->n_blocks + 1; i++) {
-        uint64_t size;
-
-        s->offsets[i] = be64_to_cpu(s->offsets[i]);
-        if (i == 0) {
-            continue;
-        }
-
-        if (s->offsets[i] < s->offsets[i - 1]) {
-            error_setg(errp, "offsets not monotonically increasing at "
-                       "index %" PRIu32 ", image file is corrupt", i);
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        size = s->offsets[i] - s->offsets[i - 1];
-
-        /* Compressed blocks should be smaller than the uncompressed block size
-         * but maybe compression performed poorly so the compressed block is
-         * actually bigger.  Clamp down on unrealistic values to prevent
-         * ridiculous s->compressed_block allocation.
-         */
-        if (size > 2 * MAX_BLOCK_SIZE) {
-            error_setg(errp, "invalid compressed block size at index %" PRIu32
-                       ", image file is corrupt", i);
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        if (size > max_compressed_block_size) {
-            max_compressed_block_size = size;
-        }
-    }
-
-    /* initialize zlib engine */
-    s->compressed_block = g_try_malloc(max_compressed_block_size + 1);
-    if (s->compressed_block == NULL) {
-        error_setg(errp, "Could not allocate compressed_block");
-        ret = -ENOMEM;
-        goto fail;
-    }
-
-    s->uncompressed_block = g_try_malloc(s->block_size);
-    if (s->uncompressed_block == NULL) {
-        error_setg(errp, "Could not allocate uncompressed_block");
-        ret = -ENOMEM;
-        goto fail;
-    }
-
-    if (inflateInit(&s->zstream) != Z_OK) {
-        ret = -EINVAL;
-        goto fail;
-    }
-    s->current_block = s->n_blocks;
-
-    s->sectors_per_block = s->block_size/512;
-    bs->total_sectors = s->n_blocks * s->sectors_per_block;
-    qemu_co_mutex_init(&s->lock);
-    return 0;
-
-fail:
-    g_free(s->offsets);
-    g_free(s->compressed_block);
-    g_free(s->uncompressed_block);
-    return ret;
-}
-
-static inline int cloop_read_block(BlockDriverState *bs, int block_num)
-{
-    BDRVCloopState *s = bs->opaque;
-
-    if (s->current_block != block_num) {
-        int ret;
-        uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];
-
-        ret = bdrv_pread(bs->file->bs, s->offsets[block_num],
-                         s->compressed_block, bytes);
-        if (ret != bytes) {
-            return -1;
-        }
-
-        s->zstream.next_in = s->compressed_block;
-        s->zstream.avail_in = bytes;
-        s->zstream.next_out = s->uncompressed_block;
-        s->zstream.avail_out = s->block_size;
-        ret = inflateReset(&s->zstream);
-        if (ret != Z_OK) {
-            return -1;
-        }
-        ret = inflate(&s->zstream, Z_FINISH);
-        if (ret != Z_STREAM_END || s->zstream.total_out != s->block_size) {
-            return -1;
-        }
-
-        s->current_block = block_num;
-    }
-    return 0;
-}
-
-static int cloop_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVCloopState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < nb_sectors; i++) {
-        uint32_t sector_offset_in_block =
-            ((sector_num + i) % s->sectors_per_block),
-            block_num = (sector_num + i) / s->sectors_per_block;
-        if (cloop_read_block(bs, block_num) != 0) {
-            return -1;
-        }
-        memcpy(buf + i * 512,
-            s->uncompressed_block + sector_offset_in_block * 512, 512);
-    }
-    return 0;
-}
-
-static coroutine_fn int cloop_co_read(BlockDriverState *bs, int64_t sector_num,
-                                      uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    BDRVCloopState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    ret = cloop_read(bs, sector_num, buf, nb_sectors);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-static void cloop_close(BlockDriverState *bs)
-{
-    BDRVCloopState *s = bs->opaque;
-    g_free(s->offsets);
-    g_free(s->compressed_block);
-    g_free(s->uncompressed_block);
-    inflateEnd(&s->zstream);
-}
-
-static BlockDriver bdrv_cloop = {
-    .format_name    = "cloop",
-    .instance_size  = sizeof(BDRVCloopState),
-    .bdrv_probe     = cloop_probe,
-    .bdrv_open      = cloop_open,
-    .bdrv_read      = cloop_co_read,
-    .bdrv_close     = cloop_close,
-};
-
-static void bdrv_cloop_init(void)
-{
-    bdrv_register(&bdrv_cloop);
-}
-
-block_init(bdrv_cloop_init);
diff --git a/qemu/block/commit.c b/qemu/block/commit.c
deleted file mode 100644
index cba0e8c1e..000000000
--- a/qemu/block/commit.c
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Live block commit
- *
- * Copyright Red Hat, Inc. 2012
- *
- * Authors:
- *  Jeff Cody   <jcody@redhat.com>
- *  Based on stream.c by Stefan Hajnoczi
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "trace.h"
-#include "block/block_int.h"
-#include "block/blockjob.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
-#include "qemu/ratelimit.h"
-#include "sysemu/block-backend.h"
-
-enum {
-    /*
-     * Size of data buffer for populating the image file.  This should be large
-     * enough to process multiple clusters in a single call, so that populating
-     * contiguous regions of the image is efficient.
-     */
-    COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
-};
-
-#define SLICE_TIME 100000000ULL /* ns */
-
-typedef struct CommitBlockJob {
-    BlockJob common;
-    RateLimit limit;
-    BlockDriverState *active;
-    BlockDriverState *top;
-    BlockDriverState *base;
-    BlockdevOnError on_error;
-    int base_flags;
-    int orig_overlay_flags;
-    char *backing_file_str;
-} CommitBlockJob;
-
-static int coroutine_fn commit_populate(BlockDriverState *bs,
-                                        BlockDriverState *base,
-                                        int64_t sector_num, int nb_sectors,
-                                        void *buf)
-{
-    int ret = 0;
-
-    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
-    if (ret) {
-        return ret;
-    }
-
-    ret = bdrv_write(base, sector_num, buf, nb_sectors);
-    if (ret) {
-        return ret;
-    }
-
-    return 0;
-}
-
-typedef struct {
-    int ret;
-} CommitCompleteData;
-
-static void commit_complete(BlockJob *job, void *opaque)
-{
-    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
-    CommitCompleteData *data = opaque;
-    BlockDriverState *active = s->active;
-    BlockDriverState *top = s->top;
-    BlockDriverState *base = s->base;
-    BlockDriverState *overlay_bs;
-    int ret = data->ret;
-
-    if (!block_job_is_cancelled(&s->common) && ret == 0) {
-        /* success */
-        ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
-    }
-
-    /* restore base open flags here if appropriate (e.g., change the base back
-     * to r/o). These reopens do not need to be atomic, since we won't abort
-     * even on failure here */
-    if (s->base_flags != bdrv_get_flags(base)) {
-        bdrv_reopen(base, s->base_flags, NULL);
-    }
-    overlay_bs = bdrv_find_overlay(active, top);
-    if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
-        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
-    }
-    g_free(s->backing_file_str);
-    block_job_completed(&s->common, ret);
-    g_free(data);
-}
-
-static void coroutine_fn commit_run(void *opaque)
-{
-    CommitBlockJob *s = opaque;
-    CommitCompleteData *data;
-    BlockDriverState *top = s->top;
-    BlockDriverState *base = s->base;
-    int64_t sector_num, end;
-    int ret = 0;
-    int n = 0;
-    void *buf = NULL;
-    int bytes_written = 0;
-    int64_t base_len;
-
-    ret = s->common.len = bdrv_getlength(top);
-
-
-    if (s->common.len < 0) {
-        goto out;
-    }
-
-    ret = base_len = bdrv_getlength(base);
-    if (base_len < 0) {
-        goto out;
-    }
-
-    if (base_len < s->common.len) {
-        ret = bdrv_truncate(base, s->common.len);
-        if (ret) {
-            goto out;
-        }
-    }
-
-    end = s->common.len >> BDRV_SECTOR_BITS;
-    buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
-
-    for (sector_num = 0; sector_num < end; sector_num += n) {
-        uint64_t delay_ns = 0;
-        bool copy;
-
-wait:
-        /* Note that even when no rate limit is applied we need to yield
-         * with no pending I/O here so that bdrv_drain_all() returns.
-         */
-        block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
-        if (block_job_is_cancelled(&s->common)) {
-            break;
-        }
-        /* Copy if allocated above the base */
-        ret = bdrv_is_allocated_above(top, base, sector_num,
-                                      COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
-                                      &n);
-        copy = (ret == 1);
-        trace_commit_one_iteration(s, sector_num, n, ret);
-        if (copy) {
-            if (s->common.speed) {
-                delay_ns = ratelimit_calculate_delay(&s->limit, n);
-                if (delay_ns > 0) {
-                    goto wait;
-                }
-            }
-            ret = commit_populate(top, base, sector_num, n, buf);
-            bytes_written += n * BDRV_SECTOR_SIZE;
-        }
-        if (ret < 0) {
-            if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
-                s->on_error == BLOCKDEV_ON_ERROR_REPORT||
-                (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
-                goto out;
-            } else {
-                n = 0;
-                continue;
-            }
-        }
-        /* Publish progress */
-        s->common.offset += n * BDRV_SECTOR_SIZE;
-    }
-
-    ret = 0;
-
-out:
-    qemu_vfree(buf);
-
-    data = g_malloc(sizeof(*data));
-    data->ret = ret;
-    block_job_defer_to_main_loop(&s->common, commit_complete, data);
-}
-
-static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
-    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
-
-    if (speed < 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
-        return;
-    }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
-}
-
-static const BlockJobDriver commit_job_driver = {
-    .instance_size = sizeof(CommitBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_COMMIT,
-    .set_speed     = commit_set_speed,
-};
-
-void commit_start(BlockDriverState *bs, BlockDriverState *base,
-                  BlockDriverState *top, int64_t speed,
-                  BlockdevOnError on_error, BlockCompletionFunc *cb,
-                  void *opaque, const char *backing_file_str, Error **errp)
-{
-    CommitBlockJob *s;
-    BlockReopenQueue *reopen_queue = NULL;
-    int orig_overlay_flags;
-    int orig_base_flags;
-    BlockDriverState *overlay_bs;
-    Error *local_err = NULL;
-
-    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
-         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
-        error_setg(errp, "Invalid parameter combination");
-        return;
-    }
-
-    assert(top != bs);
-    if (top == base) {
-        error_setg(errp, "Invalid files for merge: top and base are the same");
-        return;
-    }
-
-    overlay_bs = bdrv_find_overlay(bs, top);
-
-    if (overlay_bs == NULL) {
-        error_setg(errp, "Could not find overlay image for %s:", top->filename);
-        return;
-    }
-
-    orig_base_flags    = bdrv_get_flags(base);
-    orig_overlay_flags = bdrv_get_flags(overlay_bs);
-
-    /* convert base & overlay_bs to r/w, if necessary */
-    if (!(orig_overlay_flags & BDRV_O_RDWR)) {
-        reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL,
-                                         orig_overlay_flags | BDRV_O_RDWR);
-    }
-    if (!(orig_base_flags & BDRV_O_RDWR)) {
-        reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
-                                         orig_base_flags | BDRV_O_RDWR);
-    }
-    if (reopen_queue) {
-        bdrv_reopen_multiple(reopen_queue, &local_err);
-        if (local_err != NULL) {
-            error_propagate(errp, local_err);
-            return;
-        }
-    }
-
-
-    s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp);
-    if (!s) {
-        return;
-    }
-
-    s->base   = base;
-    s->top    = top;
-    s->active = bs;
-
-    s->base_flags          = orig_base_flags;
-    s->orig_overlay_flags  = orig_overlay_flags;
-
-    s->backing_file_str = g_strdup(backing_file_str);
-
-    s->on_error = on_error;
-    s->common.co = qemu_coroutine_create(commit_run);
-
-    trace_commit_start(bs, base, top, s, s->common.co, opaque);
-    qemu_coroutine_enter(s->common.co, s);
-}
diff --git a/qemu/block/crypto.c b/qemu/block/crypto.c
deleted file mode 100644
index 1903e84fb..000000000
--- a/qemu/block/crypto.c
+++ /dev/null
@@ -1,586 +0,0 @@
-/*
- * QEMU block full disk encryption
- *
- * Copyright (c) 2015-2016 Red Hat, Inc.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- *
- */
-
-#include "qemu/osdep.h"
-
-#include "block/block_int.h"
-#include "sysemu/block-backend.h"
-#include "crypto/block.h"
-#include "qapi/opts-visitor.h"
-#include "qapi-visit.h"
-#include "qapi/error.h"
-
-#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
-#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
-#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
-#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
-
-typedef struct BlockCrypto BlockCrypto;
-
-struct BlockCrypto {
-    QCryptoBlock *block;
-};
-
-
-static int block_crypto_probe_generic(QCryptoBlockFormat format,
-                                      const uint8_t *buf,
-                                      int buf_size,
-                                      const char *filename)
-{
-    if (qcrypto_block_has_format(format, buf, buf_size)) {
-        return 100;
-    } else {
-        return 0;
-    }
-}
-
-
-static ssize_t block_crypto_read_func(QCryptoBlock *block,
-                                      size_t offset,
-                                      uint8_t *buf,
-                                      size_t buflen,
-                                      Error **errp,
-                                      void *opaque)
-{
-    BlockDriverState *bs = opaque;
-    ssize_t ret;
-
-    ret = bdrv_pread(bs->file->bs, offset, buf, buflen);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not read encryption header");
-        return ret;
-    }
-    return ret;
-}
-
-
-struct BlockCryptoCreateData {
-    const char *filename;
-    QemuOpts *opts;
-    BlockBackend *blk;
-    uint64_t size;
-};
-
-
-static ssize_t block_crypto_write_func(QCryptoBlock *block,
-                                       size_t offset,
-                                       const uint8_t *buf,
-                                       size_t buflen,
-                                       Error **errp,
-                                       void *opaque)
-{
-    struct BlockCryptoCreateData *data = opaque;
-    ssize_t ret;
-
-    ret = blk_pwrite(data->blk, offset, buf, buflen);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not write encryption header");
-        return ret;
-    }
-    return ret;
-}
-
-
-static ssize_t block_crypto_init_func(QCryptoBlock *block,
-                                      size_t headerlen,
-                                      Error **errp,
-                                      void *opaque)
-{
-    struct BlockCryptoCreateData *data = opaque;
-    int ret;
-
-    /* User provided size should reflect amount of space made
-     * available to the guest, so we must take account of that
-     * which will be used by the crypto header
-     */
-    data->size += headerlen;
-
-    qemu_opt_set_number(data->opts, BLOCK_OPT_SIZE, data->size, &error_abort);
-    ret = bdrv_create_file(data->filename, data->opts, errp);
-    if (ret < 0) {
-        return -1;
-    }
-
-    data->blk = blk_new_open(data->filename, NULL, NULL,
-                             BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
-    if (!data->blk) {
-        return -1;
-    }
-
-    return 0;
-}
-
-
-static QemuOptsList block_crypto_runtime_opts_luks = {
-    .name = "crypto",
-    .head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head),
-    .desc = {
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
-            .type = QEMU_OPT_STRING,
-            .help = "ID of the secret that provides the encryption key",
-        },
-        { /* end of list */ }
-    },
-};
-
-
-static QemuOptsList block_crypto_create_opts_luks = {
-    .name = "crypto",
-    .head = QTAILQ_HEAD_INITIALIZER(block_crypto_create_opts_luks.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
-            .type = QEMU_OPT_STRING,
-            .help = "ID of the secret that provides the encryption key",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of encryption cipher algorithm",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of encryption cipher mode",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of IV generator algorithm",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of IV generator hash algorithm",
-        },
-        {
-            .name = BLOCK_CRYPTO_OPT_LUKS_HASH_ALG,
-            .type = QEMU_OPT_STRING,
-            .help = "Name of encryption hash algorithm",
-        },
-        { /* end of list */ }
-    },
-};
-
-
-static QCryptoBlockOpenOptions *
-block_crypto_open_opts_init(QCryptoBlockFormat format,
-                            QemuOpts *opts,
-                            Error **errp)
-{
-    OptsVisitor *ov;
-    QCryptoBlockOpenOptions *ret = NULL;
-    Error *local_err = NULL;
-    Error *end_err = NULL;
-
-    ret = g_new0(QCryptoBlockOpenOptions, 1);
-    ret->format = format;
-
-    ov = opts_visitor_new(opts);
-
-    visit_start_struct(opts_get_visitor(ov),
-                       NULL, NULL, 0, &local_err);
-    if (local_err) {
-        goto out;
-    }
-
-    switch (format) {
-    case Q_CRYPTO_BLOCK_FORMAT_LUKS:
-        visit_type_QCryptoBlockOptionsLUKS_members(
-            opts_get_visitor(ov), &ret->u.luks, &local_err);
-        break;
-
-    default:
-        error_setg(&local_err, "Unsupported block format %d", format);
-        break;
-    }
-
-    visit_end_struct(opts_get_visitor(ov), &end_err);
-    error_propagate(&local_err, end_err);
-
- out:
-    if (local_err) {
-        error_propagate(errp, local_err);
-        qapi_free_QCryptoBlockOpenOptions(ret);
-        ret = NULL;
-    }
-    opts_visitor_cleanup(ov);
-    return ret;
-}
-
-
-static QCryptoBlockCreateOptions *
-block_crypto_create_opts_init(QCryptoBlockFormat format,
-                              QemuOpts *opts,
-                              Error **errp)
-{
-    OptsVisitor *ov;
-    QCryptoBlockCreateOptions *ret = NULL;
-    Error *local_err = NULL;
-    Error *end_err = NULL;
-
-    ret = g_new0(QCryptoBlockCreateOptions, 1);
-    ret->format = format;
-
-    ov = opts_visitor_new(opts);
-
-    visit_start_struct(opts_get_visitor(ov),
-                       NULL, NULL, 0, &local_err);
-    if (local_err) {
-        goto out;
-    }
-
-    switch (format) {
-    case Q_CRYPTO_BLOCK_FORMAT_LUKS:
-        visit_type_QCryptoBlockCreateOptionsLUKS_members(
-            opts_get_visitor(ov), &ret->u.luks, &local_err);
-        break;
-
-    default:
-        error_setg(&local_err, "Unsupported block format %d", format);
-        break;
-    }
-
-    visit_end_struct(opts_get_visitor(ov), &end_err);
-    error_propagate(&local_err, end_err);
-
- out:
-    if (local_err) {
-        error_propagate(errp, local_err);
-        qapi_free_QCryptoBlockCreateOptions(ret);
-        ret = NULL;
-    }
-    opts_visitor_cleanup(ov);
-    return ret;
-}
-
-
-static int block_crypto_open_generic(QCryptoBlockFormat format,
-                                     QemuOptsList *opts_spec,
-                                     BlockDriverState *bs,
-                                     QDict *options,
-                                     int flags,
-                                     Error **errp)
-{
-    BlockCrypto *crypto = bs->opaque;
-    QemuOpts *opts = NULL;
-    Error *local_err = NULL;
-    int ret = -EINVAL;
-    QCryptoBlockOpenOptions *open_opts = NULL;
-    unsigned int cflags = 0;
-
-    opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        goto cleanup;
-    }
-
-    open_opts = block_crypto_open_opts_init(format, opts, errp);
-    if (!open_opts) {
-        goto cleanup;
-    }
-
-    if (flags & BDRV_O_NO_IO) {
-        cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
-    }
-    crypto->block = qcrypto_block_open(open_opts,
-                                       block_crypto_read_func,
-                                       bs,
-                                       cflags,
-                                       errp);
-
-    if (!crypto->block) {
-        ret = -EIO;
-        goto cleanup;
-    }
-
-    bs->encrypted = 1;
-    bs->valid_key = 1;
-
-    ret = 0;
- cleanup:
-    qapi_free_QCryptoBlockOpenOptions(open_opts);
-    return ret;
-}
-
-
-static int block_crypto_create_generic(QCryptoBlockFormat format,
-                                       const char *filename,
-                                       QemuOpts *opts,
-                                       Error **errp)
-{
-    int ret = -EINVAL;
-    QCryptoBlockCreateOptions *create_opts = NULL;
-    QCryptoBlock *crypto = NULL;
-    struct BlockCryptoCreateData data = {
-        .size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                         BDRV_SECTOR_SIZE),
-        .opts = opts,
-        .filename = filename,
-    };
-
-    create_opts = block_crypto_create_opts_init(format, opts, errp);
-    if (!create_opts) {
-        return -1;
-    }
-
-    crypto = qcrypto_block_create(create_opts,
-                                  block_crypto_init_func,
-                                  block_crypto_write_func,
-                                  &data,
-                                  errp);
-
-    if (!crypto) {
-        ret = -EIO;
-        goto cleanup;
-    }
-
-    ret = 0;
- cleanup:
-    qcrypto_block_free(crypto);
-    blk_unref(data.blk);
-    qapi_free_QCryptoBlockCreateOptions(create_opts);
-    return ret;
-}
-
-static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)
-{
-    BlockCrypto *crypto = bs->opaque;
-    size_t payload_offset =
-        qcrypto_block_get_payload_offset(crypto->block);
-
-    offset += payload_offset;
-
-    return bdrv_truncate(bs->file->bs, offset);
-}
-
-static void block_crypto_close(BlockDriverState *bs)
-{
-    BlockCrypto *crypto = bs->opaque;
-    qcrypto_block_free(crypto->block);
-}
-
-
-#define BLOCK_CRYPTO_MAX_SECTORS 32
-
-static coroutine_fn int
-block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
-                      int remaining_sectors, QEMUIOVector *qiov)
-{
-    BlockCrypto *crypto = bs->opaque;
-    int cur_nr_sectors; /* number of sectors in current iteration */
-    uint64_t bytes_done = 0;
-    uint8_t *cipher_data = NULL;
-    QEMUIOVector hd_qiov;
-    int ret = 0;
-    size_t payload_offset =
-        qcrypto_block_get_payload_offset(crypto->block) / 512;
-
-    qemu_iovec_init(&hd_qiov, qiov->niov);
-
-    /* Bounce buffer so we have a linear mem region for
-     * entire sector. XXX optimize so we avoid bounce
-     * buffer in case that qiov->niov == 1
-     */
-    cipher_data =
-        qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
-                                              qiov->size));
-    if (cipher_data == NULL) {
-        ret = -ENOMEM;
-        goto cleanup;
-    }
-
-    while (remaining_sectors) {
-        cur_nr_sectors = remaining_sectors;
-
-        if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
-            cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
-        }
-
-        qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
-
-        ret = bdrv_co_readv(bs->file->bs,
-                            payload_offset + sector_num,
-                            cur_nr_sectors, &hd_qiov);
-        if (ret < 0) {
-            goto cleanup;
-        }
-
-        if (qcrypto_block_decrypt(crypto->block,
-                                  sector_num,
-                                  cipher_data, cur_nr_sectors * 512,
-                                  NULL) < 0) {
-            ret = -EIO;
-            goto cleanup;
-        }
-
-        qemu_iovec_from_buf(qiov, bytes_done,
-                            cipher_data, cur_nr_sectors * 512);
-
-        remaining_sectors -= cur_nr_sectors;
-        sector_num += cur_nr_sectors;
-        bytes_done += cur_nr_sectors * 512;
-    }
-
- cleanup:
-    qemu_iovec_destroy(&hd_qiov);
-    qemu_vfree(cipher_data);
-
-    return ret;
-}
-
-
-static coroutine_fn int
-block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
-                       int remaining_sectors, QEMUIOVector *qiov)
-{
-    BlockCrypto *crypto = bs->opaque;
-    int cur_nr_sectors; /* number of sectors in current iteration */
-    uint64_t bytes_done = 0;
-    uint8_t *cipher_data = NULL;
-    QEMUIOVector hd_qiov;
-    int ret = 0;
-    size_t payload_offset =
-        qcrypto_block_get_payload_offset(crypto->block) / 512;
-
-    qemu_iovec_init(&hd_qiov, qiov->niov);
-
-    /* Bounce buffer so we have a linear mem region for
-     * entire sector. XXX optimize so we avoid bounce
-     * buffer in case that qiov->niov == 1
-     */
-    cipher_data =
-        qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
-                                              qiov->size));
-    if (cipher_data == NULL) {
-        ret = -ENOMEM;
-        goto cleanup;
-    }
-
-    while (remaining_sectors) {
-        cur_nr_sectors = remaining_sectors;
-
-        if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
-            cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
-        }
-
-        qemu_iovec_to_buf(qiov, bytes_done,
-                          cipher_data, cur_nr_sectors * 512);
-
-        if (qcrypto_block_encrypt(crypto->block,
-                                  sector_num,
-                                  cipher_data, cur_nr_sectors * 512,
-                                  NULL) < 0) {
-            ret = -EIO;
-            goto cleanup;
-        }
-
-        qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
-
-        ret = bdrv_co_writev(bs->file->bs,
-                             payload_offset + sector_num,
-                             cur_nr_sectors, &hd_qiov);
-        if (ret < 0) {
-            goto cleanup;
-        }
-
-        remaining_sectors -= cur_nr_sectors;
-        sector_num += cur_nr_sectors;
-        bytes_done += cur_nr_sectors * 512;
-    }
-
- cleanup:
-    qemu_iovec_destroy(&hd_qiov);
-    qemu_vfree(cipher_data);
-
-    return ret;
-}
-
-
-static int64_t block_crypto_getlength(BlockDriverState *bs)
-{
-    BlockCrypto *crypto = bs->opaque;
-    int64_t len = bdrv_getlength(bs->file->bs);
-
-    ssize_t offset = qcrypto_block_get_payload_offset(crypto->block);
-
-    len -= offset;
-
-    return len;
-}
-
-
-static int block_crypto_probe_luks(const uint8_t *buf,
-                                   int buf_size,
-                                   const char *filename) {
-    return block_crypto_probe_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
-                                      buf, buf_size, filename);
-}
-
-static int block_crypto_open_luks(BlockDriverState *bs,
-                                  QDict *options,
-                                  int flags,
-                                  Error **errp)
-{
-    return block_crypto_open_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
-                                     &block_crypto_runtime_opts_luks,
-                                     bs, options, flags, errp);
-}
-
-static int block_crypto_create_luks(const char *filename,
-                                    QemuOpts *opts,
-                                    Error **errp)
-{
-    return block_crypto_create_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
-                                       filename, opts, errp);
-}
-
-BlockDriver bdrv_crypto_luks = {
-    .format_name        = "luks",
-    .instance_size      = sizeof(BlockCrypto),
-    .bdrv_probe         = block_crypto_probe_luks,
-    .bdrv_open          = block_crypto_open_luks,
-    .bdrv_close         = block_crypto_close,
-    .bdrv_create        = block_crypto_create_luks,
-    .bdrv_truncate      = block_crypto_truncate,
-    .create_opts        = &block_crypto_create_opts_luks,
-
-    .bdrv_co_readv      = block_crypto_co_readv,
-    .bdrv_co_writev     = block_crypto_co_writev,
-    .bdrv_getlength     = block_crypto_getlength,
-};
-
-static void block_crypto_init(void)
-{
-    bdrv_register(&bdrv_crypto_luks);
-}
-
-block_init(block_crypto_init);
diff --git a/qemu/block/curl.c b/qemu/block/curl.c
deleted file mode 100644
index 5a8f8b623..000000000
--- a/qemu/block/curl.c
+++ /dev/null
@@ -1,896 +0,0 @@
-/*
- * QEMU Block driver for CURL images
- *
- * Copyright (c) 2009 Alexander Graf <agraf@suse.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "qemu/error-report.h"
-#include "block/block_int.h"
-#include "qapi/qmp/qbool.h"
-#include "qapi/qmp/qstring.h"
-#include "crypto/secret.h"
-#include <curl/curl.h>
-#include "qemu/cutils.h"
-
-// #define DEBUG_CURL
-// #define DEBUG_VERBOSE
-
-#ifdef DEBUG_CURL
-#define DPRINTF(fmt, ...) do { printf(fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) do { } while (0)
-#endif
-
-#if LIBCURL_VERSION_NUM >= 0x071000
-/* The multi interface timer callback was introduced in 7.16.0 */
-#define NEED_CURL_TIMER_CALLBACK
-#define HAVE_SOCKET_ACTION
-#endif
-
-#ifndef HAVE_SOCKET_ACTION
-/* If curl_multi_socket_action isn't available, define it statically here in
- * terms of curl_multi_socket. Note that ev_bitmask will be ignored, which is
- * less efficient but still safe. */
-static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
-                                            curl_socket_t sockfd,
-                                            int ev_bitmask,
-                                            int *running_handles)
-{
-    return curl_multi_socket(multi_handle, sockfd, running_handles);
-}
-#define curl_multi_socket_action __curl_multi_socket_action
-#endif
-
-#define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
-                   CURLPROTO_FTP | CURLPROTO_FTPS | \
-                   CURLPROTO_TFTP)
-
-#define CURL_NUM_STATES 8
-#define CURL_NUM_ACB    8
-#define SECTOR_SIZE     512
-#define READ_AHEAD_DEFAULT (256 * 1024)
-#define CURL_TIMEOUT_DEFAULT 5
-#define CURL_TIMEOUT_MAX 10000
-
-#define FIND_RET_NONE   0
-#define FIND_RET_OK     1
-#define FIND_RET_WAIT   2
-
-#define CURL_BLOCK_OPT_URL       "url"
-#define CURL_BLOCK_OPT_READAHEAD "readahead"
-#define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
-#define CURL_BLOCK_OPT_TIMEOUT "timeout"
-#define CURL_BLOCK_OPT_COOKIE    "cookie"
-#define CURL_BLOCK_OPT_USERNAME "username"
-#define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
-#define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
-#define CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET "proxy-password-secret"
-
-struct BDRVCURLState;
-
-typedef struct CURLAIOCB {
-    BlockAIOCB common;
-    QEMUBH *bh;
-    QEMUIOVector *qiov;
-
-    int64_t sector_num;
-    int nb_sectors;
-
-    size_t start;
-    size_t end;
-} CURLAIOCB;
-
-typedef struct CURLState
-{
-    struct BDRVCURLState *s;
-    CURLAIOCB *acb[CURL_NUM_ACB];
-    CURL *curl;
-    curl_socket_t sock_fd;
-    char *orig_buf;
-    size_t buf_start;
-    size_t buf_off;
-    size_t buf_len;
-    char range[128];
-    char errmsg[CURL_ERROR_SIZE];
-    char in_use;
-} CURLState;
-
-typedef struct BDRVCURLState {
-    CURLM *multi;
-    QEMUTimer timer;
-    size_t len;
-    CURLState states[CURL_NUM_STATES];
-    char *url;
-    size_t readahead_size;
-    bool sslverify;
-    uint64_t timeout;
-    char *cookie;
-    bool accept_range;
-    AioContext *aio_context;
-    char *username;
-    char *password;
-    char *proxyusername;
-    char *proxypassword;
-} BDRVCURLState;
-
-static void curl_clean_state(CURLState *s);
-static void curl_multi_do(void *arg);
-static void curl_multi_read(void *arg);
-
-#ifdef NEED_CURL_TIMER_CALLBACK
-static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
-{
-    BDRVCURLState *s = opaque;
-
-    DPRINTF("CURL: timer callback timeout_ms %ld\n", timeout_ms);
-    if (timeout_ms == -1) {
-        timer_del(&s->timer);
-    } else {
-        int64_t timeout_ns = (int64_t)timeout_ms * 1000 * 1000;
-        timer_mod(&s->timer,
-                  qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ns);
-    }
-    return 0;
-}
-#endif
-
-static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
-                        void *userp, void *sp)
-{
-    BDRVCURLState *s;
-    CURLState *state = NULL;
-    curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state);
-    state->sock_fd = fd;
-    s = state->s;
-
-    DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
-    switch (action) {
-        case CURL_POLL_IN:
-            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_read, NULL, state);
-            break;
-        case CURL_POLL_OUT:
-            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, curl_multi_do, state);
-            break;
-        case CURL_POLL_INOUT:
-            aio_set_fd_handler(s->aio_context, fd, false,
-                               curl_multi_read, curl_multi_do, state);
-            break;
-        case CURL_POLL_REMOVE:
-            aio_set_fd_handler(s->aio_context, fd, false,
-                               NULL, NULL, NULL);
-            break;
-    }
-
-    return 0;
-}
-
-static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
-{
-    BDRVCURLState *s = opaque;
-    size_t realsize = size * nmemb;
-    const char *accept_line = "Accept-Ranges: bytes";
-
-    if (realsize >= strlen(accept_line)
-        && strncmp((char *)ptr, accept_line, strlen(accept_line)) == 0) {
-        s->accept_range = true;
-    }
-
-    return realsize;
-}
-
-static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
-{
-    CURLState *s = ((CURLState*)opaque);
-    size_t realsize = size * nmemb;
-    int i;
-
-    DPRINTF("CURL: Just reading %zd bytes\n", realsize);
-
-    if (!s || !s->orig_buf)
-        return 0;
-
-    if (s->buf_off >= s->buf_len) {
-        /* buffer full, read nothing */
-        return 0;
-    }
-    realsize = MIN(realsize, s->buf_len - s->buf_off);
-    memcpy(s->orig_buf + s->buf_off, ptr, realsize);
-    s->buf_off += realsize;
-
-    for(i=0; i<CURL_NUM_ACB; i++) {
-        CURLAIOCB *acb = s->acb[i];
-
-        if (!acb)
-            continue;
-
-        if ((s->buf_off >= acb->end)) {
-            qemu_iovec_from_buf(acb->qiov, 0, s->orig_buf + acb->start,
-                                acb->end - acb->start);
-            acb->common.cb(acb->common.opaque, 0);
-            qemu_aio_unref(acb);
-            s->acb[i] = NULL;
-        }
-    }
-
-    return realsize;
-}
-
-static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
-                         CURLAIOCB *acb)
-{
-    int i;
-    size_t end = start + len;
-
-    for (i=0; i<CURL_NUM_STATES; i++) {
-        CURLState *state = &s->states[i];
-        size_t buf_end = (state->buf_start + state->buf_off);
-        size_t buf_fend = (state->buf_start + state->buf_len);
-
-        if (!state->orig_buf)
-            continue;
-        if (!state->buf_off)
-            continue;
-
-        // Does the existing buffer cover our section?
-        if ((start >= state->buf_start) &&
-            (start <= buf_end) &&
-            (end >= state->buf_start) &&
-            (end <= buf_end))
-        {
-            char *buf = state->orig_buf + (start - state->buf_start);
-
-            qemu_iovec_from_buf(acb->qiov, 0, buf, len);
-            acb->common.cb(acb->common.opaque, 0);
-
-            return FIND_RET_OK;
-        }
-
-        // Wait for unfinished chunks
-        if (state->in_use &&
-            (start >= state->buf_start) &&
-            (start <= buf_fend) &&
-            (end >= state->buf_start) &&
-            (end <= buf_fend))
-        {
-            int j;
-
-            acb->start = start - state->buf_start;
-            acb->end = acb->start + len;
-
-            for (j=0; j<CURL_NUM_ACB; j++) {
-                if (!state->acb[j]) {
-                    state->acb[j] = acb;
-                    return FIND_RET_WAIT;
-                }
-            }
-        }
-    }
-
-    return FIND_RET_NONE;
-}
-
-static void curl_multi_check_completion(BDRVCURLState *s)
-{
-    int msgs_in_queue;
-
-    /* Try to find done transfers, so we can free the easy
-     * handle again. */
-    for (;;) {
-        CURLMsg *msg;
-        msg = curl_multi_info_read(s->multi, &msgs_in_queue);
-
-        /* Quit when there are no more completions */
-        if (!msg)
-            break;
-
-        if (msg->msg == CURLMSG_DONE) {
-            CURLState *state = NULL;
-            curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE,
-                              (char **)&state);
-
-            /* ACBs for successful messages get completed in curl_read_cb */
-            if (msg->data.result != CURLE_OK) {
-                int i;
-                static int errcount = 100;
-
-                /* Don't lose the original error message from curl, since
-                 * it contains extra data.
-                 */
-                if (errcount > 0) {
-                    error_report("curl: %s", state->errmsg);
-                    if (--errcount == 0) {
-                        error_report("curl: further errors suppressed");
-                    }
-                }
-
-                for (i = 0; i < CURL_NUM_ACB; i++) {
-                    CURLAIOCB *acb = state->acb[i];
-
-                    if (acb == NULL) {
-                        continue;
-                    }
-
-                    acb->common.cb(acb->common.opaque, -EPROTO);
-                    qemu_aio_unref(acb);
-                    state->acb[i] = NULL;
-                }
-            }
-
-            curl_clean_state(state);
-            break;
-        }
-    }
-}
-
-static void curl_multi_do(void *arg)
-{
-    CURLState *s = (CURLState *)arg;
-    int running;
-    int r;
-
-    if (!s->s->multi) {
-        return;
-    }
-
-    do {
-        r = curl_multi_socket_action(s->s->multi, s->sock_fd, 0, &running);
-    } while(r == CURLM_CALL_MULTI_PERFORM);
-
-}
-
-static void curl_multi_read(void *arg)
-{
-    CURLState *s = (CURLState *)arg;
-
-    curl_multi_do(arg);
-    curl_multi_check_completion(s->s);
-}
-
-static void curl_multi_timeout_do(void *arg)
-{
-#ifdef NEED_CURL_TIMER_CALLBACK
-    BDRVCURLState *s = (BDRVCURLState *)arg;
-    int running;
-
-    if (!s->multi) {
-        return;
-    }
-
-    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
-
-    curl_multi_check_completion(s);
-#else
-    abort();
-#endif
-}
-
-static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
-{
-    CURLState *state = NULL;
-    int i, j;
-
-    do {
-        for (i=0; i<CURL_NUM_STATES; i++) {
-            for (j=0; j<CURL_NUM_ACB; j++)
-                if (s->states[i].acb[j])
-                    continue;
-            if (s->states[i].in_use)
-                continue;
-
-            state = &s->states[i];
-            state->in_use = 1;
-            break;
-        }
-        if (!state) {
-            aio_poll(bdrv_get_aio_context(bs), true);
-        }
-    } while(!state);
-
-    if (!state->curl) {
-        state->curl = curl_easy_init();
-        if (!state->curl) {
-            return NULL;
-        }
-        curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
-        curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
-                         (long) s->sslverify);
-        if (s->cookie) {
-            curl_easy_setopt(state->curl, CURLOPT_COOKIE, s->cookie);
-        }
-        curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, (long)s->timeout);
-        curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
-                         (void *)curl_read_cb);
-        curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state);
-        curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state);
-        curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1);
-        curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1);
-        curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1);
-        curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
-        curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);
-
-        if (s->username) {
-            curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username);
-        }
-        if (s->password) {
-            curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password);
-        }
-        if (s->proxyusername) {
-            curl_easy_setopt(state->curl,
-                             CURLOPT_PROXYUSERNAME, s->proxyusername);
-        }
-        if (s->proxypassword) {
-            curl_easy_setopt(state->curl,
-                             CURLOPT_PROXYPASSWORD, s->proxypassword);
-        }
-
-        /* Restrict supported protocols to avoid security issues in the more
-         * obscure protocols.  For example, do not allow POP3/SMTP/IMAP see
-         * CVE-2013-0249.
-         *
-         * Restricting protocols is only supported from 7.19.4 upwards.
-         */
-#if LIBCURL_VERSION_NUM >= 0x071304
-        curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS);
-        curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS);
-#endif
-
-#ifdef DEBUG_VERBOSE
-        curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1);
-#endif
-    }
-
-    state->s = s;
-
-    return state;
-}
-
-static void curl_clean_state(CURLState *s)
-{
-    if (s->s->multi)
-        curl_multi_remove_handle(s->s->multi, s->curl);
-    s->in_use = 0;
-}
-
-static void curl_parse_filename(const char *filename, QDict *options,
-                                Error **errp)
-{
-    qdict_put(options, CURL_BLOCK_OPT_URL, qstring_from_str(filename));
-}
-
-static void curl_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVCURLState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < CURL_NUM_STATES; i++) {
-        if (s->states[i].in_use) {
-            curl_clean_state(&s->states[i]);
-        }
-        if (s->states[i].curl) {
-            curl_easy_cleanup(s->states[i].curl);
-            s->states[i].curl = NULL;
-        }
-        g_free(s->states[i].orig_buf);
-        s->states[i].orig_buf = NULL;
-    }
-    if (s->multi) {
-        curl_multi_cleanup(s->multi);
-        s->multi = NULL;
-    }
-
-    timer_del(&s->timer);
-}
-
-static void curl_attach_aio_context(BlockDriverState *bs,
-                                    AioContext *new_context)
-{
-    BDRVCURLState *s = bs->opaque;
-
-    aio_timer_init(new_context, &s->timer,
-                   QEMU_CLOCK_REALTIME, SCALE_NS,
-                   curl_multi_timeout_do, s);
-
-    assert(!s->multi);
-    s->multi = curl_multi_init();
-    s->aio_context = new_context;
-    curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
-#ifdef NEED_CURL_TIMER_CALLBACK
-    curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
-    curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
-#endif
-}
-
-static QemuOptsList runtime_opts = {
-    .name = "curl",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = CURL_BLOCK_OPT_URL,
-            .type = QEMU_OPT_STRING,
-            .help = "URL to open",
-        },
-        {
-            .name = CURL_BLOCK_OPT_READAHEAD,
-            .type = QEMU_OPT_SIZE,
-            .help = "Readahead size",
-        },
-        {
-            .name = CURL_BLOCK_OPT_SSLVERIFY,
-            .type = QEMU_OPT_BOOL,
-            .help = "Verify SSL certificate"
-        },
-        {
-            .name = CURL_BLOCK_OPT_TIMEOUT,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Curl timeout"
-        },
-        {
-            .name = CURL_BLOCK_OPT_COOKIE,
-            .type = QEMU_OPT_STRING,
-            .help = "Pass the cookie or list of cookies with each request"
-        },
-        {
-            .name = CURL_BLOCK_OPT_USERNAME,
-            .type = QEMU_OPT_STRING,
-            .help = "Username for HTTP auth"
-        },
-        {
-            .name = CURL_BLOCK_OPT_PASSWORD_SECRET,
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret used as password for HTTP auth",
-        },
-        {
-            .name = CURL_BLOCK_OPT_PROXY_USERNAME,
-            .type = QEMU_OPT_STRING,
-            .help = "Username for HTTP proxy auth"
-        },
-        {
-            .name = CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET,
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret used as password for HTTP proxy auth",
-        },
-        { /* end of list */ }
-    },
-};
-
-
-static int curl_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
-{
-    BDRVCURLState *s = bs->opaque;
-    CURLState *state = NULL;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *file;
-    const char *cookie;
-    double d;
-    const char *secretid;
-
-    static int inited = 0;
-
-    if (flags & BDRV_O_RDWR) {
-        error_setg(errp, "curl block device does not support writes");
-        return -EROFS;
-    }
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        goto out_noclean;
-    }
-
-    s->readahead_size = qemu_opt_get_size(opts, CURL_BLOCK_OPT_READAHEAD,
-                                          READ_AHEAD_DEFAULT);
-    if ((s->readahead_size & 0x1ff) != 0) {
-        error_setg(errp, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512",
-                   s->readahead_size);
-        goto out_noclean;
-    }
-
-    s->timeout = qemu_opt_get_number(opts, CURL_BLOCK_OPT_TIMEOUT,
-                                     CURL_TIMEOUT_DEFAULT);
-    if (s->timeout > CURL_TIMEOUT_MAX) {
-        error_setg(errp, "timeout parameter is too large or negative");
-        goto out_noclean;
-    }
-
-    s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true);
-
-    cookie = qemu_opt_get(opts, CURL_BLOCK_OPT_COOKIE);
-    s->cookie = g_strdup(cookie);
-
-    file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL);
-    if (file == NULL) {
-        error_setg(errp, "curl block driver requires an 'url' option");
-        goto out_noclean;
-    }
-
-    s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
-    secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);
-
-    if (secretid) {
-        s->password = qcrypto_secret_lookup_as_utf8(secretid, errp);
-        if (!s->password) {
-            goto out_noclean;
-        }
-    }
-
-    s->proxyusername = g_strdup(
-        qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_USERNAME));
-    secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET);
-    if (secretid) {
-        s->proxypassword = qcrypto_secret_lookup_as_utf8(secretid, errp);
-        if (!s->proxypassword) {
-            goto out_noclean;
-        }
-    }
-
-    if (!inited) {
-        curl_global_init(CURL_GLOBAL_ALL);
-        inited = 1;
-    }
-
-    DPRINTF("CURL: Opening %s\n", file);
-    s->aio_context = bdrv_get_aio_context(bs);
-    s->url = g_strdup(file);
-    state = curl_init_state(bs, s);
-    if (!state)
-        goto out_noclean;
-
-    // Get file size
-
-    s->accept_range = false;
-    curl_easy_setopt(state->curl, CURLOPT_NOBODY, 1);
-    curl_easy_setopt(state->curl, CURLOPT_HEADERFUNCTION,
-                     curl_header_cb);
-    curl_easy_setopt(state->curl, CURLOPT_HEADERDATA, s);
-    if (curl_easy_perform(state->curl))
-        goto out;
-    curl_easy_getinfo(state->curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &d);
-    if (d)
-        s->len = (size_t)d;
-    else if(!s->len)
-        goto out;
-    if ((!strncasecmp(s->url, "http://", strlen("http://"))
-        || !strncasecmp(s->url, "https://", strlen("https://")))
-        && !s->accept_range) {
-        pstrcpy(state->errmsg, CURL_ERROR_SIZE,
-                "Server does not support 'range' (byte ranges).");
-        goto out;
-    }
-    DPRINTF("CURL: Size = %zd\n", s->len);
-
-    curl_clean_state(state);
-    curl_easy_cleanup(state->curl);
-    state->curl = NULL;
-
-    curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
-
-    qemu_opts_del(opts);
-    return 0;
-
-out:
-    error_setg(errp, "CURL: Error opening file: %s", state->errmsg);
-    curl_easy_cleanup(state->curl);
-    state->curl = NULL;
-out_noclean:
-    g_free(s->cookie);
-    g_free(s->url);
-    qemu_opts_del(opts);
-    return -EINVAL;
-}
-
-static const AIOCBInfo curl_aiocb_info = {
-    .aiocb_size         = sizeof(CURLAIOCB),
-};
-
-
-static void curl_readv_bh_cb(void *p)
-{
-    CURLState *state;
-    int running;
-
-    CURLAIOCB *acb = p;
-    BDRVCURLState *s = acb->common.bs->opaque;
-
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-
-    size_t start = acb->sector_num * SECTOR_SIZE;
-    size_t end;
-
-    // In case we have the requested data already (e.g. read-ahead),
-    // we can just call the callback and be done.
-    switch (curl_find_buf(s, start, acb->nb_sectors * SECTOR_SIZE, acb)) {
-        case FIND_RET_OK:
-            qemu_aio_unref(acb);
-            // fall through
-        case FIND_RET_WAIT:
-            return;
-        default:
-            break;
-    }
-
-    // No cache found, so let's start a new request
-    state = curl_init_state(acb->common.bs, s);
-    if (!state) {
-        acb->common.cb(acb->common.opaque, -EIO);
-        qemu_aio_unref(acb);
-        return;
-    }
-
-    acb->start = 0;
-    acb->end = (acb->nb_sectors * SECTOR_SIZE);
-
-    state->buf_off = 0;
-    g_free(state->orig_buf);
-    state->buf_start = start;
-    state->buf_len = acb->end + s->readahead_size;
-    end = MIN(start + state->buf_len, s->len) - 1;
-    state->orig_buf = g_try_malloc(state->buf_len);
-    if (state->buf_len && state->orig_buf == NULL) {
-        curl_clean_state(state);
-        acb->common.cb(acb->common.opaque, -ENOMEM);
-        qemu_aio_unref(acb);
-        return;
-    }
-    state->acb[0] = acb;
-
-    snprintf(state->range, 127, "%zd-%zd", start, end);
-    DPRINTF("CURL (AIO): Reading %d at %zd (%s)\n",
-            (acb->nb_sectors * SECTOR_SIZE), start, state->range);
-    curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);
-
-    curl_multi_add_handle(s->multi, state->curl);
-
-    /* Tell curl it needs to kick things off */
-    curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
-}
-
-static BlockAIOCB *curl_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    CURLAIOCB *acb;
-
-    acb = qemu_aio_get(&curl_aiocb_info, bs, cb, opaque);
-
-    acb->qiov = qiov;
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
-    qemu_bh_schedule(acb->bh);
-    return &acb->common;
-}
-
-static void curl_close(BlockDriverState *bs)
-{
-    BDRVCURLState *s = bs->opaque;
-
-    DPRINTF("CURL: Close\n");
-    curl_detach_aio_context(bs);
-
-    g_free(s->cookie);
-    g_free(s->url);
-}
-
-static int64_t curl_getlength(BlockDriverState *bs)
-{
-    BDRVCURLState *s = bs->opaque;
-    return s->len;
-}
-
-static BlockDriver bdrv_http = {
-    .format_name                = "http",
-    .protocol_name              = "http",
-
-    .instance_size              = sizeof(BDRVCURLState),
-    .bdrv_parse_filename        = curl_parse_filename,
-    .bdrv_file_open             = curl_open,
-    .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
-
-    .bdrv_aio_readv             = curl_aio_readv,
-
-    .bdrv_detach_aio_context    = curl_detach_aio_context,
-    .bdrv_attach_aio_context    = curl_attach_aio_context,
-};
-
-static BlockDriver bdrv_https = {
-    .format_name                = "https",
-    .protocol_name              = "https",
-
-    .instance_size              = sizeof(BDRVCURLState),
-    .bdrv_parse_filename        = curl_parse_filename,
-    .bdrv_file_open             = curl_open,
-    .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
-
-    .bdrv_aio_readv             = curl_aio_readv,
-
-    .bdrv_detach_aio_context    = curl_detach_aio_context,
-    .bdrv_attach_aio_context    = curl_attach_aio_context,
-};
-
-static BlockDriver bdrv_ftp = {
-    .format_name                = "ftp",
-    .protocol_name              = "ftp",
-
-    .instance_size              = sizeof(BDRVCURLState),
-    .bdrv_parse_filename        = curl_parse_filename,
-    .bdrv_file_open             = curl_open,
-    .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
-
-    .bdrv_aio_readv             = curl_aio_readv,
-
-    .bdrv_detach_aio_context    = curl_detach_aio_context,
-    .bdrv_attach_aio_context    = curl_attach_aio_context,
-};
-
-static BlockDriver bdrv_ftps = {
-    .format_name                = "ftps",
-    .protocol_name              = "ftps",
-
-    .instance_size              = sizeof(BDRVCURLState),
-    .bdrv_parse_filename        = curl_parse_filename,
-    .bdrv_file_open             = curl_open,
-    .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
-
-    .bdrv_aio_readv             = curl_aio_readv,
-
-    .bdrv_detach_aio_context    = curl_detach_aio_context,
-    .bdrv_attach_aio_context    = curl_attach_aio_context,
-};
-
-static BlockDriver bdrv_tftp = {
-    .format_name                = "tftp",
-    .protocol_name              = "tftp",
-
-    .instance_size              = sizeof(BDRVCURLState),
-    .bdrv_parse_filename        = curl_parse_filename,
-    .bdrv_file_open             = curl_open,
-    .bdrv_close                 = curl_close,
-    .bdrv_getlength             = curl_getlength,
-
-    .bdrv_aio_readv             = curl_aio_readv,
-
-    .bdrv_detach_aio_context    = curl_detach_aio_context,
-    .bdrv_attach_aio_context    = curl_attach_aio_context,
-};
-
-static void curl_block_init(void)
-{
-    bdrv_register(&bdrv_http);
-    bdrv_register(&bdrv_https);
-    bdrv_register(&bdrv_ftp);
-    bdrv_register(&bdrv_ftps);
-    bdrv_register(&bdrv_tftp);
-}
-
-block_init(curl_block_init);
diff --git a/qemu/block/dirty-bitmap.c b/qemu/block/dirty-bitmap.c
deleted file mode 100644
index 4902ca557..000000000
--- a/qemu/block/dirty-bitmap.c
+++ /dev/null
@@ -1,387 +0,0 @@
-/*
- * Block Dirty Bitmap
- *
- * Copyright (c) 2016 Red Hat. Inc
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "trace.h"
-#include "block/block_int.h"
-#include "block/blockjob.h"
-
-/**
- * A BdrvDirtyBitmap can be in three possible states:
- * (1) successor is NULL and disabled is false: full r/w mode
- * (2) successor is NULL and disabled is true: read only mode ("disabled")
- * (3) successor is set: frozen mode.
- *     A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
- *     or enabled. A frozen bitmap can only abdicate() or reclaim().
- */
-struct BdrvDirtyBitmap {
-    HBitmap *bitmap;            /* Dirty sector bitmap implementation */
-    BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
-    char *name;                 /* Optional non-empty unique ID */
-    int64_t size;               /* Size of the bitmap (Number of sectors) */
-    bool disabled;              /* Bitmap is read-only */
-    QLIST_ENTRY(BdrvDirtyBitmap) list;
-};
-
-BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
-{
-    BdrvDirtyBitmap *bm;
-
-    assert(name);
-    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
-        if (bm->name && !strcmp(name, bm->name)) {
-            return bm;
-        }
-    }
-    return NULL;
-}
-
-void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    g_free(bitmap->name);
-    bitmap->name = NULL;
-}
-
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
-                                          uint32_t granularity,
-                                          const char *name,
-                                          Error **errp)
-{
-    int64_t bitmap_size;
-    BdrvDirtyBitmap *bitmap;
-    uint32_t sector_granularity;
-
-    assert((granularity & (granularity - 1)) == 0);
-
-    if (name && bdrv_find_dirty_bitmap(bs, name)) {
-        error_setg(errp, "Bitmap already exists: %s", name);
-        return NULL;
-    }
-    sector_granularity = granularity >> BDRV_SECTOR_BITS;
-    assert(sector_granularity);
-    bitmap_size = bdrv_nb_sectors(bs);
-    if (bitmap_size < 0) {
-        error_setg_errno(errp, -bitmap_size, "could not get length of device");
-        errno = -bitmap_size;
-        return NULL;
-    }
-    bitmap = g_new0(BdrvDirtyBitmap, 1);
-    bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
-    bitmap->size = bitmap_size;
-    bitmap->name = g_strdup(name);
-    bitmap->disabled = false;
-    QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
-    return bitmap;
-}
-
-bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
-{
-    return bitmap->successor;
-}
-
-bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
-{
-    return !(bitmap->disabled || bitmap->successor);
-}
-
-DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
-{
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
-        return DIRTY_BITMAP_STATUS_FROZEN;
-    } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
-        return DIRTY_BITMAP_STATUS_DISABLED;
-    } else {
-        return DIRTY_BITMAP_STATUS_ACTIVE;
-    }
-}
-
-/**
- * Create a successor bitmap destined to replace this bitmap after an operation.
- * Requires that the bitmap is not frozen and has no successor.
- */
-int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
-                                       BdrvDirtyBitmap *bitmap, Error **errp)
-{
-    uint64_t granularity;
-    BdrvDirtyBitmap *child;
-
-    if (bdrv_dirty_bitmap_frozen(bitmap)) {
-        error_setg(errp, "Cannot create a successor for a bitmap that is "
-                   "currently frozen");
-        return -1;
-    }
-    assert(!bitmap->successor);
-
-    /* Create an anonymous successor */
-    granularity = bdrv_dirty_bitmap_granularity(bitmap);
-    child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
-    if (!child) {
-        return -1;
-    }
-
-    /* Successor will be on or off based on our current state. */
-    child->disabled = bitmap->disabled;
-
-    /* Install the successor and freeze the parent */
-    bitmap->successor = child;
-    return 0;
-}
-
-/**
- * For a bitmap with a successor, yield our name to the successor,
- * delete the old bitmap, and return a handle to the new bitmap.
- */
-BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
-                                            BdrvDirtyBitmap *bitmap,
-                                            Error **errp)
-{
-    char *name;
-    BdrvDirtyBitmap *successor = bitmap->successor;
-
-    if (successor == NULL) {
-        error_setg(errp, "Cannot relinquish control if "
-                   "there's no successor present");
-        return NULL;
-    }
-
-    name = bitmap->name;
-    bitmap->name = NULL;
-    successor->name = name;
-    bitmap->successor = NULL;
-    bdrv_release_dirty_bitmap(bs, bitmap);
-
-    return successor;
-}
-
-/**
- * In cases of failure where we can no longer safely delete the parent,
- * we may wish to re-join the parent and child/successor.
- * The merged parent will be un-frozen, but not explicitly re-enabled.
- */
-BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
-                                           BdrvDirtyBitmap *parent,
-                                           Error **errp)
-{
-    BdrvDirtyBitmap *successor = parent->successor;
-
-    if (!successor) {
-        error_setg(errp, "Cannot reclaim a successor when none is present");
-        return NULL;
-    }
-
-    if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
-        error_setg(errp, "Merging of parent and successor bitmap failed");
-        return NULL;
-    }
-    bdrv_release_dirty_bitmap(bs, successor);
-    parent->successor = NULL;
-
-    return parent;
-}
-
-/**
- * Truncates _all_ bitmaps attached to a BDS.
- */
-void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
-{
-    BdrvDirtyBitmap *bitmap;
-    uint64_t size = bdrv_nb_sectors(bs);
-
-    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
-        assert(!bdrv_dirty_bitmap_frozen(bitmap));
-        hbitmap_truncate(bitmap->bitmap, size);
-        bitmap->size = size;
-    }
-}
-
-static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
-                                                  BdrvDirtyBitmap *bitmap,
-                                                  bool only_named)
-{
-    BdrvDirtyBitmap *bm, *next;
-    QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
-        if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
-            assert(!bdrv_dirty_bitmap_frozen(bm));
-            QLIST_REMOVE(bm, list);
-            hbitmap_free(bm->bitmap);
-            g_free(bm->name);
-            g_free(bm);
-
-            if (bitmap) {
-                return;
-            }
-        }
-    }
-}
-
-void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
-{
-    bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
-}
-
-/**
- * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
- * There must not be any frozen bitmaps attached.
- */
-void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
-{
-    bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
-}
-
-void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    bitmap->disabled = true;
-}
-
-void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
-{
-    assert(!bdrv_dirty_bitmap_frozen(bitmap));
-    bitmap->disabled = false;
-}
-
-BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
-{
-    BdrvDirtyBitmap *bm;
-    BlockDirtyInfoList *list = NULL;
-    BlockDirtyInfoList **plist = &list;
-
-    QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
-        BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
-        BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
-        info->count = bdrv_get_dirty_count(bm);
-        info->granularity = bdrv_dirty_bitmap_granularity(bm);
-        info->has_name = !!bm->name;
-        info->name = g_strdup(bm->name);
-        info->status = bdrv_dirty_bitmap_status(bm);
-        entry->value = info;
-        *plist = entry;
-        plist = &entry->next;
-    }
-
-    return list;
-}
-
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
-                   int64_t sector)
-{
-    if (bitmap) {
-        return hbitmap_get(bitmap->bitmap, sector);
-    } else {
-        return 0;
-    }
-}
-
-/**
- * Chooses a default granularity based on the existing cluster size,
- * but clamped between [4K, 64K]. Defaults to 64K in the case that there
- * is no cluster size information available.
- */
-uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
-{
-    BlockDriverInfo bdi;
-    uint32_t granularity;
-
-    if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
-        granularity = MAX(4096, bdi.cluster_size);
-        granularity = MIN(65536, granularity);
-    } else {
-        granularity = 65536;
-    }
-
-    return granularity;
-}
-
-uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
-{
-    return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
-}
-
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
-{
-    hbitmap_iter_init(hbi, bitmap->bitmap, 0);
-}
-
-void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                           int64_t cur_sector, int nr_sectors)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
-}
-
-void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
-                             int64_t cur_sector, int nr_sectors)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
-}
-
-void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
-{
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    if (!out) {
-        hbitmap_reset_all(bitmap->bitmap);
-    } else {
-        HBitmap *backup = bitmap->bitmap;
-        bitmap->bitmap = hbitmap_alloc(bitmap->size,
-                                       hbitmap_granularity(backup));
-        *out = backup;
-    }
-}
-
-void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
-{
-    HBitmap *tmp = bitmap->bitmap;
-    assert(bdrv_dirty_bitmap_enabled(bitmap));
-    bitmap->bitmap = in;
-    hbitmap_free(tmp);
-}
-
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
-                    int nr_sectors)
-{
-    BdrvDirtyBitmap *bitmap;
-    QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
-        if (!bdrv_dirty_bitmap_enabled(bitmap)) {
-            continue;
-        }
-        hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
-    }
-}
-
-/**
- * Advance an HBitmapIter to an arbitrary offset.
- */
-void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
-{
-    assert(hbi->hb);
-    hbitmap_iter_init(hbi, hbi->hb, offset);
-}
-
-int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
-{
-    return hbitmap_count(bitmap->bitmap);
-}
diff --git a/qemu/block/dmg.c b/qemu/block/dmg.c
deleted file mode 100644
index a496eb7c9..000000000
--- a/qemu/block/dmg.c
+++ /dev/null
@@ -1,727 +0,0 @@
-/*
- * QEMU Block driver for DMG images
- *
- * Copyright (c) 2004 Johannes E. Schindelin
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/bswap.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include <zlib.h>
-#ifdef CONFIG_BZIP2
-#include <bzlib.h>
-#endif
-#include <glib.h>
-
-enum {
-    /* Limit chunk sizes to prevent unreasonable amounts of memory being used
-     * or truncating when converting to 32-bit types
-     */
-    DMG_LENGTHS_MAX = 64 * 1024 * 1024, /* 64 MB */
-    DMG_SECTORCOUNTS_MAX = DMG_LENGTHS_MAX / 512,
-};
-
-typedef struct BDRVDMGState {
-    CoMutex lock;
-    /* each chunk contains a certain number of sectors,
-     * offsets[i] is the offset in the .dmg file,
-     * lengths[i] is the length of the compressed chunk,
-     * sectors[i] is the sector beginning at offsets[i],
-     * sectorcounts[i] is the number of sectors in that chunk,
-     * the sectors array is ordered
-     * 0<=i<n_chunks */
-
-    uint32_t n_chunks;
-    uint32_t* types;
-    uint64_t* offsets;
-    uint64_t* lengths;
-    uint64_t* sectors;
-    uint64_t* sectorcounts;
-    uint32_t current_chunk;
-    uint8_t *compressed_chunk;
-    uint8_t *uncompressed_chunk;
-    z_stream zstream;
-#ifdef CONFIG_BZIP2
-    bz_stream bzstream;
-#endif
-} BDRVDMGState;
-
-static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    int len;
-
-    if (!filename) {
-        return 0;
-    }
-
-    len = strlen(filename);
-    if (len > 4 && !strcmp(filename + len - 4, ".dmg")) {
-        return 2;
-    }
-    return 0;
-}
-
-static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result)
-{
-    uint64_t buffer;
-    int ret;
-
-    ret = bdrv_pread(bs->file->bs, offset, &buffer, 8);
-    if (ret < 0) {
-        return ret;
-    }
-
-    *result = be64_to_cpu(buffer);
-    return 0;
-}
-
-static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
-{
-    uint32_t buffer;
-    int ret;
-
-    ret = bdrv_pread(bs->file->bs, offset, &buffer, 4);
-    if (ret < 0) {
-        return ret;
-    }
-
-    *result = be32_to_cpu(buffer);
-    return 0;
-}
-
-static inline uint64_t buff_read_uint64(const uint8_t *buffer, int64_t offset)
-{
-    return be64_to_cpu(*(uint64_t *)&buffer[offset]);
-}
-
-static inline uint32_t buff_read_uint32(const uint8_t *buffer, int64_t offset)
-{
-    return be32_to_cpu(*(uint32_t *)&buffer[offset]);
-}
-
-/* Increase max chunk sizes, if necessary.  This function is used to calculate
- * the buffer sizes needed for compressed/uncompressed chunk I/O.
- */
-static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
-                                  uint32_t *max_compressed_size,
-                                  uint32_t *max_sectors_per_chunk)
-{
-    uint32_t compressed_size = 0;
-    uint32_t uncompressed_sectors = 0;
-
-    switch (s->types[chunk]) {
-    case 0x80000005: /* zlib compressed */
-    case 0x80000006: /* bzip2 compressed */
-        compressed_size = s->lengths[chunk];
-        uncompressed_sectors = s->sectorcounts[chunk];
-        break;
-    case 1: /* copy */
-        uncompressed_sectors = (s->lengths[chunk] + 511) / 512;
-        break;
-    case 2: /* zero */
-        /* as the all-zeroes block may be large, it is treated specially: the
-         * sector is not copied from a large buffer, a simple memset is used
-         * instead. Therefore uncompressed_sectors does not need to be set. */
-        break;
-    }
-
-    if (compressed_size > *max_compressed_size) {
-        *max_compressed_size = compressed_size;
-    }
-    if (uncompressed_sectors > *max_sectors_per_chunk) {
-        *max_sectors_per_chunk = uncompressed_sectors;
-    }
-}
-
-static int64_t dmg_find_koly_offset(BlockDriverState *file_bs, Error **errp)
-{
-    int64_t length;
-    int64_t offset = 0;
-    uint8_t buffer[515];
-    int i, ret;
-
-    /* bdrv_getlength returns a multiple of block size (512), rounded up. Since
-     * dmg images can have odd sizes, try to look for the "koly" magic which
-     * marks the begin of the UDIF trailer (512 bytes). This magic can be found
-     * in the last 511 bytes of the second-last sector or the first 4 bytes of
-     * the last sector (search space: 515 bytes) */
-    length = bdrv_getlength(file_bs);
-    if (length < 0) {
-        error_setg_errno(errp, -length,
-            "Failed to get file size while reading UDIF trailer");
-        return length;
-    } else if (length < 512) {
-        error_setg(errp, "dmg file must be at least 512 bytes long");
-        return -EINVAL;
-    }
-    if (length > 511 + 512) {
-        offset = length - 511 - 512;
-    }
-    length = length < 515 ? length : 515;
-    ret = bdrv_pread(file_bs, offset, buffer, length);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed while reading UDIF trailer");
-        return ret;
-    }
-    for (i = 0; i < length - 3; i++) {
-        if (buffer[i] == 'k' && buffer[i+1] == 'o' &&
-            buffer[i+2] == 'l' && buffer[i+3] == 'y') {
-            return offset + i;
-        }
-    }
-    error_setg(errp, "Could not locate UDIF trailer in dmg file");
-    return -EINVAL;
-}
-
-/* used when building the sector table */
-typedef struct DmgHeaderState {
-    /* used internally by dmg_read_mish_block to remember offsets of blocks
-     * across calls */
-    uint64_t data_fork_offset;
-    /* exported for dmg_open */
-    uint32_t max_compressed_size;
-    uint32_t max_sectors_per_chunk;
-} DmgHeaderState;
-
-static bool dmg_is_known_block_type(uint32_t entry_type)
-{
-    switch (entry_type) {
-    case 0x00000001:    /* uncompressed */
-    case 0x00000002:    /* zeroes */
-    case 0x80000005:    /* zlib */
-#ifdef CONFIG_BZIP2
-    case 0x80000006:    /* bzip2 */
-#endif
-        return true;
-    default:
-        return false;
-    }
-}
-
-static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds,
-                               uint8_t *buffer, uint32_t count)
-{
-    uint32_t type, i;
-    int ret;
-    size_t new_size;
-    uint32_t chunk_count;
-    int64_t offset = 0;
-    uint64_t data_offset;
-    uint64_t in_offset = ds->data_fork_offset;
-    uint64_t out_offset;
-
-    type = buff_read_uint32(buffer, offset);
-    /* skip data that is not a valid MISH block (invalid magic or too small) */
-    if (type != 0x6d697368 || count < 244) {
-        /* assume success for now */
-        return 0;
-    }
-
-    /* chunk offsets are relative to this sector number */
-    out_offset = buff_read_uint64(buffer, offset + 8);
-
-    /* location in data fork for (compressed) blob (in bytes) */
-    data_offset = buff_read_uint64(buffer, offset + 0x18);
-    in_offset += data_offset;
-
-    /* move to begin of chunk entries */
-    offset += 204;
-
-    chunk_count = (count - 204) / 40;
-    new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
-    s->types = g_realloc(s->types, new_size / 2);
-    s->offsets = g_realloc(s->offsets, new_size);
-    s->lengths = g_realloc(s->lengths, new_size);
-    s->sectors = g_realloc(s->sectors, new_size);
-    s->sectorcounts = g_realloc(s->sectorcounts, new_size);
-
-    for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
-        s->types[i] = buff_read_uint32(buffer, offset);
-        if (!dmg_is_known_block_type(s->types[i])) {
-            chunk_count--;
-            i--;
-            offset += 40;
-            continue;
-        }
-
-        /* sector number */
-        s->sectors[i] = buff_read_uint64(buffer, offset + 8);
-        s->sectors[i] += out_offset;
-
-        /* sector count */
-        s->sectorcounts[i] = buff_read_uint64(buffer, offset + 0x10);
-
-        /* all-zeroes sector (type 2) does not need to be "uncompressed" and can
-         * therefore be unbounded. */
-        if (s->types[i] != 2 && s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
-            error_report("sector count %" PRIu64 " for chunk %" PRIu32
-                         " is larger than max (%u)",
-                         s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        /* offset in (compressed) data fork */
-        s->offsets[i] = buff_read_uint64(buffer, offset + 0x18);
-        s->offsets[i] += in_offset;
-
-        /* length in (compressed) data fork */
-        s->lengths[i] = buff_read_uint64(buffer, offset + 0x20);
-
-        if (s->lengths[i] > DMG_LENGTHS_MAX) {
-            error_report("length %" PRIu64 " for chunk %" PRIu32
-                         " is larger than max (%u)",
-                         s->lengths[i], i, DMG_LENGTHS_MAX);
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        update_max_chunk_size(s, i, &ds->max_compressed_size,
-                              &ds->max_sectors_per_chunk);
-        offset += 40;
-    }
-    s->n_chunks += chunk_count;
-    return 0;
-
-fail:
-    return ret;
-}
-
-static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
-                                  uint64_t info_begin, uint64_t info_length)
-{
-    BDRVDMGState *s = bs->opaque;
-    int ret;
-    uint32_t count, rsrc_data_offset;
-    uint8_t *buffer = NULL;
-    uint64_t info_end;
-    uint64_t offset;
-
-    /* read offset from begin of resource fork (info_begin) to resource data */
-    ret = read_uint32(bs, info_begin, &rsrc_data_offset);
-    if (ret < 0) {
-        goto fail;
-    } else if (rsrc_data_offset > info_length) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* read length of resource data */
-    ret = read_uint32(bs, info_begin + 8, &count);
-    if (ret < 0) {
-        goto fail;
-    } else if (count == 0 || rsrc_data_offset + count > info_length) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* begin of resource data (consisting of one or more resources) */
-    offset = info_begin + rsrc_data_offset;
-
-    /* end of resource data (there is possibly a following resource map
-     * which will be ignored). */
-    info_end = offset + count;
-
-    /* read offsets (mish blocks) from one or more resources in resource data */
-    while (offset < info_end) {
-        /* size of following resource */
-        ret = read_uint32(bs, offset, &count);
-        if (ret < 0) {
-            goto fail;
-        } else if (count == 0 || count > info_end - offset) {
-            ret = -EINVAL;
-            goto fail;
-        }
-        offset += 4;
-
-        buffer = g_realloc(buffer, count);
-        ret = bdrv_pread(bs->file->bs, offset, buffer, count);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        ret = dmg_read_mish_block(s, ds, buffer, count);
-        if (ret < 0) {
-            goto fail;
-        }
-        /* advance offset by size of resource */
-        offset += count;
-    }
-    ret = 0;
-
-fail:
-    g_free(buffer);
-    return ret;
-}
-
-static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
-                              uint64_t info_begin, uint64_t info_length)
-{
-    BDRVDMGState *s = bs->opaque;
-    int ret;
-    uint8_t *buffer = NULL;
-    char *data_begin, *data_end;
-
-    /* Have at least some length to avoid NULL for g_malloc. Attempt to set a
-     * safe upper cap on the data length. A test sample had a XML length of
-     * about 1 MiB. */
-    if (info_length == 0 || info_length > 16 * 1024 * 1024) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    buffer = g_malloc(info_length + 1);
-    buffer[info_length] = '\0';
-    ret = bdrv_pread(bs->file->bs, info_begin, buffer, info_length);
-    if (ret != info_length) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* look for <data>...</data>. The data is 284 (0x11c) bytes after base64
-     * decode. The actual data element has 431 (0x1af) bytes which includes tabs
-     * and line feeds. */
-    data_end = (char *)buffer;
-    while ((data_begin = strstr(data_end, "<data>")) != NULL) {
-        guchar *mish;
-        gsize out_len = 0;
-
-        data_begin += 6;
-        data_end = strstr(data_begin, "</data>");
-        /* malformed XML? */
-        if (data_end == NULL) {
-            ret = -EINVAL;
-            goto fail;
-        }
-        *data_end++ = '\0';
-        mish = g_base64_decode(data_begin, &out_len);
-        ret = dmg_read_mish_block(s, ds, mish, (uint32_t)out_len);
-        g_free(mish);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-    ret = 0;
-
-fail:
-    g_free(buffer);
-    return ret;
-}
-
-static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
-{
-    BDRVDMGState *s = bs->opaque;
-    DmgHeaderState ds;
-    uint64_t rsrc_fork_offset, rsrc_fork_length;
-    uint64_t plist_xml_offset, plist_xml_length;
-    int64_t offset;
-    int ret;
-
-    bs->read_only = 1;
-    s->n_chunks = 0;
-    s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
-    /* used by dmg_read_mish_block to keep track of the current I/O position */
-    ds.data_fork_offset = 0;
-    ds.max_compressed_size = 1;
-    ds.max_sectors_per_chunk = 1;
-
-    /* locate the UDIF trailer */
-    offset = dmg_find_koly_offset(bs->file->bs, errp);
-    if (offset < 0) {
-        ret = offset;
-        goto fail;
-    }
-
-    /* offset of data fork (DataForkOffset) */
-    ret = read_uint64(bs, offset + 0x18, &ds.data_fork_offset);
-    if (ret < 0) {
-        goto fail;
-    } else if (ds.data_fork_offset > offset) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* offset of resource fork (RsrcForkOffset) */
-    ret = read_uint64(bs, offset + 0x28, &rsrc_fork_offset);
-    if (ret < 0) {
-        goto fail;
-    }
-    ret = read_uint64(bs, offset + 0x30, &rsrc_fork_length);
-    if (ret < 0) {
-        goto fail;
-    }
-    if (rsrc_fork_offset >= offset ||
-        rsrc_fork_length > offset - rsrc_fork_offset) {
-        ret = -EINVAL;
-        goto fail;
-    }
-    /* offset of property list (XMLOffset) */
-    ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset);
-    if (ret < 0) {
-        goto fail;
-    }
-    ret = read_uint64(bs, offset + 0xe0, &plist_xml_length);
-    if (ret < 0) {
-        goto fail;
-    }
-    if (plist_xml_offset >= offset ||
-        plist_xml_length > offset - plist_xml_offset) {
-        ret = -EINVAL;
-        goto fail;
-    }
-    ret = read_uint64(bs, offset + 0x1ec, (uint64_t *)&bs->total_sectors);
-    if (ret < 0) {
-        goto fail;
-    }
-    if (bs->total_sectors < 0) {
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (rsrc_fork_length != 0) {
-        ret = dmg_read_resource_fork(bs, &ds,
-                                     rsrc_fork_offset, rsrc_fork_length);
-        if (ret < 0) {
-            goto fail;
-        }
-    } else if (plist_xml_length != 0) {
-        ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length);
-        if (ret < 0) {
-            goto fail;
-        }
-    } else {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* initialize zlib engine */
-    s->compressed_chunk = qemu_try_blockalign(bs->file->bs,
-                                              ds.max_compressed_size + 1);
-    s->uncompressed_chunk = qemu_try_blockalign(bs->file->bs,
-                                                512 * ds.max_sectors_per_chunk);
-    if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-
-    if (inflateInit(&s->zstream) != Z_OK) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    s->current_chunk = s->n_chunks;
-
-    qemu_co_mutex_init(&s->lock);
-    return 0;
-
-fail:
-    g_free(s->types);
-    g_free(s->offsets);
-    g_free(s->lengths);
-    g_free(s->sectors);
-    g_free(s->sectorcounts);
-    qemu_vfree(s->compressed_chunk);
-    qemu_vfree(s->uncompressed_chunk);
-    return ret;
-}
-
-static inline int is_sector_in_chunk(BDRVDMGState* s,
-                uint32_t chunk_num, uint64_t sector_num)
-{
-    if (chunk_num >= s->n_chunks || s->sectors[chunk_num] > sector_num ||
-            s->sectors[chunk_num] + s->sectorcounts[chunk_num] <= sector_num) {
-        return 0;
-    } else {
-        return -1;
-    }
-}
-
-static inline uint32_t search_chunk(BDRVDMGState *s, uint64_t sector_num)
-{
-    /* binary search */
-    uint32_t chunk1 = 0, chunk2 = s->n_chunks, chunk3;
-    while (chunk1 != chunk2) {
-        chunk3 = (chunk1 + chunk2) / 2;
-        if (s->sectors[chunk3] > sector_num) {
-            chunk2 = chunk3;
-        } else if (s->sectors[chunk3] + s->sectorcounts[chunk3] > sector_num) {
-            return chunk3;
-        } else {
-            chunk1 = chunk3;
-        }
-    }
-    return s->n_chunks; /* error */
-}
-
-static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
-{
-    BDRVDMGState *s = bs->opaque;
-
-    if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
-        int ret;
-        uint32_t chunk = search_chunk(s, sector_num);
-#ifdef CONFIG_BZIP2
-        uint64_t total_out;
-#endif
-
-        if (chunk >= s->n_chunks) {
-            return -1;
-        }
-
-        s->current_chunk = s->n_chunks;
-        switch (s->types[chunk]) { /* block entry type */
-        case 0x80000005: { /* zlib compressed */
-            /* we need to buffer, because only the chunk as whole can be
-             * inflated. */
-            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
-                             s->compressed_chunk, s->lengths[chunk]);
-            if (ret != s->lengths[chunk]) {
-                return -1;
-            }
-
-            s->zstream.next_in = s->compressed_chunk;
-            s->zstream.avail_in = s->lengths[chunk];
-            s->zstream.next_out = s->uncompressed_chunk;
-            s->zstream.avail_out = 512 * s->sectorcounts[chunk];
-            ret = inflateReset(&s->zstream);
-            if (ret != Z_OK) {
-                return -1;
-            }
-            ret = inflate(&s->zstream, Z_FINISH);
-            if (ret != Z_STREAM_END ||
-                s->zstream.total_out != 512 * s->sectorcounts[chunk]) {
-                return -1;
-            }
-            break; }
-#ifdef CONFIG_BZIP2
-        case 0x80000006: /* bzip2 compressed */
-            /* we need to buffer, because only the chunk as whole can be
-             * inflated. */
-            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
-                             s->compressed_chunk, s->lengths[chunk]);
-            if (ret != s->lengths[chunk]) {
-                return -1;
-            }
-
-            ret = BZ2_bzDecompressInit(&s->bzstream, 0, 0);
-            if (ret != BZ_OK) {
-                return -1;
-            }
-            s->bzstream.next_in = (char *)s->compressed_chunk;
-            s->bzstream.avail_in = (unsigned int) s->lengths[chunk];
-            s->bzstream.next_out = (char *)s->uncompressed_chunk;
-            s->bzstream.avail_out = (unsigned int) 512 * s->sectorcounts[chunk];
-            ret = BZ2_bzDecompress(&s->bzstream);
-            total_out = ((uint64_t)s->bzstream.total_out_hi32 << 32) +
-                        s->bzstream.total_out_lo32;
-            BZ2_bzDecompressEnd(&s->bzstream);
-            if (ret != BZ_STREAM_END ||
-                total_out != 512 * s->sectorcounts[chunk]) {
-                return -1;
-            }
-            break;
-#endif /* CONFIG_BZIP2 */
-        case 1: /* copy */
-            ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
-                             s->uncompressed_chunk, s->lengths[chunk]);
-            if (ret != s->lengths[chunk]) {
-                return -1;
-            }
-            break;
-        case 2: /* zero */
-            /* see dmg_read, it is treated specially. No buffer needs to be
-             * pre-filled, the zeroes can be set directly. */
-            break;
-        }
-        s->current_chunk = chunk;
-    }
-    return 0;
-}
-
-static int dmg_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVDMGState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < nb_sectors; i++) {
-        uint32_t sector_offset_in_chunk;
-        if (dmg_read_chunk(bs, sector_num + i) != 0) {
-            return -1;
-        }
-        /* Special case: current chunk is all zeroes. Do not perform a memcpy as
-         * s->uncompressed_chunk may be too small to cover the large all-zeroes
-         * section. dmg_read_chunk is called to find s->current_chunk */
-        if (s->types[s->current_chunk] == 2) { /* all zeroes block entry */
-            memset(buf + i * 512, 0, 512);
-            continue;
-        }
-        sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk];
-        memcpy(buf + i * 512,
-               s->uncompressed_chunk + sector_offset_in_chunk * 512, 512);
-    }
-    return 0;
-}
-
-static coroutine_fn int dmg_co_read(BlockDriverState *bs, int64_t sector_num,
-                                    uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    BDRVDMGState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    ret = dmg_read(bs, sector_num, buf, nb_sectors);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-static void dmg_close(BlockDriverState *bs)
-{
-    BDRVDMGState *s = bs->opaque;
-
-    g_free(s->types);
-    g_free(s->offsets);
-    g_free(s->lengths);
-    g_free(s->sectors);
-    g_free(s->sectorcounts);
-    qemu_vfree(s->compressed_chunk);
-    qemu_vfree(s->uncompressed_chunk);
-
-    inflateEnd(&s->zstream);
-}
-
-static BlockDriver bdrv_dmg = {
-    .format_name    = "dmg",
-    .instance_size  = sizeof(BDRVDMGState),
-    .bdrv_probe     = dmg_probe,
-    .bdrv_open      = dmg_open,
-    .bdrv_read      = dmg_co_read,
-    .bdrv_close     = dmg_close,
-};
-
-static void bdrv_dmg_init(void)
-{
-    bdrv_register(&bdrv_dmg);
-}
-
-block_init(bdrv_dmg_init);
diff --git a/qemu/block/gluster.c b/qemu/block/gluster.c
deleted file mode 100644
index a8aaacf64..000000000
--- a/qemu/block/gluster.c
+++ /dev/null
@@ -1,866 +0,0 @@
-/*
- * GlusterFS backend for QEMU
- *
- * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- *
- */
-#include "qemu/osdep.h"
-#include <glusterfs/api/glfs.h>
-#include "block/block_int.h"
-#include "qapi/error.h"
-#include "qemu/uri.h"
-
-typedef struct GlusterAIOCB {
-    int64_t size;
-    int ret;
-    QEMUBH *bh;
-    Coroutine *coroutine;
-    AioContext *aio_context;
-} GlusterAIOCB;
-
-typedef struct BDRVGlusterState {
-    struct glfs *glfs;
-    struct glfs_fd *fd;
-} BDRVGlusterState;
-
-typedef struct GlusterConf {
-    char *server;
-    int port;
-    char *volname;
-    char *image;
-    char *transport;
-} GlusterConf;
-
-static void qemu_gluster_gconf_free(GlusterConf *gconf)
-{
-    if (gconf) {
-        g_free(gconf->server);
-        g_free(gconf->volname);
-        g_free(gconf->image);
-        g_free(gconf->transport);
-        g_free(gconf);
-    }
-}
-
-static int parse_volume_options(GlusterConf *gconf, char *path)
-{
-    char *p, *q;
-
-    if (!path) {
-        return -EINVAL;
-    }
-
-    /* volume */
-    p = q = path + strspn(path, "/");
-    p += strcspn(p, "/");
-    if (*p == '\0') {
-        return -EINVAL;
-    }
-    gconf->volname = g_strndup(q, p - q);
-
-    /* image */
-    p += strspn(p, "/");
-    if (*p == '\0') {
-        return -EINVAL;
-    }
-    gconf->image = g_strdup(p);
-    return 0;
-}
-
-/*
- * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
- *
- * 'gluster' is the protocol.
- *
- * 'transport' specifies the transport type used to connect to gluster
- * management daemon (glusterd). Valid transport types are
- * tcp, unix and rdma. If a transport type isn't specified, then tcp
- * type is assumed.
- *
- * 'server' specifies the server where the volume file specification for
- * the given volume resides. This can be either hostname, ipv4 address
- * or ipv6 address. ipv6 address needs to be within square brackets [ ].
- * If transport type is 'unix', then 'server' field should not be specified.
- * The 'socket' field needs to be populated with the path to unix domain
- * socket.
- *
- * 'port' is the port number on which glusterd is listening. This is optional
- * and if not specified, QEMU will send 0 which will make gluster to use the
- * default port. If the transport type is unix, then 'port' should not be
- * specified.
- *
- * 'volname' is the name of the gluster volume which contains the VM image.
- *
- * 'image' is the path to the actual VM image that resides on gluster volume.
- *
- * Examples:
- *
- * file=gluster://1.2.3.4/testvol/a.img
- * file=gluster+tcp://1.2.3.4/testvol/a.img
- * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
- * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
- * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
- * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
- * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
- * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
- */
-static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
-{
-    URI *uri;
-    QueryParams *qp = NULL;
-    bool is_unix = false;
-    int ret = 0;
-
-    uri = uri_parse(filename);
-    if (!uri) {
-        return -EINVAL;
-    }
-
-    /* transport */
-    if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
-        gconf->transport = g_strdup("tcp");
-    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
-        gconf->transport = g_strdup("tcp");
-    } else if (!strcmp(uri->scheme, "gluster+unix")) {
-        gconf->transport = g_strdup("unix");
-        is_unix = true;
-    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
-        gconf->transport = g_strdup("rdma");
-    } else {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    ret = parse_volume_options(gconf, uri->path);
-    if (ret < 0) {
-        goto out;
-    }
-
-    qp = query_params_parse(uri->query);
-    if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if (is_unix) {
-        if (uri->server || uri->port) {
-            ret = -EINVAL;
-            goto out;
-        }
-        if (strcmp(qp->p[0].name, "socket")) {
-            ret = -EINVAL;
-            goto out;
-        }
-        gconf->server = g_strdup(qp->p[0].value);
-    } else {
-        gconf->server = g_strdup(uri->server ? uri->server : "localhost");
-        gconf->port = uri->port;
-    }
-
-out:
-    if (qp) {
-        query_params_free(qp);
-    }
-    uri_free(uri);
-    return ret;
-}
-
-static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
-                                      Error **errp)
-{
-    struct glfs *glfs = NULL;
-    int ret;
-    int old_errno;
-
-    ret = qemu_gluster_parseuri(gconf, filename);
-    if (ret < 0) {
-        error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/"
-                   "volname/image[?socket=...]");
-        errno = -ret;
-        goto out;
-    }
-
-    glfs = glfs_new(gconf->volname);
-    if (!glfs) {
-        goto out;
-    }
-
-    ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
-            gconf->port);
-    if (ret < 0) {
-        goto out;
-    }
-
-    /*
-     * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when
-     * GlusterFS makes GF_LOG_* macros available to libgfapi users.
-     */
-    ret = glfs_set_logging(glfs, "-", 4);
-    if (ret < 0) {
-        goto out;
-    }
-
-    ret = glfs_init(glfs);
-    if (ret) {
-        error_setg_errno(errp, errno,
-                         "Gluster connection failed for server=%s port=%d "
-                         "volume=%s image=%s transport=%s", gconf->server,
-                         gconf->port, gconf->volname, gconf->image,
-                         gconf->transport);
-
-        /* glfs_init sometimes doesn't set errno although docs suggest that */
-        if (errno == 0)
-            errno = EINVAL;
-
-        goto out;
-    }
-    return glfs;
-
-out:
-    if (glfs) {
-        old_errno = errno;
-        glfs_fini(glfs);
-        errno = old_errno;
-    }
-    return NULL;
-}
-
-static void qemu_gluster_complete_aio(void *opaque)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
-
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-    qemu_coroutine_enter(acb->coroutine, NULL);
-}
-
-/*
- * AIO callback routine called from GlusterFS thread.
- */
-static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
-{
-    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
-
-    if (!ret || ret == acb->size) {
-        acb->ret = 0; /* Success */
-    } else if (ret < 0) {
-        acb->ret = -errno; /* Read/Write failed */
-    } else {
-        acb->ret = -EIO; /* Partial read/write - fail it */
-    }
-
-    acb->bh = aio_bh_new(acb->aio_context, qemu_gluster_complete_aio, acb);
-    qemu_bh_schedule(acb->bh);
-}
-
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "gluster",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the gluster image",
-        },
-        { /* end of list */ }
-    },
-};
-
-static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
-{
-    assert(open_flags != NULL);
-
-    *open_flags |= O_BINARY;
-
-    if (bdrv_flags & BDRV_O_RDWR) {
-        *open_flags |= O_RDWR;
-    } else {
-        *open_flags |= O_RDONLY;
-    }
-
-    if ((bdrv_flags & BDRV_O_NOCACHE)) {
-        *open_flags |= O_DIRECT;
-    }
-}
-
-static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
-                             int bdrv_flags, Error **errp)
-{
-    BDRVGlusterState *s = bs->opaque;
-    int open_flags = 0;
-    int ret = 0;
-    GlusterConf *gconf = g_new0(GlusterConf, 1);
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *filename;
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    filename = qemu_opt_get(opts, "filename");
-
-    s->glfs = qemu_gluster_init(gconf, filename, errp);
-    if (!s->glfs) {
-        ret = -errno;
-        goto out;
-    }
-
-#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
-    /* Without this, if fsync fails for a recoverable reason (for instance,
-     * ENOSPC), gluster will dump its cache, preventing retries.  This means
-     * almost certain data loss.  Not all gluster versions support the
-     * 'resync-failed-syncs-after-fsync' key value, but there is no way to
-     * discover during runtime if it is supported (this api returns success for
-     * unknown key/value pairs) */
-    ret = glfs_set_xlator_option(s->glfs, "*-write-behind",
-                                          "resync-failed-syncs-after-fsync",
-                                          "on");
-    if (ret < 0) {
-        error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
-        ret = -errno;
-        goto out;
-    }
-#endif
-
-    qemu_gluster_parse_flags(bdrv_flags, &open_flags);
-
-    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
-    if (!s->fd) {
-        ret = -errno;
-    }
-
-out:
-    qemu_opts_del(opts);
-    qemu_gluster_gconf_free(gconf);
-    if (!ret) {
-        return ret;
-    }
-    if (s->fd) {
-        glfs_close(s->fd);
-    }
-    if (s->glfs) {
-        glfs_fini(s->glfs);
-    }
-    return ret;
-}
-
-typedef struct BDRVGlusterReopenState {
-    struct glfs *glfs;
-    struct glfs_fd *fd;
-} BDRVGlusterReopenState;
-
-
-static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
-                                       BlockReopenQueue *queue, Error **errp)
-{
-    int ret = 0;
-    BDRVGlusterReopenState *reop_s;
-    GlusterConf *gconf = NULL;
-    int open_flags = 0;
-
-    assert(state != NULL);
-    assert(state->bs != NULL);
-
-    state->opaque = g_new0(BDRVGlusterReopenState, 1);
-    reop_s = state->opaque;
-
-    qemu_gluster_parse_flags(state->flags, &open_flags);
-
-    gconf = g_new0(GlusterConf, 1);
-
-    reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
-    if (reop_s->glfs == NULL) {
-        ret = -errno;
-        goto exit;
-    }
-
-#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
-    ret = glfs_set_xlator_option(reop_s->glfs, "*-write-behind",
-                                 "resync-failed-syncs-after-fsync", "on");
-    if (ret < 0) {
-        error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
-        ret = -errno;
-        goto exit;
-    }
-#endif
-
-    reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
-    if (reop_s->fd == NULL) {
-        /* reops->glfs will be cleaned up in _abort */
-        ret = -errno;
-        goto exit;
-    }
-
-exit:
-    /* state->opaque will be freed in either the _abort or _commit */
-    qemu_gluster_gconf_free(gconf);
-    return ret;
-}
-
-static void qemu_gluster_reopen_commit(BDRVReopenState *state)
-{
-    BDRVGlusterReopenState *reop_s = state->opaque;
-    BDRVGlusterState *s = state->bs->opaque;
-
-
-    /* close the old */
-    if (s->fd) {
-        glfs_close(s->fd);
-    }
-    if (s->glfs) {
-        glfs_fini(s->glfs);
-    }
-
-    /* use the newly opened image / connection */
-    s->fd         = reop_s->fd;
-    s->glfs       = reop_s->glfs;
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-
-static void qemu_gluster_reopen_abort(BDRVReopenState *state)
-{
-    BDRVGlusterReopenState *reop_s = state->opaque;
-
-    if (reop_s == NULL) {
-        return;
-    }
-
-    if (reop_s->fd) {
-        glfs_close(reop_s->fd);
-    }
-
-    if (reop_s->glfs) {
-        glfs_fini(reop_s->glfs);
-    }
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
-{
-    int ret;
-    GlusterAIOCB acb;
-    BDRVGlusterState *s = bs->opaque;
-    off_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
-
-    acb.size = size;
-    acb.ret = 0;
-    acb.coroutine = qemu_coroutine_self();
-    acb.aio_context = bdrv_get_aio_context(bs);
-
-    ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
-    if (ret < 0) {
-        return -errno;
-    }
-
-    qemu_coroutine_yield();
-    return acb.ret;
-}
-
-static inline bool gluster_supports_zerofill(void)
-{
-    return 1;
-}
-
-static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
-        int64_t size)
-{
-    return glfs_zerofill(fd, offset, size);
-}
-
-#else
-static inline bool gluster_supports_zerofill(void)
-{
-    return 0;
-}
-
-static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
-        int64_t size)
-{
-    return 0;
-}
-#endif
-
-static int qemu_gluster_create(const char *filename,
-                               QemuOpts *opts, Error **errp)
-{
-    struct glfs *glfs;
-    struct glfs_fd *fd;
-    int ret = 0;
-    int prealloc = 0;
-    int64_t total_size = 0;
-    char *tmp = NULL;
-    GlusterConf *gconf = g_new0(GlusterConf, 1);
-
-    glfs = qemu_gluster_init(gconf, filename, errp);
-    if (!glfs) {
-        ret = -errno;
-        goto out;
-    }
-
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-
-    tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    if (!tmp || !strcmp(tmp, "off")) {
-        prealloc = 0;
-    } else if (!strcmp(tmp, "full") &&
-               gluster_supports_zerofill()) {
-        prealloc = 1;
-    } else {
-        error_setg(errp, "Invalid preallocation mode: '%s'"
-            " or GlusterFS doesn't support zerofill API",
-            tmp);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    fd = glfs_creat(glfs, gconf->image,
-        O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
-    if (!fd) {
-        ret = -errno;
-    } else {
-        if (!glfs_ftruncate(fd, total_size)) {
-            if (prealloc && qemu_gluster_zerofill(fd, 0, total_size)) {
-                ret = -errno;
-            }
-        } else {
-            ret = -errno;
-        }
-
-        if (glfs_close(fd) != 0) {
-            ret = -errno;
-        }
-    }
-out:
-    g_free(tmp);
-    qemu_gluster_gconf_free(gconf);
-    if (glfs) {
-        glfs_fini(glfs);
-    }
-    return ret;
-}
-
-static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
-{
-    int ret;
-    GlusterAIOCB acb;
-    BDRVGlusterState *s = bs->opaque;
-    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
-
-    acb.size = size;
-    acb.ret = 0;
-    acb.coroutine = qemu_coroutine_self();
-    acb.aio_context = bdrv_get_aio_context(bs);
-
-    if (write) {
-        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
-            gluster_finish_aiocb, &acb);
-    } else {
-        ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
-            gluster_finish_aiocb, &acb);
-    }
-
-    if (ret < 0) {
-        return -errno;
-    }
-
-    qemu_coroutine_yield();
-    return acb.ret;
-}
-
-static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
-{
-    int ret;
-    BDRVGlusterState *s = bs->opaque;
-
-    ret = glfs_ftruncate(s->fd, offset);
-    if (ret < 0) {
-        return -errno;
-    }
-
-    return 0;
-}
-
-static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
-}
-
-static void qemu_gluster_close(BlockDriverState *bs)
-{
-    BDRVGlusterState *s = bs->opaque;
-
-    if (s->fd) {
-        glfs_close(s->fd);
-        s->fd = NULL;
-    }
-    glfs_fini(s->glfs);
-}
-
-static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
-{
-    int ret;
-    GlusterAIOCB acb;
-    BDRVGlusterState *s = bs->opaque;
-
-    acb.size = 0;
-    acb.ret = 0;
-    acb.coroutine = qemu_coroutine_self();
-    acb.aio_context = bdrv_get_aio_context(bs);
-
-    ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
-    if (ret < 0) {
-        ret = -errno;
-        goto error;
-    }
-
-    qemu_coroutine_yield();
-    if (acb.ret < 0) {
-        ret = acb.ret;
-        goto error;
-    }
-
-    return acb.ret;
-
-error:
-    /* Some versions of Gluster (3.5.6 -> 3.5.8?) will not retain its cache
-     * after a fsync failure, so we have no way of allowing the guest to safely
-     * continue.  Gluster versions prior to 3.5.6 don't retain the cache
-     * either, but will invalidate the fd on error, so this is again our only
-     * option.
-     *
-     * The 'resync-failed-syncs-after-fsync' xlator option for the
-     * write-behind cache will cause later gluster versions to retain its
-     * cache after error, so long as the fd remains open.  However, we
-     * currently have no way of knowing if this option is supported.
-     *
-     * TODO: Once gluster provides a way for us to determine if the option
-     * is supported, bypass the closure and setting drv to NULL.  */
-    qemu_gluster_close(bs);
-    bs->drv = NULL;
-    return ret;
-}
-
-#ifdef CONFIG_GLUSTERFS_DISCARD
-static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors)
-{
-    int ret;
-    GlusterAIOCB acb;
-    BDRVGlusterState *s = bs->opaque;
-    size_t size = nb_sectors * BDRV_SECTOR_SIZE;
-    off_t offset = sector_num * BDRV_SECTOR_SIZE;
-
-    acb.size = 0;
-    acb.ret = 0;
-    acb.coroutine = qemu_coroutine_self();
-    acb.aio_context = bdrv_get_aio_context(bs);
-
-    ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
-    if (ret < 0) {
-        return -errno;
-    }
-
-    qemu_coroutine_yield();
-    return acb.ret;
-}
-#endif
-
-static int64_t qemu_gluster_getlength(BlockDriverState *bs)
-{
-    BDRVGlusterState *s = bs->opaque;
-    int64_t ret;
-
-    ret = glfs_lseek(s->fd, 0, SEEK_END);
-    if (ret < 0) {
-        return -errno;
-    } else {
-        return ret;
-    }
-}
-
-static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
-{
-    BDRVGlusterState *s = bs->opaque;
-    struct stat st;
-    int ret;
-
-    ret = glfs_fstat(s->fd, &st);
-    if (ret < 0) {
-        return -errno;
-    } else {
-        return st.st_blocks * 512;
-    }
-}
-
-static int qemu_gluster_has_zero_init(BlockDriverState *bs)
-{
-    /* GlusterFS volume could be backed by a block device */
-    return 0;
-}
-
-static QemuOptsList qemu_gluster_create_opts = {
-    .name = "qemu-gluster-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_PREALLOC,
-            .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode (allowed values: off, full)"
-        },
-        { /* end of list */ }
-    }
-};
-
-static BlockDriver bdrv_gluster = {
-    .format_name                  = "gluster",
-    .protocol_name                = "gluster",
-    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
-    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
-    .bdrv_close                   = qemu_gluster_close,
-    .bdrv_create                  = qemu_gluster_create,
-    .bdrv_getlength               = qemu_gluster_getlength,
-    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
-    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
-    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
-#ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
-#endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
-#endif
-    .create_opts                  = &qemu_gluster_create_opts,
-};
-
-static BlockDriver bdrv_gluster_tcp = {
-    .format_name                  = "gluster",
-    .protocol_name                = "gluster+tcp",
-    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
-    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
-    .bdrv_close                   = qemu_gluster_close,
-    .bdrv_create                  = qemu_gluster_create,
-    .bdrv_getlength               = qemu_gluster_getlength,
-    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
-    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
-    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
-#ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
-#endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
-#endif
-    .create_opts                  = &qemu_gluster_create_opts,
-};
-
-static BlockDriver bdrv_gluster_unix = {
-    .format_name                  = "gluster",
-    .protocol_name                = "gluster+unix",
-    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
-    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
-    .bdrv_close                   = qemu_gluster_close,
-    .bdrv_create                  = qemu_gluster_create,
-    .bdrv_getlength               = qemu_gluster_getlength,
-    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
-    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
-    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
-#ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
-#endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
-#endif
-    .create_opts                  = &qemu_gluster_create_opts,
-};
-
-static BlockDriver bdrv_gluster_rdma = {
-    .format_name                  = "gluster",
-    .protocol_name                = "gluster+rdma",
-    .instance_size                = sizeof(BDRVGlusterState),
-    .bdrv_needs_filename          = true,
-    .bdrv_file_open               = qemu_gluster_open,
-    .bdrv_reopen_prepare          = qemu_gluster_reopen_prepare,
-    .bdrv_reopen_commit           = qemu_gluster_reopen_commit,
-    .bdrv_reopen_abort            = qemu_gluster_reopen_abort,
-    .bdrv_close                   = qemu_gluster_close,
-    .bdrv_create                  = qemu_gluster_create,
-    .bdrv_getlength               = qemu_gluster_getlength,
-    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
-    .bdrv_truncate                = qemu_gluster_truncate,
-    .bdrv_co_readv                = qemu_gluster_co_readv,
-    .bdrv_co_writev               = qemu_gluster_co_writev,
-    .bdrv_co_flush_to_disk        = qemu_gluster_co_flush_to_disk,
-    .bdrv_has_zero_init           = qemu_gluster_has_zero_init,
-#ifdef CONFIG_GLUSTERFS_DISCARD
-    .bdrv_co_discard              = qemu_gluster_co_discard,
-#endif
-#ifdef CONFIG_GLUSTERFS_ZEROFILL
-    .bdrv_co_write_zeroes         = qemu_gluster_co_write_zeroes,
-#endif
-    .create_opts                  = &qemu_gluster_create_opts,
-};
-
-static void bdrv_gluster_init(void)
-{
-    bdrv_register(&bdrv_gluster_rdma);
-    bdrv_register(&bdrv_gluster_unix);
-    bdrv_register(&bdrv_gluster_tcp);
-    bdrv_register(&bdrv_gluster);
-}
-
-block_init(bdrv_gluster_init);
diff --git a/qemu/block/io.c b/qemu/block/io.c
deleted file mode 100644
index a7dbf85b1..000000000
--- a/qemu/block/io.c
+++ /dev/null
@@ -1,2810 +0,0 @@
-/*
- * Block layer I/O functions
- *
- * Copyright (c) 2003 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "trace.h"
-#include "sysemu/block-backend.h"
-#include "block/blockjob.h"
-#include "block/block_int.h"
-#include "block/throttle-groups.h"
-#include "qemu/cutils.h"
-#include "qapi/error.h"
-#include "qemu/error-report.h"
-
-#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
-
-static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque);
-static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque);
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
-                                         int64_t sector_num, int nb_sectors,
-                                         QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
-                                         int64_t sector_num, int nb_sectors,
-                                         QEMUIOVector *iov);
-static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
-                                         int64_t sector_num,
-                                         QEMUIOVector *qiov,
-                                         int nb_sectors,
-                                         BdrvRequestFlags flags,
-                                         BlockCompletionFunc *cb,
-                                         void *opaque,
-                                         bool is_write);
-static void coroutine_fn bdrv_co_do_rw(void *opaque);
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
-
-/* throttling disk I/O limits */
-void bdrv_set_io_limits(BlockDriverState *bs,
-                        ThrottleConfig *cfg)
-{
-    int i;
-
-    throttle_group_config(bs, cfg);
-
-    for (i = 0; i < 2; i++) {
-        qemu_co_enter_next(&bs->throttled_reqs[i]);
-    }
-}
-
-/* this function drain all the throttled IOs */
-static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
-{
-    bool drained = false;
-    bool enabled = bs->io_limits_enabled;
-    int i;
-
-    bs->io_limits_enabled = false;
-
-    for (i = 0; i < 2; i++) {
-        while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
-            drained = true;
-        }
-    }
-
-    bs->io_limits_enabled = enabled;
-
-    return drained;
-}
-
-void bdrv_io_limits_disable(BlockDriverState *bs)
-{
-    bs->io_limits_enabled = false;
-    bdrv_start_throttled_reqs(bs);
-    throttle_group_unregister_bs(bs);
-}
-
-/* should be called before bdrv_set_io_limits if a limit is set */
-void bdrv_io_limits_enable(BlockDriverState *bs, const char *group)
-{
-    assert(!bs->io_limits_enabled);
-    throttle_group_register_bs(bs, group);
-    bs->io_limits_enabled = true;
-}
-
-void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group)
-{
-    /* this bs is not part of any group */
-    if (!bs->throttle_state) {
-        return;
-    }
-
-    /* this bs is a part of the same group than the one we want */
-    if (!g_strcmp0(throttle_group_get_name(bs), group)) {
-        return;
-    }
-
-    /* need to change the group this bs belong to */
-    bdrv_io_limits_disable(bs);
-    bdrv_io_limits_enable(bs, group);
-}
-
-void bdrv_setup_io_funcs(BlockDriver *bdrv)
-{
-    /* Block drivers without coroutine functions need emulation */
-    if (!bdrv->bdrv_co_readv) {
-        bdrv->bdrv_co_readv = bdrv_co_readv_em;
-        bdrv->bdrv_co_writev = bdrv_co_writev_em;
-
-        /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
-         * the block driver lacks aio we need to emulate that too.
-         */
-        if (!bdrv->bdrv_aio_readv) {
-            /* add AIO emulation layer */
-            bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
-            bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
-        }
-    }
-}
-
-void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    BlockDriver *drv = bs->drv;
-    Error *local_err = NULL;
-
-    memset(&bs->bl, 0, sizeof(bs->bl));
-
-    if (!drv) {
-        return;
-    }
-
-    /* Take some limits from the children as a default */
-    if (bs->file) {
-        bdrv_refresh_limits(bs->file->bs, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
-        }
-        bs->bl.opt_transfer_length = bs->file->bs->bl.opt_transfer_length;
-        bs->bl.max_transfer_length = bs->file->bs->bl.max_transfer_length;
-        bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment;
-        bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment;
-        bs->bl.max_iov = bs->file->bs->bl.max_iov;
-    } else {
-        bs->bl.min_mem_alignment = 512;
-        bs->bl.opt_mem_alignment = getpagesize();
-
-        /* Safe default since most protocols use readv()/writev()/etc */
-        bs->bl.max_iov = IOV_MAX;
-    }
-
-    if (bs->backing) {
-        bdrv_refresh_limits(bs->backing->bs, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
-        }
-        bs->bl.opt_transfer_length =
-            MAX(bs->bl.opt_transfer_length,
-                bs->backing->bs->bl.opt_transfer_length);
-        bs->bl.max_transfer_length =
-            MIN_NON_ZERO(bs->bl.max_transfer_length,
-                         bs->backing->bs->bl.max_transfer_length);
-        bs->bl.opt_mem_alignment =
-            MAX(bs->bl.opt_mem_alignment,
-                bs->backing->bs->bl.opt_mem_alignment);
-        bs->bl.min_mem_alignment =
-            MAX(bs->bl.min_mem_alignment,
-                bs->backing->bs->bl.min_mem_alignment);
-        bs->bl.max_iov =
-            MIN(bs->bl.max_iov,
-                bs->backing->bs->bl.max_iov);
-    }
-
-    /* Then let the driver override it */
-    if (drv->bdrv_refresh_limits) {
-        drv->bdrv_refresh_limits(bs, errp);
-    }
-}
-
-/**
- * The copy-on-read flag is actually a reference count so multiple users may
- * use the feature without worrying about clobbering its previous state.
- * Copy-on-read stays enabled until all users have called to disable it.
- */
-void bdrv_enable_copy_on_read(BlockDriverState *bs)
-{
-    bs->copy_on_read++;
-}
-
-void bdrv_disable_copy_on_read(BlockDriverState *bs)
-{
-    assert(bs->copy_on_read > 0);
-    bs->copy_on_read--;
-}
-
-/* Check if any requests are in-flight (including throttled requests) */
-bool bdrv_requests_pending(BlockDriverState *bs)
-{
-    BdrvChild *child;
-
-    if (!QLIST_EMPTY(&bs->tracked_requests)) {
-        return true;
-    }
-    if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
-        return true;
-    }
-    if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
-        return true;
-    }
-
-    QLIST_FOREACH(child, &bs->children, next) {
-        if (bdrv_requests_pending(child->bs)) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-static void bdrv_drain_recurse(BlockDriverState *bs)
-{
-    BdrvChild *child;
-
-    if (bs->drv && bs->drv->bdrv_drain) {
-        bs->drv->bdrv_drain(bs);
-    }
-    QLIST_FOREACH(child, &bs->children, next) {
-        bdrv_drain_recurse(child->bs);
-    }
-}
-
-typedef struct {
-    Coroutine *co;
-    BlockDriverState *bs;
-    QEMUBH *bh;
-    bool done;
-} BdrvCoDrainData;
-
-static void bdrv_co_drain_bh_cb(void *opaque)
-{
-    BdrvCoDrainData *data = opaque;
-    Coroutine *co = data->co;
-
-    qemu_bh_delete(data->bh);
-    bdrv_drain(data->bs);
-    data->done = true;
-    qemu_coroutine_enter(co, NULL);
-}
-
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
-{
-    BdrvCoDrainData data;
-
-    /* Calling bdrv_drain() from a BH ensures the current coroutine yields and
-     * other coroutines run if they were queued from
-     * qemu_co_queue_run_restart(). */
-
-    assert(qemu_in_coroutine());
-    data = (BdrvCoDrainData) {
-        .co = qemu_coroutine_self(),
-        .bs = bs,
-        .done = false,
-        .bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_drain_bh_cb, &data),
-    };
-    qemu_bh_schedule(data.bh);
-
-    qemu_coroutine_yield();
-    /* If we are resumed from some other event (such as an aio completion or a
-     * timer callback), it is a bug in the caller that should be fixed. */
-    assert(data.done);
-}
-
-/*
- * Wait for pending requests to complete on a single BlockDriverState subtree,
- * and suspend block driver's internal I/O until next request arrives.
- *
- * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
- * AioContext.
- *
- * Only this BlockDriverState's AioContext is run, so in-flight requests must
- * not depend on events in other AioContexts.  In that case, use
- * bdrv_drain_all() instead.
- */
-void bdrv_drain(BlockDriverState *bs)
-{
-    bool busy = true;
-
-    bdrv_drain_recurse(bs);
-    if (qemu_in_coroutine()) {
-        bdrv_co_drain(bs);
-        return;
-    }
-    while (busy) {
-        /* Keep iterating */
-         bdrv_flush_io_queue(bs);
-         busy = bdrv_requests_pending(bs);
-         busy |= aio_poll(bdrv_get_aio_context(bs), busy);
-    }
-}
-
-/*
- * Wait for pending requests to complete across all BlockDriverStates
- *
- * This function does not flush data to disk, use bdrv_flush_all() for that
- * after calling this function.
- */
-void bdrv_drain_all(void)
-{
-    /* Always run first iteration so any pending completion BHs run */
-    bool busy = true;
-    BlockDriverState *bs = NULL;
-    GSList *aio_ctxs = NULL, *ctx;
-
-    while ((bs = bdrv_next(bs))) {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
-        aio_context_acquire(aio_context);
-        if (bs->job) {
-            block_job_pause(bs->job);
-        }
-        bdrv_drain_recurse(bs);
-        aio_context_release(aio_context);
-
-        if (!g_slist_find(aio_ctxs, aio_context)) {
-            aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
-        }
-    }
-
-    /* Note that completion of an asynchronous I/O operation can trigger any
-     * number of other I/O operations on other devices---for example a
-     * coroutine can submit an I/O request to another device in response to
-     * request completion.  Therefore we must keep looping until there was no
-     * more activity rather than simply draining each device independently.
-     */
-    while (busy) {
-        busy = false;
-
-        for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
-            AioContext *aio_context = ctx->data;
-            bs = NULL;
-
-            aio_context_acquire(aio_context);
-            while ((bs = bdrv_next(bs))) {
-                if (aio_context == bdrv_get_aio_context(bs)) {
-                    bdrv_flush_io_queue(bs);
-                    if (bdrv_requests_pending(bs)) {
-                        busy = true;
-                        aio_poll(aio_context, busy);
-                    }
-                }
-            }
-            busy |= aio_poll(aio_context, false);
-            aio_context_release(aio_context);
-        }
-    }
-
-    bs = NULL;
-    while ((bs = bdrv_next(bs))) {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
-        aio_context_acquire(aio_context);
-        if (bs->job) {
-            block_job_resume(bs->job);
-        }
-        aio_context_release(aio_context);
-    }
-    g_slist_free(aio_ctxs);
-}
-
-/**
- * Remove an active request from the tracked requests list
- *
- * This function should be called when a tracked request is completing.
- */
-static void tracked_request_end(BdrvTrackedRequest *req)
-{
-    if (req->serialising) {
-        req->bs->serialising_in_flight--;
-    }
-
-    QLIST_REMOVE(req, list);
-    qemu_co_queue_restart_all(&req->wait_queue);
-}
-
-/**
- * Add an active request to the tracked requests list
- */
-static void tracked_request_begin(BdrvTrackedRequest *req,
-                                  BlockDriverState *bs,
-                                  int64_t offset,
-                                  unsigned int bytes,
-                                  enum BdrvTrackedRequestType type)
-{
-    *req = (BdrvTrackedRequest){
-        .bs = bs,
-        .offset         = offset,
-        .bytes          = bytes,
-        .type           = type,
-        .co             = qemu_coroutine_self(),
-        .serialising    = false,
-        .overlap_offset = offset,
-        .overlap_bytes  = bytes,
-    };
-
-    qemu_co_queue_init(&req->wait_queue);
-
-    QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
-}
-
-static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
-{
-    int64_t overlap_offset = req->offset & ~(align - 1);
-    unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
-                               - overlap_offset;
-
-    if (!req->serialising) {
-        req->bs->serialising_in_flight++;
-        req->serialising = true;
-    }
-
-    req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
-    req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
-}
-
-/**
- * Round a region to cluster boundaries
- */
-void bdrv_round_to_clusters(BlockDriverState *bs,
-                            int64_t sector_num, int nb_sectors,
-                            int64_t *cluster_sector_num,
-                            int *cluster_nb_sectors)
-{
-    BlockDriverInfo bdi;
-
-    if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
-        *cluster_sector_num = sector_num;
-        *cluster_nb_sectors = nb_sectors;
-    } else {
-        int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
-        *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
-        *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
-                                            nb_sectors, c);
-    }
-}
-
-static int bdrv_get_cluster_size(BlockDriverState *bs)
-{
-    BlockDriverInfo bdi;
-    int ret;
-
-    ret = bdrv_get_info(bs, &bdi);
-    if (ret < 0 || bdi.cluster_size == 0) {
-        return bs->request_alignment;
-    } else {
-        return bdi.cluster_size;
-    }
-}
-
-static bool tracked_request_overlaps(BdrvTrackedRequest *req,
-                                     int64_t offset, unsigned int bytes)
-{
-    /*        aaaa   bbbb */
-    if (offset >= req->overlap_offset + req->overlap_bytes) {
-        return false;
-    }
-    /* bbbb   aaaa        */
-    if (req->overlap_offset >= offset + bytes) {
-        return false;
-    }
-    return true;
-}
-
-static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
-{
-    BlockDriverState *bs = self->bs;
-    BdrvTrackedRequest *req;
-    bool retry;
-    bool waited = false;
-
-    if (!bs->serialising_in_flight) {
-        return false;
-    }
-
-    do {
-        retry = false;
-        QLIST_FOREACH(req, &bs->tracked_requests, list) {
-            if (req == self || (!req->serialising && !self->serialising)) {
-                continue;
-            }
-            if (tracked_request_overlaps(req, self->overlap_offset,
-                                         self->overlap_bytes))
-            {
-                /* Hitting this means there was a reentrant request, for
-                 * example, a block driver issuing nested requests.  This must
-                 * never happen since it means deadlock.
-                 */
-                assert(qemu_coroutine_self() != req->co);
-
-                /* If the request is already (indirectly) waiting for us, or
-                 * will wait for us as soon as it wakes up, then just go on
-                 * (instead of producing a deadlock in the former case). */
-                if (!req->waiting_for) {
-                    self->waiting_for = req;
-                    qemu_co_queue_wait(&req->wait_queue);
-                    self->waiting_for = NULL;
-                    retry = true;
-                    waited = true;
-                    break;
-                }
-            }
-        }
-    } while (retry);
-
-    return waited;
-}
-
-static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
-                                   size_t size)
-{
-    if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
-        return -EIO;
-    }
-
-    if (!bdrv_is_inserted(bs)) {
-        return -ENOMEDIUM;
-    }
-
-    if (offset < 0) {
-        return -EIO;
-    }
-
-    return 0;
-}
-
-static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
-                              int nb_sectors)
-{
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return -EIO;
-    }
-
-    return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
-                                   nb_sectors * BDRV_SECTOR_SIZE);
-}
-
-typedef struct RwCo {
-    BlockDriverState *bs;
-    int64_t offset;
-    QEMUIOVector *qiov;
-    bool is_write;
-    int ret;
-    BdrvRequestFlags flags;
-} RwCo;
-
-static void coroutine_fn bdrv_rw_co_entry(void *opaque)
-{
-    RwCo *rwco = opaque;
-
-    if (!rwco->is_write) {
-        rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
-                                      rwco->qiov->size, rwco->qiov,
-                                      rwco->flags);
-    } else {
-        rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
-                                       rwco->qiov->size, rwco->qiov,
-                                       rwco->flags);
-    }
-}
-
-/*
- * Process a vectored synchronous request using coroutines
- */
-static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
-                        QEMUIOVector *qiov, bool is_write,
-                        BdrvRequestFlags flags)
-{
-    Coroutine *co;
-    RwCo rwco = {
-        .bs = bs,
-        .offset = offset,
-        .qiov = qiov,
-        .is_write = is_write,
-        .ret = NOT_DONE,
-        .flags = flags,
-    };
-
-    /**
-     * In sync call context, when the vcpu is blocked, this throttling timer
-     * will not fire; so the I/O throttling function has to be disabled here
-     * if it has been enabled.
-     */
-    if (bs->io_limits_enabled) {
-        fprintf(stderr, "Disabling I/O throttling on '%s' due "
-                        "to synchronous I/O.\n", bdrv_get_device_name(bs));
-        bdrv_io_limits_disable(bs);
-    }
-
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        bdrv_rw_co_entry(&rwco);
-    } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
-        co = qemu_coroutine_create(bdrv_rw_co_entry);
-        qemu_coroutine_enter(co, &rwco);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
-    }
-    return rwco.ret;
-}
-
-/*
- * Process a synchronous request using coroutines
- */
-static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
-                      int nb_sectors, bool is_write, BdrvRequestFlags flags)
-{
-    QEMUIOVector qiov;
-    struct iovec iov = {
-        .iov_base = (void *)buf,
-        .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
-    };
-
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return -EINVAL;
-    }
-
-    qemu_iovec_init_external(&qiov, &iov, 1);
-    return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
-                        &qiov, is_write, flags);
-}
-
-/* return < 0 if error. See bdrv_write() for the return codes */
-int bdrv_read(BlockDriverState *bs, int64_t sector_num,
-              uint8_t *buf, int nb_sectors)
-{
-    return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
-}
-
-/* Return < 0 if error. Important errors are:
-  -EIO         generic I/O error (may happen for all errors)
-  -ENOMEDIUM   No media inserted.
-  -EINVAL      Invalid sector number or nb_sectors
-  -EACCES      Trying to write a read-only device
-*/
-int bdrv_write(BlockDriverState *bs, int64_t sector_num,
-               const uint8_t *buf, int nb_sectors)
-{
-    return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
-}
-
-int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
-                      int nb_sectors, BdrvRequestFlags flags)
-{
-    return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
-                      BDRV_REQ_ZERO_WRITE | flags);
-}
-
-/*
- * Completely zero out a block device with the help of bdrv_write_zeroes.
- * The operation is sped up by checking the block status and only writing
- * zeroes to the device if they currently do not return zeroes. Optional
- * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
- *
- * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
- */
-int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
-{
-    int64_t target_sectors, ret, nb_sectors, sector_num = 0;
-    BlockDriverState *file;
-    int n;
-
-    target_sectors = bdrv_nb_sectors(bs);
-    if (target_sectors < 0) {
-        return target_sectors;
-    }
-
-    for (;;) {
-        nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
-        if (nb_sectors <= 0) {
-            return 0;
-        }
-        ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n, &file);
-        if (ret < 0) {
-            error_report("error getting block status at sector %" PRId64 ": %s",
-                         sector_num, strerror(-ret));
-            return ret;
-        }
-        if (ret & BDRV_BLOCK_ZERO) {
-            sector_num += n;
-            continue;
-        }
-        ret = bdrv_write_zeroes(bs, sector_num, n, flags);
-        if (ret < 0) {
-            error_report("error writing zeroes at sector %" PRId64 ": %s",
-                         sector_num, strerror(-ret));
-            return ret;
-        }
-        sector_num += n;
-    }
-}
-
-int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
-{
-    QEMUIOVector qiov;
-    struct iovec iov = {
-        .iov_base = (void *)buf,
-        .iov_len = bytes,
-    };
-    int ret;
-
-    if (bytes < 0) {
-        return -EINVAL;
-    }
-
-    qemu_iovec_init_external(&qiov, &iov, 1);
-    ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return bytes;
-}
-
-int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
-{
-    int ret;
-
-    ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return qiov->size;
-}
-
-int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
-                const void *buf, int bytes)
-{
-    QEMUIOVector qiov;
-    struct iovec iov = {
-        .iov_base   = (void *) buf,
-        .iov_len    = bytes,
-    };
-
-    if (bytes < 0) {
-        return -EINVAL;
-    }
-
-    qemu_iovec_init_external(&qiov, &iov, 1);
-    return bdrv_pwritev(bs, offset, &qiov);
-}
-
-/*
- * Writes to the file and ensures that no writes are reordered across this
- * request (acts as a barrier)
- *
- * Returns 0 on success, -errno in error cases.
- */
-int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
-    const void *buf, int count)
-{
-    int ret;
-
-    ret = bdrv_pwrite(bs, offset, buf, count);
-    if (ret < 0) {
-        return ret;
-    }
-
-    ret = bdrv_flush(bs);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return 0;
-}
-
-static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    /* Perform I/O through a temporary buffer so that users who scribble over
-     * their read buffer while the operation is in progress do not end up
-     * modifying the image file.  This is critical for zero-copy guest I/O
-     * where anything might happen inside guest memory.
-     */
-    void *bounce_buffer;
-
-    BlockDriver *drv = bs->drv;
-    struct iovec iov;
-    QEMUIOVector bounce_qiov;
-    int64_t cluster_sector_num;
-    int cluster_nb_sectors;
-    size_t skip_bytes;
-    int ret;
-
-    /* Cover entire cluster so no additional backing file I/O is required when
-     * allocating cluster in the image file.
-     */
-    bdrv_round_to_clusters(bs, sector_num, nb_sectors,
-                           &cluster_sector_num, &cluster_nb_sectors);
-
-    trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
-                                   cluster_sector_num, cluster_nb_sectors);
-
-    iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
-    iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
-    if (bounce_buffer == NULL) {
-        ret = -ENOMEM;
-        goto err;
-    }
-
-    qemu_iovec_init_external(&bounce_qiov, &iov, 1);
-
-    ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
-                             &bounce_qiov);
-    if (ret < 0) {
-        goto err;
-    }
-
-    if (drv->bdrv_co_write_zeroes &&
-        buffer_is_zero(bounce_buffer, iov.iov_len)) {
-        ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
-                                      cluster_nb_sectors, 0);
-    } else {
-        /* This does not change the data on the disk, it is not necessary
-         * to flush even in cache=writethrough mode.
-         */
-        ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
-                                  &bounce_qiov);
-    }
-
-    if (ret < 0) {
-        /* It might be okay to ignore write errors for guest requests.  If this
-         * is a deliberate copy-on-read then we don't want to ignore the error.
-         * Simply report it in all cases.
-         */
-        goto err;
-    }
-
-    skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
-    qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
-                        nb_sectors * BDRV_SECTOR_SIZE);
-
-err:
-    qemu_vfree(bounce_buffer);
-    return ret;
-}
-
-/*
- * Forwards an already correctly aligned request to the BlockDriver. This
- * handles copy on read and zeroing after EOF; any other features must be
- * implemented by the caller.
- */
-static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
-    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
-    int64_t align, QEMUIOVector *qiov, int flags)
-{
-    BlockDriver *drv = bs->drv;
-    int ret;
-
-    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
-    unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
-
-    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert(!qiov || bytes == qiov->size);
-    assert((bs->open_flags & BDRV_O_NO_IO) == 0);
-
-    /* Handle Copy on Read and associated serialisation */
-    if (flags & BDRV_REQ_COPY_ON_READ) {
-        /* If we touch the same cluster it counts as an overlap.  This
-         * guarantees that allocating writes will be serialized and not race
-         * with each other for the same cluster.  For example, in copy-on-read
-         * it ensures that the CoR read and write operations are atomic and
-         * guest writes cannot interleave between them. */
-        mark_request_serialising(req, bdrv_get_cluster_size(bs));
-    }
-
-    if (!(flags & BDRV_REQ_NO_SERIALISING)) {
-        wait_serialising_requests(req);
-    }
-
-    if (flags & BDRV_REQ_COPY_ON_READ) {
-        int pnum;
-
-        ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
-        if (ret < 0) {
-            goto out;
-        }
-
-        if (!ret || pnum != nb_sectors) {
-            ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
-            goto out;
-        }
-    }
-
-    /* Forward the request to the BlockDriver */
-    if (!bs->zero_beyond_eof) {
-        ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
-    } else {
-        /* Read zeros after EOF */
-        int64_t total_sectors, max_nb_sectors;
-
-        total_sectors = bdrv_nb_sectors(bs);
-        if (total_sectors < 0) {
-            ret = total_sectors;
-            goto out;
-        }
-
-        max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
-                                  align >> BDRV_SECTOR_BITS);
-        if (nb_sectors < max_nb_sectors) {
-            ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
-        } else if (max_nb_sectors > 0) {
-            QEMUIOVector local_qiov;
-
-            qemu_iovec_init(&local_qiov, qiov->niov);
-            qemu_iovec_concat(&local_qiov, qiov, 0,
-                              max_nb_sectors * BDRV_SECTOR_SIZE);
-
-            ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
-                                     &local_qiov);
-
-            qemu_iovec_destroy(&local_qiov);
-        } else {
-            ret = 0;
-        }
-
-        /* Reading beyond end of file is supposed to produce zeroes */
-        if (ret == 0 && total_sectors < sector_num + nb_sectors) {
-            uint64_t offset = MAX(0, total_sectors - sector_num);
-            uint64_t bytes = (sector_num + nb_sectors - offset) *
-                              BDRV_SECTOR_SIZE;
-            qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
-        }
-    }
-
-out:
-    return ret;
-}
-
-/*
- * Handle a read request in coroutine context
- */
-int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
-    BdrvRequestFlags flags)
-{
-    BlockDriver *drv = bs->drv;
-    BdrvTrackedRequest req;
-
-    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
-    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
-    uint8_t *head_buf = NULL;
-    uint8_t *tail_buf = NULL;
-    QEMUIOVector local_qiov;
-    bool use_local_qiov = false;
-    int ret;
-
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-
-    ret = bdrv_check_byte_request(bs, offset, bytes);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* Don't do copy-on-read if we read data before write operation */
-    if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
-        flags |= BDRV_REQ_COPY_ON_READ;
-    }
-
-    /* throttling disk I/O */
-    if (bs->io_limits_enabled) {
-        throttle_group_co_io_limits_intercept(bs, bytes, false);
-    }
-
-    /* Align read if necessary by padding qiov */
-    if (offset & (align - 1)) {
-        head_buf = qemu_blockalign(bs, align);
-        qemu_iovec_init(&local_qiov, qiov->niov + 2);
-        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
-        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
-        use_local_qiov = true;
-
-        bytes += offset & (align - 1);
-        offset = offset & ~(align - 1);
-    }
-
-    if ((offset + bytes) & (align - 1)) {
-        if (!use_local_qiov) {
-            qemu_iovec_init(&local_qiov, qiov->niov + 1);
-            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
-            use_local_qiov = true;
-        }
-        tail_buf = qemu_blockalign(bs, align);
-        qemu_iovec_add(&local_qiov, tail_buf,
-                       align - ((offset + bytes) & (align - 1)));
-
-        bytes = ROUND_UP(bytes, align);
-    }
-
-    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
-    ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
-                              use_local_qiov ? &local_qiov : qiov,
-                              flags);
-    tracked_request_end(&req);
-
-    if (use_local_qiov) {
-        qemu_iovec_destroy(&local_qiov);
-        qemu_vfree(head_buf);
-        qemu_vfree(tail_buf);
-    }
-
-    return ret;
-}
-
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
-    BdrvRequestFlags flags)
-{
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return -EINVAL;
-    }
-
-    return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
-                             nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
-}
-
-int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
-    int nb_sectors, QEMUIOVector *qiov)
-{
-    trace_bdrv_co_readv(bs, sector_num, nb_sectors);
-
-    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    trace_bdrv_co_readv_no_serialising(bs, sector_num, nb_sectors);
-
-    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
-                            BDRV_REQ_NO_SERIALISING);
-}
-
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
-
-    return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
-                            BDRV_REQ_COPY_ON_READ);
-}
-
-#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
-
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
-{
-    BlockDriver *drv = bs->drv;
-    QEMUIOVector qiov;
-    struct iovec iov = {0};
-    int ret = 0;
-
-    int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
-                                        BDRV_REQUEST_MAX_SECTORS);
-
-    while (nb_sectors > 0 && !ret) {
-        int num = nb_sectors;
-
-        /* Align request.  Block drivers can expect the "bulk" of the request
-         * to be aligned.
-         */
-        if (bs->bl.write_zeroes_alignment
-            && num > bs->bl.write_zeroes_alignment) {
-            if (sector_num % bs->bl.write_zeroes_alignment != 0) {
-                /* Make a small request up to the first aligned sector.  */
-                num = bs->bl.write_zeroes_alignment;
-                num -= sector_num % bs->bl.write_zeroes_alignment;
-            } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
-                /* Shorten the request to the last aligned sector.  num cannot
-                 * underflow because num > bs->bl.write_zeroes_alignment.
-                 */
-                num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
-            }
-        }
-
-        /* limit request size */
-        if (num > max_write_zeroes) {
-            num = max_write_zeroes;
-        }
-
-        ret = -ENOTSUP;
-        /* First try the efficient write zeroes operation */
-        if (drv->bdrv_co_write_zeroes) {
-            ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
-        }
-
-        if (ret == -ENOTSUP) {
-            /* Fall back to bounce buffer if write zeroes is unsupported */
-            int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
-                                            MAX_WRITE_ZEROES_BOUNCE_BUFFER);
-            num = MIN(num, max_xfer_len);
-            iov.iov_len = num * BDRV_SECTOR_SIZE;
-            if (iov.iov_base == NULL) {
-                iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
-                if (iov.iov_base == NULL) {
-                    ret = -ENOMEM;
-                    goto fail;
-                }
-                memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
-            }
-            qemu_iovec_init_external(&qiov, &iov, 1);
-
-            ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
-
-            /* Keep bounce buffer around if it is big enough for all
-             * all future requests.
-             */
-            if (num < max_xfer_len) {
-                qemu_vfree(iov.iov_base);
-                iov.iov_base = NULL;
-            }
-        }
-
-        sector_num += num;
-        nb_sectors -= num;
-    }
-
-fail:
-    qemu_vfree(iov.iov_base);
-    return ret;
-}
-
-/*
- * Forwards an already correctly aligned write request to the BlockDriver.
- */
-static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
-    BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
-    QEMUIOVector *qiov, int flags)
-{
-    BlockDriver *drv = bs->drv;
-    bool waited;
-    int ret;
-
-    int64_t sector_num = offset >> BDRV_SECTOR_BITS;
-    unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
-
-    assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-    assert(!qiov || bytes == qiov->size);
-    assert((bs->open_flags & BDRV_O_NO_IO) == 0);
-
-    waited = wait_serialising_requests(req);
-    assert(!waited || !req->serialising);
-    assert(req->overlap_offset <= offset);
-    assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
-
-    ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
-
-    if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
-        !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
-        qemu_iovec_is_zero(qiov)) {
-        flags |= BDRV_REQ_ZERO_WRITE;
-        if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
-            flags |= BDRV_REQ_MAY_UNMAP;
-        }
-    }
-
-    if (ret < 0) {
-        /* Do nothing, write notifier decided to fail this request */
-    } else if (flags & BDRV_REQ_ZERO_WRITE) {
-        bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
-        ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
-    } else if (drv->bdrv_co_writev_flags) {
-        bdrv_debug_event(bs, BLKDBG_PWRITEV);
-        ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
-                                        flags);
-    } else {
-        assert(drv->supported_write_flags == 0);
-        bdrv_debug_event(bs, BLKDBG_PWRITEV);
-        ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
-    }
-    bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
-
-    if (ret == 0 && (flags & BDRV_REQ_FUA) &&
-        !(drv->supported_write_flags & BDRV_REQ_FUA))
-    {
-        ret = bdrv_co_flush(bs);
-    }
-
-    bdrv_set_dirty(bs, sector_num, nb_sectors);
-
-    if (bs->wr_highest_offset < offset + bytes) {
-        bs->wr_highest_offset = offset + bytes;
-    }
-
-    if (ret >= 0) {
-        bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
-    }
-
-    return ret;
-}
-
-static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
-                                                int64_t offset,
-                                                unsigned int bytes,
-                                                BdrvRequestFlags flags,
-                                                BdrvTrackedRequest *req)
-{
-    uint8_t *buf = NULL;
-    QEMUIOVector local_qiov;
-    struct iovec iov;
-    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
-    unsigned int head_padding_bytes, tail_padding_bytes;
-    int ret = 0;
-
-    head_padding_bytes = offset & (align - 1);
-    tail_padding_bytes = align - ((offset + bytes) & (align - 1));
-
-
-    assert(flags & BDRV_REQ_ZERO_WRITE);
-    if (head_padding_bytes || tail_padding_bytes) {
-        buf = qemu_blockalign(bs, align);
-        iov = (struct iovec) {
-            .iov_base   = buf,
-            .iov_len    = align,
-        };
-        qemu_iovec_init_external(&local_qiov, &iov, 1);
-    }
-    if (head_padding_bytes) {
-        uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
-
-        /* RMW the unaligned part before head. */
-        mark_request_serialising(req, align);
-        wait_serialising_requests(req);
-        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
-        ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
-                                  align, &local_qiov, 0);
-        if (ret < 0) {
-            goto fail;
-        }
-        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
-
-        memset(buf + head_padding_bytes, 0, zero_bytes);
-        ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
-                                   &local_qiov,
-                                   flags & ~BDRV_REQ_ZERO_WRITE);
-        if (ret < 0) {
-            goto fail;
-        }
-        offset += zero_bytes;
-        bytes -= zero_bytes;
-    }
-
-    assert(!bytes || (offset & (align - 1)) == 0);
-    if (bytes >= align) {
-        /* Write the aligned part in the middle. */
-        uint64_t aligned_bytes = bytes & ~(align - 1);
-        ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes,
-                                   NULL, flags);
-        if (ret < 0) {
-            goto fail;
-        }
-        bytes -= aligned_bytes;
-        offset += aligned_bytes;
-    }
-
-    assert(!bytes || (offset & (align - 1)) == 0);
-    if (bytes) {
-        assert(align == tail_padding_bytes + bytes);
-        /* RMW the unaligned part after tail. */
-        mark_request_serialising(req, align);
-        wait_serialising_requests(req);
-        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
-        ret = bdrv_aligned_preadv(bs, req, offset, align,
-                                  align, &local_qiov, 0);
-        if (ret < 0) {
-            goto fail;
-        }
-        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
-
-        memset(buf, 0, bytes);
-        ret = bdrv_aligned_pwritev(bs, req, offset, align,
-                                   &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
-    }
-fail:
-    qemu_vfree(buf);
-    return ret;
-
-}
-
-/*
- * Handle a write request in coroutine context
- */
-int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
-    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
-    BdrvRequestFlags flags)
-{
-    BdrvTrackedRequest req;
-    /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
-    uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
-    uint8_t *head_buf = NULL;
-    uint8_t *tail_buf = NULL;
-    QEMUIOVector local_qiov;
-    bool use_local_qiov = false;
-    int ret;
-
-    if (!bs->drv) {
-        return -ENOMEDIUM;
-    }
-    if (bs->read_only) {
-        return -EPERM;
-    }
-    assert(!(bs->open_flags & BDRV_O_INACTIVE));
-
-    ret = bdrv_check_byte_request(bs, offset, bytes);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* throttling disk I/O */
-    if (bs->io_limits_enabled) {
-        throttle_group_co_io_limits_intercept(bs, bytes, true);
-    }
-
-    /*
-     * Align write if necessary by performing a read-modify-write cycle.
-     * Pad qiov with the read parts and be sure to have a tracked request not
-     * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
-     */
-    tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
-
-    if (!qiov) {
-        ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
-        goto out;
-    }
-
-    if (offset & (align - 1)) {
-        QEMUIOVector head_qiov;
-        struct iovec head_iov;
-
-        mark_request_serialising(&req, align);
-        wait_serialising_requests(&req);
-
-        head_buf = qemu_blockalign(bs, align);
-        head_iov = (struct iovec) {
-            .iov_base   = head_buf,
-            .iov_len    = align,
-        };
-        qemu_iovec_init_external(&head_qiov, &head_iov, 1);
-
-        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
-        ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
-                                  align, &head_qiov, 0);
-        if (ret < 0) {
-            goto fail;
-        }
-        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
-
-        qemu_iovec_init(&local_qiov, qiov->niov + 2);
-        qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
-        qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
-        use_local_qiov = true;
-
-        bytes += offset & (align - 1);
-        offset = offset & ~(align - 1);
-    }
-
-    if ((offset + bytes) & (align - 1)) {
-        QEMUIOVector tail_qiov;
-        struct iovec tail_iov;
-        size_t tail_bytes;
-        bool waited;
-
-        mark_request_serialising(&req, align);
-        waited = wait_serialising_requests(&req);
-        assert(!waited || !use_local_qiov);
-
-        tail_buf = qemu_blockalign(bs, align);
-        tail_iov = (struct iovec) {
-            .iov_base   = tail_buf,
-            .iov_len    = align,
-        };
-        qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
-
-        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
-        ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
-                                  align, &tail_qiov, 0);
-        if (ret < 0) {
-            goto fail;
-        }
-        bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
-
-        if (!use_local_qiov) {
-            qemu_iovec_init(&local_qiov, qiov->niov + 1);
-            qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
-            use_local_qiov = true;
-        }
-
-        tail_bytes = (offset + bytes) & (align - 1);
-        qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
-
-        bytes = ROUND_UP(bytes, align);
-    }
-
-    ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
-                               use_local_qiov ? &local_qiov : qiov,
-                               flags);
-
-fail:
-
-    if (use_local_qiov) {
-        qemu_iovec_destroy(&local_qiov);
-    }
-    qemu_vfree(head_buf);
-    qemu_vfree(tail_buf);
-out:
-    tracked_request_end(&req);
-    return ret;
-}
-
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
-    BdrvRequestFlags flags)
-{
-    if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-        return -EINVAL;
-    }
-
-    return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
-                              nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
-}
-
-int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
-    int nb_sectors, QEMUIOVector *qiov)
-{
-    trace_bdrv_co_writev(bs, sector_num, nb_sectors);
-
-    return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
-                                      int64_t sector_num, int nb_sectors,
-                                      BdrvRequestFlags flags)
-{
-    trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
-
-    if (!(bs->open_flags & BDRV_O_UNMAP)) {
-        flags &= ~BDRV_REQ_MAY_UNMAP;
-    }
-
-    return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
-                             BDRV_REQ_ZERO_WRITE | flags);
-}
-
-typedef struct BdrvCoGetBlockStatusData {
-    BlockDriverState *bs;
-    BlockDriverState *base;
-    BlockDriverState **file;
-    int64_t sector_num;
-    int nb_sectors;
-    int *pnum;
-    int64_t ret;
-    bool done;
-} BdrvCoGetBlockStatusData;
-
-/*
- * Returns the allocation status of the specified sectors.
- * Drivers not implementing the functionality are assumed to not support
- * backing files, hence all their sectors are reported as allocated.
- *
- * If 'sector_num' is beyond the end of the disk image the return value is 0
- * and 'pnum' is set to 0.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
- *
- * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
- * beyond the end of the disk image it will be clamped.
- *
- * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file'
- * points to the BDS which the sector range is allocated in.
- */
-static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
-                                                     int64_t sector_num,
-                                                     int nb_sectors, int *pnum,
-                                                     BlockDriverState **file)
-{
-    int64_t total_sectors;
-    int64_t n;
-    int64_t ret, ret2;
-
-    total_sectors = bdrv_nb_sectors(bs);
-    if (total_sectors < 0) {
-        return total_sectors;
-    }
-
-    if (sector_num >= total_sectors) {
-        *pnum = 0;
-        return 0;
-    }
-
-    n = total_sectors - sector_num;
-    if (n < nb_sectors) {
-        nb_sectors = n;
-    }
-
-    if (!bs->drv->bdrv_co_get_block_status) {
-        *pnum = nb_sectors;
-        ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
-        if (bs->drv->protocol_name) {
-            ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
-        }
-        return ret;
-    }
-
-    *file = NULL;
-    ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
-                                            file);
-    if (ret < 0) {
-        *pnum = 0;
-        return ret;
-    }
-
-    if (ret & BDRV_BLOCK_RAW) {
-        assert(ret & BDRV_BLOCK_OFFSET_VALID);
-        return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
-                                     *pnum, pnum, file);
-    }
-
-    if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
-        ret |= BDRV_BLOCK_ALLOCATED;
-    } else {
-        if (bdrv_unallocated_blocks_are_zero(bs)) {
-            ret |= BDRV_BLOCK_ZERO;
-        } else if (bs->backing) {
-            BlockDriverState *bs2 = bs->backing->bs;
-            int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
-            if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
-                ret |= BDRV_BLOCK_ZERO;
-            }
-        }
-    }
-
-    if (*file && *file != bs &&
-        (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
-        (ret & BDRV_BLOCK_OFFSET_VALID)) {
-        BlockDriverState *file2;
-        int file_pnum;
-
-        ret2 = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
-                                        *pnum, &file_pnum, &file2);
-        if (ret2 >= 0) {
-            /* Ignore errors.  This is just providing extra information, it
-             * is useful but not necessary.
-             */
-            if (!file_pnum) {
-                /* !file_pnum indicates an offset at or beyond the EOF; it is
-                 * perfectly valid for the format block driver to point to such
-                 * offsets, so catch it and mark everything as zero */
-                ret |= BDRV_BLOCK_ZERO;
-            } else {
-                /* Limit request to the range reported by the protocol driver */
-                *pnum = file_pnum;
-                ret |= (ret2 & BDRV_BLOCK_ZERO);
-            }
-        }
-    }
-
-    return ret;
-}
-
-static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
-        BlockDriverState *base,
-        int64_t sector_num,
-        int nb_sectors,
-        int *pnum,
-        BlockDriverState **file)
-{
-    BlockDriverState *p;
-    int64_t ret = 0;
-
-    assert(bs != base);
-    for (p = bs; p != base; p = backing_bs(p)) {
-        ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file);
-        if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) {
-            break;
-        }
-        /* [sector_num, pnum] unallocated on this layer, which could be only
-         * the first part of [sector_num, nb_sectors].  */
-        nb_sectors = MIN(nb_sectors, *pnum);
-    }
-    return ret;
-}
-
-/* Coroutine wrapper for bdrv_get_block_status_above() */
-static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
-{
-    BdrvCoGetBlockStatusData *data = opaque;
-
-    data->ret = bdrv_co_get_block_status_above(data->bs, data->base,
-                                               data->sector_num,
-                                               data->nb_sectors,
-                                               data->pnum,
-                                               data->file);
-    data->done = true;
-}
-
-/*
- * Synchronous wrapper around bdrv_co_get_block_status_above().
- *
- * See bdrv_co_get_block_status_above() for details.
- */
-int64_t bdrv_get_block_status_above(BlockDriverState *bs,
-                                    BlockDriverState *base,
-                                    int64_t sector_num,
-                                    int nb_sectors, int *pnum,
-                                    BlockDriverState **file)
-{
-    Coroutine *co;
-    BdrvCoGetBlockStatusData data = {
-        .bs = bs,
-        .base = base,
-        .file = file,
-        .sector_num = sector_num,
-        .nb_sectors = nb_sectors,
-        .pnum = pnum,
-        .done = false,
-    };
-
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        bdrv_get_block_status_above_co_entry(&data);
-    } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
-        co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry);
-        qemu_coroutine_enter(co, &data);
-        while (!data.done) {
-            aio_poll(aio_context, true);
-        }
-    }
-    return data.ret;
-}
-
-int64_t bdrv_get_block_status(BlockDriverState *bs,
-                              int64_t sector_num,
-                              int nb_sectors, int *pnum,
-                              BlockDriverState **file)
-{
-    return bdrv_get_block_status_above(bs, backing_bs(bs),
-                                       sector_num, nb_sectors, pnum, file);
-}
-
-int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
-                                   int nb_sectors, int *pnum)
-{
-    BlockDriverState *file;
-    int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum,
-                                        &file);
-    if (ret < 0) {
-        return ret;
-    }
-    return !!(ret & BDRV_BLOCK_ALLOCATED);
-}
-
-/*
- * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
- *
- * Return true if the given sector is allocated in any image between
- * BASE and TOP (inclusive).  BASE can be NULL to check if the given
- * sector is allocated in any image of the chain.  Return false otherwise.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- *  the specified sector) that are known to be in the same
- *  allocated/unallocated state.
- *
- */
-int bdrv_is_allocated_above(BlockDriverState *top,
-                            BlockDriverState *base,
-                            int64_t sector_num,
-                            int nb_sectors, int *pnum)
-{
-    BlockDriverState *intermediate;
-    int ret, n = nb_sectors;
-
-    intermediate = top;
-    while (intermediate && intermediate != base) {
-        int pnum_inter;
-        ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
-                                &pnum_inter);
-        if (ret < 0) {
-            return ret;
-        } else if (ret) {
-            *pnum = pnum_inter;
-            return 1;
-        }
-
-        /*
-         * [sector_num, nb_sectors] is unallocated on top but intermediate
-         * might have
-         *
-         * [sector_num+x, nr_sectors] allocated.
-         */
-        if (n > pnum_inter &&
-            (intermediate == top ||
-             sector_num + pnum_inter < intermediate->total_sectors)) {
-            n = pnum_inter;
-        }
-
-        intermediate = backing_bs(intermediate);
-    }
-
-    *pnum = n;
-    return 0;
-}
-
-int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                          const uint8_t *buf, int nb_sectors)
-{
-    BlockDriver *drv = bs->drv;
-    int ret;
-
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (!drv->bdrv_write_compressed) {
-        return -ENOTSUP;
-    }
-    ret = bdrv_check_request(bs, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    }
-
-    assert(QLIST_EMPTY(&bs->dirty_bitmaps));
-
-    return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
-}
-
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
-                      int64_t pos, int size)
-{
-    QEMUIOVector qiov;
-    struct iovec iov = {
-        .iov_base   = (void *) buf,
-        .iov_len    = size,
-    };
-
-    qemu_iovec_init_external(&qiov, &iov, 1);
-    return bdrv_writev_vmstate(bs, &qiov, pos);
-}
-
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
-{
-    BlockDriver *drv = bs->drv;
-
-    if (!drv) {
-        return -ENOMEDIUM;
-    } else if (drv->bdrv_save_vmstate) {
-        return drv->bdrv_save_vmstate(bs, qiov, pos);
-    } else if (bs->file) {
-        return bdrv_writev_vmstate(bs->file->bs, qiov, pos);
-    }
-
-    return -ENOTSUP;
-}
-
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
-                      int64_t pos, int size)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv)
-        return -ENOMEDIUM;
-    if (drv->bdrv_load_vmstate)
-        return drv->bdrv_load_vmstate(bs, buf, pos, size);
-    if (bs->file)
-        return bdrv_load_vmstate(bs->file->bs, buf, pos, size);
-    return -ENOTSUP;
-}
-
-/**************************************************************/
-/* async I/Os */
-
-BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
-                           QEMUIOVector *qiov, int nb_sectors,
-                           BlockCompletionFunc *cb, void *opaque)
-{
-    trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
-
-    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
-                                 cb, opaque, false);
-}
-
-BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
-                            QEMUIOVector *qiov, int nb_sectors,
-                            BlockCompletionFunc *cb, void *opaque)
-{
-    trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
-
-    return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
-                                 cb, opaque, true);
-}
-
-BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
-
-    return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
-                                 BDRV_REQ_ZERO_WRITE | flags,
-                                 cb, opaque, true);
-}
-
-
-typedef struct MultiwriteCB {
-    int error;
-    int num_requests;
-    int num_callbacks;
-    struct {
-        BlockCompletionFunc *cb;
-        void *opaque;
-        QEMUIOVector *free_qiov;
-    } callbacks[];
-} MultiwriteCB;
-
-static void multiwrite_user_cb(MultiwriteCB *mcb)
-{
-    int i;
-
-    for (i = 0; i < mcb->num_callbacks; i++) {
-        mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
-        if (mcb->callbacks[i].free_qiov) {
-            qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
-        }
-        g_free(mcb->callbacks[i].free_qiov);
-    }
-}
-
-static void multiwrite_cb(void *opaque, int ret)
-{
-    MultiwriteCB *mcb = opaque;
-
-    trace_multiwrite_cb(mcb, ret);
-
-    if (ret < 0 && !mcb->error) {
-        mcb->error = ret;
-    }
-
-    mcb->num_requests--;
-    if (mcb->num_requests == 0) {
-        multiwrite_user_cb(mcb);
-        g_free(mcb);
-    }
-}
-
-static int multiwrite_req_compare(const void *a, const void *b)
-{
-    const BlockRequest *req1 = a, *req2 = b;
-
-    /*
-     * Note that we can't simply subtract req2->sector from req1->sector
-     * here as that could overflow the return value.
-     */
-    if (req1->sector > req2->sector) {
-        return 1;
-    } else if (req1->sector < req2->sector) {
-        return -1;
-    } else {
-        return 0;
-    }
-}
-
-/*
- * Takes a bunch of requests and tries to merge them. Returns the number of
- * requests that remain after merging.
- */
-static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
-    int num_reqs, MultiwriteCB *mcb)
-{
-    int i, outidx;
-
-    // Sort requests by start sector
-    qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
-
-    // Check if adjacent requests touch the same clusters. If so, combine them,
-    // filling up gaps with zero sectors.
-    outidx = 0;
-    for (i = 1; i < num_reqs; i++) {
-        int merge = 0;
-        int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
-
-        // Handle exactly sequential writes and overlapping writes.
-        if (reqs[i].sector <= oldreq_last) {
-            merge = 1;
-        }
-
-        if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 >
-            bs->bl.max_iov) {
-            merge = 0;
-        }
-
-        if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
-            reqs[i].nb_sectors > bs->bl.max_transfer_length) {
-            merge = 0;
-        }
-
-        if (merge) {
-            size_t size;
-            QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
-            qemu_iovec_init(qiov,
-                reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
-
-            // Add the first request to the merged one. If the requests are
-            // overlapping, drop the last sectors of the first request.
-            size = (reqs[i].sector - reqs[outidx].sector) << 9;
-            qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
-
-            // We should need to add any zeros between the two requests
-            assert (reqs[i].sector <= oldreq_last);
-
-            // Add the second request
-            qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
-
-            // Add tail of first request, if necessary
-            if (qiov->size < reqs[outidx].qiov->size) {
-                qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
-                                  reqs[outidx].qiov->size - qiov->size);
-            }
-
-            reqs[outidx].nb_sectors = qiov->size >> 9;
-            reqs[outidx].qiov = qiov;
-
-            mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
-        } else {
-            outidx++;
-            reqs[outidx].sector     = reqs[i].sector;
-            reqs[outidx].nb_sectors = reqs[i].nb_sectors;
-            reqs[outidx].qiov       = reqs[i].qiov;
-        }
-    }
-
-    if (bs->blk) {
-        block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
-                              num_reqs - outidx - 1);
-    }
-
-    return outidx + 1;
-}
-
-/*
- * Submit multiple AIO write requests at once.
- *
- * On success, the function returns 0 and all requests in the reqs array have
- * been submitted. In error case this function returns -1, and any of the
- * requests may or may not be submitted yet. In particular, this means that the
- * callback will be called for some of the requests, for others it won't. The
- * caller must check the error field of the BlockRequest to wait for the right
- * callbacks (if error != 0, no callback will be called).
- *
- * The implementation may modify the contents of the reqs array, e.g. to merge
- * requests. However, the fields opaque and error are left unmodified as they
- * are used to signal failure for a single request to the caller.
- */
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
-{
-    MultiwriteCB *mcb;
-    int i;
-
-    /* don't submit writes if we don't have a medium */
-    if (bs->drv == NULL) {
-        for (i = 0; i < num_reqs; i++) {
-            reqs[i].error = -ENOMEDIUM;
-        }
-        return -1;
-    }
-
-    if (num_reqs == 0) {
-        return 0;
-    }
-
-    // Create MultiwriteCB structure
-    mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
-    mcb->num_requests = 0;
-    mcb->num_callbacks = num_reqs;
-
-    for (i = 0; i < num_reqs; i++) {
-        mcb->callbacks[i].cb = reqs[i].cb;
-        mcb->callbacks[i].opaque = reqs[i].opaque;
-    }
-
-    // Check for mergable requests
-    num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
-
-    trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
-
-    /* Run the aio requests. */
-    mcb->num_requests = num_reqs;
-    for (i = 0; i < num_reqs; i++) {
-        bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
-                              reqs[i].nb_sectors, reqs[i].flags,
-                              multiwrite_cb, mcb,
-                              true);
-    }
-
-    return 0;
-}
-
-void bdrv_aio_cancel(BlockAIOCB *acb)
-{
-    qemu_aio_ref(acb);
-    bdrv_aio_cancel_async(acb);
-    while (acb->refcnt > 1) {
-        if (acb->aiocb_info->get_aio_context) {
-            aio_poll(acb->aiocb_info->get_aio_context(acb), true);
-        } else if (acb->bs) {
-            aio_poll(bdrv_get_aio_context(acb->bs), true);
-        } else {
-            abort();
-        }
-    }
-    qemu_aio_unref(acb);
-}
-
-/* Async version of aio cancel. The caller is not blocked if the acb implements
- * cancel_async, otherwise we do nothing and let the request normally complete.
- * In either case the completion callback must be called. */
-void bdrv_aio_cancel_async(BlockAIOCB *acb)
-{
-    if (acb->aiocb_info->cancel_async) {
-        acb->aiocb_info->cancel_async(acb);
-    }
-}
-
-/**************************************************************/
-/* async block device emulation */
-
-typedef struct BlockAIOCBSync {
-    BlockAIOCB common;
-    QEMUBH *bh;
-    int ret;
-    /* vector translation state */
-    QEMUIOVector *qiov;
-    uint8_t *bounce;
-    int is_write;
-} BlockAIOCBSync;
-
-static const AIOCBInfo bdrv_em_aiocb_info = {
-    .aiocb_size         = sizeof(BlockAIOCBSync),
-};
-
-static void bdrv_aio_bh_cb(void *opaque)
-{
-    BlockAIOCBSync *acb = opaque;
-
-    if (!acb->is_write && acb->ret >= 0) {
-        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-    }
-    qemu_vfree(acb->bounce);
-    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_bh_delete(acb->bh);
-    acb->bh = NULL;
-    qemu_aio_unref(acb);
-}
-
-static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
-                                      int64_t sector_num,
-                                      QEMUIOVector *qiov,
-                                      int nb_sectors,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque,
-                                      int is_write)
-
-{
-    BlockAIOCBSync *acb;
-
-    acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
-    acb->is_write = is_write;
-    acb->qiov = qiov;
-    acb->bounce = qemu_try_blockalign(bs, qiov->size);
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
-
-    if (acb->bounce == NULL) {
-        acb->ret = -ENOMEM;
-    } else if (is_write) {
-        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
-        acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
-    } else {
-        acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
-    }
-
-    qemu_bh_schedule(acb->bh);
-
-    return &acb->common;
-}
-
-static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-}
-
-static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-}
-
-
-typedef struct BlockAIOCBCoroutine {
-    BlockAIOCB common;
-    BlockRequest req;
-    bool is_write;
-    bool need_bh;
-    bool *done;
-    QEMUBH* bh;
-} BlockAIOCBCoroutine;
-
-static const AIOCBInfo bdrv_em_co_aiocb_info = {
-    .aiocb_size         = sizeof(BlockAIOCBCoroutine),
-};
-
-static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
-{
-    if (!acb->need_bh) {
-        acb->common.cb(acb->common.opaque, acb->req.error);
-        qemu_aio_unref(acb);
-    }
-}
-
-static void bdrv_co_em_bh(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-
-    assert(!acb->need_bh);
-    qemu_bh_delete(acb->bh);
-    bdrv_co_complete(acb);
-}
-
-static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
-{
-    acb->need_bh = false;
-    if (acb->req.error != -EINPROGRESS) {
-        BlockDriverState *bs = acb->common.bs;
-
-        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
-        qemu_bh_schedule(acb->bh);
-    }
-}
-
-/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
-static void coroutine_fn bdrv_co_do_rw(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-
-    if (!acb->is_write) {
-        acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
-            acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
-    } else {
-        acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
-            acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
-    }
-
-    bdrv_co_complete(acb);
-}
-
-static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
-                                         int64_t sector_num,
-                                         QEMUIOVector *qiov,
-                                         int nb_sectors,
-                                         BdrvRequestFlags flags,
-                                         BlockCompletionFunc *cb,
-                                         void *opaque,
-                                         bool is_write)
-{
-    Coroutine *co;
-    BlockAIOCBCoroutine *acb;
-
-    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
-    acb->need_bh = true;
-    acb->req.error = -EINPROGRESS;
-    acb->req.sector = sector_num;
-    acb->req.nb_sectors = nb_sectors;
-    acb->req.qiov = qiov;
-    acb->req.flags = flags;
-    acb->is_write = is_write;
-
-    co = qemu_coroutine_create(bdrv_co_do_rw);
-    qemu_coroutine_enter(co, acb);
-
-    bdrv_co_maybe_schedule_bh(acb);
-    return &acb->common;
-}
-
-static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-
-    acb->req.error = bdrv_co_flush(bs);
-    bdrv_co_complete(acb);
-}
-
-BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    trace_bdrv_aio_flush(bs, opaque);
-
-    Coroutine *co;
-    BlockAIOCBCoroutine *acb;
-
-    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
-    acb->need_bh = true;
-    acb->req.error = -EINPROGRESS;
-
-    co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
-    qemu_coroutine_enter(co, acb);
-
-    bdrv_co_maybe_schedule_bh(acb);
-    return &acb->common;
-}
-
-static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-    BlockDriverState *bs = acb->common.bs;
-
-    acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
-    bdrv_co_complete(acb);
-}
-
-BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    Coroutine *co;
-    BlockAIOCBCoroutine *acb;
-
-    trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
-
-    acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
-    acb->need_bh = true;
-    acb->req.error = -EINPROGRESS;
-    acb->req.sector = sector_num;
-    acb->req.nb_sectors = nb_sectors;
-    co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
-    qemu_coroutine_enter(co, acb);
-
-    bdrv_co_maybe_schedule_bh(acb);
-    return &acb->common;
-}
-
-void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
-                   BlockCompletionFunc *cb, void *opaque)
-{
-    BlockAIOCB *acb;
-
-    acb = g_malloc(aiocb_info->aiocb_size);
-    acb->aiocb_info = aiocb_info;
-    acb->bs = bs;
-    acb->cb = cb;
-    acb->opaque = opaque;
-    acb->refcnt = 1;
-    return acb;
-}
-
-void qemu_aio_ref(void *p)
-{
-    BlockAIOCB *acb = p;
-    acb->refcnt++;
-}
-
-void qemu_aio_unref(void *p)
-{
-    BlockAIOCB *acb = p;
-    assert(acb->refcnt > 0);
-    if (--acb->refcnt == 0) {
-        g_free(acb);
-    }
-}
-
-/**************************************************************/
-/* Coroutine block device emulation */
-
-typedef struct CoroutineIOCompletion {
-    Coroutine *coroutine;
-    int ret;
-} CoroutineIOCompletion;
-
-static void bdrv_co_io_em_complete(void *opaque, int ret)
-{
-    CoroutineIOCompletion *co = opaque;
-
-    co->ret = ret;
-    qemu_coroutine_enter(co->coroutine, NULL);
-}
-
-static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors, QEMUIOVector *iov,
-                                      bool is_write)
-{
-    CoroutineIOCompletion co = {
-        .coroutine = qemu_coroutine_self(),
-    };
-    BlockAIOCB *acb;
-
-    if (is_write) {
-        acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
-                                       bdrv_co_io_em_complete, &co);
-    } else {
-        acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
-                                      bdrv_co_io_em_complete, &co);
-    }
-
-    trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
-    if (!acb) {
-        return -EIO;
-    }
-    qemu_coroutine_yield();
-
-    return co.ret;
-}
-
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
-                                         int64_t sector_num, int nb_sectors,
-                                         QEMUIOVector *iov)
-{
-    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
-}
-
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
-                                         int64_t sector_num, int nb_sectors,
-                                         QEMUIOVector *iov)
-{
-    return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
-}
-
-static void coroutine_fn bdrv_flush_co_entry(void *opaque)
-{
-    RwCo *rwco = opaque;
-
-    rwco->ret = bdrv_co_flush(rwco->bs);
-}
-
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
-{
-    int ret;
-    BdrvTrackedRequest req;
-
-    if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
-        bdrv_is_sg(bs)) {
-        return 0;
-    }
-
-    tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
-
-    /* Write back all layers by calling one driver function */
-    if (bs->drv->bdrv_co_flush) {
-        ret = bs->drv->bdrv_co_flush(bs);
-        goto out;
-    }
-
-    /* Write back cached data to the OS even with cache=unsafe */
-    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
-    if (bs->drv->bdrv_co_flush_to_os) {
-        ret = bs->drv->bdrv_co_flush_to_os(bs);
-        if (ret < 0) {
-            goto out;
-        }
-    }
-
-    /* But don't actually force it to the disk with cache=unsafe */
-    if (bs->open_flags & BDRV_O_NO_FLUSH) {
-        goto flush_parent;
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
-    if (bs->drv->bdrv_co_flush_to_disk) {
-        ret = bs->drv->bdrv_co_flush_to_disk(bs);
-    } else if (bs->drv->bdrv_aio_flush) {
-        BlockAIOCB *acb;
-        CoroutineIOCompletion co = {
-            .coroutine = qemu_coroutine_self(),
-        };
-
-        acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
-        if (acb == NULL) {
-            ret = -EIO;
-        } else {
-            qemu_coroutine_yield();
-            ret = co.ret;
-        }
-    } else {
-        /*
-         * Some block drivers always operate in either writethrough or unsafe
-         * mode and don't support bdrv_flush therefore. Usually qemu doesn't
-         * know how the server works (because the behaviour is hardcoded or
-         * depends on server-side configuration), so we can't ensure that
-         * everything is safe on disk. Returning an error doesn't work because
-         * that would break guests even if the server operates in writethrough
-         * mode.
-         *
-         * Let's hope the user knows what he's doing.
-         */
-        ret = 0;
-    }
-    if (ret < 0) {
-        goto out;
-    }
-
-    /* Now flush the underlying protocol.  It will also have BDRV_O_NO_FLUSH
-     * in the case of cache=unsafe, so there are no useless flushes.
-     */
-flush_parent:
-    ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
-out:
-    tracked_request_end(&req);
-    return ret;
-}
-
-int bdrv_flush(BlockDriverState *bs)
-{
-    Coroutine *co;
-    RwCo rwco = {
-        .bs = bs,
-        .ret = NOT_DONE,
-    };
-
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        bdrv_flush_co_entry(&rwco);
-    } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
-        co = qemu_coroutine_create(bdrv_flush_co_entry);
-        qemu_coroutine_enter(co, &rwco);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
-    }
-
-    return rwco.ret;
-}
-
-typedef struct DiscardCo {
-    BlockDriverState *bs;
-    int64_t sector_num;
-    int nb_sectors;
-    int ret;
-} DiscardCo;
-static void coroutine_fn bdrv_discard_co_entry(void *opaque)
-{
-    DiscardCo *rwco = opaque;
-
-    rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
-}
-
-int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
-                                 int nb_sectors)
-{
-    BdrvTrackedRequest req;
-    int max_discard, ret;
-
-    if (!bs->drv) {
-        return -ENOMEDIUM;
-    }
-
-    ret = bdrv_check_request(bs, sector_num, nb_sectors);
-    if (ret < 0) {
-        return ret;
-    } else if (bs->read_only) {
-        return -EPERM;
-    }
-    assert(!(bs->open_flags & BDRV_O_INACTIVE));
-
-    /* Do nothing if disabled.  */
-    if (!(bs->open_flags & BDRV_O_UNMAP)) {
-        return 0;
-    }
-
-    if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
-        return 0;
-    }
-
-    tracked_request_begin(&req, bs, sector_num, nb_sectors,
-                          BDRV_TRACKED_DISCARD);
-    bdrv_set_dirty(bs, sector_num, nb_sectors);
-
-    max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
-    while (nb_sectors > 0) {
-        int ret;
-        int num = nb_sectors;
-
-        /* align request */
-        if (bs->bl.discard_alignment &&
-            num >= bs->bl.discard_alignment &&
-            sector_num % bs->bl.discard_alignment) {
-            if (num > bs->bl.discard_alignment) {
-                num = bs->bl.discard_alignment;
-            }
-            num -= sector_num % bs->bl.discard_alignment;
-        }
-
-        /* limit request size */
-        if (num > max_discard) {
-            num = max_discard;
-        }
-
-        if (bs->drv->bdrv_co_discard) {
-            ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
-        } else {
-            BlockAIOCB *acb;
-            CoroutineIOCompletion co = {
-                .coroutine = qemu_coroutine_self(),
-            };
-
-            acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
-                                            bdrv_co_io_em_complete, &co);
-            if (acb == NULL) {
-                ret = -EIO;
-                goto out;
-            } else {
-                qemu_coroutine_yield();
-                ret = co.ret;
-            }
-        }
-        if (ret && ret != -ENOTSUP) {
-            goto out;
-        }
-
-        sector_num += num;
-        nb_sectors -= num;
-    }
-    ret = 0;
-out:
-    tracked_request_end(&req);
-    return ret;
-}
-
-int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
-{
-    Coroutine *co;
-    DiscardCo rwco = {
-        .bs = bs,
-        .sector_num = sector_num,
-        .nb_sectors = nb_sectors,
-        .ret = NOT_DONE,
-    };
-
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        bdrv_discard_co_entry(&rwco);
-    } else {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
-
-        co = qemu_coroutine_create(bdrv_discard_co_entry);
-        qemu_coroutine_enter(co, &rwco);
-        while (rwco.ret == NOT_DONE) {
-            aio_poll(aio_context, true);
-        }
-    }
-
-    return rwco.ret;
-}
-
-typedef struct {
-    CoroutineIOCompletion *co;
-    QEMUBH *bh;
-} BdrvIoctlCompletionData;
-
-static void bdrv_ioctl_bh_cb(void *opaque)
-{
-    BdrvIoctlCompletionData *data = opaque;
-
-    bdrv_co_io_em_complete(data->co, -ENOTSUP);
-    qemu_bh_delete(data->bh);
-}
-
-static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
-{
-    BlockDriver *drv = bs->drv;
-    BdrvTrackedRequest tracked_req;
-    CoroutineIOCompletion co = {
-        .coroutine = qemu_coroutine_self(),
-    };
-    BlockAIOCB *acb;
-
-    tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL);
-    if (!drv || !drv->bdrv_aio_ioctl) {
-        co.ret = -ENOTSUP;
-        goto out;
-    }
-
-    acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
-    if (!acb) {
-        BdrvIoctlCompletionData *data = g_new(BdrvIoctlCompletionData, 1);
-        data->bh = aio_bh_new(bdrv_get_aio_context(bs),
-                                bdrv_ioctl_bh_cb, data);
-        data->co = &co;
-        qemu_bh_schedule(data->bh);
-    }
-    qemu_coroutine_yield();
-out:
-    tracked_request_end(&tracked_req);
-    return co.ret;
-}
-
-typedef struct {
-    BlockDriverState *bs;
-    int req;
-    void *buf;
-    int ret;
-} BdrvIoctlCoData;
-
-static void coroutine_fn bdrv_co_ioctl_entry(void *opaque)
-{
-    BdrvIoctlCoData *data = opaque;
-    data->ret = bdrv_co_do_ioctl(data->bs, data->req, data->buf);
-}
-
-/* needed for generic scsi interface */
-int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
-    BdrvIoctlCoData data = {
-        .bs = bs,
-        .req = req,
-        .buf = buf,
-        .ret = -EINPROGRESS,
-    };
-
-    if (qemu_in_coroutine()) {
-        /* Fast-path if already in coroutine context */
-        bdrv_co_ioctl_entry(&data);
-    } else {
-        Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry);
-
-        qemu_coroutine_enter(co, &data);
-        while (data.ret == -EINPROGRESS) {
-            aio_poll(bdrv_get_aio_context(bs), true);
-        }
-    }
-    return data.ret;
-}
-
-static void coroutine_fn bdrv_co_aio_ioctl_entry(void *opaque)
-{
-    BlockAIOCBCoroutine *acb = opaque;
-    acb->req.error = bdrv_co_do_ioctl(acb->common.bs,
-                                      acb->req.req, acb->req.buf);
-    bdrv_co_complete(acb);
-}
-
-BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
-        unsigned long int req, void *buf,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    BlockAIOCBCoroutine *acb = qemu_aio_get(&bdrv_em_co_aiocb_info,
-                                            bs, cb, opaque);
-    Coroutine *co;
-
-    acb->need_bh = true;
-    acb->req.error = -EINPROGRESS;
-    acb->req.req = req;
-    acb->req.buf = buf;
-    co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry);
-    qemu_coroutine_enter(co, acb);
-
-    bdrv_co_maybe_schedule_bh(acb);
-    return &acb->common;
-}
-
-void *qemu_blockalign(BlockDriverState *bs, size_t size)
-{
-    return qemu_memalign(bdrv_opt_mem_align(bs), size);
-}
-
-void *qemu_blockalign0(BlockDriverState *bs, size_t size)
-{
-    return memset(qemu_blockalign(bs, size), 0, size);
-}
-
-void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
-{
-    size_t align = bdrv_opt_mem_align(bs);
-
-    /* Ensure that NULL is never returned on success */
-    assert(align > 0);
-    if (size == 0) {
-        size = align;
-    }
-
-    return qemu_try_memalign(align, size);
-}
-
-void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
-{
-    void *mem = qemu_try_blockalign(bs, size);
-
-    if (mem) {
-        memset(mem, 0, size);
-    }
-
-    return mem;
-}
-
-/*
- * Check if all memory in this vector is sector aligned.
- */
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
-{
-    int i;
-    size_t alignment = bdrv_min_mem_align(bs);
-
-    for (i = 0; i < qiov->niov; i++) {
-        if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
-            return false;
-        }
-        if (qiov->iov[i].iov_len % alignment) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
-                                    NotifierWithReturn *notifier)
-{
-    notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
-}
-
-void bdrv_io_plug(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-    if (drv && drv->bdrv_io_plug) {
-        drv->bdrv_io_plug(bs);
-    } else if (bs->file) {
-        bdrv_io_plug(bs->file->bs);
-    }
-}
-
-void bdrv_io_unplug(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-    if (drv && drv->bdrv_io_unplug) {
-        drv->bdrv_io_unplug(bs);
-    } else if (bs->file) {
-        bdrv_io_unplug(bs->file->bs);
-    }
-}
-
-void bdrv_flush_io_queue(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-    if (drv && drv->bdrv_flush_io_queue) {
-        drv->bdrv_flush_io_queue(bs);
-    } else if (bs->file) {
-        bdrv_flush_io_queue(bs->file->bs);
-    }
-    bdrv_start_throttled_reqs(bs);
-}
-
-void bdrv_drained_begin(BlockDriverState *bs)
-{
-    if (!bs->quiesce_counter++) {
-        aio_disable_external(bdrv_get_aio_context(bs));
-    }
-    bdrv_drain(bs);
-}
-
-void bdrv_drained_end(BlockDriverState *bs)
-{
-    assert(bs->quiesce_counter > 0);
-    if (--bs->quiesce_counter > 0) {
-        return;
-    }
-    aio_enable_external(bdrv_get_aio_context(bs));
-}
diff --git a/qemu/block/iscsi.c b/qemu/block/iscsi.c
deleted file mode 100644
index 302baf84c..000000000
--- a/qemu/block/iscsi.c
+++ /dev/null
@@ -1,1904 +0,0 @@
-/*
- * QEMU Block driver for iSCSI images
- *
- * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
- * Copyright (c) 2012-2015 Peter Lieven <pl@kamp.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-
-#include <poll.h>
-#include <math.h>
-#include <arpa/inet.h>
-#include "qemu-common.h"
-#include "qemu/config-file.h"
-#include "qemu/error-report.h"
-#include "qemu/bitops.h"
-#include "qemu/bitmap.h"
-#include "block/block_int.h"
-#include "block/scsi.h"
-#include "qemu/iov.h"
-#include "sysemu/sysemu.h"
-#include "qmp-commands.h"
-#include "qapi/qmp/qstring.h"
-#include "crypto/secret.h"
-
-#include <iscsi/iscsi.h>
-#include <iscsi/scsi-lowlevel.h>
-
-#ifdef __linux__
-#include <scsi/sg.h>
-#include <block/scsi.h>
-#endif
-
-typedef struct IscsiLun {
-    struct iscsi_context *iscsi;
-    AioContext *aio_context;
-    int lun;
-    enum scsi_inquiry_peripheral_device_type type;
-    int block_size;
-    uint64_t num_blocks;
-    int events;
-    QEMUTimer *nop_timer;
-    QEMUTimer *event_timer;
-    struct scsi_inquiry_logical_block_provisioning lbp;
-    struct scsi_inquiry_block_limits bl;
-    unsigned char *zeroblock;
-    unsigned long *allocationmap;
-    int cluster_sectors;
-    bool use_16_for_rw;
-    bool write_protected;
-    bool lbpme;
-    bool lbprz;
-    bool dpofua;
-    bool has_write_same;
-    bool request_timed_out;
-} IscsiLun;
-
-typedef struct IscsiTask {
-    int status;
-    int complete;
-    int retries;
-    int do_retry;
-    struct scsi_task *task;
-    Coroutine *co;
-    QEMUBH *bh;
-    IscsiLun *iscsilun;
-    QEMUTimer retry_timer;
-    int err_code;
-} IscsiTask;
-
-typedef struct IscsiAIOCB {
-    BlockAIOCB common;
-    QEMUIOVector *qiov;
-    QEMUBH *bh;
-    IscsiLun *iscsilun;
-    struct scsi_task *task;
-    uint8_t *buf;
-    int status;
-    int64_t sector_num;
-    int nb_sectors;
-    int ret;
-#ifdef __linux__
-    sg_io_hdr_t *ioh;
-#endif
-} IscsiAIOCB;
-
-/* libiscsi uses time_t so its enough to process events every second */
-#define EVENT_INTERVAL 1000
-#define NOP_INTERVAL 5000
-#define MAX_NOP_FAILURES 3
-#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
-static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
-
-/* this threshold is a trade-off knob to choose between
- * the potential additional overhead of an extra GET_LBA_STATUS request
- * vs. unnecessarily reading a lot of zero sectors over the wire.
- * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
- * sectors we check the allocation status of the area covered by the
- * request first if the allocationmap indicates that the area might be
- * unallocated. */
-#define ISCSI_CHECKALLOC_THRES 64
-
-static void
-iscsi_bh_cb(void *p)
-{
-    IscsiAIOCB *acb = p;
-
-    qemu_bh_delete(acb->bh);
-
-    g_free(acb->buf);
-    acb->buf = NULL;
-
-    acb->common.cb(acb->common.opaque, acb->status);
-
-    if (acb->task != NULL) {
-        scsi_free_scsi_task(acb->task);
-        acb->task = NULL;
-    }
-
-    qemu_aio_unref(acb);
-}
-
-static void
-iscsi_schedule_bh(IscsiAIOCB *acb)
-{
-    if (acb->bh) {
-        return;
-    }
-    acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
-    qemu_bh_schedule(acb->bh);
-}
-
-static void iscsi_co_generic_bh_cb(void *opaque)
-{
-    struct IscsiTask *iTask = opaque;
-    iTask->complete = 1;
-    qemu_bh_delete(iTask->bh);
-    qemu_coroutine_enter(iTask->co, NULL);
-}
-
-static void iscsi_retry_timer_expired(void *opaque)
-{
-    struct IscsiTask *iTask = opaque;
-    iTask->complete = 1;
-    if (iTask->co) {
-        qemu_coroutine_enter(iTask->co, NULL);
-    }
-}
-
-static inline unsigned exp_random(double mean)
-{
-    return -mean * log((double)rand() / RAND_MAX);
-}
-
-/* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
- * libiscsi 1.10.0, together with other constants we need.  Use it as
- * a hint that we have to define them ourselves if needed, to keep the
- * minimum required libiscsi version at 1.9.0.  We use an ASCQ macro for
- * the test because SCSI_STATUS_* is an enum.
- *
- * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
- * an enum, check against the LIBISCSI_API_VERSION macro, which was
- * introduced in 1.11.0.  If it is present, there is no need to define
- * anything.
- */
-#if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
-    !defined(LIBISCSI_API_VERSION)
-#define SCSI_STATUS_TASK_SET_FULL                          0x28
-#define SCSI_STATUS_TIMEOUT                                0x0f000002
-#define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST    0x2600
-#define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR        0x1a00
-#endif
-
-static int iscsi_translate_sense(struct scsi_sense *sense)
-{
-    int ret;
-
-    switch (sense->key) {
-    case SCSI_SENSE_NOT_READY:
-        return -EBUSY;
-    case SCSI_SENSE_DATA_PROTECTION:
-        return -EACCES;
-    case SCSI_SENSE_COMMAND_ABORTED:
-        return -ECANCELED;
-    case SCSI_SENSE_ILLEGAL_REQUEST:
-        /* Parse ASCQ */
-        break;
-    default:
-        return -EIO;
-    }
-    switch (sense->ascq) {
-    case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
-    case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
-    case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
-    case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
-        ret = -EINVAL;
-        break;
-    case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
-        ret = -ENOSPC;
-        break;
-    case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
-        ret = -ENOTSUP;
-        break;
-    case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
-    case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
-    case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
-        ret = -ENOMEDIUM;
-        break;
-    case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
-        ret = -EACCES;
-        break;
-    default:
-        ret = -EIO;
-        break;
-    }
-    return ret;
-}
-
-static void
-iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
-                        void *command_data, void *opaque)
-{
-    struct IscsiTask *iTask = opaque;
-    struct scsi_task *task = command_data;
-
-    iTask->status = status;
-    iTask->do_retry = 0;
-    iTask->task = task;
-
-    if (status != SCSI_STATUS_GOOD) {
-        if (iTask->retries++ < ISCSI_CMD_RETRIES) {
-            if (status == SCSI_STATUS_CHECK_CONDITION
-                && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
-                error_report("iSCSI CheckCondition: %s",
-                             iscsi_get_error(iscsi));
-                iTask->do_retry = 1;
-                goto out;
-            }
-            if (status == SCSI_STATUS_BUSY ||
-                status == SCSI_STATUS_TIMEOUT ||
-                status == SCSI_STATUS_TASK_SET_FULL) {
-                unsigned retry_time =
-                    exp_random(iscsi_retry_times[iTask->retries - 1]);
-                if (status == SCSI_STATUS_TIMEOUT) {
-                    /* make sure the request is rescheduled AFTER the
-                     * reconnect is initiated */
-                    retry_time = EVENT_INTERVAL * 2;
-                    iTask->iscsilun->request_timed_out = true;
-                }
-                error_report("iSCSI Busy/TaskSetFull/TimeOut"
-                             " (retry #%u in %u ms): %s",
-                             iTask->retries, retry_time,
-                             iscsi_get_error(iscsi));
-                aio_timer_init(iTask->iscsilun->aio_context,
-                               &iTask->retry_timer, QEMU_CLOCK_REALTIME,
-                               SCALE_MS, iscsi_retry_timer_expired, iTask);
-                timer_mod(&iTask->retry_timer,
-                          qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
-                iTask->do_retry = 1;
-                return;
-            }
-        }
-        iTask->err_code = iscsi_translate_sense(&task->sense);
-        error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
-    }
-
-out:
-    if (iTask->co) {
-        iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
-                               iscsi_co_generic_bh_cb, iTask);
-        qemu_bh_schedule(iTask->bh);
-    } else {
-        iTask->complete = 1;
-    }
-}
-
-static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
-{
-    *iTask = (struct IscsiTask) {
-        .co         = qemu_coroutine_self(),
-        .iscsilun   = iscsilun,
-    };
-}
-
-static void
-iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
-                    void *private_data)
-{
-    IscsiAIOCB *acb = private_data;
-
-    acb->status = -ECANCELED;
-    iscsi_schedule_bh(acb);
-}
-
-static void
-iscsi_aio_cancel(BlockAIOCB *blockacb)
-{
-    IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
-    IscsiLun *iscsilun = acb->iscsilun;
-
-    if (acb->status != -EINPROGRESS) {
-        return;
-    }
-
-    /* send a task mgmt call to the target to cancel the task on the target */
-    iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
-                                     iscsi_abort_task_cb, acb);
-
-}
-
-static const AIOCBInfo iscsi_aiocb_info = {
-    .aiocb_size         = sizeof(IscsiAIOCB),
-    .cancel_async       = iscsi_aio_cancel,
-};
-
-
-static void iscsi_process_read(void *arg);
-static void iscsi_process_write(void *arg);
-
-static void
-iscsi_set_events(IscsiLun *iscsilun)
-{
-    struct iscsi_context *iscsi = iscsilun->iscsi;
-    int ev = iscsi_which_events(iscsi);
-
-    if (ev != iscsilun->events) {
-        aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
-                           false,
-                           (ev & POLLIN) ? iscsi_process_read : NULL,
-                           (ev & POLLOUT) ? iscsi_process_write : NULL,
-                           iscsilun);
-        iscsilun->events = ev;
-    }
-}
-
-static void iscsi_timed_check_events(void *opaque)
-{
-    IscsiLun *iscsilun = opaque;
-
-    /* check for timed out requests */
-    iscsi_service(iscsilun->iscsi, 0);
-
-    if (iscsilun->request_timed_out) {
-        iscsilun->request_timed_out = false;
-        iscsi_reconnect(iscsilun->iscsi);
-    }
-
-    /* newer versions of libiscsi may return zero events. Ensure we are able
-     * to return to service once this situation changes. */
-    iscsi_set_events(iscsilun);
-
-    timer_mod(iscsilun->event_timer,
-              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
-}
-
-static void
-iscsi_process_read(void *arg)
-{
-    IscsiLun *iscsilun = arg;
-    struct iscsi_context *iscsi = iscsilun->iscsi;
-
-    iscsi_service(iscsi, POLLIN);
-    iscsi_set_events(iscsilun);
-}
-
-static void
-iscsi_process_write(void *arg)
-{
-    IscsiLun *iscsilun = arg;
-    struct iscsi_context *iscsi = iscsilun->iscsi;
-
-    iscsi_service(iscsi, POLLOUT);
-    iscsi_set_events(iscsilun);
-}
-
-static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
-{
-    return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
-}
-
-static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
-{
-    return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
-}
-
-static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
-                                      IscsiLun *iscsilun)
-{
-    if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
-        (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
-            error_report("iSCSI misaligned request: "
-                         "iscsilun->block_size %u, sector_num %" PRIi64
-                         ", nb_sectors %d",
-                         iscsilun->block_size, sector_num, nb_sectors);
-            return 0;
-    }
-    return 1;
-}
-
-static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
-{
-    return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
-                                                       iscsilun),
-                                       iscsilun->cluster_sectors));
-}
-
-static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
-                                    int nb_sectors)
-{
-    if (iscsilun->allocationmap == NULL) {
-        return;
-    }
-    bitmap_set(iscsilun->allocationmap,
-               sector_num / iscsilun->cluster_sectors,
-               DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
-}
-
-static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
-                                      int nb_sectors)
-{
-    int64_t cluster_num, nb_clusters;
-    if (iscsilun->allocationmap == NULL) {
-        return;
-    }
-    cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
-    nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
-                  - cluster_num;
-    if (nb_clusters > 0) {
-        bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
-    }
-}
-
-static int coroutine_fn
-iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-                      QEMUIOVector *iov, int flags)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    struct IscsiTask iTask;
-    uint64_t lba;
-    uint32_t num_sectors;
-    bool fua;
-
-    if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
-        return -EINVAL;
-    }
-
-    if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
-        error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
-                     "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
-        return -EINVAL;
-    }
-
-    lba = sector_qemu2lun(sector_num, iscsilun);
-    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
-    iscsi_co_init_iscsitask(iscsilun, &iTask);
-retry:
-    fua = iscsilun->dpofua && (flags & BDRV_REQ_FUA);
-    if (iscsilun->use_16_for_rw) {
-        iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
-                                        NULL, num_sectors * iscsilun->block_size,
-                                        iscsilun->block_size, 0, 0, fua, 0, 0,
-                                        iscsi_co_generic_cb, &iTask);
-    } else {
-        iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
-                                        NULL, num_sectors * iscsilun->block_size,
-                                        iscsilun->block_size, 0, 0, fua, 0, 0,
-                                        iscsi_co_generic_cb, &iTask);
-    }
-    if (iTask.task == NULL) {
-        return -ENOMEM;
-    }
-    scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
-                          iov->niov);
-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_coroutine_yield();
-    }
-
-    if (iTask.task != NULL) {
-        scsi_free_scsi_task(iTask.task);
-        iTask.task = NULL;
-    }
-
-    if (iTask.do_retry) {
-        iTask.complete = 0;
-        goto retry;
-    }
-
-    if (iTask.status != SCSI_STATUS_GOOD) {
-        return iTask.err_code;
-    }
-
-    iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
-
-    return 0;
-}
-
-static int coroutine_fn
-iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-                QEMUIOVector *iov)
-{
-    return iscsi_co_writev_flags(bs, sector_num, nb_sectors, iov, 0);
-}
-
-
-static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
-                                             int64_t sector_num, int nb_sectors)
-{
-    unsigned long size;
-    if (iscsilun->allocationmap == NULL) {
-        return true;
-    }
-    size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
-    return !(find_next_bit(iscsilun->allocationmap, size,
-                           sector_num / iscsilun->cluster_sectors) == size);
-}
-
-static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
-                                                  int64_t sector_num,
-                                                  int nb_sectors, int *pnum,
-                                                  BlockDriverState **file)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    struct scsi_get_lba_status *lbas = NULL;
-    struct scsi_lba_status_descriptor *lbasd = NULL;
-    struct IscsiTask iTask;
-    int64_t ret;
-
-    iscsi_co_init_iscsitask(iscsilun, &iTask);
-
-    if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    /* default to all sectors allocated */
-    ret = BDRV_BLOCK_DATA;
-    ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
-    *pnum = nb_sectors;
-
-    /* LUN does not support logical block provisioning */
-    if (!iscsilun->lbpme) {
-        goto out;
-    }
-
-retry:
-    if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
-                                  sector_qemu2lun(sector_num, iscsilun),
-                                  8 + 16, iscsi_co_generic_cb,
-                                  &iTask) == NULL) {
-        ret = -ENOMEM;
-        goto out;
-    }
-
-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_coroutine_yield();
-    }
-
-    if (iTask.do_retry) {
-        if (iTask.task != NULL) {
-            scsi_free_scsi_task(iTask.task);
-            iTask.task = NULL;
-        }
-        iTask.complete = 0;
-        goto retry;
-    }
-
-    if (iTask.status != SCSI_STATUS_GOOD) {
-        /* in case the get_lba_status_callout fails (i.e.
-         * because the device is busy or the cmd is not
-         * supported) we pretend all blocks are allocated
-         * for backwards compatibility */
-        goto out;
-    }
-
-    lbas = scsi_datain_unmarshall(iTask.task);
-    if (lbas == NULL) {
-        ret = -EIO;
-        goto out;
-    }
-
-    lbasd = &lbas->descriptors[0];
-
-    if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
-        ret = -EIO;
-        goto out;
-    }
-
-    *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
-
-    if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
-        lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
-        ret &= ~BDRV_BLOCK_DATA;
-        if (iscsilun->lbprz) {
-            ret |= BDRV_BLOCK_ZERO;
-        }
-    }
-
-    if (ret & BDRV_BLOCK_ZERO) {
-        iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
-    } else {
-        iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
-    }
-
-    if (*pnum > nb_sectors) {
-        *pnum = nb_sectors;
-    }
-out:
-    if (iTask.task != NULL) {
-        scsi_free_scsi_task(iTask.task);
-    }
-    if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
-        *file = bs;
-    }
-    return ret;
-}
-
-static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
-                                       int64_t sector_num, int nb_sectors,
-                                       QEMUIOVector *iov)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    struct IscsiTask iTask;
-    uint64_t lba;
-    uint32_t num_sectors;
-
-    if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
-        return -EINVAL;
-    }
-
-    if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
-        error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
-                     "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
-        return -EINVAL;
-    }
-
-    if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
-        !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
-        int64_t ret;
-        int pnum;
-        BlockDriverState *file;
-        ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum, &file);
-        if (ret < 0) {
-            return ret;
-        }
-        if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
-            qemu_iovec_memset(iov, 0, 0x00, iov->size);
-            return 0;
-        }
-    }
-
-    lba = sector_qemu2lun(sector_num, iscsilun);
-    num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
-
-    iscsi_co_init_iscsitask(iscsilun, &iTask);
-retry:
-    if (iscsilun->use_16_for_rw) {
-        iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
-                                       num_sectors * iscsilun->block_size,
-                                       iscsilun->block_size, 0, 0, 0, 0, 0,
-                                       iscsi_co_generic_cb, &iTask);
-    } else {
-        iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
-                                       num_sectors * iscsilun->block_size,
-                                       iscsilun->block_size,
-                                       0, 0, 0, 0, 0,
-                                       iscsi_co_generic_cb, &iTask);
-    }
-    if (iTask.task == NULL) {
-        return -ENOMEM;
-    }
-    scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
-
-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_coroutine_yield();
-    }
-
-    if (iTask.task != NULL) {
-        scsi_free_scsi_task(iTask.task);
-        iTask.task = NULL;
-    }
-
-    if (iTask.do_retry) {
-        iTask.complete = 0;
-        goto retry;
-    }
-
-    if (iTask.status != SCSI_STATUS_GOOD) {
-        return iTask.err_code;
-    }
-
-    return 0;
-}
-
-static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    struct IscsiTask iTask;
-
-    iscsi_co_init_iscsitask(iscsilun, &iTask);
-retry:
-    if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
-                                      0, iscsi_co_generic_cb, &iTask) == NULL) {
-        return -ENOMEM;
-    }
-
-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_coroutine_yield();
-    }
-
-    if (iTask.task != NULL) {
-        scsi_free_scsi_task(iTask.task);
-        iTask.task = NULL;
-    }
-
-    if (iTask.do_retry) {
-        iTask.complete = 0;
-        goto retry;
-    }
-
-    if (iTask.status != SCSI_STATUS_GOOD) {
-        return iTask.err_code;
-    }
-
-    return 0;
-}
-
-#ifdef __linux__
-static void
-iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
-                     void *command_data, void *opaque)
-{
-    IscsiAIOCB *acb = opaque;
-
-    g_free(acb->buf);
-    acb->buf = NULL;
-
-    acb->status = 0;
-    if (status < 0) {
-        error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
-                     iscsi_get_error(iscsi));
-        acb->status = iscsi_translate_sense(&acb->task->sense);
-    }
-
-    acb->ioh->driver_status = 0;
-    acb->ioh->host_status   = 0;
-    acb->ioh->resid         = 0;
-
-#define SG_ERR_DRIVER_SENSE    0x08
-
-    if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
-        int ss;
-
-        acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
-
-        acb->ioh->sb_len_wr = acb->task->datain.size - 2;
-        ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
-             acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
-        memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
-    }
-
-    iscsi_schedule_bh(acb);
-}
-
-static void iscsi_ioctl_bh_completion(void *opaque)
-{
-    IscsiAIOCB *acb = opaque;
-
-    qemu_bh_delete(acb->bh);
-    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
-}
-
-static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
-{
-    BlockDriverState *bs = acb->common.bs;
-    IscsiLun *iscsilun = bs->opaque;
-    int ret = 0;
-
-    switch (req) {
-    case SG_GET_VERSION_NUM:
-        *(int *)buf = 30000;
-        break;
-    case SG_GET_SCSI_ID:
-        ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
-        break;
-    default:
-        ret = -EINVAL;
-    }
-    assert(!acb->bh);
-    acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
-                         iscsi_ioctl_bh_completion, acb);
-    acb->ret = ret;
-    qemu_bh_schedule(acb->bh);
-}
-
-static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
-        unsigned long int req, void *buf,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    struct iscsi_context *iscsi = iscsilun->iscsi;
-    struct iscsi_data data;
-    IscsiAIOCB *acb;
-
-    acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
-
-    acb->iscsilun = iscsilun;
-    acb->bh          = NULL;
-    acb->status      = -EINPROGRESS;
-    acb->buf         = NULL;
-    acb->ioh         = buf;
-
-    if (req != SG_IO) {
-        iscsi_ioctl_handle_emulated(acb, req, buf);
-        return &acb->common;
-    }
-
-    acb->task = malloc(sizeof(struct scsi_task));
-    if (acb->task == NULL) {
-        error_report("iSCSI: Failed to allocate task for scsi command. %s",
-                     iscsi_get_error(iscsi));
-        qemu_aio_unref(acb);
-        return NULL;
-    }
-    memset(acb->task, 0, sizeof(struct scsi_task));
-
-    switch (acb->ioh->dxfer_direction) {
-    case SG_DXFER_TO_DEV:
-        acb->task->xfer_dir = SCSI_XFER_WRITE;
-        break;
-    case SG_DXFER_FROM_DEV:
-        acb->task->xfer_dir = SCSI_XFER_READ;
-        break;
-    default:
-        acb->task->xfer_dir = SCSI_XFER_NONE;
-        break;
-    }
-
-    acb->task->cdb_size = acb->ioh->cmd_len;
-    memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
-    acb->task->expxferlen = acb->ioh->dxfer_len;
-
-    data.size = 0;
-    if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
-        if (acb->ioh->iovec_count == 0) {
-            data.data = acb->ioh->dxferp;
-            data.size = acb->ioh->dxfer_len;
-        } else {
-            scsi_task_set_iov_out(acb->task,
-                                 (struct scsi_iovec *) acb->ioh->dxferp,
-                                 acb->ioh->iovec_count);
-        }
-    }
-
-    if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
-                                 iscsi_aio_ioctl_cb,
-                                 (data.size > 0) ? &data : NULL,
-                                 acb) != 0) {
-        scsi_free_scsi_task(acb->task);
-        qemu_aio_unref(acb);
-        return NULL;
-    }
-
-    /* tell libiscsi to read straight into the buffer we got from ioctl */
-    if (acb->task->xfer_dir == SCSI_XFER_READ) {
-        if (acb->ioh->iovec_count == 0) {
-            scsi_task_add_data_in_buffer(acb->task,
-                                         acb->ioh->dxfer_len,
-                                         acb->ioh->dxferp);
-        } else {
-            scsi_task_set_iov_in(acb->task,
-                                 (struct scsi_iovec *) acb->ioh->dxferp,
-                                 acb->ioh->iovec_count);
-        }
-    }
-
-    iscsi_set_events(iscsilun);
-
-    return &acb->common;
-}
-
-#endif
-
-static int64_t
-iscsi_getlength(BlockDriverState *bs)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    int64_t len;
-
-    len  = iscsilun->num_blocks;
-    len *= iscsilun->block_size;
-
-    return len;
-}
-
-static int
-coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
-                                   int nb_sectors)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    struct IscsiTask iTask;
-    struct unmap_list list;
-
-    if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
-        return -EINVAL;
-    }
-
-    if (!iscsilun->lbp.lbpu) {
-        /* UNMAP is not supported by the target */
-        return 0;
-    }
-
-    list.lba = sector_qemu2lun(sector_num, iscsilun);
-    list.num = sector_qemu2lun(nb_sectors, iscsilun);
-
-    iscsi_co_init_iscsitask(iscsilun, &iTask);
-retry:
-    if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
-                     iscsi_co_generic_cb, &iTask) == NULL) {
-        return -ENOMEM;
-    }
-
-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_coroutine_yield();
-    }
-
-    if (iTask.task != NULL) {
-        scsi_free_scsi_task(iTask.task);
-        iTask.task = NULL;
-    }
-
-    if (iTask.do_retry) {
-        iTask.complete = 0;
-        goto retry;
-    }
-
-    if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
-        /* the target might fail with a check condition if it
-           is not happy with the alignment of the UNMAP request
-           we silently fail in this case */
-        return 0;
-    }
-
-    if (iTask.status != SCSI_STATUS_GOOD) {
-        return iTask.err_code;
-    }
-
-    iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
-
-    return 0;
-}
-
-static int
-coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
-                                   int nb_sectors, BdrvRequestFlags flags)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    struct IscsiTask iTask;
-    uint64_t lba;
-    uint32_t nb_blocks;
-    bool use_16_for_ws = iscsilun->use_16_for_rw;
-
-    if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
-        return -EINVAL;
-    }
-
-    if (flags & BDRV_REQ_MAY_UNMAP) {
-        if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
-            /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
-            use_16_for_ws = true;
-        }
-        if (use_16_for_ws && !iscsilun->lbp.lbpws) {
-            /* WRITESAME16 with UNMAP is not supported by the target,
-             * fall back and try WRITESAME10/16 without UNMAP */
-            flags &= ~BDRV_REQ_MAY_UNMAP;
-            use_16_for_ws = iscsilun->use_16_for_rw;
-        }
-    }
-
-    if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
-        /* WRITESAME without UNMAP is not supported by the target */
-        return -ENOTSUP;
-    }
-
-    lba = sector_qemu2lun(sector_num, iscsilun);
-    nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
-
-    if (iscsilun->zeroblock == NULL) {
-        iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
-        if (iscsilun->zeroblock == NULL) {
-            return -ENOMEM;
-        }
-    }
-
-    iscsi_co_init_iscsitask(iscsilun, &iTask);
-retry:
-    if (use_16_for_ws) {
-        iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
-                                            iscsilun->zeroblock, iscsilun->block_size,
-                                            nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
-                                            0, 0, iscsi_co_generic_cb, &iTask);
-    } else {
-        iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
-                                            iscsilun->zeroblock, iscsilun->block_size,
-                                            nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
-                                            0, 0, iscsi_co_generic_cb, &iTask);
-    }
-    if (iTask.task == NULL) {
-        return -ENOMEM;
-    }
-
-    while (!iTask.complete) {
-        iscsi_set_events(iscsilun);
-        qemu_coroutine_yield();
-    }
-
-    if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
-        iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
-        (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
-         iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
-        /* WRITE SAME is not supported by the target */
-        iscsilun->has_write_same = false;
-        scsi_free_scsi_task(iTask.task);
-        return -ENOTSUP;
-    }
-
-    if (iTask.task != NULL) {
-        scsi_free_scsi_task(iTask.task);
-        iTask.task = NULL;
-    }
-
-    if (iTask.do_retry) {
-        iTask.complete = 0;
-        goto retry;
-    }
-
-    if (iTask.status != SCSI_STATUS_GOOD) {
-        return iTask.err_code;
-    }
-
-    if (flags & BDRV_REQ_MAY_UNMAP) {
-        iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
-    } else {
-        iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
-    }
-
-    return 0;
-}
-
-static void parse_chap(struct iscsi_context *iscsi, const char *target,
-                       Error **errp)
-{
-    QemuOptsList *list;
-    QemuOpts *opts;
-    const char *user = NULL;
-    const char *password = NULL;
-    const char *secretid;
-    char *secret = NULL;
-
-    list = qemu_find_opts("iscsi");
-    if (!list) {
-        return;
-    }
-
-    opts = qemu_opts_find(list, target);
-    if (opts == NULL) {
-        opts = QTAILQ_FIRST(&list->head);
-        if (!opts) {
-            return;
-        }
-    }
-
-    user = qemu_opt_get(opts, "user");
-    if (!user) {
-        return;
-    }
-
-    secretid = qemu_opt_get(opts, "password-secret");
-    password = qemu_opt_get(opts, "password");
-    if (secretid && password) {
-        error_setg(errp, "'password' and 'password-secret' properties are "
-                   "mutually exclusive");
-        return;
-    }
-    if (secretid) {
-        secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
-        if (!secret) {
-            return;
-        }
-        password = secret;
-    } else if (!password) {
-        error_setg(errp, "CHAP username specified but no password was given");
-        return;
-    }
-
-    if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
-        error_setg(errp, "Failed to set initiator username and password");
-    }
-
-    g_free(secret);
-}
-
-static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
-                                Error **errp)
-{
-    QemuOptsList *list;
-    QemuOpts *opts;
-    const char *digest = NULL;
-
-    list = qemu_find_opts("iscsi");
-    if (!list) {
-        return;
-    }
-
-    opts = qemu_opts_find(list, target);
-    if (opts == NULL) {
-        opts = QTAILQ_FIRST(&list->head);
-        if (!opts) {
-            return;
-        }
-    }
-
-    digest = qemu_opt_get(opts, "header-digest");
-    if (!digest) {
-        return;
-    }
-
-    if (!strcmp(digest, "CRC32C")) {
-        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
-    } else if (!strcmp(digest, "NONE")) {
-        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
-    } else if (!strcmp(digest, "CRC32C-NONE")) {
-        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
-    } else if (!strcmp(digest, "NONE-CRC32C")) {
-        iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
-    } else {
-        error_setg(errp, "Invalid header-digest setting : %s", digest);
-    }
-}
-
-static char *parse_initiator_name(const char *target)
-{
-    QemuOptsList *list;
-    QemuOpts *opts;
-    const char *name;
-    char *iscsi_name;
-    UuidInfo *uuid_info;
-
-    list = qemu_find_opts("iscsi");
-    if (list) {
-        opts = qemu_opts_find(list, target);
-        if (!opts) {
-            opts = QTAILQ_FIRST(&list->head);
-        }
-        if (opts) {
-            name = qemu_opt_get(opts, "initiator-name");
-            if (name) {
-                return g_strdup(name);
-            }
-        }
-    }
-
-    uuid_info = qmp_query_uuid(NULL);
-    if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
-        name = qemu_get_vm_name();
-    } else {
-        name = uuid_info->UUID;
-    }
-    iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
-                                 name ? ":" : "", name ? name : "");
-    qapi_free_UuidInfo(uuid_info);
-    return iscsi_name;
-}
-
-static int parse_timeout(const char *target)
-{
-    QemuOptsList *list;
-    QemuOpts *opts;
-    const char *timeout;
-
-    list = qemu_find_opts("iscsi");
-    if (list) {
-        opts = qemu_opts_find(list, target);
-        if (!opts) {
-            opts = QTAILQ_FIRST(&list->head);
-        }
-        if (opts) {
-            timeout = qemu_opt_get(opts, "timeout");
-            if (timeout) {
-                return atoi(timeout);
-            }
-        }
-    }
-
-    return 0;
-}
-
-static void iscsi_nop_timed_event(void *opaque)
-{
-    IscsiLun *iscsilun = opaque;
-
-    if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
-        error_report("iSCSI: NOP timeout. Reconnecting...");
-        iscsilun->request_timed_out = true;
-    } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
-        error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
-        return;
-    }
-
-    timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
-    iscsi_set_events(iscsilun);
-}
-
-static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
-{
-    struct scsi_task *task = NULL;
-    struct scsi_readcapacity10 *rc10 = NULL;
-    struct scsi_readcapacity16 *rc16 = NULL;
-    int retries = ISCSI_CMD_RETRIES; 
-
-    do {
-        if (task != NULL) {
-            scsi_free_scsi_task(task);
-            task = NULL;
-        }
-
-        switch (iscsilun->type) {
-        case TYPE_DISK:
-            task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
-            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
-                rc16 = scsi_datain_unmarshall(task);
-                if (rc16 == NULL) {
-                    error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
-                } else {
-                    iscsilun->block_size = rc16->block_length;
-                    iscsilun->num_blocks = rc16->returned_lba + 1;
-                    iscsilun->lbpme = !!rc16->lbpme;
-                    iscsilun->lbprz = !!rc16->lbprz;
-                    iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
-                }
-                break;
-            }
-            if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
-                && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
-                break;
-            }
-            /* Fall through and try READ CAPACITY(10) instead.  */
-        case TYPE_ROM:
-            task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
-            if (task != NULL && task->status == SCSI_STATUS_GOOD) {
-                rc10 = scsi_datain_unmarshall(task);
-                if (rc10 == NULL) {
-                    error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
-                } else {
-                    iscsilun->block_size = rc10->block_size;
-                    if (rc10->lba == 0) {
-                        /* blank disk loaded */
-                        iscsilun->num_blocks = 0;
-                    } else {
-                        iscsilun->num_blocks = rc10->lba + 1;
-                    }
-                }
-            }
-            break;
-        default:
-            return;
-        }
-    } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
-             && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
-             && retries-- > 0);
-
-    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
-        error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
-    } else if (!iscsilun->block_size ||
-               iscsilun->block_size % BDRV_SECTOR_SIZE) {
-        error_setg(errp, "iSCSI: the target returned an invalid "
-                   "block size of %d.", iscsilun->block_size);
-    }
-    if (task) {
-        scsi_free_scsi_task(task);
-    }
-}
-
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "iscsi",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the iscsi image",
-        },
-        { /* end of list */ }
-    },
-};
-
-static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
-                                          int evpd, int pc, void **inq, Error **errp)
-{
-    int full_size;
-    struct scsi_task *task = NULL;
-    task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
-    if (task == NULL || task->status != SCSI_STATUS_GOOD) {
-        goto fail;
-    }
-    full_size = scsi_datain_getfullsize(task);
-    if (full_size > task->datain.size) {
-        scsi_free_scsi_task(task);
-
-        /* we need more data for the full list */
-        task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
-        if (task == NULL || task->status != SCSI_STATUS_GOOD) {
-            goto fail;
-        }
-    }
-
-    *inq = scsi_datain_unmarshall(task);
-    if (*inq == NULL) {
-        error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
-        goto fail_with_err;
-    }
-
-    return task;
-
-fail:
-    error_setg(errp, "iSCSI: Inquiry command failed : %s",
-               iscsi_get_error(iscsi));
-fail_with_err:
-    if (task != NULL) {
-        scsi_free_scsi_task(task);
-    }
-    return NULL;
-}
-
-static void iscsi_detach_aio_context(BlockDriverState *bs)
-{
-    IscsiLun *iscsilun = bs->opaque;
-
-    aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
-                       false, NULL, NULL, NULL);
-    iscsilun->events = 0;
-
-    if (iscsilun->nop_timer) {
-        timer_del(iscsilun->nop_timer);
-        timer_free(iscsilun->nop_timer);
-        iscsilun->nop_timer = NULL;
-    }
-    if (iscsilun->event_timer) {
-        timer_del(iscsilun->event_timer);
-        timer_free(iscsilun->event_timer);
-        iscsilun->event_timer = NULL;
-    }
-}
-
-static void iscsi_attach_aio_context(BlockDriverState *bs,
-                                     AioContext *new_context)
-{
-    IscsiLun *iscsilun = bs->opaque;
-
-    iscsilun->aio_context = new_context;
-    iscsi_set_events(iscsilun);
-
-    /* Set up a timer for sending out iSCSI NOPs */
-    iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
-                                        QEMU_CLOCK_REALTIME, SCALE_MS,
-                                        iscsi_nop_timed_event, iscsilun);
-    timer_mod(iscsilun->nop_timer,
-              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
-
-    /* Set up a timer for periodic calls to iscsi_set_events and to
-     * scan for command timeout */
-    iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
-                                          QEMU_CLOCK_REALTIME, SCALE_MS,
-                                          iscsi_timed_check_events, iscsilun);
-    timer_mod(iscsilun->event_timer,
-              qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
-}
-
-static void iscsi_modesense_sync(IscsiLun *iscsilun)
-{
-    struct scsi_task *task;
-    struct scsi_mode_sense *ms = NULL;
-    iscsilun->write_protected = false;
-    iscsilun->dpofua = false;
-
-    task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
-                                 1, SCSI_MODESENSE_PC_CURRENT,
-                                 0x3F, 0, 255);
-    if (task == NULL) {
-        error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
-                     iscsi_get_error(iscsilun->iscsi));
-        goto out;
-    }
-
-    if (task->status != SCSI_STATUS_GOOD) {
-        error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
-        goto out;
-    }
-    ms = scsi_datain_unmarshall(task);
-    if (!ms) {
-        error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
-                     iscsi_get_error(iscsilun->iscsi));
-        goto out;
-    }
-    iscsilun->write_protected = ms->device_specific_parameter & 0x80;
-    iscsilun->dpofua          = ms->device_specific_parameter & 0x10;
-
-out:
-    if (task) {
-        scsi_free_scsi_task(task);
-    }
-}
-
-/*
- * We support iscsi url's on the form
- * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
- */
-static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    struct iscsi_context *iscsi = NULL;
-    struct iscsi_url *iscsi_url = NULL;
-    struct scsi_task *task = NULL;
-    struct scsi_inquiry_standard *inq = NULL;
-    struct scsi_inquiry_supported_pages *inq_vpd;
-    char *initiator_name = NULL;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *filename;
-    int i, ret = 0, timeout = 0;
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    filename = qemu_opt_get(opts, "filename");
-
-    iscsi_url = iscsi_parse_full_url(iscsi, filename);
-    if (iscsi_url == NULL) {
-        error_setg(errp, "Failed to parse URL : %s", filename);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    memset(iscsilun, 0, sizeof(IscsiLun));
-
-    initiator_name = parse_initiator_name(iscsi_url->target);
-
-    iscsi = iscsi_create_context(initiator_name);
-    if (iscsi == NULL) {
-        error_setg(errp, "iSCSI: Failed to create iSCSI context.");
-        ret = -ENOMEM;
-        goto out;
-    }
-
-    if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
-        error_setg(errp, "iSCSI: Failed to set target name.");
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if (iscsi_url->user[0] != '\0') {
-        ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
-                                              iscsi_url->passwd);
-        if (ret != 0) {
-            error_setg(errp, "Failed to set initiator username and password");
-            ret = -EINVAL;
-            goto out;
-        }
-    }
-
-    /* check if we got CHAP username/password via the options */
-    parse_chap(iscsi, iscsi_url->target, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
-        error_setg(errp, "iSCSI: Failed to set session type to normal.");
-        ret = -EINVAL;
-        goto out;
-    }
-
-    iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
-
-    /* check if we got HEADER_DIGEST via the options */
-    parse_header_digest(iscsi, iscsi_url->target, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    /* timeout handling is broken in libiscsi before 1.15.0 */
-    timeout = parse_timeout(iscsi_url->target);
-#if defined(LIBISCSI_API_VERSION) && LIBISCSI_API_VERSION >= 20150621
-    iscsi_set_timeout(iscsi, timeout);
-#else
-    if (timeout) {
-        error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
-    }
-#endif
-
-    if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
-        error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
-            iscsi_get_error(iscsi));
-        ret = -EINVAL;
-        goto out;
-    }
-
-    iscsilun->iscsi = iscsi;
-    iscsilun->aio_context = bdrv_get_aio_context(bs);
-    iscsilun->lun   = iscsi_url->lun;
-    iscsilun->has_write_same = true;
-
-    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
-                            (void **) &inq, errp);
-    if (task == NULL) {
-        ret = -EINVAL;
-        goto out;
-    }
-    iscsilun->type = inq->periperal_device_type;
-    scsi_free_scsi_task(task);
-    task = NULL;
-
-    iscsi_modesense_sync(iscsilun);
-
-    /* Check the write protect flag of the LUN if we want to write */
-    if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
-        iscsilun->write_protected) {
-        error_setg(errp, "Cannot open a write protected LUN as read-write");
-        ret = -EACCES;
-        goto out;
-    }
-
-    iscsi_readcapacity_sync(iscsilun, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
-    }
-    bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
-    bs->request_alignment = iscsilun->block_size;
-
-    /* We don't have any emulation for devices other than disks and CD-ROMs, so
-     * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
-     * will try to read from the device to guess the image format.
-     */
-    if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
-        bs->sg = 1;
-    }
-
-    task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
-                            SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
-                            (void **) &inq_vpd, errp);
-    if (task == NULL) {
-        ret = -EINVAL;
-        goto out;
-    }
-    for (i = 0; i < inq_vpd->num_pages; i++) {
-        struct scsi_task *inq_task;
-        struct scsi_inquiry_logical_block_provisioning *inq_lbp;
-        struct scsi_inquiry_block_limits *inq_bl;
-        switch (inq_vpd->pages[i]) {
-        case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
-            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
-                                        SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
-                                        (void **) &inq_lbp, errp);
-            if (inq_task == NULL) {
-                ret = -EINVAL;
-                goto out;
-            }
-            memcpy(&iscsilun->lbp, inq_lbp,
-                   sizeof(struct scsi_inquiry_logical_block_provisioning));
-            scsi_free_scsi_task(inq_task);
-            break;
-        case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
-            inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
-                                    SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
-                                    (void **) &inq_bl, errp);
-            if (inq_task == NULL) {
-                ret = -EINVAL;
-                goto out;
-            }
-            memcpy(&iscsilun->bl, inq_bl,
-                   sizeof(struct scsi_inquiry_block_limits));
-            scsi_free_scsi_task(inq_task);
-            break;
-        default:
-            break;
-        }
-    }
-    scsi_free_scsi_task(task);
-    task = NULL;
-
-    iscsi_attach_aio_context(bs, iscsilun->aio_context);
-
-    /* Guess the internal cluster (page) size of the iscsi target by the means
-     * of opt_unmap_gran. Transfer the unmap granularity only if it has a
-     * reasonable size */
-    if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
-        iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
-        iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
-                                     iscsilun->block_size) >> BDRV_SECTOR_BITS;
-        if (iscsilun->lbprz) {
-            iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
-            if (iscsilun->allocationmap == NULL) {
-                ret = -ENOMEM;
-            }
-        }
-    }
-
-out:
-    qemu_opts_del(opts);
-    g_free(initiator_name);
-    if (iscsi_url != NULL) {
-        iscsi_destroy_url(iscsi_url);
-    }
-    if (task != NULL) {
-        scsi_free_scsi_task(task);
-    }
-
-    if (ret) {
-        if (iscsi != NULL) {
-            if (iscsi_is_logged_in(iscsi)) {
-                iscsi_logout_sync(iscsi);
-            }
-            iscsi_destroy_context(iscsi);
-        }
-        memset(iscsilun, 0, sizeof(IscsiLun));
-    }
-    return ret;
-}
-
-static void iscsi_close(BlockDriverState *bs)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    struct iscsi_context *iscsi = iscsilun->iscsi;
-
-    iscsi_detach_aio_context(bs);
-    if (iscsi_is_logged_in(iscsi)) {
-        iscsi_logout_sync(iscsi);
-    }
-    iscsi_destroy_context(iscsi);
-    g_free(iscsilun->zeroblock);
-    g_free(iscsilun->allocationmap);
-    memset(iscsilun, 0, sizeof(IscsiLun));
-}
-
-static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
-{
-    return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
-}
-
-static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    /* We don't actually refresh here, but just return data queried in
-     * iscsi_open(): iscsi targets don't change their limits. */
-
-    IscsiLun *iscsilun = bs->opaque;
-    uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
-
-    if (iscsilun->bl.max_xfer_len) {
-        max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
-    }
-
-    bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
-
-    if (iscsilun->lbp.lbpu) {
-        if (iscsilun->bl.max_unmap < 0xffffffff) {
-            bs->bl.max_discard =
-                sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
-        }
-        bs->bl.discard_alignment =
-            sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
-    }
-
-    if (iscsilun->bl.max_ws_len < 0xffffffff) {
-        bs->bl.max_write_zeroes =
-            sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
-    }
-    if (iscsilun->lbp.lbpws) {
-        bs->bl.write_zeroes_alignment =
-            sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
-    }
-    bs->bl.opt_transfer_length =
-        sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
-}
-
-/* Note that this will not re-establish a connection with an iSCSI target - it
- * is effectively a NOP.  */
-static int iscsi_reopen_prepare(BDRVReopenState *state,
-                                BlockReopenQueue *queue, Error **errp)
-{
-    IscsiLun *iscsilun = state->bs->opaque;
-
-    if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
-        error_setg(errp, "Cannot open a write protected LUN as read-write");
-        return -EACCES;
-    }
-    return 0;
-}
-
-static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    Error *local_err = NULL;
-
-    if (iscsilun->type != TYPE_DISK) {
-        return -ENOTSUP;
-    }
-
-    iscsi_readcapacity_sync(iscsilun, &local_err);
-    if (local_err != NULL) {
-        error_free(local_err);
-        return -EIO;
-    }
-
-    if (offset > iscsi_getlength(bs)) {
-        return -EINVAL;
-    }
-
-    if (iscsilun->allocationmap != NULL) {
-        g_free(iscsilun->allocationmap);
-        iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
-    }
-
-    return 0;
-}
-
-static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    int ret = 0;
-    int64_t total_size = 0;
-    BlockDriverState *bs;
-    IscsiLun *iscsilun = NULL;
-    QDict *bs_options;
-
-    bs = bdrv_new();
-
-    /* Read out options */
-    total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                              BDRV_SECTOR_SIZE);
-    bs->opaque = g_new0(struct IscsiLun, 1);
-    iscsilun = bs->opaque;
-
-    bs_options = qdict_new();
-    qdict_put(bs_options, "filename", qstring_from_str(filename));
-    ret = iscsi_open(bs, bs_options, 0, NULL);
-    QDECREF(bs_options);
-
-    if (ret != 0) {
-        goto out;
-    }
-    iscsi_detach_aio_context(bs);
-    if (iscsilun->type != TYPE_DISK) {
-        ret = -ENODEV;
-        goto out;
-    }
-    if (bs->total_sectors < total_size) {
-        ret = -ENOSPC;
-        goto out;
-    }
-
-    ret = 0;
-out:
-    if (iscsilun->iscsi != NULL) {
-        iscsi_destroy_context(iscsilun->iscsi);
-    }
-    g_free(bs->opaque);
-    bs->opaque = NULL;
-    bdrv_unref(bs);
-    return ret;
-}
-
-static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    IscsiLun *iscsilun = bs->opaque;
-    bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
-    bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
-    bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
-    return 0;
-}
-
-static QemuOptsList iscsi_create_opts = {
-    .name = "iscsi-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        { /* end of list */ }
-    }
-};
-
-static BlockDriver bdrv_iscsi = {
-    .format_name     = "iscsi",
-    .protocol_name   = "iscsi",
-
-    .instance_size   = sizeof(IscsiLun),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open  = iscsi_open,
-    .bdrv_close      = iscsi_close,
-    .bdrv_create     = iscsi_create,
-    .create_opts     = &iscsi_create_opts,
-    .bdrv_reopen_prepare  = iscsi_reopen_prepare,
-
-    .bdrv_getlength  = iscsi_getlength,
-    .bdrv_get_info   = iscsi_get_info,
-    .bdrv_truncate   = iscsi_truncate,
-    .bdrv_refresh_limits = iscsi_refresh_limits,
-
-    .bdrv_co_get_block_status = iscsi_co_get_block_status,
-    .bdrv_co_discard      = iscsi_co_discard,
-    .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
-    .bdrv_co_readv         = iscsi_co_readv,
-    .bdrv_co_writev        = iscsi_co_writev,
-    .bdrv_co_writev_flags  = iscsi_co_writev_flags,
-    .supported_write_flags = BDRV_REQ_FUA,
-    .bdrv_co_flush_to_disk = iscsi_co_flush,
-
-#ifdef __linux__
-    .bdrv_aio_ioctl   = iscsi_aio_ioctl,
-#endif
-
-    .bdrv_detach_aio_context = iscsi_detach_aio_context,
-    .bdrv_attach_aio_context = iscsi_attach_aio_context,
-};
-
-static QemuOptsList qemu_iscsi_opts = {
-    .name = "iscsi",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
-    .desc = {
-        {
-            .name = "user",
-            .type = QEMU_OPT_STRING,
-            .help = "username for CHAP authentication to target",
-        },{
-            .name = "password",
-            .type = QEMU_OPT_STRING,
-            .help = "password for CHAP authentication to target",
-        },{
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of the secret providing password for CHAP "
-                    "authentication to target",
-        },{
-            .name = "header-digest",
-            .type = QEMU_OPT_STRING,
-            .help = "HeaderDigest setting. "
-                    "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
-        },{
-            .name = "initiator-name",
-            .type = QEMU_OPT_STRING,
-            .help = "Initiator iqn name to use when connecting",
-        },{
-            .name = "timeout",
-            .type = QEMU_OPT_NUMBER,
-            .help = "Request timeout in seconds (default 0 = no timeout)",
-        },
-        { /* end of list */ }
-    },
-};
-
-static void iscsi_block_init(void)
-{
-    bdrv_register(&bdrv_iscsi);
-    qemu_add_opts(&qemu_iscsi_opts);
-}
-
-block_init(iscsi_block_init);
diff --git a/qemu/block/linux-aio.c b/qemu/block/linux-aio.c
deleted file mode 100644
index 805757e02..000000000
--- a/qemu/block/linux-aio.c
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Linux native AIO support.
- *
- * Copyright (C) 2009 IBM, Corp.
- * Copyright (C) 2009 Red Hat, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "block/aio.h"
-#include "qemu/queue.h"
-#include "block/raw-aio.h"
-#include "qemu/event_notifier.h"
-
-#include <libaio.h>
-
-/*
- * Queue size (per-device).
- *
- * XXX: eventually we need to communicate this to the guest and/or make it
- *      tunable by the guest.  If we get more outstanding requests at a time
- *      than this we will get EAGAIN from io_submit which is communicated to
- *      the guest as an I/O error.
- */
-#define MAX_EVENTS 128
-
-#define MAX_QUEUED_IO  128
-
-struct qemu_laiocb {
-    BlockAIOCB common;
-    struct qemu_laio_state *ctx;
-    struct iocb iocb;
-    ssize_t ret;
-    size_t nbytes;
-    QEMUIOVector *qiov;
-    bool is_read;
-    QSIMPLEQ_ENTRY(qemu_laiocb) next;
-};
-
-typedef struct {
-    int plugged;
-    unsigned int n;
-    bool blocked;
-    QSIMPLEQ_HEAD(, qemu_laiocb) pending;
-} LaioQueue;
-
-struct qemu_laio_state {
-    io_context_t ctx;
-    EventNotifier e;
-
-    /* io queue for submit at batch */
-    LaioQueue io_q;
-
-    /* I/O completion processing */
-    QEMUBH *completion_bh;
-    struct io_event events[MAX_EVENTS];
-    int event_idx;
-    int event_max;
-};
-
-static void ioq_submit(struct qemu_laio_state *s);
-
-static inline ssize_t io_event_ret(struct io_event *ev)
-{
-    return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
-}
-
-/*
- * Completes an AIO request (calls the callback and frees the ACB).
- */
-static void qemu_laio_process_completion(struct qemu_laio_state *s,
-    struct qemu_laiocb *laiocb)
-{
-    int ret;
-
-    ret = laiocb->ret;
-    if (ret != -ECANCELED) {
-        if (ret == laiocb->nbytes) {
-            ret = 0;
-        } else if (ret >= 0) {
-            /* Short reads mean EOF, pad with zeros. */
-            if (laiocb->is_read) {
-                qemu_iovec_memset(laiocb->qiov, ret, 0,
-                    laiocb->qiov->size - ret);
-            } else {
-                ret = -EINVAL;
-            }
-        }
-    }
-    laiocb->common.cb(laiocb->common.opaque, ret);
-
-    qemu_aio_unref(laiocb);
-}
-
-/* The completion BH fetches completed I/O requests and invokes their
- * callbacks.
- *
- * The function is somewhat tricky because it supports nested event loops, for
- * example when a request callback invokes aio_poll().  In order to do this,
- * the completion events array and index are kept in qemu_laio_state.  The BH
- * reschedules itself as long as there are completions pending so it will
- * either be called again in a nested event loop or will be called after all
- * events have been completed.  When there are no events left to complete, the
- * BH returns without rescheduling.
- */
-static void qemu_laio_completion_bh(void *opaque)
-{
-    struct qemu_laio_state *s = opaque;
-
-    /* Fetch more completion events when empty */
-    if (s->event_idx == s->event_max) {
-        do {
-            struct timespec ts = { 0 };
-            s->event_max = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS,
-                                        s->events, &ts);
-        } while (s->event_max == -EINTR);
-
-        s->event_idx = 0;
-        if (s->event_max <= 0) {
-            s->event_max = 0;
-            return; /* no more events */
-        }
-    }
-
-    /* Reschedule so nested event loops see currently pending completions */
-    qemu_bh_schedule(s->completion_bh);
-
-    /* Process completion events */
-    while (s->event_idx < s->event_max) {
-        struct iocb *iocb = s->events[s->event_idx].obj;
-        struct qemu_laiocb *laiocb =
-                container_of(iocb, struct qemu_laiocb, iocb);
-
-        laiocb->ret = io_event_ret(&s->events[s->event_idx]);
-        s->event_idx++;
-
-        qemu_laio_process_completion(s, laiocb);
-    }
-
-    if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
-        ioq_submit(s);
-    }
-}
-
-static void qemu_laio_completion_cb(EventNotifier *e)
-{
-    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
-
-    if (event_notifier_test_and_clear(&s->e)) {
-        qemu_bh_schedule(s->completion_bh);
-    }
-}
-
-static void laio_cancel(BlockAIOCB *blockacb)
-{
-    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
-    struct io_event event;
-    int ret;
-
-    if (laiocb->ret != -EINPROGRESS) {
-        return;
-    }
-    ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
-    laiocb->ret = -ECANCELED;
-    if (ret != 0) {
-        /* iocb is not cancelled, cb will be called by the event loop later */
-        return;
-    }
-
-    laiocb->common.cb(laiocb->common.opaque, laiocb->ret);
-}
-
-static const AIOCBInfo laio_aiocb_info = {
-    .aiocb_size         = sizeof(struct qemu_laiocb),
-    .cancel_async       = laio_cancel,
-};
-
-static void ioq_init(LaioQueue *io_q)
-{
-    QSIMPLEQ_INIT(&io_q->pending);
-    io_q->plugged = 0;
-    io_q->n = 0;
-    io_q->blocked = false;
-}
-
-static void ioq_submit(struct qemu_laio_state *s)
-{
-    int ret, len;
-    struct qemu_laiocb *aiocb;
-    struct iocb *iocbs[MAX_QUEUED_IO];
-    QSIMPLEQ_HEAD(, qemu_laiocb) completed;
-
-    do {
-        len = 0;
-        QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) {
-            iocbs[len++] = &aiocb->iocb;
-            if (len == MAX_QUEUED_IO) {
-                break;
-            }
-        }
-
-        ret = io_submit(s->ctx, len, iocbs);
-        if (ret == -EAGAIN) {
-            break;
-        }
-        if (ret < 0) {
-            abort();
-        }
-
-        s->io_q.n -= ret;
-        aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb);
-        QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed);
-    } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
-    s->io_q.blocked = (s->io_q.n > 0);
-}
-
-void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
-{
-    struct qemu_laio_state *s = aio_ctx;
-
-    s->io_q.plugged++;
-}
-
-void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
-{
-    struct qemu_laio_state *s = aio_ctx;
-
-    assert(s->io_q.plugged > 0 || !unplug);
-
-    if (unplug && --s->io_q.plugged > 0) {
-        return;
-    }
-
-    if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
-        ioq_submit(s);
-    }
-}
-
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
-{
-    struct qemu_laio_state *s = aio_ctx;
-    struct qemu_laiocb *laiocb;
-    struct iocb *iocbs;
-    off_t offset = sector_num * 512;
-
-    laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
-    laiocb->nbytes = nb_sectors * 512;
-    laiocb->ctx = s;
-    laiocb->ret = -EINPROGRESS;
-    laiocb->is_read = (type == QEMU_AIO_READ);
-    laiocb->qiov = qiov;
-
-    iocbs = &laiocb->iocb;
-
-    switch (type) {
-    case QEMU_AIO_WRITE:
-        io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
-	break;
-    case QEMU_AIO_READ:
-        io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset);
-	break;
-    /* Currently Linux kernel does not support other operations */
-    default:
-        fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
-                        __func__, type);
-        goto out_free_aiocb;
-    }
-    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
-
-    QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
-    s->io_q.n++;
-    if (!s->io_q.blocked &&
-        (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
-        ioq_submit(s);
-    }
-    return &laiocb->common;
-
-out_free_aiocb:
-    qemu_aio_unref(laiocb);
-    return NULL;
-}
-
-void laio_detach_aio_context(void *s_, AioContext *old_context)
-{
-    struct qemu_laio_state *s = s_;
-
-    aio_set_event_notifier(old_context, &s->e, false, NULL);
-    qemu_bh_delete(s->completion_bh);
-}
-
-void laio_attach_aio_context(void *s_, AioContext *new_context)
-{
-    struct qemu_laio_state *s = s_;
-
-    s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
-    aio_set_event_notifier(new_context, &s->e, false,
-                           qemu_laio_completion_cb);
-}
-
-void *laio_init(void)
-{
-    struct qemu_laio_state *s;
-
-    s = g_malloc0(sizeof(*s));
-    if (event_notifier_init(&s->e, false) < 0) {
-        goto out_free_state;
-    }
-
-    if (io_setup(MAX_EVENTS, &s->ctx) != 0) {
-        goto out_close_efd;
-    }
-
-    ioq_init(&s->io_q);
-
-    return s;
-
-out_close_efd:
-    event_notifier_cleanup(&s->e);
-out_free_state:
-    g_free(s);
-    return NULL;
-}
-
-void laio_cleanup(void *s_)
-{
-    struct qemu_laio_state *s = s_;
-
-    event_notifier_cleanup(&s->e);
-
-    if (io_destroy(s->ctx) != 0) {
-        fprintf(stderr, "%s: destroy AIO context %p failed\n",
-                        __func__, &s->ctx);
-    }
-    g_free(s);
-}
diff --git a/qemu/block/mirror.c b/qemu/block/mirror.c
deleted file mode 100644
index 039f48125..000000000
--- a/qemu/block/mirror.c
+++ /dev/null
@@ -1,976 +0,0 @@
-/*
- * Image mirroring
- *
- * Copyright Red Hat, Inc. 2012
- *
- * Authors:
- *  Paolo Bonzini  <pbonzini@redhat.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "trace.h"
-#include "block/blockjob.h"
-#include "block/block_int.h"
-#include "sysemu/block-backend.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
-#include "qemu/ratelimit.h"
-#include "qemu/bitmap.h"
-#include "qemu/error-report.h"
-
-#define SLICE_TIME    100000000ULL /* ns */
-#define MAX_IN_FLIGHT 16
-#define DEFAULT_MIRROR_BUF_SIZE   (10 << 20)
-
-/* The mirroring buffer is a list of granularity-sized chunks.
- * Free chunks are organized in a list.
- */
-typedef struct MirrorBuffer {
-    QSIMPLEQ_ENTRY(MirrorBuffer) next;
-} MirrorBuffer;
-
-typedef struct MirrorBlockJob {
-    BlockJob common;
-    RateLimit limit;
-    BlockDriverState *target;
-    BlockDriverState *base;
-    /* The name of the graph node to replace */
-    char *replaces;
-    /* The BDS to replace */
-    BlockDriverState *to_replace;
-    /* Used to block operations on the drive-mirror-replace target */
-    Error *replace_blocker;
-    bool is_none_mode;
-    BlockdevOnError on_source_error, on_target_error;
-    bool synced;
-    bool should_complete;
-    int64_t granularity;
-    size_t buf_size;
-    int64_t bdev_length;
-    unsigned long *cow_bitmap;
-    BdrvDirtyBitmap *dirty_bitmap;
-    HBitmapIter hbi;
-    uint8_t *buf;
-    QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
-    int buf_free_count;
-
-    unsigned long *in_flight_bitmap;
-    int in_flight;
-    int sectors_in_flight;
-    int ret;
-    bool unmap;
-    bool waiting_for_io;
-    int target_cluster_sectors;
-    int max_iov;
-} MirrorBlockJob;
-
-typedef struct MirrorOp {
-    MirrorBlockJob *s;
-    QEMUIOVector qiov;
-    int64_t sector_num;
-    int nb_sectors;
-} MirrorOp;
-
-static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
-                                            int error)
-{
-    s->synced = false;
-    if (read) {
-        return block_job_error_action(&s->common, s->common.bs,
-                                      s->on_source_error, true, error);
-    } else {
-        return block_job_error_action(&s->common, s->target,
-                                      s->on_target_error, false, error);
-    }
-}
-
-static void mirror_iteration_done(MirrorOp *op, int ret)
-{
-    MirrorBlockJob *s = op->s;
-    struct iovec *iov;
-    int64_t chunk_num;
-    int i, nb_chunks, sectors_per_chunk;
-
-    trace_mirror_iteration_done(s, op->sector_num, op->nb_sectors, ret);
-
-    s->in_flight--;
-    s->sectors_in_flight -= op->nb_sectors;
-    iov = op->qiov.iov;
-    for (i = 0; i < op->qiov.niov; i++) {
-        MirrorBuffer *buf = (MirrorBuffer *) iov[i].iov_base;
-        QSIMPLEQ_INSERT_TAIL(&s->buf_free, buf, next);
-        s->buf_free_count++;
-    }
-
-    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
-    chunk_num = op->sector_num / sectors_per_chunk;
-    nb_chunks = DIV_ROUND_UP(op->nb_sectors, sectors_per_chunk);
-    bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
-    if (ret >= 0) {
-        if (s->cow_bitmap) {
-            bitmap_set(s->cow_bitmap, chunk_num, nb_chunks);
-        }
-        s->common.offset += (uint64_t)op->nb_sectors * BDRV_SECTOR_SIZE;
-    }
-
-    qemu_iovec_destroy(&op->qiov);
-    g_free(op);
-
-    if (s->waiting_for_io) {
-        qemu_coroutine_enter(s->common.co, NULL);
-    }
-}
-
-static void mirror_write_complete(void *opaque, int ret)
-{
-    MirrorOp *op = opaque;
-    MirrorBlockJob *s = op->s;
-    if (ret < 0) {
-        BlockErrorAction action;
-
-        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
-        action = mirror_error_action(s, false, -ret);
-        if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
-            s->ret = ret;
-        }
-    }
-    mirror_iteration_done(op, ret);
-}
-
-static void mirror_read_complete(void *opaque, int ret)
-{
-    MirrorOp *op = opaque;
-    MirrorBlockJob *s = op->s;
-    if (ret < 0) {
-        BlockErrorAction action;
-
-        bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
-        action = mirror_error_action(s, true, -ret);
-        if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
-            s->ret = ret;
-        }
-
-        mirror_iteration_done(op, ret);
-        return;
-    }
-    bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors,
-                    mirror_write_complete, op);
-}
-
-static inline void mirror_clip_sectors(MirrorBlockJob *s,
-                                       int64_t sector_num,
-                                       int *nb_sectors)
-{
-    *nb_sectors = MIN(*nb_sectors,
-                      s->bdev_length / BDRV_SECTOR_SIZE - sector_num);
-}
-
-/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
- * return the offset of the adjusted tail sector against original. */
-static int mirror_cow_align(MirrorBlockJob *s,
-                            int64_t *sector_num,
-                            int *nb_sectors)
-{
-    bool need_cow;
-    int ret = 0;
-    int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS;
-    int64_t align_sector_num = *sector_num;
-    int align_nb_sectors = *nb_sectors;
-    int max_sectors = chunk_sectors * s->max_iov;
-
-    need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap);
-    need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
-                          s->cow_bitmap);
-    if (need_cow) {
-        bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
-                               &align_sector_num, &align_nb_sectors);
-    }
-
-    if (align_nb_sectors > max_sectors) {
-        align_nb_sectors = max_sectors;
-        if (need_cow) {
-            align_nb_sectors = QEMU_ALIGN_DOWN(align_nb_sectors,
-                                               s->target_cluster_sectors);
-        }
-    }
-    /* Clipping may result in align_nb_sectors unaligned to chunk boundary, but
-     * that doesn't matter because it's already the end of source image. */
-    mirror_clip_sectors(s, align_sector_num, &align_nb_sectors);
-
-    ret = align_sector_num + align_nb_sectors - (*sector_num + *nb_sectors);
-    *sector_num = align_sector_num;
-    *nb_sectors = align_nb_sectors;
-    assert(ret >= 0);
-    return ret;
-}
-
-static inline void mirror_wait_for_io(MirrorBlockJob *s)
-{
-    assert(!s->waiting_for_io);
-    s->waiting_for_io = true;
-    qemu_coroutine_yield();
-    s->waiting_for_io = false;
-}
-
-/* Submit async read while handling COW.
- * Returns: nb_sectors if no alignment is necessary, or
- *          (new_end - sector_num) if tail is rounded up or down due to
- *          alignment or buffer limit.
- */
-static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
-                          int nb_sectors)
-{
-    BlockDriverState *source = s->common.bs;
-    int sectors_per_chunk, nb_chunks;
-    int ret = nb_sectors;
-    MirrorOp *op;
-
-    sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
-
-    /* We can only handle as much as buf_size at a time. */
-    nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
-    assert(nb_sectors);
-
-    if (s->cow_bitmap) {
-        ret += mirror_cow_align(s, &sector_num, &nb_sectors);
-    }
-    assert(nb_sectors << BDRV_SECTOR_BITS <= s->buf_size);
-    /* The sector range must meet granularity because:
-     * 1) Caller passes in aligned values;
-     * 2) mirror_cow_align is used only when target cluster is larger. */
-    assert(!(sector_num % sectors_per_chunk));
-    nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk);
-
-    while (s->buf_free_count < nb_chunks) {
-        trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
-        mirror_wait_for_io(s);
-    }
-
-    /* Allocate a MirrorOp that is used as an AIO callback.  */
-    op = g_new(MirrorOp, 1);
-    op->s = s;
-    op->sector_num = sector_num;
-    op->nb_sectors = nb_sectors;
-
-    /* Now make a QEMUIOVector taking enough granularity-sized chunks
-     * from s->buf_free.
-     */
-    qemu_iovec_init(&op->qiov, nb_chunks);
-    while (nb_chunks-- > 0) {
-        MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
-        size_t remaining = nb_sectors * BDRV_SECTOR_SIZE - op->qiov.size;
-
-        QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
-        s->buf_free_count--;
-        qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
-    }
-
-    /* Copy the dirty cluster.  */
-    s->in_flight++;
-    s->sectors_in_flight += nb_sectors;
-    trace_mirror_one_iteration(s, sector_num, nb_sectors);
-
-    bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
-                   mirror_read_complete, op);
-    return ret;
-}
-
-static void mirror_do_zero_or_discard(MirrorBlockJob *s,
-                                      int64_t sector_num,
-                                      int nb_sectors,
-                                      bool is_discard)
-{
-    MirrorOp *op;
-
-    /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed
-     * so the freeing in mirror_iteration_done is nop. */
-    op = g_new0(MirrorOp, 1);
-    op->s = s;
-    op->sector_num = sector_num;
-    op->nb_sectors = nb_sectors;
-
-    s->in_flight++;
-    s->sectors_in_flight += nb_sectors;
-    if (is_discard) {
-        bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
-                         mirror_write_complete, op);
-    } else {
-        bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
-                              s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
-                              mirror_write_complete, op);
-    }
-}
-
-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
-{
-    BlockDriverState *source = s->common.bs;
-    int64_t sector_num, first_chunk;
-    uint64_t delay_ns = 0;
-    /* At least the first dirty chunk is mirrored in one iteration. */
-    int nb_chunks = 1;
-    int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
-    int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
-
-    sector_num = hbitmap_iter_next(&s->hbi);
-    if (sector_num < 0) {
-        bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
-        sector_num = hbitmap_iter_next(&s->hbi);
-        trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
-        assert(sector_num >= 0);
-    }
-
-    first_chunk = sector_num / sectors_per_chunk;
-    while (test_bit(first_chunk, s->in_flight_bitmap)) {
-        trace_mirror_yield_in_flight(s, first_chunk, s->in_flight);
-        mirror_wait_for_io(s);
-    }
-
-    /* Find the number of consective dirty chunks following the first dirty
-     * one, and wait for in flight requests in them. */
-    while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
-        int64_t hbitmap_next;
-        int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
-        int64_t next_chunk = next_sector / sectors_per_chunk;
-        if (next_sector >= end ||
-            !bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
-            break;
-        }
-        if (test_bit(next_chunk, s->in_flight_bitmap)) {
-            break;
-        }
-
-        hbitmap_next = hbitmap_iter_next(&s->hbi);
-        if (hbitmap_next > next_sector || hbitmap_next < 0) {
-            /* The bitmap iterator's cache is stale, refresh it */
-            bdrv_set_dirty_iter(&s->hbi, next_sector);
-            hbitmap_next = hbitmap_iter_next(&s->hbi);
-        }
-        assert(hbitmap_next == next_sector);
-        nb_chunks++;
-    }
-
-    /* Clear dirty bits before querying the block status, because
-     * calling bdrv_get_block_status_above could yield - if some blocks are
-     * marked dirty in this window, we need to know.
-     */
-    bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
-                            nb_chunks * sectors_per_chunk);
-    bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
-    while (nb_chunks > 0 && sector_num < end) {
-        int ret;
-        int io_sectors;
-        BlockDriverState *file;
-        enum MirrorMethod {
-            MIRROR_METHOD_COPY,
-            MIRROR_METHOD_ZERO,
-            MIRROR_METHOD_DISCARD
-        } mirror_method = MIRROR_METHOD_COPY;
-
-        assert(!(sector_num % sectors_per_chunk));
-        ret = bdrv_get_block_status_above(source, NULL, sector_num,
-                                          nb_chunks * sectors_per_chunk,
-                                          &io_sectors, &file);
-        if (ret < 0) {
-            io_sectors = nb_chunks * sectors_per_chunk;
-        }
-
-        io_sectors -= io_sectors % sectors_per_chunk;
-        if (io_sectors < sectors_per_chunk) {
-            io_sectors = sectors_per_chunk;
-        } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
-            int64_t target_sector_num;
-            int target_nb_sectors;
-            bdrv_round_to_clusters(s->target, sector_num, io_sectors,
-                                   &target_sector_num, &target_nb_sectors);
-            if (target_sector_num == sector_num &&
-                target_nb_sectors == io_sectors) {
-                mirror_method = ret & BDRV_BLOCK_ZERO ?
-                                    MIRROR_METHOD_ZERO :
-                                    MIRROR_METHOD_DISCARD;
-            }
-        }
-
-        mirror_clip_sectors(s, sector_num, &io_sectors);
-        switch (mirror_method) {
-        case MIRROR_METHOD_COPY:
-            io_sectors = mirror_do_read(s, sector_num, io_sectors);
-            break;
-        case MIRROR_METHOD_ZERO:
-            mirror_do_zero_or_discard(s, sector_num, io_sectors, false);
-            break;
-        case MIRROR_METHOD_DISCARD:
-            mirror_do_zero_or_discard(s, sector_num, io_sectors, true);
-            break;
-        default:
-            abort();
-        }
-        assert(io_sectors);
-        sector_num += io_sectors;
-        nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
-        delay_ns += ratelimit_calculate_delay(&s->limit, io_sectors);
-    }
-    return delay_ns;
-}
-
-static void mirror_free_init(MirrorBlockJob *s)
-{
-    int granularity = s->granularity;
-    size_t buf_size = s->buf_size;
-    uint8_t *buf = s->buf;
-
-    assert(s->buf_free_count == 0);
-    QSIMPLEQ_INIT(&s->buf_free);
-    while (buf_size != 0) {
-        MirrorBuffer *cur = (MirrorBuffer *)buf;
-        QSIMPLEQ_INSERT_TAIL(&s->buf_free, cur, next);
-        s->buf_free_count++;
-        buf_size -= granularity;
-        buf += granularity;
-    }
-}
-
-static void mirror_drain(MirrorBlockJob *s)
-{
-    while (s->in_flight > 0) {
-        mirror_wait_for_io(s);
-    }
-}
-
-typedef struct {
-    int ret;
-} MirrorExitData;
-
-static void mirror_exit(BlockJob *job, void *opaque)
-{
-    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-    MirrorExitData *data = opaque;
-    AioContext *replace_aio_context = NULL;
-    BlockDriverState *src = s->common.bs;
-
-    /* Make sure that the source BDS doesn't go away before we called
-     * block_job_completed(). */
-    bdrv_ref(src);
-
-    if (s->to_replace) {
-        replace_aio_context = bdrv_get_aio_context(s->to_replace);
-        aio_context_acquire(replace_aio_context);
-    }
-
-    if (s->should_complete && data->ret == 0) {
-        BlockDriverState *to_replace = s->common.bs;
-        if (s->to_replace) {
-            to_replace = s->to_replace;
-        }
-
-        /* This was checked in mirror_start_job(), but meanwhile one of the
-         * nodes could have been newly attached to a BlockBackend. */
-        if (to_replace->blk && s->target->blk) {
-            error_report("block job: Can't create node with two BlockBackends");
-            data->ret = -EINVAL;
-            goto out;
-        }
-
-        if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
-            bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
-        }
-        bdrv_replace_in_backing_chain(to_replace, s->target);
-    }
-
-out:
-    if (s->to_replace) {
-        bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
-        error_free(s->replace_blocker);
-        bdrv_unref(s->to_replace);
-    }
-    if (replace_aio_context) {
-        aio_context_release(replace_aio_context);
-    }
-    g_free(s->replaces);
-    bdrv_op_unblock_all(s->target, s->common.blocker);
-    bdrv_unref(s->target);
-    block_job_completed(&s->common, data->ret);
-    g_free(data);
-    bdrv_drained_end(src);
-    if (qemu_get_aio_context() == bdrv_get_aio_context(src)) {
-        aio_enable_external(iohandler_get_aio_context());
-    }
-    bdrv_unref(src);
-}
-
-static void coroutine_fn mirror_run(void *opaque)
-{
-    MirrorBlockJob *s = opaque;
-    MirrorExitData *data;
-    BlockDriverState *bs = s->common.bs;
-    int64_t sector_num, end, length;
-    uint64_t last_pause_ns;
-    BlockDriverInfo bdi;
-    char backing_filename[2]; /* we only need 2 characters because we are only
-                                 checking for a NULL string */
-    int ret = 0;
-    int n;
-    int target_cluster_size = BDRV_SECTOR_SIZE;
-
-    if (block_job_is_cancelled(&s->common)) {
-        goto immediate_exit;
-    }
-
-    s->bdev_length = bdrv_getlength(bs);
-    if (s->bdev_length < 0) {
-        ret = s->bdev_length;
-        goto immediate_exit;
-    } else if (s->bdev_length == 0) {
-        /* Report BLOCK_JOB_READY and wait for complete. */
-        block_job_event_ready(&s->common);
-        s->synced = true;
-        while (!block_job_is_cancelled(&s->common) && !s->should_complete) {
-            block_job_yield(&s->common);
-        }
-        s->common.cancelled = false;
-        goto immediate_exit;
-    }
-
-    length = DIV_ROUND_UP(s->bdev_length, s->granularity);
-    s->in_flight_bitmap = bitmap_new(length);
-
-    /* If we have no backing file yet in the destination, we cannot let
-     * the destination do COW.  Instead, we copy sectors around the
-     * dirty data if needed.  We need a bitmap to do that.
-     */
-    bdrv_get_backing_filename(s->target, backing_filename,
-                              sizeof(backing_filename));
-    if (!bdrv_get_info(s->target, &bdi) && bdi.cluster_size) {
-        target_cluster_size = bdi.cluster_size;
-    }
-    if (backing_filename[0] && !s->target->backing
-        && s->granularity < target_cluster_size) {
-        s->buf_size = MAX(s->buf_size, target_cluster_size);
-        s->cow_bitmap = bitmap_new(length);
-    }
-    s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
-    s->max_iov = MIN(s->common.bs->bl.max_iov, s->target->bl.max_iov);
-
-    end = s->bdev_length / BDRV_SECTOR_SIZE;
-    s->buf = qemu_try_blockalign(bs, s->buf_size);
-    if (s->buf == NULL) {
-        ret = -ENOMEM;
-        goto immediate_exit;
-    }
-
-    mirror_free_init(s);
-
-    last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    if (!s->is_none_mode) {
-        /* First part, loop on the sectors and initialize the dirty bitmap.  */
-        BlockDriverState *base = s->base;
-        bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(s->target);
-
-        for (sector_num = 0; sector_num < end; ) {
-            /* Just to make sure we are not exceeding int limit. */
-            int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
-                                 end - sector_num);
-            int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-
-            if (now - last_pause_ns > SLICE_TIME) {
-                last_pause_ns = now;
-                block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
-            }
-
-            if (block_job_is_cancelled(&s->common)) {
-                goto immediate_exit;
-            }
-
-            ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
-
-            if (ret < 0) {
-                goto immediate_exit;
-            }
-
-            assert(n > 0);
-            if (ret == 1 || mark_all_dirty) {
-                bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
-            }
-            sector_num += n;
-        }
-    }
-
-    bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
-    for (;;) {
-        uint64_t delay_ns = 0;
-        int64_t cnt;
-        bool should_complete;
-
-        if (s->ret < 0) {
-            ret = s->ret;
-            goto immediate_exit;
-        }
-
-        cnt = bdrv_get_dirty_count(s->dirty_bitmap);
-        /* s->common.offset contains the number of bytes already processed so
-         * far, cnt is the number of dirty sectors remaining and
-         * s->sectors_in_flight is the number of sectors currently being
-         * processed; together those are the current total operation length */
-        s->common.len = s->common.offset +
-                        (cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE;
-
-        /* Note that even when no rate limit is applied we need to yield
-         * periodically with no pending I/O so that bdrv_drain_all() returns.
-         * We do so every SLICE_TIME nanoseconds, or when there is an error,
-         * or when the source is clean, whichever comes first.
-         */
-        if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
-            s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-            if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
-                (cnt == 0 && s->in_flight > 0)) {
-                trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
-                mirror_wait_for_io(s);
-                continue;
-            } else if (cnt != 0) {
-                delay_ns = mirror_iteration(s);
-            }
-        }
-
-        should_complete = false;
-        if (s->in_flight == 0 && cnt == 0) {
-            trace_mirror_before_flush(s);
-            ret = bdrv_flush(s->target);
-            if (ret < 0) {
-                if (mirror_error_action(s, false, -ret) ==
-                    BLOCK_ERROR_ACTION_REPORT) {
-                    goto immediate_exit;
-                }
-            } else {
-                /* We're out of the streaming phase.  From now on, if the job
-                 * is cancelled we will actually complete all pending I/O and
-                 * report completion.  This way, block-job-cancel will leave
-                 * the target in a consistent state.
-                 */
-                if (!s->synced) {
-                    block_job_event_ready(&s->common);
-                    s->synced = true;
-                }
-
-                should_complete = s->should_complete ||
-                    block_job_is_cancelled(&s->common);
-                cnt = bdrv_get_dirty_count(s->dirty_bitmap);
-            }
-        }
-
-        if (cnt == 0 && should_complete) {
-            /* The dirty bitmap is not updated while operations are pending.
-             * If we're about to exit, wait for pending operations before
-             * calling bdrv_get_dirty_count(bs), or we may exit while the
-             * source has dirty data to copy!
-             *
-             * Note that I/O can be submitted by the guest while
-             * mirror_populate runs.
-             */
-            trace_mirror_before_drain(s, cnt);
-            bdrv_co_drain(bs);
-            cnt = bdrv_get_dirty_count(s->dirty_bitmap);
-        }
-
-        ret = 0;
-        trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
-        if (!s->synced) {
-            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
-            if (block_job_is_cancelled(&s->common)) {
-                break;
-            }
-        } else if (!should_complete) {
-            delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
-            block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
-        } else if (cnt == 0) {
-            /* The two disks are in sync.  Exit and report successful
-             * completion.
-             */
-            assert(QLIST_EMPTY(&bs->tracked_requests));
-            s->common.cancelled = false;
-            break;
-        }
-        last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-    }
-
-immediate_exit:
-    if (s->in_flight > 0) {
-        /* We get here only if something went wrong.  Either the job failed,
-         * or it was cancelled prematurely so that we do not guarantee that
-         * the target is a copy of the source.
-         */
-        assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
-        mirror_drain(s);
-    }
-
-    assert(s->in_flight == 0);
-    qemu_vfree(s->buf);
-    g_free(s->cow_bitmap);
-    g_free(s->in_flight_bitmap);
-    bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
-    if (s->target->blk) {
-        blk_iostatus_disable(s->target->blk);
-    }
-
-    data = g_malloc(sizeof(*data));
-    data->ret = ret;
-    /* Before we switch to target in mirror_exit, make sure data doesn't
-     * change. */
-    bdrv_drained_begin(s->common.bs);
-    if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) {
-        /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the
-         * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we
-         * need a block layer API change to achieve this. */
-        aio_disable_external(iohandler_get_aio_context());
-    }
-    block_job_defer_to_main_loop(&s->common, mirror_exit, data);
-}
-
-static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
-    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-
-    if (speed < 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
-        return;
-    }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
-}
-
-static void mirror_iostatus_reset(BlockJob *job)
-{
-    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-
-    if (s->target->blk) {
-        blk_iostatus_reset(s->target->blk);
-    }
-}
-
-static void mirror_complete(BlockJob *job, Error **errp)
-{
-    MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-    Error *local_err = NULL;
-    int ret;
-
-    ret = bdrv_open_backing_file(s->target, NULL, "backing", &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        return;
-    }
-    if (!s->synced) {
-        error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
-        return;
-    }
-
-    /* check the target bs is not blocked and block all operations on it */
-    if (s->replaces) {
-        AioContext *replace_aio_context;
-
-        s->to_replace = bdrv_find_node(s->replaces);
-        if (!s->to_replace) {
-            error_setg(errp, "Node name '%s' not found", s->replaces);
-            return;
-        }
-
-        replace_aio_context = bdrv_get_aio_context(s->to_replace);
-        aio_context_acquire(replace_aio_context);
-
-        error_setg(&s->replace_blocker,
-                   "block device is in use by block-job-complete");
-        bdrv_op_block_all(s->to_replace, s->replace_blocker);
-        bdrv_ref(s->to_replace);
-
-        aio_context_release(replace_aio_context);
-    }
-
-    s->should_complete = true;
-    block_job_enter(&s->common);
-}
-
-static const BlockJobDriver mirror_job_driver = {
-    .instance_size = sizeof(MirrorBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_MIRROR,
-    .set_speed     = mirror_set_speed,
-    .iostatus_reset= mirror_iostatus_reset,
-    .complete      = mirror_complete,
-};
-
-static const BlockJobDriver commit_active_job_driver = {
-    .instance_size = sizeof(MirrorBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_COMMIT,
-    .set_speed     = mirror_set_speed,
-    .iostatus_reset
-                   = mirror_iostatus_reset,
-    .complete      = mirror_complete,
-};
-
-static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
-                             const char *replaces,
-                             int64_t speed, uint32_t granularity,
-                             int64_t buf_size,
-                             BlockdevOnError on_source_error,
-                             BlockdevOnError on_target_error,
-                             bool unmap,
-                             BlockCompletionFunc *cb,
-                             void *opaque, Error **errp,
-                             const BlockJobDriver *driver,
-                             bool is_none_mode, BlockDriverState *base)
-{
-    MirrorBlockJob *s;
-    BlockDriverState *replaced_bs;
-
-    if (granularity == 0) {
-        granularity = bdrv_get_default_bitmap_granularity(target);
-    }
-
-    assert ((granularity & (granularity - 1)) == 0);
-
-    if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
-         on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
-        return;
-    }
-
-    if (buf_size < 0) {
-        error_setg(errp, "Invalid parameter 'buf-size'");
-        return;
-    }
-
-    if (buf_size == 0) {
-        buf_size = DEFAULT_MIRROR_BUF_SIZE;
-    }
-
-    /* We can't support this case as long as the block layer can't handle
-     * multiple BlockBackends per BlockDriverState. */
-    if (replaces) {
-        replaced_bs = bdrv_lookup_bs(replaces, replaces, errp);
-        if (replaced_bs == NULL) {
-            return;
-        }
-    } else {
-        replaced_bs = bs;
-    }
-    if (replaced_bs->blk && target->blk) {
-        error_setg(errp, "Can't create node with two BlockBackends");
-        return;
-    }
-
-    s = block_job_create(driver, bs, speed, cb, opaque, errp);
-    if (!s) {
-        return;
-    }
-
-    s->replaces = g_strdup(replaces);
-    s->on_source_error = on_source_error;
-    s->on_target_error = on_target_error;
-    s->target = target;
-    s->is_none_mode = is_none_mode;
-    s->base = base;
-    s->granularity = granularity;
-    s->buf_size = ROUND_UP(buf_size, granularity);
-    s->unmap = unmap;
-
-    s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
-    if (!s->dirty_bitmap) {
-        g_free(s->replaces);
-        block_job_unref(&s->common);
-        return;
-    }
-
-    bdrv_op_block_all(s->target, s->common.blocker);
-
-    if (s->target->blk) {
-        blk_set_on_error(s->target->blk, on_target_error, on_target_error);
-        blk_iostatus_enable(s->target->blk);
-    }
-    s->common.co = qemu_coroutine_create(mirror_run);
-    trace_mirror_start(bs, s, s->common.co, opaque);
-    qemu_coroutine_enter(s->common.co, s);
-}
-
-void mirror_start(BlockDriverState *bs, BlockDriverState *target,
-                  const char *replaces,
-                  int64_t speed, uint32_t granularity, int64_t buf_size,
-                  MirrorSyncMode mode, BlockdevOnError on_source_error,
-                  BlockdevOnError on_target_error,
-                  bool unmap,
-                  BlockCompletionFunc *cb,
-                  void *opaque, Error **errp)
-{
-    bool is_none_mode;
-    BlockDriverState *base;
-
-    if (mode == MIRROR_SYNC_MODE_INCREMENTAL) {
-        error_setg(errp, "Sync mode 'incremental' not supported");
-        return;
-    }
-    is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
-    base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
-    mirror_start_job(bs, target, replaces,
-                     speed, granularity, buf_size,
-                     on_source_error, on_target_error, unmap, cb, opaque, errp,
-                     &mirror_job_driver, is_none_mode, base);
-}
-
-void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
-                         int64_t speed,
-                         BlockdevOnError on_error,
-                         BlockCompletionFunc *cb,
-                         void *opaque, Error **errp)
-{
-    int64_t length, base_length;
-    int orig_base_flags;
-    int ret;
-    Error *local_err = NULL;
-
-    orig_base_flags = bdrv_get_flags(base);
-
-    if (bdrv_reopen(base, bs->open_flags, errp)) {
-        return;
-    }
-
-    length = bdrv_getlength(bs);
-    if (length < 0) {
-        error_setg_errno(errp, -length,
-                         "Unable to determine length of %s", bs->filename);
-        goto error_restore_flags;
-    }
-
-    base_length = bdrv_getlength(base);
-    if (base_length < 0) {
-        error_setg_errno(errp, -base_length,
-                         "Unable to determine length of %s", base->filename);
-        goto error_restore_flags;
-    }
-
-    if (length > base_length) {
-        ret = bdrv_truncate(base, length);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret,
-                            "Top image %s is larger than base image %s, and "
-                             "resize of base image failed",
-                             bs->filename, base->filename);
-            goto error_restore_flags;
-        }
-    }
-
-    bdrv_ref(base);
-    mirror_start_job(bs, base, NULL, speed, 0, 0,
-                     on_error, on_error, false, cb, opaque, &local_err,
-                     &commit_active_job_driver, false, base);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        goto error_restore_flags;
-    }
-
-    return;
-
-error_restore_flags:
-    /* ignore error and errp for bdrv_reopen, because we want to propagate
-     * the original error */
-    bdrv_reopen(base, orig_base_flags, NULL);
-    return;
-}
diff --git a/qemu/block/nbd-client.c b/qemu/block/nbd-client.c
deleted file mode 100644
index 878e879ac..000000000
--- a/qemu/block/nbd-client.c
+++ /dev/null
@@ -1,436 +0,0 @@
-/*
- * QEMU Block driver for  NBD
- *
- * Copyright (C) 2008 Bull S.A.S.
- *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
- *
- * Some parts:
- *    Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "nbd-client.h"
-
-#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
-#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))
-
-static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
-{
-    int i;
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->recv_coroutine[i]) {
-            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
-        }
-    }
-}
-
-static void nbd_teardown_connection(BlockDriverState *bs)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-
-    if (!client->ioc) { /* Already closed */
-        return;
-    }
-
-    /* finish any pending coroutines */
-    qio_channel_shutdown(client->ioc,
-                         QIO_CHANNEL_SHUTDOWN_BOTH,
-                         NULL);
-    nbd_recv_coroutines_enter_all(client);
-
-    nbd_client_detach_aio_context(bs);
-    object_unref(OBJECT(client->sioc));
-    client->sioc = NULL;
-    object_unref(OBJECT(client->ioc));
-    client->ioc = NULL;
-}
-
-static void nbd_reply_ready(void *opaque)
-{
-    BlockDriverState *bs = opaque;
-    NbdClientSession *s = nbd_get_client_session(bs);
-    uint64_t i;
-    int ret;
-
-    if (!s->ioc) { /* Already closed */
-        return;
-    }
-
-    if (s->reply.handle == 0) {
-        /* No reply already in flight.  Fetch a header.  It is possible
-         * that another thread has done the same thing in parallel, so
-         * the socket is not readable anymore.
-         */
-        ret = nbd_receive_reply(s->ioc, &s->reply);
-        if (ret == -EAGAIN) {
-            return;
-        }
-        if (ret < 0) {
-            s->reply.handle = 0;
-            goto fail;
-        }
-    }
-
-    /* There's no need for a mutex on the receive side, because the
-     * handler acts as a synchronization point and ensures that only
-     * one coroutine is called until the reply finishes.  */
-    i = HANDLE_TO_INDEX(s, s->reply.handle);
-    if (i >= MAX_NBD_REQUESTS) {
-        goto fail;
-    }
-
-    if (s->recv_coroutine[i]) {
-        qemu_coroutine_enter(s->recv_coroutine[i], NULL);
-        return;
-    }
-
-fail:
-    nbd_teardown_connection(bs);
-}
-
-static void nbd_restart_write(void *opaque)
-{
-    BlockDriverState *bs = opaque;
-
-    qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine, NULL);
-}
-
-static int nbd_co_send_request(BlockDriverState *bs,
-                               struct nbd_request *request,
-                               QEMUIOVector *qiov, int offset)
-{
-    NbdClientSession *s = nbd_get_client_session(bs);
-    AioContext *aio_context;
-    int rc, ret, i;
-
-    qemu_co_mutex_lock(&s->send_mutex);
-
-    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
-        if (s->recv_coroutine[i] == NULL) {
-            s->recv_coroutine[i] = qemu_coroutine_self();
-            break;
-        }
-    }
-
-    g_assert(qemu_in_coroutine());
-    assert(i < MAX_NBD_REQUESTS);
-    request->handle = INDEX_TO_HANDLE(s, i);
-
-    if (!s->ioc) {
-        qemu_co_mutex_unlock(&s->send_mutex);
-        return -EPIPE;
-    }
-
-    s->send_coroutine = qemu_coroutine_self();
-    aio_context = bdrv_get_aio_context(bs);
-
-    aio_set_fd_handler(aio_context, s->sioc->fd, false,
-                       nbd_reply_ready, nbd_restart_write, bs);
-    if (qiov) {
-        qio_channel_set_cork(s->ioc, true);
-        rc = nbd_send_request(s->ioc, request);
-        if (rc >= 0) {
-            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
-                               offset, request->len, 0);
-            if (ret != request->len) {
-                rc = -EIO;
-            }
-        }
-        qio_channel_set_cork(s->ioc, false);
-    } else {
-        rc = nbd_send_request(s->ioc, request);
-    }
-    aio_set_fd_handler(aio_context, s->sioc->fd, false,
-                       nbd_reply_ready, NULL, bs);
-    s->send_coroutine = NULL;
-    qemu_co_mutex_unlock(&s->send_mutex);
-    return rc;
-}
-
-static void nbd_co_receive_reply(NbdClientSession *s,
-    struct nbd_request *request, struct nbd_reply *reply,
-    QEMUIOVector *qiov, int offset)
-{
-    int ret;
-
-    /* Wait until we're woken up by the read handler.  TODO: perhaps
-     * peek at the next reply and avoid yielding if it's ours?  */
-    qemu_coroutine_yield();
-    *reply = s->reply;
-    if (reply->handle != request->handle ||
-        !s->ioc) {
-        reply->error = EIO;
-    } else {
-        if (qiov && reply->error == 0) {
-            ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
-                               offset, request->len, 1);
-            if (ret != request->len) {
-                reply->error = EIO;
-            }
-        }
-
-        /* Tell the read handler to read another header.  */
-        s->reply.handle = 0;
-    }
-}
-
-static void nbd_coroutine_start(NbdClientSession *s,
-   struct nbd_request *request)
-{
-    /* Poor man semaphore.  The free_sema is locked when no other request
-     * can be accepted, and unlocked after receiving one reply.  */
-    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
-        qemu_co_mutex_lock(&s->free_sema);
-        assert(s->in_flight < MAX_NBD_REQUESTS);
-    }
-    s->in_flight++;
-
-    /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
-}
-
-static void nbd_coroutine_end(NbdClientSession *s,
-    struct nbd_request *request)
-{
-    int i = HANDLE_TO_INDEX(s, request->handle);
-    s->recv_coroutine[i] = NULL;
-    if (s->in_flight-- == MAX_NBD_REQUESTS) {
-        qemu_co_mutex_unlock(&s->free_sema);
-    }
-}
-
-static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors, QEMUIOVector *qiov,
-                          int offset)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_READ };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, qiov, offset);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-
-}
-
-static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
-                           int nb_sectors, QEMUIOVector *qiov,
-                           int offset, int *flags)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_WRITE };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    if ((*flags & BDRV_REQ_FUA) && (client->nbdflags & NBD_FLAG_SEND_FUA)) {
-        *flags &= ~BDRV_REQ_FUA;
-        request.type |= NBD_CMD_FLAG_FUA;
-    }
-
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, qiov, offset);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-}
-
-/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
- * remain aligned to 4K. */
-#define NBD_MAX_SECTORS 2040
-
-int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov)
-{
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
-}
-
-int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov, int *flags)
-{
-    int offset = 0;
-    int ret;
-    while (nb_sectors > NBD_MAX_SECTORS) {
-        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset,
-                              flags);
-        if (ret < 0) {
-            return ret;
-        }
-        offset += NBD_MAX_SECTORS * 512;
-        sector_num += NBD_MAX_SECTORS;
-        nb_sectors -= NBD_MAX_SECTORS;
-    }
-    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset, flags);
-}
-
-int nbd_client_co_flush(BlockDriverState *bs)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_FLUSH };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
-        return 0;
-    }
-
-    request.from = 0;
-    request.len = 0;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-}
-
-int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = { .type = NBD_CMD_TRIM };
-    struct nbd_reply reply;
-    ssize_t ret;
-
-    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
-        return 0;
-    }
-    request.from = sector_num * 512;
-    request.len = nb_sectors * 512;
-
-    nbd_coroutine_start(client, &request);
-    ret = nbd_co_send_request(bs, &request, NULL, 0);
-    if (ret < 0) {
-        reply.error = -ret;
-    } else {
-        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
-    }
-    nbd_coroutine_end(client, &request);
-    return -reply.error;
-
-}
-
-void nbd_client_detach_aio_context(BlockDriverState *bs)
-{
-    aio_set_fd_handler(bdrv_get_aio_context(bs),
-                       nbd_get_client_session(bs)->sioc->fd,
-                       false, NULL, NULL, NULL);
-}
-
-void nbd_client_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
-{
-    aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
-                       false, nbd_reply_ready, NULL, bs);
-}
-
-void nbd_client_close(BlockDriverState *bs)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-    struct nbd_request request = {
-        .type = NBD_CMD_DISC,
-        .from = 0,
-        .len = 0
-    };
-
-    if (client->ioc == NULL) {
-        return;
-    }
-
-    nbd_send_request(client->ioc, &request);
-
-    nbd_teardown_connection(bs);
-}
-
-int nbd_client_init(BlockDriverState *bs,
-                    QIOChannelSocket *sioc,
-                    const char *export,
-                    QCryptoTLSCreds *tlscreds,
-                    const char *hostname,
-                    Error **errp)
-{
-    NbdClientSession *client = nbd_get_client_session(bs);
-    int ret;
-
-    /* NBD handshake */
-    logout("session init %s\n", export);
-    qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
-
-    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
-                                &client->nbdflags,
-                                tlscreds, hostname,
-                                &client->ioc,
-                                &client->size, errp);
-    if (ret < 0) {
-        logout("Failed to negotiate with the NBD server\n");
-        return ret;
-    }
-
-    qemu_co_mutex_init(&client->send_mutex);
-    qemu_co_mutex_init(&client->free_sema);
-    client->sioc = sioc;
-    object_ref(OBJECT(client->sioc));
-
-    if (!client->ioc) {
-        client->ioc = QIO_CHANNEL(sioc);
-        object_ref(OBJECT(client->ioc));
-    }
-
-    /* Now that we're connected, set the socket to be non-blocking and
-     * kick the reply mechanism.  */
-    qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
-
-    nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
-
-    logout("Established connection with NBD server\n");
-    return 0;
-}
diff --git a/qemu/block/nbd-client.h b/qemu/block/nbd-client.h
deleted file mode 100644
index bc7aec079..000000000
--- a/qemu/block/nbd-client.h
+++ /dev/null
@@ -1,59 +0,0 @@
-#ifndef NBD_CLIENT_H
-#define NBD_CLIENT_H
-
-#include "qemu-common.h"
-#include "block/nbd.h"
-#include "block/block_int.h"
-#include "io/channel-socket.h"
-
-/* #define DEBUG_NBD */
-
-#if defined(DEBUG_NBD)
-#define logout(fmt, ...) \
-    fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
-#else
-#define logout(fmt, ...) ((void)0)
-#endif
-
-#define MAX_NBD_REQUESTS    16
-
-typedef struct NbdClientSession {
-    QIOChannelSocket *sioc; /* The master data channel */
-    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
-    uint32_t nbdflags;
-    off_t size;
-
-    CoMutex send_mutex;
-    CoMutex free_sema;
-    Coroutine *send_coroutine;
-    int in_flight;
-
-    Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
-    struct nbd_reply reply;
-
-    bool is_unix;
-} NbdClientSession;
-
-NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
-
-int nbd_client_init(BlockDriverState *bs,
-                    QIOChannelSocket *sock,
-                    const char *export_name,
-                    QCryptoTLSCreds *tlscreds,
-                    const char *hostname,
-                    Error **errp);
-void nbd_client_close(BlockDriverState *bs);
-
-int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors);
-int nbd_client_co_flush(BlockDriverState *bs);
-int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov, int *flags);
-int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov);
-
-void nbd_client_detach_aio_context(BlockDriverState *bs);
-void nbd_client_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context);
-
-#endif /* NBD_CLIENT_H */
diff --git a/qemu/block/nbd.c b/qemu/block/nbd.c
deleted file mode 100644
index f7ea3b360..000000000
--- a/qemu/block/nbd.c
+++ /dev/null
@@ -1,539 +0,0 @@
-/*
- * QEMU Block driver for  NBD
- *
- * Copyright (C) 2008 Bull S.A.S.
- *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
- *
- * Some parts:
- *    Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "block/nbd-client.h"
-#include "qapi/error.h"
-#include "qemu/uri.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qjson.h"
-#include "qapi/qmp/qint.h"
-#include "qapi/qmp/qstring.h"
-#include "qemu/cutils.h"
-
-#define EN_OPTSTR ":exportname="
-
-typedef struct BDRVNBDState {
-    NbdClientSession client;
-} BDRVNBDState;
-
-static int nbd_parse_uri(const char *filename, QDict *options)
-{
-    URI *uri;
-    const char *p;
-    QueryParams *qp = NULL;
-    int ret = 0;
-    bool is_unix;
-
-    uri = uri_parse(filename);
-    if (!uri) {
-        return -EINVAL;
-    }
-
-    /* transport */
-    if (!strcmp(uri->scheme, "nbd")) {
-        is_unix = false;
-    } else if (!strcmp(uri->scheme, "nbd+tcp")) {
-        is_unix = false;
-    } else if (!strcmp(uri->scheme, "nbd+unix")) {
-        is_unix = true;
-    } else {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    p = uri->path ? uri->path : "/";
-    p += strspn(p, "/");
-    if (p[0]) {
-        qdict_put(options, "export", qstring_from_str(p));
-    }
-
-    qp = query_params_parse(uri->query);
-    if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if (is_unix) {
-        /* nbd+unix:///export?socket=path */
-        if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
-            ret = -EINVAL;
-            goto out;
-        }
-        qdict_put(options, "path", qstring_from_str(qp->p[0].value));
-    } else {
-        QString *host;
-        /* nbd[+tcp]://host[:port]/export */
-        if (!uri->server) {
-            ret = -EINVAL;
-            goto out;
-        }
-
-        /* strip braces from literal IPv6 address */
-        if (uri->server[0] == '[') {
-            host = qstring_from_substr(uri->server, 1,
-                                       strlen(uri->server) - 2);
-        } else {
-            host = qstring_from_str(uri->server);
-        }
-
-        qdict_put(options, "host", host);
-        if (uri->port) {
-            char* port_str = g_strdup_printf("%d", uri->port);
-            qdict_put(options, "port", qstring_from_str(port_str));
-            g_free(port_str);
-        }
-    }
-
-out:
-    if (qp) {
-        query_params_free(qp);
-    }
-    uri_free(uri);
-    return ret;
-}
-
-static void nbd_parse_filename(const char *filename, QDict *options,
-                               Error **errp)
-{
-    char *file;
-    char *export_name;
-    const char *host_spec;
-    const char *unixpath;
-
-    if (qdict_haskey(options, "host")
-        || qdict_haskey(options, "port")
-        || qdict_haskey(options, "path"))
-    {
-        error_setg(errp, "host/port/path and a file name may not be specified "
-                         "at the same time");
-        return;
-    }
-
-    if (strstr(filename, "://")) {
-        int ret = nbd_parse_uri(filename, options);
-        if (ret < 0) {
-            error_setg(errp, "No valid URL specified");
-        }
-        return;
-    }
-
-    file = g_strdup(filename);
-
-    export_name = strstr(file, EN_OPTSTR);
-    if (export_name) {
-        if (export_name[strlen(EN_OPTSTR)] == 0) {
-            goto out;
-        }
-        export_name[0] = 0; /* truncate 'file' */
-        export_name += strlen(EN_OPTSTR);
-
-        qdict_put(options, "export", qstring_from_str(export_name));
-    }
-
-    /* extract the host_spec - fail if it's not nbd:... */
-    if (!strstart(file, "nbd:", &host_spec)) {
-        error_setg(errp, "File name string for NBD must start with 'nbd:'");
-        goto out;
-    }
-
-    if (!*host_spec) {
-        goto out;
-    }
-
-    /* are we a UNIX or TCP socket? */
-    if (strstart(host_spec, "unix:", &unixpath)) {
-        qdict_put(options, "path", qstring_from_str(unixpath));
-    } else {
-        InetSocketAddress *addr = NULL;
-
-        addr = inet_parse(host_spec, errp);
-        if (!addr) {
-            goto out;
-        }
-
-        qdict_put(options, "host", qstring_from_str(addr->host));
-        qdict_put(options, "port", qstring_from_str(addr->port));
-        qapi_free_InetSocketAddress(addr);
-    }
-
-out:
-    g_free(file);
-}
-
-static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
-                                 Error **errp)
-{
-    SocketAddress *saddr;
-
-    if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
-        if (qdict_haskey(options, "path")) {
-            error_setg(errp, "path and host may not be used at the same time.");
-        } else {
-            error_setg(errp, "one of path and host must be specified.");
-        }
-        return NULL;
-    }
-
-    saddr = g_new0(SocketAddress, 1);
-
-    if (qdict_haskey(options, "path")) {
-        UnixSocketAddress *q_unix;
-        saddr->type = SOCKET_ADDRESS_KIND_UNIX;
-        q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
-        q_unix->path = g_strdup(qdict_get_str(options, "path"));
-        qdict_del(options, "path");
-    } else {
-        InetSocketAddress *inet;
-        saddr->type = SOCKET_ADDRESS_KIND_INET;
-        inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
-        inet->host = g_strdup(qdict_get_str(options, "host"));
-        if (!qdict_get_try_str(options, "port")) {
-            inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
-        } else {
-            inet->port = g_strdup(qdict_get_str(options, "port"));
-        }
-        qdict_del(options, "host");
-        qdict_del(options, "port");
-    }
-
-    s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
-
-    *export = g_strdup(qdict_get_try_str(options, "export"));
-    if (*export) {
-        qdict_del(options, "export");
-    }
-
-    return saddr;
-}
-
-NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
-{
-    BDRVNBDState *s = bs->opaque;
-    return &s->client;
-}
-
-static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
-                                                  Error **errp)
-{
-    QIOChannelSocket *sioc;
-    Error *local_err = NULL;
-
-    sioc = qio_channel_socket_new();
-
-    qio_channel_socket_connect_sync(sioc,
-                                    saddr,
-                                    &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return NULL;
-    }
-
-    qio_channel_set_delay(QIO_CHANNEL(sioc), false);
-
-    return sioc;
-}
-
-
-static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
-{
-    Object *obj;
-    QCryptoTLSCreds *creds;
-
-    obj = object_resolve_path_component(
-        object_get_objects_root(), id);
-    if (!obj) {
-        error_setg(errp, "No TLS credentials with id '%s'",
-                   id);
-        return NULL;
-    }
-    creds = (QCryptoTLSCreds *)
-        object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
-    if (!creds) {
-        error_setg(errp, "Object with id '%s' is not TLS credentials",
-                   id);
-        return NULL;
-    }
-
-    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
-        error_setg(errp,
-                   "Expecting TLS credentials with a client endpoint");
-        return NULL;
-    }
-    object_ref(obj);
-    return creds;
-}
-
-
-static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
-{
-    BDRVNBDState *s = bs->opaque;
-    char *export = NULL;
-    QIOChannelSocket *sioc = NULL;
-    SocketAddress *saddr;
-    const char *tlscredsid;
-    QCryptoTLSCreds *tlscreds = NULL;
-    const char *hostname = NULL;
-    int ret = -EINVAL;
-
-    /* Pop the config into our state object. Exit if invalid. */
-    saddr = nbd_config(s, options, &export, errp);
-    if (!saddr) {
-        goto error;
-    }
-
-    tlscredsid = g_strdup(qdict_get_try_str(options, "tls-creds"));
-    if (tlscredsid) {
-        qdict_del(options, "tls-creds");
-        tlscreds = nbd_get_tls_creds(tlscredsid, errp);
-        if (!tlscreds) {
-            goto error;
-        }
-
-        if (saddr->type != SOCKET_ADDRESS_KIND_INET) {
-            error_setg(errp, "TLS only supported over IP sockets");
-            goto error;
-        }
-        hostname = saddr->u.inet.data->host;
-    }
-
-    /* establish TCP connection, return error if it fails
-     * TODO: Configurable retry-until-timeout behaviour.
-     */
-    sioc = nbd_establish_connection(saddr, errp);
-    if (!sioc) {
-        ret = -ECONNREFUSED;
-        goto error;
-    }
-
-    /* NBD handshake */
-    ret = nbd_client_init(bs, sioc, export,
-                          tlscreds, hostname, errp);
- error:
-    if (sioc) {
-        object_unref(OBJECT(sioc));
-    }
-    if (tlscreds) {
-        object_unref(OBJECT(tlscreds));
-    }
-    qapi_free_SocketAddress(saddr);
-    g_free(export);
-    return ret;
-}
-
-static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov)
-{
-    return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov);
-}
-
-static int nbd_co_writev_flags(BlockDriverState *bs, int64_t sector_num,
-                               int nb_sectors, QEMUIOVector *qiov, int flags)
-{
-    int ret;
-
-    ret = nbd_client_co_writev(bs, sector_num, nb_sectors, qiov, &flags);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* The flag wasn't sent to the server, so we need to emulate it with an
-     * explicit flush */
-    if (flags & BDRV_REQ_FUA) {
-        ret = nbd_client_co_flush(bs);
-    }
-
-    return ret;
-}
-
-static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov)
-{
-    return nbd_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-static int nbd_co_flush(BlockDriverState *bs)
-{
-    return nbd_client_co_flush(bs);
-}
-
-static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS;
-    bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS;
-}
-
-static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors)
-{
-    return nbd_client_co_discard(bs, sector_num, nb_sectors);
-}
-
-static void nbd_close(BlockDriverState *bs)
-{
-    nbd_client_close(bs);
-}
-
-static int64_t nbd_getlength(BlockDriverState *bs)
-{
-    BDRVNBDState *s = bs->opaque;
-
-    return s->client.size;
-}
-
-static void nbd_detach_aio_context(BlockDriverState *bs)
-{
-    nbd_client_detach_aio_context(bs);
-}
-
-static void nbd_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
-{
-    nbd_client_attach_aio_context(bs, new_context);
-}
-
-static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
-{
-    QDict *opts = qdict_new();
-    const char *path   = qdict_get_try_str(options, "path");
-    const char *host   = qdict_get_try_str(options, "host");
-    const char *port   = qdict_get_try_str(options, "port");
-    const char *export = qdict_get_try_str(options, "export");
-    const char *tlscreds = qdict_get_try_str(options, "tls-creds");
-
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
-
-    if (path && export) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix:///%s?socket=%s", export, path);
-    } else if (path && !export) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd+unix://?socket=%s", path);
-    } else if (!path && export && port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s:%s/%s", host, port, export);
-    } else if (!path && export && !port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s/%s", host, export);
-    } else if (!path && !export && port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s:%s", host, port);
-    } else if (!path && !export && !port) {
-        snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "nbd://%s", host);
-    }
-
-    if (path) {
-        qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(path)));
-    } else if (port) {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
-        qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(port)));
-    } else {
-        qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
-    }
-    if (export) {
-        qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export)));
-    }
-    if (tlscreds) {
-        qdict_put_obj(opts, "tls-creds", QOBJECT(qstring_from_str(tlscreds)));
-    }
-
-    bs->full_open_options = opts;
-}
-
-static BlockDriver bdrv_nbd = {
-    .format_name                = "nbd",
-    .protocol_name              = "nbd",
-    .instance_size              = sizeof(BDRVNBDState),
-    .bdrv_parse_filename        = nbd_parse_filename,
-    .bdrv_file_open             = nbd_open,
-    .bdrv_co_readv              = nbd_co_readv,
-    .bdrv_co_writev             = nbd_co_writev,
-    .bdrv_co_writev_flags       = nbd_co_writev_flags,
-    .supported_write_flags      = BDRV_REQ_FUA,
-    .bdrv_close                 = nbd_close,
-    .bdrv_co_flush_to_os        = nbd_co_flush,
-    .bdrv_co_discard            = nbd_co_discard,
-    .bdrv_refresh_limits        = nbd_refresh_limits,
-    .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_attach_aio_context,
-    .bdrv_refresh_filename      = nbd_refresh_filename,
-};
-
-static BlockDriver bdrv_nbd_tcp = {
-    .format_name                = "nbd",
-    .protocol_name              = "nbd+tcp",
-    .instance_size              = sizeof(BDRVNBDState),
-    .bdrv_parse_filename        = nbd_parse_filename,
-    .bdrv_file_open             = nbd_open,
-    .bdrv_co_readv              = nbd_co_readv,
-    .bdrv_co_writev             = nbd_co_writev,
-    .bdrv_co_writev_flags       = nbd_co_writev_flags,
-    .supported_write_flags      = BDRV_REQ_FUA,
-    .bdrv_close                 = nbd_close,
-    .bdrv_co_flush_to_os        = nbd_co_flush,
-    .bdrv_co_discard            = nbd_co_discard,
-    .bdrv_refresh_limits        = nbd_refresh_limits,
-    .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_attach_aio_context,
-    .bdrv_refresh_filename      = nbd_refresh_filename,
-};
-
-static BlockDriver bdrv_nbd_unix = {
-    .format_name                = "nbd",
-    .protocol_name              = "nbd+unix",
-    .instance_size              = sizeof(BDRVNBDState),
-    .bdrv_parse_filename        = nbd_parse_filename,
-    .bdrv_file_open             = nbd_open,
-    .bdrv_co_readv              = nbd_co_readv,
-    .bdrv_co_writev             = nbd_co_writev,
-    .bdrv_co_writev_flags       = nbd_co_writev_flags,
-    .supported_write_flags      = BDRV_REQ_FUA,
-    .bdrv_close                 = nbd_close,
-    .bdrv_co_flush_to_os        = nbd_co_flush,
-    .bdrv_co_discard            = nbd_co_discard,
-    .bdrv_refresh_limits        = nbd_refresh_limits,
-    .bdrv_getlength             = nbd_getlength,
-    .bdrv_detach_aio_context    = nbd_detach_aio_context,
-    .bdrv_attach_aio_context    = nbd_attach_aio_context,
-    .bdrv_refresh_filename      = nbd_refresh_filename,
-};
-
-static void bdrv_nbd_init(void)
-{
-    bdrv_register(&bdrv_nbd);
-    bdrv_register(&bdrv_nbd_tcp);
-    bdrv_register(&bdrv_nbd_unix);
-}
-
-block_init(bdrv_nbd_init);
diff --git a/qemu/block/nfs.c b/qemu/block/nfs.c
deleted file mode 100644
index 9f51cc3f1..000000000
--- a/qemu/block/nfs.c
+++ /dev/null
@@ -1,563 +0,0 @@
-/*
- * QEMU Block driver for native access to files on NFS shares
- *
- * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-
-#include <poll.h>
-#include "qemu-common.h"
-#include "qemu/config-file.h"
-#include "qemu/error-report.h"
-#include "qapi/error.h"
-#include "block/block_int.h"
-#include "trace.h"
-#include "qemu/iov.h"
-#include "qemu/uri.h"
-#include "qemu/cutils.h"
-#include "sysemu/sysemu.h"
-#include <nfsc/libnfs.h>
-
-#define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
-#define QEMU_NFS_MAX_DEBUG_LEVEL 2
-
-typedef struct NFSClient {
-    struct nfs_context *context;
-    struct nfsfh *fh;
-    int events;
-    bool has_zero_init;
-    AioContext *aio_context;
-    blkcnt_t st_blocks;
-} NFSClient;
-
-typedef struct NFSRPC {
-    int ret;
-    int complete;
-    QEMUIOVector *iov;
-    struct stat *st;
-    Coroutine *co;
-    QEMUBH *bh;
-    NFSClient *client;
-} NFSRPC;
-
-static void nfs_process_read(void *arg);
-static void nfs_process_write(void *arg);
-
-static void nfs_set_events(NFSClient *client)
-{
-    int ev = nfs_which_events(client->context);
-    if (ev != client->events) {
-        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                           false,
-                           (ev & POLLIN) ? nfs_process_read : NULL,
-                           (ev & POLLOUT) ? nfs_process_write : NULL, client);
-
-    }
-    client->events = ev;
-}
-
-static void nfs_process_read(void *arg)
-{
-    NFSClient *client = arg;
-    nfs_service(client->context, POLLIN);
-    nfs_set_events(client);
-}
-
-static void nfs_process_write(void *arg)
-{
-    NFSClient *client = arg;
-    nfs_service(client->context, POLLOUT);
-    nfs_set_events(client);
-}
-
-static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
-{
-    *task = (NFSRPC) {
-        .co             = qemu_coroutine_self(),
-        .client         = client,
-    };
-}
-
-static void nfs_co_generic_bh_cb(void *opaque)
-{
-    NFSRPC *task = opaque;
-    task->complete = 1;
-    qemu_bh_delete(task->bh);
-    qemu_coroutine_enter(task->co, NULL);
-}
-
-static void
-nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
-                  void *private_data)
-{
-    NFSRPC *task = private_data;
-    task->ret = ret;
-    if (task->ret > 0 && task->iov) {
-        if (task->ret <= task->iov->size) {
-            qemu_iovec_from_buf(task->iov, 0, data, task->ret);
-        } else {
-            task->ret = -EIO;
-        }
-    }
-    if (task->ret == 0 && task->st) {
-        memcpy(task->st, data, sizeof(struct stat));
-    }
-    if (task->ret < 0) {
-        error_report("NFS Error: %s", nfs_get_error(nfs));
-    }
-    if (task->co) {
-        task->bh = aio_bh_new(task->client->aio_context,
-                              nfs_co_generic_bh_cb, task);
-        qemu_bh_schedule(task->bh);
-    } else {
-        task->complete = 1;
-    }
-}
-
-static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
-                                     int64_t sector_num, int nb_sectors,
-                                     QEMUIOVector *iov)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task;
-
-    nfs_co_init_task(client, &task);
-    task.iov = iov;
-
-    if (nfs_pread_async(client->context, client->fh,
-                        sector_num * BDRV_SECTOR_SIZE,
-                        nb_sectors * BDRV_SECTOR_SIZE,
-                        nfs_co_generic_cb, &task) != 0) {
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_coroutine_yield();
-    }
-
-    if (task.ret < 0) {
-        return task.ret;
-    }
-
-    /* zero pad short reads */
-    if (task.ret < iov->size) {
-        qemu_iovec_memset(iov, task.ret, 0, iov->size - task.ret);
-    }
-
-    return 0;
-}
-
-static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
-                                        int64_t sector_num, int nb_sectors,
-                                        QEMUIOVector *iov)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task;
-    char *buf = NULL;
-
-    nfs_co_init_task(client, &task);
-
-    buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
-    if (nb_sectors && buf == NULL) {
-        return -ENOMEM;
-    }
-
-    qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
-
-    if (nfs_pwrite_async(client->context, client->fh,
-                         sector_num * BDRV_SECTOR_SIZE,
-                         nb_sectors * BDRV_SECTOR_SIZE,
-                         buf, nfs_co_generic_cb, &task) != 0) {
-        g_free(buf);
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_coroutine_yield();
-    }
-
-    g_free(buf);
-
-    if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
-        return task.ret < 0 ? task.ret : -EIO;
-    }
-
-    return 0;
-}
-
-static int coroutine_fn nfs_co_flush(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task;
-
-    nfs_co_init_task(client, &task);
-
-    if (nfs_fsync_async(client->context, client->fh, nfs_co_generic_cb,
-                        &task) != 0) {
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        qemu_coroutine_yield();
-    }
-
-    return task.ret;
-}
-
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "nfs",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the NFS file",
-        },
-        { /* end of list */ }
-    },
-};
-
-static void nfs_detach_aio_context(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-
-    aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                       false, NULL, NULL, NULL);
-    client->events = 0;
-}
-
-static void nfs_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
-{
-    NFSClient *client = bs->opaque;
-
-    client->aio_context = new_context;
-    nfs_set_events(client);
-}
-
-static void nfs_client_close(NFSClient *client)
-{
-    if (client->context) {
-        if (client->fh) {
-            nfs_close(client->context, client->fh);
-        }
-        aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
-                           false, NULL, NULL, NULL);
-        nfs_destroy_context(client->context);
-    }
-    memset(client, 0, sizeof(NFSClient));
-}
-
-static void nfs_file_close(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    nfs_client_close(client);
-}
-
-static int64_t nfs_client_open(NFSClient *client, const char *filename,
-                               int flags, Error **errp)
-{
-    int ret = -EINVAL, i;
-    struct stat st;
-    URI *uri;
-    QueryParams *qp = NULL;
-    char *file = NULL, *strp = NULL;
-
-    uri = uri_parse(filename);
-    if (!uri) {
-        error_setg(errp, "Invalid URL specified");
-        goto fail;
-    }
-    if (!uri->server) {
-        error_setg(errp, "Invalid URL specified");
-        goto fail;
-    }
-    strp = strrchr(uri->path, '/');
-    if (strp == NULL) {
-        error_setg(errp, "Invalid URL specified");
-        goto fail;
-    }
-    file = g_strdup(strp);
-    *strp = 0;
-
-    client->context = nfs_init_context();
-    if (client->context == NULL) {
-        error_setg(errp, "Failed to init NFS context");
-        goto fail;
-    }
-
-    qp = query_params_parse(uri->query);
-    for (i = 0; i < qp->n; i++) {
-        unsigned long long val;
-        if (!qp->p[i].value) {
-            error_setg(errp, "Value for NFS parameter expected: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
-        if (parse_uint_full(qp->p[i].value, &val, 0)) {
-            error_setg(errp, "Illegal value for NFS parameter: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
-        if (!strcmp(qp->p[i].name, "uid")) {
-            nfs_set_uid(client->context, val);
-        } else if (!strcmp(qp->p[i].name, "gid")) {
-            nfs_set_gid(client->context, val);
-        } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
-            nfs_set_tcp_syncnt(client->context, val);
-#ifdef LIBNFS_FEATURE_READAHEAD
-        } else if (!strcmp(qp->p[i].name, "readahead")) {
-            if (val > QEMU_NFS_MAX_READAHEAD_SIZE) {
-                error_report("NFS Warning: Truncating NFS readahead"
-                             " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
-                val = QEMU_NFS_MAX_READAHEAD_SIZE;
-            }
-            nfs_set_readahead(client->context, val);
-#endif
-#ifdef LIBNFS_FEATURE_DEBUG
-        } else if (!strcmp(qp->p[i].name, "debug")) {
-            /* limit the maximum debug level to avoid potential flooding
-             * of our log files. */
-            if (val > QEMU_NFS_MAX_DEBUG_LEVEL) {
-                error_report("NFS Warning: Limiting NFS debug level"
-                             " to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
-                val = QEMU_NFS_MAX_DEBUG_LEVEL;
-            }
-            nfs_set_debug(client->context, val);
-#endif
-        } else {
-            error_setg(errp, "Unknown NFS parameter name: %s",
-                       qp->p[i].name);
-            goto fail;
-        }
-    }
-
-    ret = nfs_mount(client->context, uri->server, uri->path);
-    if (ret < 0) {
-        error_setg(errp, "Failed to mount nfs share: %s",
-                   nfs_get_error(client->context));
-        goto fail;
-    }
-
-    if (flags & O_CREAT) {
-        ret = nfs_creat(client->context, file, 0600, &client->fh);
-        if (ret < 0) {
-            error_setg(errp, "Failed to create file: %s",
-                       nfs_get_error(client->context));
-            goto fail;
-        }
-    } else {
-        ret = nfs_open(client->context, file, flags, &client->fh);
-        if (ret < 0) {
-            error_setg(errp, "Failed to open file : %s",
-                       nfs_get_error(client->context));
-            goto fail;
-        }
-    }
-
-    ret = nfs_fstat(client->context, client->fh, &st);
-    if (ret < 0) {
-        error_setg(errp, "Failed to fstat file: %s",
-                   nfs_get_error(client->context));
-        goto fail;
-    }
-
-    ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
-    client->st_blocks = st.st_blocks;
-    client->has_zero_init = S_ISREG(st.st_mode);
-    goto out;
-fail:
-    nfs_client_close(client);
-out:
-    if (qp) {
-        query_params_free(qp);
-    }
-    uri_free(uri);
-    g_free(file);
-    return ret;
-}
-
-static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp) {
-    NFSClient *client = bs->opaque;
-    int64_t ret;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-
-    client->aio_context = bdrv_get_aio_context(bs);
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
-    }
-    ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
-                          (flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
-                          errp);
-    if (ret < 0) {
-        goto out;
-    }
-    bs->total_sectors = ret;
-    ret = 0;
-out:
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static QemuOptsList nfs_create_opts = {
-    .name = "nfs-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(nfs_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        { /* end of list */ }
-    }
-};
-
-static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
-{
-    int ret = 0;
-    int64_t total_size = 0;
-    NFSClient *client = g_new0(NFSClient, 1);
-
-    client->aio_context = qemu_get_aio_context();
-
-    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-
-    ret = nfs_client_open(client, url, O_CREAT, errp);
-    if (ret < 0) {
-        goto out;
-    }
-    ret = nfs_ftruncate(client->context, client->fh, total_size);
-    nfs_client_close(client);
-out:
-    g_free(client);
-    return ret;
-}
-
-static int nfs_has_zero_init(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    return client->has_zero_init;
-}
-
-static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
-{
-    NFSClient *client = bs->opaque;
-    NFSRPC task = {0};
-    struct stat st;
-
-    if (bdrv_is_read_only(bs) &&
-        !(bs->open_flags & BDRV_O_NOCACHE)) {
-        return client->st_blocks * 512;
-    }
-
-    task.st = &st;
-    if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
-                        &task) != 0) {
-        return -ENOMEM;
-    }
-
-    while (!task.complete) {
-        nfs_set_events(client);
-        aio_poll(client->aio_context, true);
-    }
-
-    return (task.ret < 0 ? task.ret : st.st_blocks * 512);
-}
-
-static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
-{
-    NFSClient *client = bs->opaque;
-    return nfs_ftruncate(client->context, client->fh, offset);
-}
-
-/* Note that this will not re-establish a connection with the NFS server
- * - it is effectively a NOP.  */
-static int nfs_reopen_prepare(BDRVReopenState *state,
-                              BlockReopenQueue *queue, Error **errp)
-{
-    NFSClient *client = state->bs->opaque;
-    struct stat st;
-    int ret = 0;
-
-    if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) {
-        error_setg(errp, "Cannot open a read-only mount as read-write");
-        return -EACCES;
-    }
-
-    /* Update cache for read-only reopens */
-    if (!(state->flags & BDRV_O_RDWR)) {
-        ret = nfs_fstat(client->context, client->fh, &st);
-        if (ret < 0) {
-            error_setg(errp, "Failed to fstat file: %s",
-                       nfs_get_error(client->context));
-            return ret;
-        }
-        client->st_blocks = st.st_blocks;
-    }
-
-    return 0;
-}
-
-static BlockDriver bdrv_nfs = {
-    .format_name                    = "nfs",
-    .protocol_name                  = "nfs",
-
-    .instance_size                  = sizeof(NFSClient),
-    .bdrv_needs_filename            = true,
-    .create_opts                    = &nfs_create_opts,
-
-    .bdrv_has_zero_init             = nfs_has_zero_init,
-    .bdrv_get_allocated_file_size   = nfs_get_allocated_file_size,
-    .bdrv_truncate                  = nfs_file_truncate,
-
-    .bdrv_file_open                 = nfs_file_open,
-    .bdrv_close                     = nfs_file_close,
-    .bdrv_create                    = nfs_file_create,
-    .bdrv_reopen_prepare            = nfs_reopen_prepare,
-
-    .bdrv_co_readv                  = nfs_co_readv,
-    .bdrv_co_writev                 = nfs_co_writev,
-    .bdrv_co_flush_to_disk          = nfs_co_flush,
-
-    .bdrv_detach_aio_context        = nfs_detach_aio_context,
-    .bdrv_attach_aio_context        = nfs_attach_aio_context,
-};
-
-static void nfs_block_init(void)
-{
-    bdrv_register(&bdrv_nfs);
-}
-
-block_init(nfs_block_init);
diff --git a/qemu/block/null.c b/qemu/block/null.c
deleted file mode 100644
index 396500bab..000000000
--- a/qemu/block/null.c
+++ /dev/null
@@ -1,266 +0,0 @@
-/*
- * Null block driver
- *
- * Authors:
- *  Fam Zheng <famz@redhat.com>
- *
- * Copyright (C) 2014 Red Hat, Inc.
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "block/block_int.h"
-
-#define NULL_OPT_LATENCY "latency-ns"
-#define NULL_OPT_ZEROES  "read-zeroes"
-
-typedef struct {
-    int64_t length;
-    int64_t latency_ns;
-    bool read_zeroes;
-} BDRVNullState;
-
-static QemuOptsList runtime_opts = {
-    .name = "null",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "",
-        },
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "size of the null block",
-        },
-        {
-            .name = NULL_OPT_LATENCY,
-            .type = QEMU_OPT_NUMBER,
-            .help = "nanoseconds (approximated) to wait "
-                    "before completing request",
-        },
-        {
-            .name = NULL_OPT_ZEROES,
-            .type = QEMU_OPT_BOOL,
-            .help = "return zeroes when read",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
-                          Error **errp)
-{
-    QemuOpts *opts;
-    BDRVNullState *s = bs->opaque;
-    int ret = 0;
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &error_abort);
-    s->length =
-        qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 1 << 30);
-    s->latency_ns =
-        qemu_opt_get_number(opts, NULL_OPT_LATENCY, 0);
-    if (s->latency_ns < 0) {
-        error_setg(errp, "latency-ns is invalid");
-        ret = -EINVAL;
-    }
-    s->read_zeroes = qemu_opt_get_bool(opts, NULL_OPT_ZEROES, false);
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static void null_close(BlockDriverState *bs)
-{
-}
-
-static int64_t null_getlength(BlockDriverState *bs)
-{
-    BDRVNullState *s = bs->opaque;
-    return s->length;
-}
-
-static coroutine_fn int null_co_common(BlockDriverState *bs)
-{
-    BDRVNullState *s = bs->opaque;
-
-    if (s->latency_ns) {
-        co_aio_sleep_ns(bdrv_get_aio_context(bs), QEMU_CLOCK_REALTIME,
-                        s->latency_ns);
-    }
-    return 0;
-}
-
-static coroutine_fn int null_co_readv(BlockDriverState *bs,
-                                      int64_t sector_num, int nb_sectors,
-                                      QEMUIOVector *qiov)
-{
-    BDRVNullState *s = bs->opaque;
-
-    if (s->read_zeroes) {
-        qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
-    }
-
-    return null_co_common(bs);
-}
-
-static coroutine_fn int null_co_writev(BlockDriverState *bs,
-                                       int64_t sector_num, int nb_sectors,
-                                       QEMUIOVector *qiov)
-{
-    return null_co_common(bs);
-}
-
-static coroutine_fn int null_co_flush(BlockDriverState *bs)
-{
-    return null_co_common(bs);
-}
-
-typedef struct {
-    BlockAIOCB common;
-    QEMUBH *bh;
-    QEMUTimer timer;
-} NullAIOCB;
-
-static const AIOCBInfo null_aiocb_info = {
-    .aiocb_size = sizeof(NullAIOCB),
-};
-
-static void null_bh_cb(void *opaque)
-{
-    NullAIOCB *acb = opaque;
-    acb->common.cb(acb->common.opaque, 0);
-    qemu_bh_delete(acb->bh);
-    qemu_aio_unref(acb);
-}
-
-static void null_timer_cb(void *opaque)
-{
-    NullAIOCB *acb = opaque;
-    acb->common.cb(acb->common.opaque, 0);
-    timer_deinit(&acb->timer);
-    qemu_aio_unref(acb);
-}
-
-static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
-                                          BlockCompletionFunc *cb,
-                                          void *opaque)
-{
-    NullAIOCB *acb;
-    BDRVNullState *s = bs->opaque;
-
-    acb = qemu_aio_get(&null_aiocb_info, bs, cb, opaque);
-    /* Only emulate latency after vcpu is running. */
-    if (s->latency_ns) {
-        aio_timer_init(bdrv_get_aio_context(bs), &acb->timer,
-                       QEMU_CLOCK_REALTIME, SCALE_NS,
-                       null_timer_cb, acb);
-        timer_mod_ns(&acb->timer,
-                     qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + s->latency_ns);
-    } else {
-        acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
-        qemu_bh_schedule(acb->bh);
-    }
-    return &acb->common;
-}
-
-static BlockAIOCB *null_aio_readv(BlockDriverState *bs,
-                                  int64_t sector_num, QEMUIOVector *qiov,
-                                  int nb_sectors,
-                                  BlockCompletionFunc *cb,
-                                  void *opaque)
-{
-    BDRVNullState *s = bs->opaque;
-
-    if (s->read_zeroes) {
-        qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
-    }
-
-    return null_aio_common(bs, cb, opaque);
-}
-
-static BlockAIOCB *null_aio_writev(BlockDriverState *bs,
-                                   int64_t sector_num, QEMUIOVector *qiov,
-                                   int nb_sectors,
-                                   BlockCompletionFunc *cb,
-                                   void *opaque)
-{
-    return null_aio_common(bs, cb, opaque);
-}
-
-static BlockAIOCB *null_aio_flush(BlockDriverState *bs,
-                                  BlockCompletionFunc *cb,
-                                  void *opaque)
-{
-    return null_aio_common(bs, cb, opaque);
-}
-
-static int null_reopen_prepare(BDRVReopenState *reopen_state,
-                               BlockReopenQueue *queue, Error **errp)
-{
-    return 0;
-}
-
-static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs,
-                                                     int64_t sector_num,
-                                                     int nb_sectors, int *pnum,
-                                                     BlockDriverState **file)
-{
-    BDRVNullState *s = bs->opaque;
-    off_t start = sector_num * BDRV_SECTOR_SIZE;
-
-    *pnum = nb_sectors;
-    *file = bs;
-
-    if (s->read_zeroes) {
-        return BDRV_BLOCK_OFFSET_VALID | start | BDRV_BLOCK_ZERO;
-    } else {
-        return BDRV_BLOCK_OFFSET_VALID | start;
-    }
-}
-
-static BlockDriver bdrv_null_co = {
-    .format_name            = "null-co",
-    .protocol_name          = "null-co",
-    .instance_size          = sizeof(BDRVNullState),
-
-    .bdrv_file_open         = null_file_open,
-    .bdrv_close             = null_close,
-    .bdrv_getlength         = null_getlength,
-
-    .bdrv_co_readv          = null_co_readv,
-    .bdrv_co_writev         = null_co_writev,
-    .bdrv_co_flush_to_disk  = null_co_flush,
-    .bdrv_reopen_prepare    = null_reopen_prepare,
-
-    .bdrv_co_get_block_status   = null_co_get_block_status,
-};
-
-static BlockDriver bdrv_null_aio = {
-    .format_name            = "null-aio",
-    .protocol_name          = "null-aio",
-    .instance_size          = sizeof(BDRVNullState),
-
-    .bdrv_file_open         = null_file_open,
-    .bdrv_close             = null_close,
-    .bdrv_getlength         = null_getlength,
-
-    .bdrv_aio_readv         = null_aio_readv,
-    .bdrv_aio_writev        = null_aio_writev,
-    .bdrv_aio_flush         = null_aio_flush,
-    .bdrv_reopen_prepare    = null_reopen_prepare,
-
-    .bdrv_co_get_block_status   = null_co_get_block_status,
-};
-
-static void bdrv_null_init(void)
-{
-    bdrv_register(&bdrv_null_co);
-    bdrv_register(&bdrv_null_aio);
-}
-
-block_init(bdrv_null_init);
diff --git a/qemu/block/parallels.c b/qemu/block/parallels.c
deleted file mode 100644
index 324ed43ac..000000000
--- a/qemu/block/parallels.c
+++ /dev/null
@@ -1,766 +0,0 @@
-/*
- * Block driver for Parallels disk image format
- *
- * Copyright (c) 2007 Alex Beregszaszi
- * Copyright (c) 2015 Denis V. Lunev <den@openvz.org>
- *
- * This code was originally based on comparing different disk images created
- * by Parallels. Currently it is based on opened OpenVZ sources
- * available at
- *     http://git.openvz.org/?p=ploop;a=summary
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "sysemu/block-backend.h"
-#include "qemu/module.h"
-#include "qemu/bitmap.h"
-#include "qapi/util.h"
-
-/**************************************************************/
-
-#define HEADER_MAGIC "WithoutFreeSpace"
-#define HEADER_MAGIC2 "WithouFreSpacExt"
-#define HEADER_VERSION 2
-#define HEADER_INUSE_MAGIC  (0x746F6E59)
-
-#define DEFAULT_CLUSTER_SIZE 1048576        /* 1 MiB */
-
-
-// always little-endian
-typedef struct ParallelsHeader {
-    char magic[16]; // "WithoutFreeSpace"
-    uint32_t version;
-    uint32_t heads;
-    uint32_t cylinders;
-    uint32_t tracks;
-    uint32_t bat_entries;
-    uint64_t nb_sectors;
-    uint32_t inuse;
-    uint32_t data_off;
-    char padding[12];
-} QEMU_PACKED ParallelsHeader;
-
-
-typedef enum ParallelsPreallocMode {
-    PRL_PREALLOC_MODE_FALLOCATE = 0,
-    PRL_PREALLOC_MODE_TRUNCATE = 1,
-    PRL_PREALLOC_MODE__MAX = 2,
-} ParallelsPreallocMode;
-
-static const char *prealloc_mode_lookup[] = {
-    "falloc",
-    "truncate",
-    NULL,
-};
-
-
-typedef struct BDRVParallelsState {
-    /** Locking is conservative, the lock protects
-     *   - image file extending (truncate, fallocate)
-     *   - any access to block allocation table
-     */
-    CoMutex lock;
-
-    ParallelsHeader *header;
-    uint32_t header_size;
-    bool header_unclean;
-
-    unsigned long *bat_dirty_bmap;
-    unsigned int  bat_dirty_block;
-
-    uint32_t *bat_bitmap;
-    unsigned int bat_size;
-
-    int64_t  data_end;
-    uint64_t prealloc_size;
-    ParallelsPreallocMode prealloc_mode;
-
-    unsigned int tracks;
-
-    unsigned int off_multiplier;
-} BDRVParallelsState;
-
-
-#define PARALLELS_OPT_PREALLOC_MODE     "prealloc-mode"
-#define PARALLELS_OPT_PREALLOC_SIZE     "prealloc-size"
-
-static QemuOptsList parallels_runtime_opts = {
-    .name = "parallels",
-    .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head),
-    .desc = {
-        {
-            .name = PARALLELS_OPT_PREALLOC_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Preallocation size on image expansion",
-            .def_value_str = "128MiB",
-        },
-        {
-            .name = PARALLELS_OPT_PREALLOC_MODE,
-            .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode on image expansion "
-                    "(allowed values: falloc, truncate)",
-            .def_value_str = "falloc",
-        },
-        { /* end of list */ },
-    },
-};
-
-
-static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx)
-{
-    return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier;
-}
-
-static uint32_t bat_entry_off(uint32_t idx)
-{
-    return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx;
-}
-
-static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num)
-{
-    uint32_t index, offset;
-
-    index = sector_num / s->tracks;
-    offset = sector_num % s->tracks;
-
-    /* not allocated */
-    if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) {
-        return -1;
-    }
-    return bat2sect(s, index) + offset;
-}
-
-static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
-        int nb_sectors)
-{
-    int ret = s->tracks - sector_num % s->tracks;
-    return MIN(nb_sectors, ret);
-}
-
-static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
-                            int nb_sectors, int *pnum)
-{
-    int64_t start_off = -2, prev_end_off = -2;
-
-    *pnum = 0;
-    while (nb_sectors > 0 || start_off == -2) {
-        int64_t offset = seek_to_sector(s, sector_num);
-        int to_end;
-
-        if (start_off == -2) {
-            start_off = offset;
-            prev_end_off = offset;
-        } else if (offset != prev_end_off) {
-            break;
-        }
-
-        to_end = cluster_remainder(s, sector_num, nb_sectors);
-        nb_sectors -= to_end;
-        sector_num += to_end;
-        *pnum += to_end;
-
-        if (offset > 0) {
-            prev_end_off += to_end;
-        }
-    }
-    return start_off;
-}
-
-static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
-                                 int nb_sectors, int *pnum)
-{
-    BDRVParallelsState *s = bs->opaque;
-    uint32_t idx, to_allocate, i;
-    int64_t pos, space;
-
-    pos = block_status(s, sector_num, nb_sectors, pnum);
-    if (pos > 0) {
-        return pos;
-    }
-
-    idx = sector_num / s->tracks;
-    if (idx >= s->bat_size) {
-        return -EINVAL;
-    }
-
-    to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx;
-    space = to_allocate * s->tracks;
-    if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
-        int ret;
-        space += s->prealloc_size;
-        if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
-            ret = bdrv_write_zeroes(bs->file->bs, s->data_end, space, 0);
-        } else {
-            ret = bdrv_truncate(bs->file->bs,
-                                (s->data_end + space) << BDRV_SECTOR_BITS);
-        }
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    for (i = 0; i < to_allocate; i++) {
-        s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
-        s->data_end += s->tracks;
-        bitmap_set(s->bat_dirty_bmap,
-                   bat_entry_off(idx + i) / s->bat_dirty_block, 1);
-    }
-
-    return bat2sect(s, idx) + sector_num % s->tracks;
-}
-
-
-static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
-{
-    BDRVParallelsState *s = bs->opaque;
-    unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block);
-    unsigned long bit;
-
-    qemu_co_mutex_lock(&s->lock);
-
-    bit = find_first_bit(s->bat_dirty_bmap, size);
-    while (bit < size) {
-        uint32_t off = bit * s->bat_dirty_block;
-        uint32_t to_write = s->bat_dirty_block;
-        int ret;
-
-        if (off + to_write > s->header_size) {
-            to_write = s->header_size - off;
-        }
-        ret = bdrv_pwrite(bs->file->bs, off, (uint8_t *)s->header + off,
-                          to_write);
-        if (ret < 0) {
-            qemu_co_mutex_unlock(&s->lock);
-            return ret;
-        }
-        bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1);
-    }
-    bitmap_zero(s->bat_dirty_bmap, size);
-
-    qemu_co_mutex_unlock(&s->lock);
-    return 0;
-}
-
-
-static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
-{
-    BDRVParallelsState *s = bs->opaque;
-    int64_t offset;
-
-    qemu_co_mutex_lock(&s->lock);
-    offset = block_status(s, sector_num, nb_sectors, pnum);
-    qemu_co_mutex_unlock(&s->lock);
-
-    if (offset < 0) {
-        return 0;
-    }
-
-    *file = bs->file->bs;
-    return (offset << BDRV_SECTOR_BITS) |
-        BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
-}
-
-static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    BDRVParallelsState *s = bs->opaque;
-    uint64_t bytes_done = 0;
-    QEMUIOVector hd_qiov;
-    int ret = 0;
-
-    qemu_iovec_init(&hd_qiov, qiov->niov);
-
-    while (nb_sectors > 0) {
-        int64_t position;
-        int n, nbytes;
-
-        qemu_co_mutex_lock(&s->lock);
-        position = allocate_clusters(bs, sector_num, nb_sectors, &n);
-        qemu_co_mutex_unlock(&s->lock);
-        if (position < 0) {
-            ret = (int)position;
-            break;
-        }
-
-        nbytes = n << BDRV_SECTOR_BITS;
-
-        qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
-
-        ret = bdrv_co_writev(bs->file->bs, position, n, &hd_qiov);
-        if (ret < 0) {
-            break;
-        }
-
-        nb_sectors -= n;
-        sector_num += n;
-        bytes_done += nbytes;
-    }
-
-    qemu_iovec_destroy(&hd_qiov);
-    return ret;
-}
-
-static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
-    BDRVParallelsState *s = bs->opaque;
-    uint64_t bytes_done = 0;
-    QEMUIOVector hd_qiov;
-    int ret = 0;
-
-    qemu_iovec_init(&hd_qiov, qiov->niov);
-
-    while (nb_sectors > 0) {
-        int64_t position;
-        int n, nbytes;
-
-        qemu_co_mutex_lock(&s->lock);
-        position = block_status(s, sector_num, nb_sectors, &n);
-        qemu_co_mutex_unlock(&s->lock);
-
-        nbytes = n << BDRV_SECTOR_BITS;
-
-        if (position < 0) {
-            qemu_iovec_memset(qiov, bytes_done, 0, nbytes);
-        } else {
-            qemu_iovec_reset(&hd_qiov);
-            qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
-
-            ret = bdrv_co_readv(bs->file->bs, position, n, &hd_qiov);
-            if (ret < 0) {
-                break;
-            }
-        }
-
-        nb_sectors -= n;
-        sector_num += n;
-        bytes_done += nbytes;
-    }
-
-    qemu_iovec_destroy(&hd_qiov);
-    return ret;
-}
-
-
-static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
-                           BdrvCheckMode fix)
-{
-    BDRVParallelsState *s = bs->opaque;
-    int64_t size, prev_off, high_off;
-    int ret;
-    uint32_t i;
-    bool flush_bat = false;
-    int cluster_size = s->tracks << BDRV_SECTOR_BITS;
-
-    size = bdrv_getlength(bs->file->bs);
-    if (size < 0) {
-        res->check_errors++;
-        return size;
-    }
-
-    if (s->header_unclean) {
-        fprintf(stderr, "%s image was not closed correctly\n",
-                fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
-        res->corruptions++;
-        if (fix & BDRV_FIX_ERRORS) {
-            /* parallels_close will do the job right */
-            res->corruptions_fixed++;
-            s->header_unclean = false;
-        }
-    }
-
-    res->bfi.total_clusters = s->bat_size;
-    res->bfi.compressed_clusters = 0; /* compression is not supported */
-
-    high_off = 0;
-    prev_off = 0;
-    for (i = 0; i < s->bat_size; i++) {
-        int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
-        if (off == 0) {
-            prev_off = 0;
-            continue;
-        }
-
-        /* cluster outside the image */
-        if (off > size) {
-            fprintf(stderr, "%s cluster %u is outside image\n",
-                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
-            res->corruptions++;
-            if (fix & BDRV_FIX_ERRORS) {
-                prev_off = 0;
-                s->bat_bitmap[i] = 0;
-                res->corruptions_fixed++;
-                flush_bat = true;
-                continue;
-            }
-        }
-
-        res->bfi.allocated_clusters++;
-        if (off > high_off) {
-            high_off = off;
-        }
-
-        if (prev_off != 0 && (prev_off + cluster_size) != off) {
-            res->bfi.fragmented_clusters++;
-        }
-        prev_off = off;
-    }
-
-    if (flush_bat) {
-        ret = bdrv_pwrite_sync(bs->file->bs, 0, s->header, s->header_size);
-        if (ret < 0) {
-            res->check_errors++;
-            return ret;
-        }
-    }
-
-    res->image_end_offset = high_off + cluster_size;
-    if (size > res->image_end_offset) {
-        int64_t count;
-        count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size);
-        fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
-                fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
-                size - res->image_end_offset);
-        res->leaks += count;
-        if (fix & BDRV_FIX_LEAKS) {
-            ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
-            if (ret < 0) {
-                res->check_errors++;
-                return ret;
-            }
-            res->leaks_fixed += count;
-        }
-    }
-
-    return 0;
-}
-
-
-static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    int64_t total_size, cl_size;
-    uint8_t tmp[BDRV_SECTOR_SIZE];
-    Error *local_err = NULL;
-    BlockBackend *file;
-    uint32_t bat_entries, bat_sectors;
-    ParallelsHeader header;
-    int ret;
-
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-    cl_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
-                          DEFAULT_CLUSTER_SIZE), BDRV_SECTOR_SIZE);
-
-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        return ret;
-    }
-
-    file = blk_new_open(filename, NULL, NULL,
-                        BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (file == NULL) {
-        error_propagate(errp, local_err);
-        return -EIO;
-    }
-
-    blk_set_allow_write_beyond_eof(file, true);
-
-    ret = blk_truncate(file, 0);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    bat_entries = DIV_ROUND_UP(total_size, cl_size);
-    bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size);
-    bat_sectors = (bat_sectors *  cl_size) >> BDRV_SECTOR_BITS;
-
-    memset(&header, 0, sizeof(header));
-    memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic));
-    header.version = cpu_to_le32(HEADER_VERSION);
-    /* don't care much about geometry, it is not used on image level */
-    header.heads = cpu_to_le32(16);
-    header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE / 16 / 32);
-    header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS);
-    header.bat_entries = cpu_to_le32(bat_entries);
-    header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE));
-    header.data_off = cpu_to_le32(bat_sectors);
-
-    /* write all the data */
-    memset(tmp, 0, sizeof(tmp));
-    memcpy(tmp, &header, sizeof(header));
-
-    ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
-    if (ret < 0) {
-        goto exit;
-    }
-    ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0);
-    if (ret < 0) {
-        goto exit;
-    }
-    ret = 0;
-
-done:
-    blk_unref(file);
-    return ret;
-
-exit:
-    error_setg_errno(errp, -ret, "Failed to create Parallels image");
-    goto done;
-}
-
-
-static int parallels_probe(const uint8_t *buf, int buf_size,
-                           const char *filename)
-{
-    const ParallelsHeader *ph = (const void *)buf;
-
-    if (buf_size < sizeof(ParallelsHeader)) {
-        return 0;
-    }
-
-    if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
-           !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
-           (le32_to_cpu(ph->version) == HEADER_VERSION)) {
-        return 100;
-    }
-
-    return 0;
-}
-
-static int parallels_update_header(BlockDriverState *bs)
-{
-    BDRVParallelsState *s = bs->opaque;
-    unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs),
-                        sizeof(ParallelsHeader));
-
-    if (size > s->header_size) {
-        size = s->header_size;
-    }
-    return bdrv_pwrite_sync(bs->file->bs, 0, s->header, size);
-}
-
-static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
-                          Error **errp)
-{
-    BDRVParallelsState *s = bs->opaque;
-    ParallelsHeader ph;
-    int ret, size, i;
-    QemuOpts *opts = NULL;
-    Error *local_err = NULL;
-    char *buf;
-
-    ret = bdrv_pread(bs->file->bs, 0, &ph, sizeof(ph));
-    if (ret < 0) {
-        goto fail;
-    }
-
-    bs->total_sectors = le64_to_cpu(ph.nb_sectors);
-
-    if (le32_to_cpu(ph.version) != HEADER_VERSION) {
-        goto fail_format;
-    }
-    if (!memcmp(ph.magic, HEADER_MAGIC, 16)) {
-        s->off_multiplier = 1;
-        bs->total_sectors = 0xffffffff & bs->total_sectors;
-    } else if (!memcmp(ph.magic, HEADER_MAGIC2, 16)) {
-        s->off_multiplier = le32_to_cpu(ph.tracks);
-    } else {
-        goto fail_format;
-    }
-
-    s->tracks = le32_to_cpu(ph.tracks);
-    if (s->tracks == 0) {
-        error_setg(errp, "Invalid image: Zero sectors per track");
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (s->tracks > INT32_MAX/513) {
-        error_setg(errp, "Invalid image: Too big cluster");
-        ret = -EFBIG;
-        goto fail;
-    }
-
-    s->bat_size = le32_to_cpu(ph.bat_entries);
-    if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
-        error_setg(errp, "Catalog too large");
-        ret = -EFBIG;
-        goto fail;
-    }
-
-    size = bat_entry_off(s->bat_size);
-    s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
-    s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
-    if (s->header == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-    s->data_end = le32_to_cpu(ph.data_off);
-    if (s->data_end == 0) {
-        s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
-    }
-    if (s->data_end < s->header_size) {
-        /* there is not enough unused space to fit to block align between BAT
-           and actual data. We can't avoid read-modify-write... */
-        s->header_size = size;
-    }
-
-    ret = bdrv_pread(bs->file->bs, 0, s->header, s->header_size);
-    if (ret < 0) {
-        goto fail;
-    }
-    s->bat_bitmap = (uint32_t *)(s->header + 1);
-
-    for (i = 0; i < s->bat_size; i++) {
-        int64_t off = bat2sect(s, i);
-        if (off >= s->data_end) {
-            s->data_end = off + s->tracks;
-        }
-    }
-
-    if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
-        /* Image was not closed correctly. The check is mandatory */
-        s->header_unclean = true;
-        if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
-            error_setg(errp, "parallels: Image was not closed correctly; "
-                       "cannot be opened read/write");
-            ret = -EACCES;
-            goto fail;
-        }
-    }
-
-    opts = qemu_opts_create(&parallels_runtime_opts, NULL, 0, &local_err);
-    if (local_err != NULL) {
-        goto fail_options;
-    }
-
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err != NULL) {
-        goto fail_options;
-    }
-
-    s->prealloc_size =
-        qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
-    s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
-    buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
-    s->prealloc_mode = qapi_enum_parse(prealloc_mode_lookup, buf,
-            PRL_PREALLOC_MODE__MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err);
-    g_free(buf);
-    if (local_err != NULL) {
-        goto fail_options;
-    }
-    if (!bdrv_has_zero_init(bs->file->bs) ||
-            bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
-        s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
-    }
-
-    if (flags & BDRV_O_RDWR) {
-        s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
-        ret = parallels_update_header(bs);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-
-    s->bat_dirty_block = 4 * getpagesize();
-    s->bat_dirty_bmap =
-        bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
-
-    qemu_co_mutex_init(&s->lock);
-    return 0;
-
-fail_format:
-    error_setg(errp, "Image not in Parallels format");
-    ret = -EINVAL;
-fail:
-    qemu_vfree(s->header);
-    return ret;
-
-fail_options:
-    error_propagate(errp, local_err);
-    ret = -EINVAL;
-    goto fail;
-}
-
-
-static void parallels_close(BlockDriverState *bs)
-{
-    BDRVParallelsState *s = bs->opaque;
-
-    if (bs->open_flags & BDRV_O_RDWR) {
-        s->header->inuse = 0;
-        parallels_update_header(bs);
-    }
-
-    if (bs->open_flags & BDRV_O_RDWR) {
-        bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
-    }
-
-    g_free(s->bat_dirty_bmap);
-    qemu_vfree(s->header);
-}
-
-static QemuOptsList parallels_create_opts = {
-    .name = "parallels-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size",
-        },
-        {
-            .name = BLOCK_OPT_CLUSTER_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Parallels image cluster size",
-            .def_value_str = stringify(DEFAULT_CLUSTER_SIZE),
-        },
-        { /* end of list */ }
-    }
-};
-
-static BlockDriver bdrv_parallels = {
-    .format_name	= "parallels",
-    .instance_size	= sizeof(BDRVParallelsState),
-    .bdrv_probe		= parallels_probe,
-    .bdrv_open		= parallels_open,
-    .bdrv_close		= parallels_close,
-    .bdrv_co_get_block_status = parallels_co_get_block_status,
-    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
-    .bdrv_co_flush_to_os      = parallels_co_flush_to_os,
-    .bdrv_co_readv  = parallels_co_readv,
-    .bdrv_co_writev = parallels_co_writev,
-
-    .bdrv_create    = parallels_create,
-    .bdrv_check     = parallels_check,
-    .create_opts    = &parallels_create_opts,
-};
-
-static void bdrv_parallels_init(void)
-{
-    bdrv_register(&bdrv_parallels);
-}
-
-block_init(bdrv_parallels_init);
diff --git a/qemu/block/qapi.c b/qemu/block/qapi.c
deleted file mode 100644
index c5f6ba643..000000000
--- a/qemu/block/qapi.c
+++ /dev/null
@@ -1,783 +0,0 @@
-/*
- * Block layer qmp and info dump related functions
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "block/qapi.h"
-#include "block/block_int.h"
-#include "block/throttle-groups.h"
-#include "block/write-threshold.h"
-#include "qmp-commands.h"
-#include "qapi-visit.h"
-#include "qapi/qmp-output-visitor.h"
-#include "qapi/qmp/types.h"
-#include "sysemu/block-backend.h"
-#include "qemu/cutils.h"
-
-BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
-                                        BlockDriverState *bs, Error **errp)
-{
-    ImageInfo **p_image_info;
-    BlockDriverState *bs0;
-    BlockDeviceInfo *info = g_malloc0(sizeof(*info));
-
-    info->file                   = g_strdup(bs->filename);
-    info->ro                     = bs->read_only;
-    info->drv                    = g_strdup(bs->drv->format_name);
-    info->encrypted              = bs->encrypted;
-    info->encryption_key_missing = bdrv_key_required(bs);
-
-    info->cache = g_new(BlockdevCacheInfo, 1);
-    *info->cache = (BlockdevCacheInfo) {
-        .writeback      = blk ? blk_enable_write_cache(blk) : true,
-        .direct         = !!(bs->open_flags & BDRV_O_NOCACHE),
-        .no_flush       = !!(bs->open_flags & BDRV_O_NO_FLUSH),
-    };
-
-    if (bs->node_name[0]) {
-        info->has_node_name = true;
-        info->node_name = g_strdup(bs->node_name);
-    }
-
-    if (bs->backing_file[0]) {
-        info->has_backing_file = true;
-        info->backing_file = g_strdup(bs->backing_file);
-    }
-
-    info->backing_file_depth = bdrv_get_backing_file_depth(bs);
-    info->detect_zeroes = bs->detect_zeroes;
-
-    if (bs->throttle_state) {
-        ThrottleConfig cfg;
-
-        throttle_group_get_config(bs, &cfg);
-
-        info->bps     = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
-        info->bps_rd  = cfg.buckets[THROTTLE_BPS_READ].avg;
-        info->bps_wr  = cfg.buckets[THROTTLE_BPS_WRITE].avg;
-
-        info->iops    = cfg.buckets[THROTTLE_OPS_TOTAL].avg;
-        info->iops_rd = cfg.buckets[THROTTLE_OPS_READ].avg;
-        info->iops_wr = cfg.buckets[THROTTLE_OPS_WRITE].avg;
-
-        info->has_bps_max     = cfg.buckets[THROTTLE_BPS_TOTAL].max;
-        info->bps_max         = cfg.buckets[THROTTLE_BPS_TOTAL].max;
-        info->has_bps_rd_max  = cfg.buckets[THROTTLE_BPS_READ].max;
-        info->bps_rd_max      = cfg.buckets[THROTTLE_BPS_READ].max;
-        info->has_bps_wr_max  = cfg.buckets[THROTTLE_BPS_WRITE].max;
-        info->bps_wr_max      = cfg.buckets[THROTTLE_BPS_WRITE].max;
-
-        info->has_iops_max    = cfg.buckets[THROTTLE_OPS_TOTAL].max;
-        info->iops_max        = cfg.buckets[THROTTLE_OPS_TOTAL].max;
-        info->has_iops_rd_max = cfg.buckets[THROTTLE_OPS_READ].max;
-        info->iops_rd_max     = cfg.buckets[THROTTLE_OPS_READ].max;
-        info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
-        info->iops_wr_max     = cfg.buckets[THROTTLE_OPS_WRITE].max;
-
-        info->has_bps_max_length     = info->has_bps_max;
-        info->bps_max_length         =
-            cfg.buckets[THROTTLE_BPS_TOTAL].burst_length;
-        info->has_bps_rd_max_length  = info->has_bps_rd_max;
-        info->bps_rd_max_length      =
-            cfg.buckets[THROTTLE_BPS_READ].burst_length;
-        info->has_bps_wr_max_length  = info->has_bps_wr_max;
-        info->bps_wr_max_length      =
-            cfg.buckets[THROTTLE_BPS_WRITE].burst_length;
-
-        info->has_iops_max_length    = info->has_iops_max;
-        info->iops_max_length        =
-            cfg.buckets[THROTTLE_OPS_TOTAL].burst_length;
-        info->has_iops_rd_max_length = info->has_iops_rd_max;
-        info->iops_rd_max_length     =
-            cfg.buckets[THROTTLE_OPS_READ].burst_length;
-        info->has_iops_wr_max_length = info->has_iops_wr_max;
-        info->iops_wr_max_length     =
-            cfg.buckets[THROTTLE_OPS_WRITE].burst_length;
-
-        info->has_iops_size = cfg.op_size;
-        info->iops_size = cfg.op_size;
-
-        info->has_group = true;
-        info->group = g_strdup(throttle_group_get_name(bs));
-    }
-
-    info->write_threshold = bdrv_write_threshold_get(bs);
-
-    bs0 = bs;
-    p_image_info = &info->image;
-    while (1) {
-        Error *local_err = NULL;
-        bdrv_query_image_info(bs0, p_image_info, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            qapi_free_BlockDeviceInfo(info);
-            return NULL;
-        }
-        if (bs0->drv && bs0->backing) {
-            bs0 = bs0->backing->bs;
-            (*p_image_info)->has_backing_image = true;
-            p_image_info = &((*p_image_info)->backing_image);
-        } else {
-            break;
-        }
-    }
-
-    return info;
-}
-
-/*
- * Returns 0 on success, with *p_list either set to describe snapshot
- * information, or NULL because there are no snapshots.  Returns -errno on
- * error, with *p_list untouched.
- */
-int bdrv_query_snapshot_info_list(BlockDriverState *bs,
-                                  SnapshotInfoList **p_list,
-                                  Error **errp)
-{
-    int i, sn_count;
-    QEMUSnapshotInfo *sn_tab = NULL;
-    SnapshotInfoList *info_list, *cur_item = NULL, *head = NULL;
-    SnapshotInfo *info;
-
-    sn_count = bdrv_snapshot_list(bs, &sn_tab);
-    if (sn_count < 0) {
-        const char *dev = bdrv_get_device_name(bs);
-        switch (sn_count) {
-        case -ENOMEDIUM:
-            error_setg(errp, "Device '%s' is not inserted", dev);
-            break;
-        case -ENOTSUP:
-            error_setg(errp,
-                       "Device '%s' does not support internal snapshots",
-                       dev);
-            break;
-        default:
-            error_setg_errno(errp, -sn_count,
-                             "Can't list snapshots of device '%s'", dev);
-            break;
-        }
-        return sn_count;
-    }
-
-    for (i = 0; i < sn_count; i++) {
-        info = g_new0(SnapshotInfo, 1);
-        info->id            = g_strdup(sn_tab[i].id_str);
-        info->name          = g_strdup(sn_tab[i].name);
-        info->vm_state_size = sn_tab[i].vm_state_size;
-        info->date_sec      = sn_tab[i].date_sec;
-        info->date_nsec     = sn_tab[i].date_nsec;
-        info->vm_clock_sec  = sn_tab[i].vm_clock_nsec / 1000000000;
-        info->vm_clock_nsec = sn_tab[i].vm_clock_nsec % 1000000000;
-
-        info_list = g_new0(SnapshotInfoList, 1);
-        info_list->value = info;
-
-        /* XXX: waiting for the qapi to support qemu-queue.h types */
-        if (!cur_item) {
-            head = cur_item = info_list;
-        } else {
-            cur_item->next = info_list;
-            cur_item = info_list;
-        }
-
-    }
-
-    g_free(sn_tab);
-    *p_list = head;
-    return 0;
-}
-
-/**
- * bdrv_query_image_info:
- * @bs: block device to examine
- * @p_info: location to store image information
- * @errp: location to store error information
- *
- * Store "flat" image information in @p_info.
- *
- * "Flat" means it does *not* query backing image information,
- * i.e. (*pinfo)->has_backing_image will be set to false and
- * (*pinfo)->backing_image to NULL even when the image does in fact have
- * a backing image.
- *
- * @p_info will be set only on success. On error, store error in @errp.
- */
-void bdrv_query_image_info(BlockDriverState *bs,
-                           ImageInfo **p_info,
-                           Error **errp)
-{
-    int64_t size;
-    const char *backing_filename;
-    BlockDriverInfo bdi;
-    int ret;
-    Error *err = NULL;
-    ImageInfo *info;
-
-    aio_context_acquire(bdrv_get_aio_context(bs));
-
-    size = bdrv_getlength(bs);
-    if (size < 0) {
-        error_setg_errno(errp, -size, "Can't get size of device '%s'",
-                         bdrv_get_device_name(bs));
-        goto out;
-    }
-
-    info = g_new0(ImageInfo, 1);
-    info->filename        = g_strdup(bs->filename);
-    info->format          = g_strdup(bdrv_get_format_name(bs));
-    info->virtual_size    = size;
-    info->actual_size     = bdrv_get_allocated_file_size(bs);
-    info->has_actual_size = info->actual_size >= 0;
-    if (bdrv_is_encrypted(bs)) {
-        info->encrypted = true;
-        info->has_encrypted = true;
-    }
-    if (bdrv_get_info(bs, &bdi) >= 0) {
-        if (bdi.cluster_size != 0) {
-            info->cluster_size = bdi.cluster_size;
-            info->has_cluster_size = true;
-        }
-        info->dirty_flag = bdi.is_dirty;
-        info->has_dirty_flag = true;
-    }
-    info->format_specific     = bdrv_get_specific_info(bs);
-    info->has_format_specific = info->format_specific != NULL;
-
-    backing_filename = bs->backing_file;
-    if (backing_filename[0] != '\0') {
-        char *backing_filename2 = g_malloc0(PATH_MAX);
-        info->backing_filename = g_strdup(backing_filename);
-        info->has_backing_filename = true;
-        bdrv_get_full_backing_filename(bs, backing_filename2, PATH_MAX, &err);
-        if (err) {
-            /* Can't reconstruct the full backing filename, so we must omit
-             * this field and apply a Best Effort to this query. */
-            g_free(backing_filename2);
-            backing_filename2 = NULL;
-            error_free(err);
-            err = NULL;
-        }
-
-        /* Always report the full_backing_filename if present, even if it's the
-         * same as backing_filename. That they are same is useful info. */
-        if (backing_filename2) {
-            info->full_backing_filename = g_strdup(backing_filename2);
-            info->has_full_backing_filename = true;
-        }
-
-        if (bs->backing_format[0]) {
-            info->backing_filename_format = g_strdup(bs->backing_format);
-            info->has_backing_filename_format = true;
-        }
-        g_free(backing_filename2);
-    }
-
-    ret = bdrv_query_snapshot_info_list(bs, &info->snapshots, &err);
-    switch (ret) {
-    case 0:
-        if (info->snapshots) {
-            info->has_snapshots = true;
-        }
-        break;
-    /* recoverable error */
-    case -ENOMEDIUM:
-    case -ENOTSUP:
-        error_free(err);
-        break;
-    default:
-        error_propagate(errp, err);
-        qapi_free_ImageInfo(info);
-        goto out;
-    }
-
-    *p_info = info;
-
-out:
-    aio_context_release(bdrv_get_aio_context(bs));
-}
-
-/* @p_info will be set only on success. */
-static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
-                            Error **errp)
-{
-    BlockInfo *info = g_malloc0(sizeof(*info));
-    BlockDriverState *bs = blk_bs(blk);
-    info->device = g_strdup(blk_name(blk));
-    info->type = g_strdup("unknown");
-    info->locked = blk_dev_is_medium_locked(blk);
-    info->removable = blk_dev_has_removable_media(blk);
-
-    if (blk_dev_has_tray(blk)) {
-        info->has_tray_open = true;
-        info->tray_open = blk_dev_is_tray_open(blk);
-    }
-
-    if (blk_iostatus_is_enabled(blk)) {
-        info->has_io_status = true;
-        info->io_status = blk_iostatus(blk);
-    }
-
-    if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) {
-        info->has_dirty_bitmaps = true;
-        info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
-    }
-
-    if (bs && bs->drv) {
-        info->has_inserted = true;
-        info->inserted = bdrv_block_device_info(blk, bs, errp);
-        if (info->inserted == NULL) {
-            goto err;
-        }
-    }
-
-    *p_info = info;
-    return;
-
- err:
-    qapi_free_BlockInfo(info);
-}
-
-static BlockStats *bdrv_query_stats(BlockBackend *blk,
-                                    const BlockDriverState *bs,
-                                    bool query_backing);
-
-static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
-{
-    BlockAcctStats *stats = blk_get_stats(blk);
-    BlockAcctTimedStats *ts = NULL;
-
-    ds->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
-    ds->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
-    ds->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
-    ds->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
-
-    ds->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
-    ds->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
-    ds->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
-
-    ds->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
-    ds->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
-    ds->invalid_flush_operations =
-        stats->invalid_ops[BLOCK_ACCT_FLUSH];
-
-    ds->rd_merged = stats->merged[BLOCK_ACCT_READ];
-    ds->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
-    ds->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
-    ds->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
-    ds->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
-    ds->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
-
-    ds->has_idle_time_ns = stats->last_access_time_ns > 0;
-    if (ds->has_idle_time_ns) {
-        ds->idle_time_ns = block_acct_idle_time_ns(stats);
-    }
-
-    ds->account_invalid = stats->account_invalid;
-    ds->account_failed = stats->account_failed;
-
-    while ((ts = block_acct_interval_next(stats, ts))) {
-        BlockDeviceTimedStatsList *timed_stats =
-            g_malloc0(sizeof(*timed_stats));
-        BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
-        timed_stats->next = ds->timed_stats;
-        timed_stats->value = dev_stats;
-        ds->timed_stats = timed_stats;
-
-        TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
-        TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
-        TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
-
-        dev_stats->interval_length = ts->interval_length;
-
-        dev_stats->min_rd_latency_ns = timed_average_min(rd);
-        dev_stats->max_rd_latency_ns = timed_average_max(rd);
-        dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
-
-        dev_stats->min_wr_latency_ns = timed_average_min(wr);
-        dev_stats->max_wr_latency_ns = timed_average_max(wr);
-        dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
-
-        dev_stats->min_flush_latency_ns = timed_average_min(fl);
-        dev_stats->max_flush_latency_ns = timed_average_max(fl);
-        dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
-
-        dev_stats->avg_rd_queue_depth =
-            block_acct_queue_depth(ts, BLOCK_ACCT_READ);
-        dev_stats->avg_wr_queue_depth =
-            block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
-    }
-}
-
-static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
-                                 bool query_backing)
-{
-    if (bdrv_get_node_name(bs)[0]) {
-        s->has_node_name = true;
-        s->node_name = g_strdup(bdrv_get_node_name(bs));
-    }
-
-    s->stats->wr_highest_offset = bs->wr_highest_offset;
-
-    if (bs->file) {
-        s->has_parent = true;
-        s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
-    }
-
-    if (query_backing && bs->backing) {
-        s->has_backing = true;
-        s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
-    }
-
-}
-
-static BlockStats *bdrv_query_stats(BlockBackend *blk,
-                                    const BlockDriverState *bs,
-                                    bool query_backing)
-{
-    BlockStats *s;
-
-    s = g_malloc0(sizeof(*s));
-    s->stats = g_malloc0(sizeof(*s->stats));
-
-    if (blk) {
-        s->has_device = true;
-        s->device = g_strdup(blk_name(blk));
-        bdrv_query_blk_stats(s->stats, blk);
-    }
-    if (bs) {
-        bdrv_query_bds_stats(s, bs, query_backing);
-    }
-
-    return s;
-}
-
-BlockInfoList *qmp_query_block(Error **errp)
-{
-    BlockInfoList *head = NULL, **p_next = &head;
-    BlockBackend *blk;
-    Error *local_err = NULL;
-
-    for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
-        BlockInfoList *info = g_malloc0(sizeof(*info));
-        bdrv_query_info(blk, &info->value, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            g_free(info);
-            qapi_free_BlockInfoList(head);
-            return NULL;
-        }
-
-        *p_next = info;
-        p_next = &info->next;
-    }
-
-    return head;
-}
-
-static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
-                           bool query_nodes)
-{
-    if (query_nodes) {
-        *bs = bdrv_next_node(*bs);
-        return !!*bs;
-    }
-
-    *blk = blk_next(*blk);
-    *bs = *blk ? blk_bs(*blk) : NULL;
-
-    return !!*blk;
-}
-
-BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
-                                     bool query_nodes,
-                                     Error **errp)
-{
-    BlockStatsList *head = NULL, **p_next = &head;
-    BlockBackend *blk = NULL;
-    BlockDriverState *bs = NULL;
-
-    /* Just to be safe if query_nodes is not always initialized */
-    query_nodes = has_query_nodes && query_nodes;
-
-    while (next_query_bds(&blk, &bs, query_nodes)) {
-        BlockStatsList *info = g_malloc0(sizeof(*info));
-        AioContext *ctx = blk ? blk_get_aio_context(blk)
-                              : bdrv_get_aio_context(bs);
-
-        aio_context_acquire(ctx);
-        info->value = bdrv_query_stats(blk, bs, !query_nodes);
-        aio_context_release(ctx);
-
-        *p_next = info;
-        p_next = &info->next;
-    }
-
-    return head;
-}
-
-#define NB_SUFFIXES 4
-
-static char *get_human_readable_size(char *buf, int buf_size, int64_t size)
-{
-    static const char suffixes[NB_SUFFIXES] = {'K', 'M', 'G', 'T'};
-    int64_t base;
-    int i;
-
-    if (size <= 999) {
-        snprintf(buf, buf_size, "%" PRId64, size);
-    } else {
-        base = 1024;
-        for (i = 0; i < NB_SUFFIXES; i++) {
-            if (size < (10 * base)) {
-                snprintf(buf, buf_size, "%0.1f%c",
-                         (double)size / base,
-                         suffixes[i]);
-                break;
-            } else if (size < (1000 * base) || i == (NB_SUFFIXES - 1)) {
-                snprintf(buf, buf_size, "%" PRId64 "%c",
-                         ((size + (base >> 1)) / base),
-                         suffixes[i]);
-                break;
-            }
-            base = base * 1024;
-        }
-    }
-    return buf;
-}
-
-void bdrv_snapshot_dump(fprintf_function func_fprintf, void *f,
-                        QEMUSnapshotInfo *sn)
-{
-    char buf1[128], date_buf[128], clock_buf[128];
-    struct tm tm;
-    time_t ti;
-    int64_t secs;
-
-    if (!sn) {
-        func_fprintf(f,
-                     "%-10s%-20s%7s%20s%15s",
-                     "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
-    } else {
-        ti = sn->date_sec;
-        localtime_r(&ti, &tm);
-        strftime(date_buf, sizeof(date_buf),
-                 "%Y-%m-%d %H:%M:%S", &tm);
-        secs = sn->vm_clock_nsec / 1000000000;
-        snprintf(clock_buf, sizeof(clock_buf),
-                 "%02d:%02d:%02d.%03d",
-                 (int)(secs / 3600),
-                 (int)((secs / 60) % 60),
-                 (int)(secs % 60),
-                 (int)((sn->vm_clock_nsec / 1000000) % 1000));
-        func_fprintf(f,
-                     "%-10s%-20s%7s%20s%15s",
-                     sn->id_str, sn->name,
-                     get_human_readable_size(buf1, sizeof(buf1),
-                                             sn->vm_state_size),
-                     date_buf,
-                     clock_buf);
-    }
-}
-
-static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
-                       QDict *dict);
-static void dump_qlist(fprintf_function func_fprintf, void *f, int indentation,
-                       QList *list);
-
-static void dump_qobject(fprintf_function func_fprintf, void *f,
-                         int comp_indent, QObject *obj)
-{
-    switch (qobject_type(obj)) {
-        case QTYPE_QINT: {
-            QInt *value = qobject_to_qint(obj);
-            func_fprintf(f, "%" PRId64, qint_get_int(value));
-            break;
-        }
-        case QTYPE_QSTRING: {
-            QString *value = qobject_to_qstring(obj);
-            func_fprintf(f, "%s", qstring_get_str(value));
-            break;
-        }
-        case QTYPE_QDICT: {
-            QDict *value = qobject_to_qdict(obj);
-            dump_qdict(func_fprintf, f, comp_indent, value);
-            break;
-        }
-        case QTYPE_QLIST: {
-            QList *value = qobject_to_qlist(obj);
-            dump_qlist(func_fprintf, f, comp_indent, value);
-            break;
-        }
-        case QTYPE_QFLOAT: {
-            QFloat *value = qobject_to_qfloat(obj);
-            func_fprintf(f, "%g", qfloat_get_double(value));
-            break;
-        }
-        case QTYPE_QBOOL: {
-            QBool *value = qobject_to_qbool(obj);
-            func_fprintf(f, "%s", qbool_get_bool(value) ? "true" : "false");
-            break;
-        }
-        default:
-            abort();
-    }
-}
-
-static void dump_qlist(fprintf_function func_fprintf, void *f, int indentation,
-                       QList *list)
-{
-    const QListEntry *entry;
-    int i = 0;
-
-    for (entry = qlist_first(list); entry; entry = qlist_next(entry), i++) {
-        QType type = qobject_type(entry->value);
-        bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
-        func_fprintf(f, "%*s[%i]:%c", indentation * 4, "", i,
-                     composite ? '\n' : ' ');
-        dump_qobject(func_fprintf, f, indentation + 1, entry->value);
-        if (!composite) {
-            func_fprintf(f, "\n");
-        }
-    }
-}
-
-static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
-                       QDict *dict)
-{
-    const QDictEntry *entry;
-
-    for (entry = qdict_first(dict); entry; entry = qdict_next(dict, entry)) {
-        QType type = qobject_type(entry->value);
-        bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
-        char *key = g_malloc(strlen(entry->key) + 1);
-        int i;
-
-        /* replace dashes with spaces in key (variable) names */
-        for (i = 0; entry->key[i]; i++) {
-            key[i] = entry->key[i] == '-' ? ' ' : entry->key[i];
-        }
-        key[i] = 0;
-        func_fprintf(f, "%*s%s:%c", indentation * 4, "", key,
-                     composite ? '\n' : ' ');
-        dump_qobject(func_fprintf, f, indentation + 1, entry->value);
-        if (!composite) {
-            func_fprintf(f, "\n");
-        }
-        g_free(key);
-    }
-}
-
-void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
-                                   ImageInfoSpecific *info_spec)
-{
-    QmpOutputVisitor *ov = qmp_output_visitor_new();
-    QObject *obj, *data;
-
-    visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), NULL, &info_spec,
-                                 &error_abort);
-    obj = qmp_output_get_qobject(ov);
-    assert(qobject_type(obj) == QTYPE_QDICT);
-    data = qdict_get(qobject_to_qdict(obj), "data");
-    dump_qobject(func_fprintf, f, 1, data);
-    qmp_output_visitor_cleanup(ov);
-}
-
-void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
-                          ImageInfo *info)
-{
-    char size_buf[128], dsize_buf[128];
-    if (!info->has_actual_size) {
-        snprintf(dsize_buf, sizeof(dsize_buf), "unavailable");
-    } else {
-        get_human_readable_size(dsize_buf, sizeof(dsize_buf),
-                                info->actual_size);
-    }
-    get_human_readable_size(size_buf, sizeof(size_buf), info->virtual_size);
-    func_fprintf(f,
-                 "image: %s\n"
-                 "file format: %s\n"
-                 "virtual size: %s (%" PRId64 " bytes)\n"
-                 "disk size: %s\n",
-                 info->filename, info->format, size_buf,
-                 info->virtual_size,
-                 dsize_buf);
-
-    if (info->has_encrypted && info->encrypted) {
-        func_fprintf(f, "encrypted: yes\n");
-    }
-
-    if (info->has_cluster_size) {
-        func_fprintf(f, "cluster_size: %" PRId64 "\n",
-                       info->cluster_size);
-    }
-
-    if (info->has_dirty_flag && info->dirty_flag) {
-        func_fprintf(f, "cleanly shut down: no\n");
-    }
-
-    if (info->has_backing_filename) {
-        func_fprintf(f, "backing file: %s", info->backing_filename);
-        if (!info->has_full_backing_filename) {
-            func_fprintf(f, " (cannot determine actual path)");
-        } else if (strcmp(info->backing_filename,
-                          info->full_backing_filename) != 0) {
-            func_fprintf(f, " (actual path: %s)", info->full_backing_filename);
-        }
-        func_fprintf(f, "\n");
-        if (info->has_backing_filename_format) {
-            func_fprintf(f, "backing file format: %s\n",
-                         info->backing_filename_format);
-        }
-    }
-
-    if (info->has_snapshots) {
-        SnapshotInfoList *elem;
-
-        func_fprintf(f, "Snapshot list:\n");
-        bdrv_snapshot_dump(func_fprintf, f, NULL);
-        func_fprintf(f, "\n");
-
-        /* Ideally bdrv_snapshot_dump() would operate on SnapshotInfoList but
-         * we convert to the block layer's native QEMUSnapshotInfo for now.
-         */
-        for (elem = info->snapshots; elem; elem = elem->next) {
-            QEMUSnapshotInfo sn = {
-                .vm_state_size = elem->value->vm_state_size,
-                .date_sec = elem->value->date_sec,
-                .date_nsec = elem->value->date_nsec,
-                .vm_clock_nsec = elem->value->vm_clock_sec * 1000000000ULL +
-                                 elem->value->vm_clock_nsec,
-            };
-
-            pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id);
-            pstrcpy(sn.name, sizeof(sn.name), elem->value->name);
-            bdrv_snapshot_dump(func_fprintf, f, &sn);
-            func_fprintf(f, "\n");
-        }
-    }
-
-    if (info->has_format_specific) {
-        func_fprintf(f, "Format specific information:\n");
-        bdrv_image_info_specific_dump(func_fprintf, f, info->format_specific);
-    }
-}
diff --git a/qemu/block/qcow.c b/qemu/block/qcow.c
deleted file mode 100644
index 60ddb12ec..000000000
--- a/qemu/block/qcow.c
+++ /dev/null
@@ -1,1050 +0,0 @@
-/*
- * Block driver for the QCOW format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "qemu/error-report.h"
-#include "block/block_int.h"
-#include "sysemu/block-backend.h"
-#include "qemu/module.h"
-#include <zlib.h>
-#include "qapi/qmp/qerror.h"
-#include "crypto/cipher.h"
-#include "migration/migration.h"
-
-/**************************************************************/
-/* QEMU COW block driver with compression and encryption support */
-
-#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-#define QCOW_VERSION 1
-
-#define QCOW_CRYPT_NONE 0
-#define QCOW_CRYPT_AES  1
-
-#define QCOW_OFLAG_COMPRESSED (1LL << 63)
-
-typedef struct QCowHeader {
-    uint32_t magic;
-    uint32_t version;
-    uint64_t backing_file_offset;
-    uint32_t backing_file_size;
-    uint32_t mtime;
-    uint64_t size; /* in bytes */
-    uint8_t cluster_bits;
-    uint8_t l2_bits;
-    uint16_t padding;
-    uint32_t crypt_method;
-    uint64_t l1_table_offset;
-} QEMU_PACKED QCowHeader;
-
-#define L2_CACHE_SIZE 16
-
-typedef struct BDRVQcowState {
-    int cluster_bits;
-    int cluster_size;
-    int cluster_sectors;
-    int l2_bits;
-    int l2_size;
-    unsigned int l1_size;
-    uint64_t cluster_offset_mask;
-    uint64_t l1_table_offset;
-    uint64_t *l1_table;
-    uint64_t *l2_cache;
-    uint64_t l2_cache_offsets[L2_CACHE_SIZE];
-    uint32_t l2_cache_counts[L2_CACHE_SIZE];
-    uint8_t *cluster_cache;
-    uint8_t *cluster_data;
-    uint64_t cluster_cache_offset;
-    QCryptoCipher *cipher; /* NULL if no key yet */
-    uint32_t crypt_method_header;
-    CoMutex lock;
-    Error *migration_blocker;
-} BDRVQcowState;
-
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-
-static int qcow_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const QCowHeader *cow_header = (const void *)buf;
-
-    if (buf_size >= sizeof(QCowHeader) &&
-        be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
-        be32_to_cpu(cow_header->version) == QCOW_VERSION)
-        return 100;
-    else
-        return 0;
-}
-
-static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
-{
-    BDRVQcowState *s = bs->opaque;
-    unsigned int len, i, shift;
-    int ret;
-    QCowHeader header;
-
-    ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
-    if (ret < 0) {
-        goto fail;
-    }
-    be32_to_cpus(&header.magic);
-    be32_to_cpus(&header.version);
-    be64_to_cpus(&header.backing_file_offset);
-    be32_to_cpus(&header.backing_file_size);
-    be32_to_cpus(&header.mtime);
-    be64_to_cpus(&header.size);
-    be32_to_cpus(&header.crypt_method);
-    be64_to_cpus(&header.l1_table_offset);
-
-    if (header.magic != QCOW_MAGIC) {
-        error_setg(errp, "Image not in qcow format");
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (header.version != QCOW_VERSION) {
-        error_setg(errp, "Unsupported qcow version %" PRIu32, header.version);
-        ret = -ENOTSUP;
-        goto fail;
-    }
-
-    if (header.size <= 1) {
-        error_setg(errp, "Image size is too small (must be at least 2 bytes)");
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (header.cluster_bits < 9 || header.cluster_bits > 16) {
-        error_setg(errp, "Cluster size must be between 512 and 64k");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* l2_bits specifies number of entries; storing a uint64_t in each entry,
-     * so bytes = num_entries << 3. */
-    if (header.l2_bits < 9 - 3 || header.l2_bits > 16 - 3) {
-        error_setg(errp, "L2 table size must be between 512 and 64k");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    if (header.crypt_method > QCOW_CRYPT_AES) {
-        error_setg(errp, "invalid encryption method in qcow header");
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) {
-        error_setg(errp, "AES cipher not available");
-        ret = -EINVAL;
-        goto fail;
-    }
-    s->crypt_method_header = header.crypt_method;
-    if (s->crypt_method_header) {
-        if (bdrv_uses_whitelist() &&
-            s->crypt_method_header == QCOW_CRYPT_AES) {
-            error_report("qcow built-in AES encryption is deprecated");
-            error_printf("Support for it will be removed in a future release.\n"
-                         "You can use 'qemu-img convert' to switch to an\n"
-                         "unencrypted qcow image, or a LUKS raw image.\n");
-        }
-
-        bs->encrypted = 1;
-    }
-    s->cluster_bits = header.cluster_bits;
-    s->cluster_size = 1 << s->cluster_bits;
-    s->cluster_sectors = 1 << (s->cluster_bits - 9);
-    s->l2_bits = header.l2_bits;
-    s->l2_size = 1 << s->l2_bits;
-    bs->total_sectors = header.size / 512;
-    s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
-
-    /* read the level 1 table */
-    shift = s->cluster_bits + s->l2_bits;
-    if (header.size > UINT64_MAX - (1LL << shift)) {
-        error_setg(errp, "Image too large");
-        ret = -EINVAL;
-        goto fail;
-    } else {
-        uint64_t l1_size = (header.size + (1LL << shift) - 1) >> shift;
-        if (l1_size > INT_MAX / sizeof(uint64_t)) {
-            error_setg(errp, "Image too large");
-            ret = -EINVAL;
-            goto fail;
-        }
-        s->l1_size = l1_size;
-    }
-
-    s->l1_table_offset = header.l1_table_offset;
-    s->l1_table = g_try_new(uint64_t, s->l1_size);
-    if (s->l1_table == NULL) {
-        error_setg(errp, "Could not allocate memory for L1 table");
-        ret = -ENOMEM;
-        goto fail;
-    }
-
-    ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
-               s->l1_size * sizeof(uint64_t));
-    if (ret < 0) {
-        goto fail;
-    }
-
-    for(i = 0;i < s->l1_size; i++) {
-        be64_to_cpus(&s->l1_table[i]);
-    }
-
-    /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */
-    s->l2_cache =
-        qemu_try_blockalign(bs->file->bs,
-                            s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    if (s->l2_cache == NULL) {
-        error_setg(errp, "Could not allocate L2 table cache");
-        ret = -ENOMEM;
-        goto fail;
-    }
-    s->cluster_cache = g_malloc(s->cluster_size);
-    s->cluster_data = g_malloc(s->cluster_size);
-    s->cluster_cache_offset = -1;
-
-    /* read the backing file name */
-    if (header.backing_file_offset != 0) {
-        len = header.backing_file_size;
-        if (len > 1023 || len >= sizeof(bs->backing_file)) {
-            error_setg(errp, "Backing file name too long");
-            ret = -EINVAL;
-            goto fail;
-        }
-        ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
-                   bs->backing_file, len);
-        if (ret < 0) {
-            goto fail;
-        }
-        bs->backing_file[len] = '\0';
-    }
-
-    /* Disable migration when qcow images are used */
-    error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
-
-    qemu_co_mutex_init(&s->lock);
-    return 0;
-
- fail:
-    g_free(s->l1_table);
-    qemu_vfree(s->l2_cache);
-    g_free(s->cluster_cache);
-    g_free(s->cluster_data);
-    return ret;
-}
-
-
-/* We have nothing to do for QCOW reopen, stubs just return
- * success */
-static int qcow_reopen_prepare(BDRVReopenState *state,
-                               BlockReopenQueue *queue, Error **errp)
-{
-    return 0;
-}
-
-static int qcow_set_key(BlockDriverState *bs, const char *key)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint8_t keybuf[16];
-    int len, i;
-    Error *err;
-
-    memset(keybuf, 0, 16);
-    len = strlen(key);
-    if (len > 16)
-        len = 16;
-    /* XXX: we could compress the chars to 7 bits to increase
-       entropy */
-    for(i = 0;i < len;i++) {
-        keybuf[i] = key[i];
-    }
-    assert(bs->encrypted);
-
-    qcrypto_cipher_free(s->cipher);
-    s->cipher = qcrypto_cipher_new(
-        QCRYPTO_CIPHER_ALG_AES_128,
-        QCRYPTO_CIPHER_MODE_CBC,
-        keybuf, G_N_ELEMENTS(keybuf),
-        &err);
-
-    if (!s->cipher) {
-        /* XXX would be nice if errors in this method could
-         * be properly propagate to the caller. Would need
-         * the bdrv_set_key() API signature to be fixed. */
-        error_free(err);
-        return -1;
-    }
-    return 0;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
-   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
-   supported */
-static int encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
-                           uint8_t *out_buf, const uint8_t *in_buf,
-                           int nb_sectors, bool enc, Error **errp)
-{
-    union {
-        uint64_t ll[2];
-        uint8_t b[16];
-    } ivec;
-    int i;
-    int ret;
-
-    for(i = 0; i < nb_sectors; i++) {
-        ivec.ll[0] = cpu_to_le64(sector_num);
-        ivec.ll[1] = 0;
-        if (qcrypto_cipher_setiv(s->cipher,
-                                 ivec.b, G_N_ELEMENTS(ivec.b),
-                                 errp) < 0) {
-            return -1;
-        }
-        if (enc) {
-            ret = qcrypto_cipher_encrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        } else {
-            ret = qcrypto_cipher_decrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        }
-        if (ret < 0) {
-            return -1;
-        }
-        sector_num++;
-        in_buf += 512;
-        out_buf += 512;
-    }
-    return 0;
-}
-
-/* 'allocate' is:
- *
- * 0 to not allocate.
- *
- * 1 to allocate a normal cluster (for sector indexes 'n_start' to
- * 'n_end')
- *
- * 2 to allocate a compressed cluster of size
- * 'compressed_size'. 'compressed_size' must be > 0 and <
- * cluster_size
- *
- * return 0 if not allocated.
- */
-static uint64_t get_cluster_offset(BlockDriverState *bs,
-                                   uint64_t offset, int allocate,
-                                   int compressed_size,
-                                   int n_start, int n_end)
-{
-    BDRVQcowState *s = bs->opaque;
-    int min_index, i, j, l1_index, l2_index;
-    uint64_t l2_offset, *l2_table, cluster_offset, tmp;
-    uint32_t min_count;
-    int new_l2_table;
-
-    l1_index = offset >> (s->l2_bits + s->cluster_bits);
-    l2_offset = s->l1_table[l1_index];
-    new_l2_table = 0;
-    if (!l2_offset) {
-        if (!allocate)
-            return 0;
-        /* allocate a new l2 entry */
-        l2_offset = bdrv_getlength(bs->file->bs);
-        /* round to cluster size */
-        l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
-        /* update the L1 entry */
-        s->l1_table[l1_index] = l2_offset;
-        tmp = cpu_to_be64(l2_offset);
-        if (bdrv_pwrite_sync(bs->file->bs,
-                s->l1_table_offset + l1_index * sizeof(tmp),
-                &tmp, sizeof(tmp)) < 0)
-            return 0;
-        new_l2_table = 1;
-    }
-    for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (l2_offset == s->l2_cache_offsets[i]) {
-            /* increment the hit count */
-            if (++s->l2_cache_counts[i] == 0xffffffff) {
-                for(j = 0; j < L2_CACHE_SIZE; j++) {
-                    s->l2_cache_counts[j] >>= 1;
-                }
-            }
-            l2_table = s->l2_cache + (i << s->l2_bits);
-            goto found;
-        }
-    }
-    /* not found: load a new entry in the least used one */
-    min_index = 0;
-    min_count = 0xffffffff;
-    for(i = 0; i < L2_CACHE_SIZE; i++) {
-        if (s->l2_cache_counts[i] < min_count) {
-            min_count = s->l2_cache_counts[i];
-            min_index = i;
-        }
-    }
-    l2_table = s->l2_cache + (min_index << s->l2_bits);
-    if (new_l2_table) {
-        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-        if (bdrv_pwrite_sync(bs->file->bs, l2_offset, l2_table,
-                s->l2_size * sizeof(uint64_t)) < 0)
-            return 0;
-    } else {
-        if (bdrv_pread(bs->file->bs, l2_offset, l2_table,
-                       s->l2_size * sizeof(uint64_t)) !=
-            s->l2_size * sizeof(uint64_t))
-            return 0;
-    }
-    s->l2_cache_offsets[min_index] = l2_offset;
-    s->l2_cache_counts[min_index] = 1;
- found:
-    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
-    cluster_offset = be64_to_cpu(l2_table[l2_index]);
-    if (!cluster_offset ||
-        ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
-        if (!allocate)
-            return 0;
-        /* allocate a new cluster */
-        if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
-            (n_end - n_start) < s->cluster_sectors) {
-            /* if the cluster is already compressed, we must
-               decompress it in the case it is not completely
-               overwritten */
-            if (decompress_cluster(bs, cluster_offset) < 0)
-                return 0;
-            cluster_offset = bdrv_getlength(bs->file->bs);
-            cluster_offset = (cluster_offset + s->cluster_size - 1) &
-                ~(s->cluster_size - 1);
-            /* write the cluster content */
-            if (bdrv_pwrite(bs->file->bs, cluster_offset, s->cluster_cache,
-                            s->cluster_size) !=
-                s->cluster_size)
-                return -1;
-        } else {
-            cluster_offset = bdrv_getlength(bs->file->bs);
-            if (allocate == 1) {
-                /* round to cluster size */
-                cluster_offset = (cluster_offset + s->cluster_size - 1) &
-                    ~(s->cluster_size - 1);
-                bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
-                /* if encrypted, we must initialize the cluster
-                   content which won't be written */
-                if (bs->encrypted &&
-                    (n_end - n_start) < s->cluster_sectors) {
-                    uint64_t start_sect;
-                    assert(s->cipher);
-                    start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
-                    memset(s->cluster_data + 512, 0x00, 512);
-                    for(i = 0; i < s->cluster_sectors; i++) {
-                        if (i < n_start || i >= n_end) {
-                            Error *err = NULL;
-                            if (encrypt_sectors(s, start_sect + i,
-                                                s->cluster_data,
-                                                s->cluster_data + 512, 1,
-                                                true, &err) < 0) {
-                                error_free(err);
-                                errno = EIO;
-                                return -1;
-                            }
-                            if (bdrv_pwrite(bs->file->bs,
-                                            cluster_offset + i * 512,
-                                            s->cluster_data, 512) != 512)
-                                return -1;
-                        }
-                    }
-                }
-            } else if (allocate == 2) {
-                cluster_offset |= QCOW_OFLAG_COMPRESSED |
-                    (uint64_t)compressed_size << (63 - s->cluster_bits);
-            }
-        }
-        /* update L2 table */
-        tmp = cpu_to_be64(cluster_offset);
-        l2_table[l2_index] = tmp;
-        if (bdrv_pwrite_sync(bs->file->bs, l2_offset + l2_index * sizeof(tmp),
-                &tmp, sizeof(tmp)) < 0)
-            return 0;
-    }
-    return cluster_offset;
-}
-
-static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
-{
-    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster, n;
-    uint64_t cluster_offset;
-
-    qemu_co_mutex_lock(&s->lock);
-    cluster_offset = get_cluster_offset(bs, sector_num << 9, 0, 0, 0, 0);
-    qemu_co_mutex_unlock(&s->lock);
-    index_in_cluster = sector_num & (s->cluster_sectors - 1);
-    n = s->cluster_sectors - index_in_cluster;
-    if (n > nb_sectors)
-        n = nb_sectors;
-    *pnum = n;
-    if (!cluster_offset) {
-        return 0;
-    }
-    if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->cipher) {
-        return BDRV_BLOCK_DATA;
-    }
-    cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
-    *file = bs->file->bs;
-    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
-}
-
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
-                             const uint8_t *buf, int buf_size)
-{
-    z_stream strm1, *strm = &strm1;
-    int ret, out_len;
-
-    memset(strm, 0, sizeof(*strm));
-
-    strm->next_in = (uint8_t *)buf;
-    strm->avail_in = buf_size;
-    strm->next_out = out_buf;
-    strm->avail_out = out_buf_size;
-
-    ret = inflateInit2(strm, -12);
-    if (ret != Z_OK)
-        return -1;
-    ret = inflate(strm, Z_FINISH);
-    out_len = strm->next_out - out_buf;
-    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
-        out_len != out_buf_size) {
-        inflateEnd(strm);
-        return -1;
-    }
-    inflateEnd(strm);
-    return 0;
-}
-
-static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
-{
-    BDRVQcowState *s = bs->opaque;
-    int ret, csize;
-    uint64_t coffset;
-
-    coffset = cluster_offset & s->cluster_offset_mask;
-    if (s->cluster_cache_offset != coffset) {
-        csize = cluster_offset >> (63 - s->cluster_bits);
-        csize &= (s->cluster_size - 1);
-        ret = bdrv_pread(bs->file->bs, coffset, s->cluster_data, csize);
-        if (ret != csize)
-            return -1;
-        if (decompress_buffer(s->cluster_cache, s->cluster_size,
-                              s->cluster_data, csize) < 0) {
-            return -1;
-        }
-        s->cluster_cache_offset = coffset;
-    }
-    return 0;
-}
-
-static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
-                         int nb_sectors, QEMUIOVector *qiov)
-{
-    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster;
-    int ret = 0, n;
-    uint64_t cluster_offset;
-    struct iovec hd_iov;
-    QEMUIOVector hd_qiov;
-    uint8_t *buf;
-    void *orig_buf;
-    Error *err = NULL;
-
-    if (qiov->niov > 1) {
-        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
-        if (buf == NULL) {
-            return -ENOMEM;
-        }
-    } else {
-        orig_buf = NULL;
-        buf = (uint8_t *)qiov->iov->iov_base;
-    }
-
-    qemu_co_mutex_lock(&s->lock);
-
-    while (nb_sectors != 0) {
-        /* prepare next request */
-        cluster_offset = get_cluster_offset(bs, sector_num << 9,
-                                                 0, 0, 0, 0);
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        n = s->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors) {
-            n = nb_sectors;
-        }
-
-        if (!cluster_offset) {
-            if (bs->backing) {
-                /* read from the base image */
-                hd_iov.iov_base = (void *)buf;
-                hd_iov.iov_len = n * 512;
-                qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
-                qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_readv(bs->backing->bs, sector_num,
-                                    n, &hd_qiov);
-                qemu_co_mutex_lock(&s->lock);
-                if (ret < 0) {
-                    goto fail;
-                }
-            } else {
-                /* Note: in this case, no need to wait */
-                memset(buf, 0, 512 * n);
-            }
-        } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
-            /* add AIO support for compressed blocks ? */
-            if (decompress_cluster(bs, cluster_offset) < 0) {
-                goto fail;
-            }
-            memcpy(buf,
-                   s->cluster_cache + index_in_cluster * 512, 512 * n);
-        } else {
-            if ((cluster_offset & 511) != 0) {
-                goto fail;
-            }
-            hd_iov.iov_base = (void *)buf;
-            hd_iov.iov_len = n * 512;
-            qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
-            qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_readv(bs->file->bs,
-                                (cluster_offset >> 9) + index_in_cluster,
-                                n, &hd_qiov);
-            qemu_co_mutex_lock(&s->lock);
-            if (ret < 0) {
-                break;
-            }
-            if (bs->encrypted) {
-                assert(s->cipher);
-                if (encrypt_sectors(s, sector_num, buf, buf,
-                                    n, false, &err) < 0) {
-                    goto fail;
-                }
-            }
-        }
-        ret = 0;
-
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-
-done:
-    qemu_co_mutex_unlock(&s->lock);
-
-    if (qiov->niov > 1) {
-        qemu_iovec_from_buf(qiov, 0, orig_buf, qiov->size);
-        qemu_vfree(orig_buf);
-    }
-
-    return ret;
-
-fail:
-    error_free(err);
-    ret = -EIO;
-    goto done;
-}
-
-static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
-                          int nb_sectors, QEMUIOVector *qiov)
-{
-    BDRVQcowState *s = bs->opaque;
-    int index_in_cluster;
-    uint64_t cluster_offset;
-    const uint8_t *src_buf;
-    int ret = 0, n;
-    uint8_t *cluster_data = NULL;
-    struct iovec hd_iov;
-    QEMUIOVector hd_qiov;
-    uint8_t *buf;
-    void *orig_buf;
-
-    s->cluster_cache_offset = -1; /* disable compressed cache */
-
-    if (qiov->niov > 1) {
-        buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
-        if (buf == NULL) {
-            return -ENOMEM;
-        }
-        qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
-    } else {
-        orig_buf = NULL;
-        buf = (uint8_t *)qiov->iov->iov_base;
-    }
-
-    qemu_co_mutex_lock(&s->lock);
-
-    while (nb_sectors != 0) {
-
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        n = s->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors) {
-            n = nb_sectors;
-        }
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, 1, 0,
-                                            index_in_cluster,
-                                            index_in_cluster + n);
-        if (!cluster_offset || (cluster_offset & 511) != 0) {
-            ret = -EIO;
-            break;
-        }
-        if (bs->encrypted) {
-            Error *err = NULL;
-            assert(s->cipher);
-            if (!cluster_data) {
-                cluster_data = g_malloc0(s->cluster_size);
-            }
-            if (encrypt_sectors(s, sector_num, cluster_data, buf,
-                                n, true, &err) < 0) {
-                error_free(err);
-                ret = -EIO;
-                break;
-            }
-            src_buf = cluster_data;
-        } else {
-            src_buf = buf;
-        }
-
-        hd_iov.iov_base = (void *)src_buf;
-        hd_iov.iov_len = n * 512;
-        qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
-        qemu_co_mutex_unlock(&s->lock);
-        ret = bdrv_co_writev(bs->file->bs,
-                             (cluster_offset >> 9) + index_in_cluster,
-                             n, &hd_qiov);
-        qemu_co_mutex_lock(&s->lock);
-        if (ret < 0) {
-            break;
-        }
-        ret = 0;
-
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-    qemu_co_mutex_unlock(&s->lock);
-
-    if (qiov->niov > 1) {
-        qemu_vfree(orig_buf);
-    }
-    g_free(cluster_data);
-
-    return ret;
-}
-
-static void qcow_close(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-
-    qcrypto_cipher_free(s->cipher);
-    s->cipher = NULL;
-    g_free(s->l1_table);
-    qemu_vfree(s->l2_cache);
-    g_free(s->cluster_cache);
-    g_free(s->cluster_data);
-
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
-}
-
-static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    int header_size, backing_filename_len, l1_size, shift, i;
-    QCowHeader header;
-    uint8_t *tmp;
-    int64_t total_size = 0;
-    char *backing_file = NULL;
-    int flags = 0;
-    Error *local_err = NULL;
-    int ret;
-    BlockBackend *qcow_blk;
-
-    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
-        flags |= BLOCK_FLAG_ENCRYPT;
-    }
-
-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto cleanup;
-    }
-
-    qcow_blk = blk_new_open(filename, NULL, NULL,
-                            BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (qcow_blk == NULL) {
-        error_propagate(errp, local_err);
-        ret = -EIO;
-        goto cleanup;
-    }
-
-    blk_set_allow_write_beyond_eof(qcow_blk, true);
-
-    ret = blk_truncate(qcow_blk, 0);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    memset(&header, 0, sizeof(header));
-    header.magic = cpu_to_be32(QCOW_MAGIC);
-    header.version = cpu_to_be32(QCOW_VERSION);
-    header.size = cpu_to_be64(total_size);
-    header_size = sizeof(header);
-    backing_filename_len = 0;
-    if (backing_file) {
-        if (strcmp(backing_file, "fat:")) {
-            header.backing_file_offset = cpu_to_be64(header_size);
-            backing_filename_len = strlen(backing_file);
-            header.backing_file_size = cpu_to_be32(backing_filename_len);
-            header_size += backing_filename_len;
-        } else {
-            /* special backing file for vvfat */
-            backing_file = NULL;
-        }
-        header.cluster_bits = 9; /* 512 byte cluster to avoid copying
-                                    unmodified sectors */
-        header.l2_bits = 12; /* 32 KB L2 tables */
-    } else {
-        header.cluster_bits = 12; /* 4 KB clusters */
-        header.l2_bits = 9; /* 4 KB L2 tables */
-    }
-    header_size = (header_size + 7) & ~7;
-    shift = header.cluster_bits + header.l2_bits;
-    l1_size = (total_size + (1LL << shift) - 1) >> shift;
-
-    header.l1_table_offset = cpu_to_be64(header_size);
-    if (flags & BLOCK_FLAG_ENCRYPT) {
-        header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
-    } else {
-        header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
-    }
-
-    /* write all the data */
-    ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header));
-    if (ret != sizeof(header)) {
-        goto exit;
-    }
-
-    if (backing_file) {
-        ret = blk_pwrite(qcow_blk, sizeof(header),
-            backing_file, backing_filename_len);
-        if (ret != backing_filename_len) {
-            goto exit;
-        }
-    }
-
-    tmp = g_malloc0(BDRV_SECTOR_SIZE);
-    for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
-        BDRV_SECTOR_SIZE); i++) {
-        ret = blk_pwrite(qcow_blk, header_size +
-            BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
-        if (ret != BDRV_SECTOR_SIZE) {
-            g_free(tmp);
-            goto exit;
-        }
-    }
-
-    g_free(tmp);
-    ret = 0;
-exit:
-    blk_unref(qcow_blk);
-cleanup:
-    g_free(backing_file);
-    return ret;
-}
-
-static int qcow_make_empty(BlockDriverState *bs)
-{
-    BDRVQcowState *s = bs->opaque;
-    uint32_t l1_length = s->l1_size * sizeof(uint64_t);
-    int ret;
-
-    memset(s->l1_table, 0, l1_length);
-    if (bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, s->l1_table,
-            l1_length) < 0)
-        return -1;
-    ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
-    if (ret < 0)
-        return ret;
-
-    memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
-    memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
-    memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
-
-    return 0;
-}
-
-/* XXX: put compressed sectors first, then all the cluster aligned
-   tables to avoid losing bytes in alignment */
-static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                                 const uint8_t *buf, int nb_sectors)
-{
-    BDRVQcowState *s = bs->opaque;
-    z_stream strm;
-    int ret, out_len;
-    uint8_t *out_buf;
-    uint64_t cluster_offset;
-
-    if (nb_sectors != s->cluster_sectors) {
-        ret = -EINVAL;
-
-        /* Zero-pad last write if image size is not cluster aligned */
-        if (sector_num + nb_sectors == bs->total_sectors &&
-            nb_sectors < s->cluster_sectors) {
-            uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
-            memset(pad_buf, 0, s->cluster_size);
-            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
-            ret = qcow_write_compressed(bs, sector_num,
-                                        pad_buf, s->cluster_sectors);
-            qemu_vfree(pad_buf);
-        }
-        return ret;
-    }
-
-    out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-
-    /* best compression, small window, no zlib header */
-    memset(&strm, 0, sizeof(strm));
-    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
-                       Z_DEFLATED, -12,
-                       9, Z_DEFAULT_STRATEGY);
-    if (ret != 0) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    strm.avail_in = s->cluster_size;
-    strm.next_in = (uint8_t *)buf;
-    strm.avail_out = s->cluster_size;
-    strm.next_out = out_buf;
-
-    ret = deflate(&strm, Z_FINISH);
-    if (ret != Z_STREAM_END && ret != Z_OK) {
-        deflateEnd(&strm);
-        ret = -EINVAL;
-        goto fail;
-    }
-    out_len = strm.next_out - out_buf;
-
-    deflateEnd(&strm);
-
-    if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
-        /* could not compress: write normal cluster */
-        ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
-        if (ret < 0) {
-            goto fail;
-        }
-    } else {
-        cluster_offset = get_cluster_offset(bs, sector_num << 9, 2,
-                                            out_len, 0, 0);
-        if (cluster_offset == 0) {
-            ret = -EIO;
-            goto fail;
-        }
-
-        cluster_offset &= s->cluster_offset_mask;
-        ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-
-    ret = 0;
-fail:
-    g_free(out_buf);
-    return ret;
-}
-
-static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVQcowState *s = bs->opaque;
-    bdi->cluster_size = s->cluster_size;
-    return 0;
-}
-
-static QemuOptsList qcow_create_opts = {
-    .name = "qcow-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(qcow_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FILE,
-            .type = QEMU_OPT_STRING,
-            .help = "File name of a base image"
-        },
-        {
-            .name = BLOCK_OPT_ENCRYPT,
-            .type = QEMU_OPT_BOOL,
-            .help = "Encrypt the image",
-            .def_value_str = "off"
-        },
-        { /* end of list */ }
-    }
-};
-
-static BlockDriver bdrv_qcow = {
-    .format_name	= "qcow",
-    .instance_size	= sizeof(BDRVQcowState),
-    .bdrv_probe		= qcow_probe,
-    .bdrv_open		= qcow_open,
-    .bdrv_close		= qcow_close,
-    .bdrv_reopen_prepare    = qcow_reopen_prepare,
-    .bdrv_create            = qcow_create,
-    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
-    .supports_backing       = true,
-
-    .bdrv_co_readv          = qcow_co_readv,
-    .bdrv_co_writev         = qcow_co_writev,
-    .bdrv_co_get_block_status   = qcow_co_get_block_status,
-
-    .bdrv_set_key           = qcow_set_key,
-    .bdrv_make_empty        = qcow_make_empty,
-    .bdrv_write_compressed  = qcow_write_compressed,
-    .bdrv_get_info          = qcow_get_info,
-
-    .create_opts            = &qcow_create_opts,
-};
-
-static void bdrv_qcow_init(void)
-{
-    bdrv_register(&bdrv_qcow);
-}
-
-block_init(bdrv_qcow_init);
diff --git a/qemu/block/qcow2-cache.c b/qemu/block/qcow2-cache.c
deleted file mode 100644
index 0fe8edae4..000000000
--- a/qemu/block/qcow2-cache.c
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * L2/refcount table cache for the QCOW2 format
- *
- * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-/* Needed for CONFIG_MADVISE */
-#include "qemu/osdep.h"
-
-#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
-#include <sys/mman.h>
-#endif
-
-#include "block/block_int.h"
-#include "qemu-common.h"
-#include "qcow2.h"
-#include "trace.h"
-
-typedef struct Qcow2CachedTable {
-    int64_t  offset;
-    uint64_t lru_counter;
-    int      ref;
-    bool     dirty;
-} Qcow2CachedTable;
-
-struct Qcow2Cache {
-    Qcow2CachedTable       *entries;
-    struct Qcow2Cache      *depends;
-    int                     size;
-    bool                    depends_on_flush;
-    void                   *table_array;
-    uint64_t                lru_counter;
-    uint64_t                cache_clean_lru_counter;
-};
-
-static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
-                    Qcow2Cache *c, int table)
-{
-    BDRVQcow2State *s = bs->opaque;
-    return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
-}
-
-static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
-                  Qcow2Cache *c, void *table)
-{
-    BDRVQcow2State *s = bs->opaque;
-    ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
-    int idx = table_offset / s->cluster_size;
-    assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
-    return idx;
-}
-
-static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
-                                      int i, int num_tables)
-{
-#if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
-    BDRVQcow2State *s = bs->opaque;
-    void *t = qcow2_cache_get_table_addr(bs, c, i);
-    int align = getpagesize();
-    size_t mem_size = (size_t) s->cluster_size * num_tables;
-    size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
-    size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
-    if (length > 0) {
-        qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
-    }
-#endif
-}
-
-static inline bool can_clean_entry(Qcow2Cache *c, int i)
-{
-    Qcow2CachedTable *t = &c->entries[i];
-    return t->ref == 0 && !t->dirty && t->offset != 0 &&
-        t->lru_counter <= c->cache_clean_lru_counter;
-}
-
-void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
-{
-    int i = 0;
-    while (i < c->size) {
-        int to_clean = 0;
-
-        /* Skip the entries that we don't need to clean */
-        while (i < c->size && !can_clean_entry(c, i)) {
-            i++;
-        }
-
-        /* And count how many we can clean in a row */
-        while (i < c->size && can_clean_entry(c, i)) {
-            c->entries[i].offset = 0;
-            c->entries[i].lru_counter = 0;
-            i++;
-            to_clean++;
-        }
-
-        if (to_clean > 0) {
-            qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
-        }
-    }
-
-    c->cache_clean_lru_counter = c->lru_counter;
-}
-
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
-{
-    BDRVQcow2State *s = bs->opaque;
-    Qcow2Cache *c;
-
-    c = g_new0(Qcow2Cache, 1);
-    c->size = num_tables;
-    c->entries = g_try_new0(Qcow2CachedTable, num_tables);
-    c->table_array = qemu_try_blockalign(bs->file->bs,
-                                         (size_t) num_tables * s->cluster_size);
-
-    if (!c->entries || !c->table_array) {
-        qemu_vfree(c->table_array);
-        g_free(c->entries);
-        g_free(c);
-        c = NULL;
-    }
-
-    return c;
-}
-
-int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
-{
-    int i;
-
-    for (i = 0; i < c->size; i++) {
-        assert(c->entries[i].ref == 0);
-    }
-
-    qemu_vfree(c->table_array);
-    g_free(c->entries);
-    g_free(c);
-
-    return 0;
-}
-
-static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
-{
-    int ret;
-
-    ret = qcow2_cache_flush(bs, c->depends);
-    if (ret < 0) {
-        return ret;
-    }
-
-    c->depends = NULL;
-    c->depends_on_flush = false;
-
-    return 0;
-}
-
-static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int ret = 0;
-
-    if (!c->entries[i].dirty || !c->entries[i].offset) {
-        return 0;
-    }
-
-    trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
-                                  c == s->l2_table_cache, i);
-
-    if (c->depends) {
-        ret = qcow2_cache_flush_dependency(bs, c);
-    } else if (c->depends_on_flush) {
-        ret = bdrv_flush(bs->file->bs);
-        if (ret >= 0) {
-            c->depends_on_flush = false;
-        }
-    }
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (c == s->refcount_block_cache) {
-        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
-                c->entries[i].offset, s->cluster_size);
-    } else if (c == s->l2_table_cache) {
-        ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
-                c->entries[i].offset, s->cluster_size);
-    } else {
-        ret = qcow2_pre_write_overlap_check(bs, 0,
-                c->entries[i].offset, s->cluster_size);
-    }
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (c == s->refcount_block_cache) {
-        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
-    } else if (c == s->l2_table_cache) {
-        BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
-    }
-
-    ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset,
-                      qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
-    if (ret < 0) {
-        return ret;
-    }
-
-    c->entries[i].dirty = false;
-
-    return 0;
-}
-
-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int result = 0;
-    int ret;
-    int i;
-
-    trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
-
-    for (i = 0; i < c->size; i++) {
-        ret = qcow2_cache_entry_flush(bs, c, i);
-        if (ret < 0 && result != -ENOSPC) {
-            result = ret;
-        }
-    }
-
-    if (result == 0) {
-        ret = bdrv_flush(bs->file->bs);
-        if (ret < 0) {
-            result = ret;
-        }
-    }
-
-    return result;
-}
-
-int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
-    Qcow2Cache *dependency)
-{
-    int ret;
-
-    if (dependency->depends) {
-        ret = qcow2_cache_flush_dependency(bs, dependency);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    if (c->depends && (c->depends != dependency)) {
-        ret = qcow2_cache_flush_dependency(bs, c);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    c->depends = dependency;
-    return 0;
-}
-
-void qcow2_cache_depends_on_flush(Qcow2Cache *c)
-{
-    c->depends_on_flush = true;
-}
-
-int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
-{
-    int ret, i;
-
-    ret = qcow2_cache_flush(bs, c);
-    if (ret < 0) {
-        return ret;
-    }
-
-    for (i = 0; i < c->size; i++) {
-        assert(c->entries[i].ref == 0);
-        c->entries[i].offset = 0;
-        c->entries[i].lru_counter = 0;
-    }
-
-    qcow2_cache_table_release(bs, c, 0, c->size);
-
-    c->lru_counter = 0;
-
-    return 0;
-}
-
-static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
-    uint64_t offset, void **table, bool read_from_disk)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int i;
-    int ret;
-    int lookup_index;
-    uint64_t min_lru_counter = UINT64_MAX;
-    int min_lru_index = -1;
-
-    trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
-                          offset, read_from_disk);
-
-    /* Check if the table is already cached */
-    i = lookup_index = (offset / s->cluster_size * 4) % c->size;
-    do {
-        const Qcow2CachedTable *t = &c->entries[i];
-        if (t->offset == offset) {
-            goto found;
-        }
-        if (t->ref == 0 && t->lru_counter < min_lru_counter) {
-            min_lru_counter = t->lru_counter;
-            min_lru_index = i;
-        }
-        if (++i == c->size) {
-            i = 0;
-        }
-    } while (i != lookup_index);
-
-    if (min_lru_index == -1) {
-        /* This can't happen in current synchronous code, but leave the check
-         * here as a reminder for whoever starts using AIO with the cache */
-        abort();
-    }
-
-    /* Cache miss: write a table back and replace it */
-    i = min_lru_index;
-    trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
-                                        c == s->l2_table_cache, i);
-
-    ret = qcow2_cache_entry_flush(bs, c, i);
-    if (ret < 0) {
-        return ret;
-    }
-
-    trace_qcow2_cache_get_read(qemu_coroutine_self(),
-                               c == s->l2_table_cache, i);
-    c->entries[i].offset = 0;
-    if (read_from_disk) {
-        if (c == s->l2_table_cache) {
-            BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
-        }
-
-        ret = bdrv_pread(bs->file->bs, offset,
-                         qcow2_cache_get_table_addr(bs, c, i),
-                         s->cluster_size);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    c->entries[i].offset = offset;
-
-    /* And return the right table */
-found:
-    c->entries[i].ref++;
-    *table = qcow2_cache_get_table_addr(bs, c, i);
-
-    trace_qcow2_cache_get_done(qemu_coroutine_self(),
-                               c == s->l2_table_cache, i);
-
-    return 0;
-}
-
-int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
-    void **table)
-{
-    return qcow2_cache_do_get(bs, c, offset, table, true);
-}
-
-int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
-    void **table)
-{
-    return qcow2_cache_do_get(bs, c, offset, table, false);
-}
-
-void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
-{
-    int i = qcow2_cache_get_table_idx(bs, c, *table);
-
-    c->entries[i].ref--;
-    *table = NULL;
-
-    if (c->entries[i].ref == 0) {
-        c->entries[i].lru_counter = ++c->lru_counter;
-    }
-
-    assert(c->entries[i].ref >= 0);
-}
-
-void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
-     void *table)
-{
-    int i = qcow2_cache_get_table_idx(bs, c, table);
-    assert(c->entries[i].offset != 0);
-    c->entries[i].dirty = true;
-}
diff --git a/qemu/block/qcow2-cluster.c b/qemu/block/qcow2-cluster.c
deleted file mode 100644
index 31ecc1030..000000000
--- a/qemu/block/qcow2-cluster.c
+++ /dev/null
@@ -1,1899 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include <zlib.h>
-
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/qcow2.h"
-#include "trace.h"
-
-int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
-                        bool exact_size)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int new_l1_size2, ret, i;
-    uint64_t *new_l1_table;
-    int64_t old_l1_table_offset, old_l1_size;
-    int64_t new_l1_table_offset, new_l1_size;
-    uint8_t data[12];
-
-    if (min_size <= s->l1_size)
-        return 0;
-
-    /* Do a sanity check on min_size before trying to calculate new_l1_size
-     * (this prevents overflows during the while loop for the calculation of
-     * new_l1_size) */
-    if (min_size > INT_MAX / sizeof(uint64_t)) {
-        return -EFBIG;
-    }
-
-    if (exact_size) {
-        new_l1_size = min_size;
-    } else {
-        /* Bump size up to reduce the number of times we have to grow */
-        new_l1_size = s->l1_size;
-        if (new_l1_size == 0) {
-            new_l1_size = 1;
-        }
-        while (min_size > new_l1_size) {
-            new_l1_size = (new_l1_size * 3 + 1) / 2;
-        }
-    }
-
-    if (new_l1_size > INT_MAX / sizeof(uint64_t)) {
-        return -EFBIG;
-    }
-
-#ifdef DEBUG_ALLOC2
-    fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n",
-            s->l1_size, new_l1_size);
-#endif
-
-    new_l1_size2 = sizeof(uint64_t) * new_l1_size;
-    new_l1_table = qemu_try_blockalign(bs->file->bs,
-                                       align_offset(new_l1_size2, 512));
-    if (new_l1_table == NULL) {
-        return -ENOMEM;
-    }
-    memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
-
-    memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
-
-    /* write new table (align to cluster) */
-    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ALLOC_TABLE);
-    new_l1_table_offset = qcow2_alloc_clusters(bs, new_l1_size2);
-    if (new_l1_table_offset < 0) {
-        qemu_vfree(new_l1_table);
-        return new_l1_table_offset;
-    }
-
-    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* the L1 position has not yet been updated, so these clusters must
-     * indeed be completely free */
-    ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset,
-                                        new_l1_size2);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
-    for(i = 0; i < s->l1_size; i++)
-        new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
-    ret = bdrv_pwrite_sync(bs->file->bs, new_l1_table_offset,
-                           new_l1_table, new_l1_size2);
-    if (ret < 0)
-        goto fail;
-    for(i = 0; i < s->l1_size; i++)
-        new_l1_table[i] = be64_to_cpu(new_l1_table[i]);
-
-    /* set new table */
-    BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
-    cpu_to_be32w((uint32_t*)data, new_l1_size);
-    stq_be_p(data + 4, new_l1_table_offset);
-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_size),
-                           data, sizeof(data));
-    if (ret < 0) {
-        goto fail;
-    }
-    qemu_vfree(s->l1_table);
-    old_l1_table_offset = s->l1_table_offset;
-    s->l1_table_offset = new_l1_table_offset;
-    s->l1_table = new_l1_table;
-    old_l1_size = s->l1_size;
-    s->l1_size = new_l1_size;
-    qcow2_free_clusters(bs, old_l1_table_offset, old_l1_size * sizeof(uint64_t),
-                        QCOW2_DISCARD_OTHER);
-    return 0;
- fail:
-    qemu_vfree(new_l1_table);
-    qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
-                        QCOW2_DISCARD_OTHER);
-    return ret;
-}
-
-/*
- * l2_load
- *
- * Loads a L2 table into memory. If the table is in the cache, the cache
- * is used; otherwise the L2 table is loaded from the image file.
- *
- * Returns a pointer to the L2 table on success, or NULL if the read from
- * the image file failed.
- */
-
-static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
-    uint64_t **l2_table)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int ret;
-
-    ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
-
-    return ret;
-}
-
-/*
- * Writes one sector of the L1 table to the disk (can't update single entries
- * and we really don't want bdrv_pread to perform a read-modify-write)
- */
-#define L1_ENTRIES_PER_SECTOR (512 / 8)
-int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t buf[L1_ENTRIES_PER_SECTOR] = { 0 };
-    int l1_start_index;
-    int i, ret;
-
-    l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1);
-    for (i = 0; i < L1_ENTRIES_PER_SECTOR && l1_start_index + i < s->l1_size;
-         i++)
-    {
-        buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
-    }
-
-    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
-            s->l1_table_offset + 8 * l1_start_index, sizeof(buf));
-    if (ret < 0) {
-        return ret;
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
-    ret = bdrv_pwrite_sync(bs->file->bs,
-                           s->l1_table_offset + 8 * l1_start_index,
-                           buf, sizeof(buf));
-    if (ret < 0) {
-        return ret;
-    }
-
-    return 0;
-}
-
-/*
- * l2_allocate
- *
- * Allocate a new l2 entry in the file. If l1_index points to an already
- * used entry in the L2 table (i.e. we are doing a copy on write for the L2
- * table) copy the contents of the old L2 table into the newly allocated one.
- * Otherwise the new table is initialized with zeros.
- *
- */
-
-static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t old_l2_offset;
-    uint64_t *l2_table = NULL;
-    int64_t l2_offset;
-    int ret;
-
-    old_l2_offset = s->l1_table[l1_index];
-
-    trace_qcow2_l2_allocate(bs, l1_index);
-
-    /* allocate a new l2 entry */
-
-    l2_offset = qcow2_alloc_clusters(bs, s->l2_size * sizeof(uint64_t));
-    if (l2_offset < 0) {
-        ret = l2_offset;
-        goto fail;
-    }
-
-    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* allocate a new entry in the l2 cache */
-
-    trace_qcow2_l2_allocate_get_empty(bs, l1_index);
-    ret = qcow2_cache_get_empty(bs, s->l2_table_cache, l2_offset, (void**) table);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    l2_table = *table;
-
-    if ((old_l2_offset & L1E_OFFSET_MASK) == 0) {
-        /* if there was no old l2 table, clear the new table */
-        memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
-    } else {
-        uint64_t* old_table;
-
-        /* if there was an old l2 table, read it from the disk */
-        BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_COW_READ);
-        ret = qcow2_cache_get(bs, s->l2_table_cache,
-            old_l2_offset & L1E_OFFSET_MASK,
-            (void**) &old_table);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        memcpy(l2_table, old_table, s->cluster_size);
-
-        qcow2_cache_put(bs, s->l2_table_cache, (void **) &old_table);
-    }
-
-    /* write the l2 table to the file */
-    BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
-
-    trace_qcow2_l2_allocate_write_l2(bs, l1_index);
-    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-    ret = qcow2_cache_flush(bs, s->l2_table_cache);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* update the L1 entry */
-    trace_qcow2_l2_allocate_write_l1(bs, l1_index);
-    s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
-    ret = qcow2_write_l1_entry(bs, l1_index);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    *table = l2_table;
-    trace_qcow2_l2_allocate_done(bs, l1_index, 0);
-    return 0;
-
-fail:
-    trace_qcow2_l2_allocate_done(bs, l1_index, ret);
-    if (l2_table != NULL) {
-        qcow2_cache_put(bs, s->l2_table_cache, (void**) table);
-    }
-    s->l1_table[l1_index] = old_l2_offset;
-    if (l2_offset > 0) {
-        qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
-                            QCOW2_DISCARD_ALWAYS);
-    }
-    return ret;
-}
-
-/*
- * Checks how many clusters in a given L2 table are contiguous in the image
- * file. As soon as one of the flags in the bitmask stop_flags changes compared
- * to the first cluster, the search is stopped and the cluster is not counted
- * as contiguous. (This allows it, for example, to stop at the first compressed
- * cluster which may require a different handling)
- */
-static int count_contiguous_clusters(int nb_clusters, int cluster_size,
-        uint64_t *l2_table, uint64_t stop_flags)
-{
-    int i;
-    uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
-    uint64_t first_entry = be64_to_cpu(l2_table[0]);
-    uint64_t offset = first_entry & mask;
-
-    if (!offset)
-        return 0;
-
-    assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL);
-
-    for (i = 0; i < nb_clusters; i++) {
-        uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
-        if (offset + (uint64_t) i * cluster_size != l2_entry) {
-            break;
-        }
-    }
-
-	return i;
-}
-
-static int count_contiguous_clusters_by_type(int nb_clusters,
-                                             uint64_t *l2_table,
-                                             int wanted_type)
-{
-    int i;
-
-    for (i = 0; i < nb_clusters; i++) {
-        int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
-
-        if (type != wanted_type) {
-            break;
-        }
-    }
-
-    return i;
-}
-
-/* The crypt function is compatible with the linux cryptoloop
-   algorithm for < 4 GB images. NOTE: out_buf == in_buf is
-   supported */
-int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
-                          uint8_t *out_buf, const uint8_t *in_buf,
-                          int nb_sectors, bool enc,
-                          Error **errp)
-{
-    union {
-        uint64_t ll[2];
-        uint8_t b[16];
-    } ivec;
-    int i;
-    int ret;
-
-    for(i = 0; i < nb_sectors; i++) {
-        ivec.ll[0] = cpu_to_le64(sector_num);
-        ivec.ll[1] = 0;
-        if (qcrypto_cipher_setiv(s->cipher,
-                                 ivec.b, G_N_ELEMENTS(ivec.b),
-                                 errp) < 0) {
-            return -1;
-        }
-        if (enc) {
-            ret = qcrypto_cipher_encrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        } else {
-            ret = qcrypto_cipher_decrypt(s->cipher,
-                                         in_buf,
-                                         out_buf,
-                                         512,
-                                         errp);
-        }
-        if (ret < 0) {
-            return -1;
-        }
-        sector_num++;
-        in_buf += 512;
-        out_buf += 512;
-    }
-    return 0;
-}
-
-static int coroutine_fn copy_sectors(BlockDriverState *bs,
-                                     uint64_t start_sect,
-                                     uint64_t cluster_offset,
-                                     int n_start, int n_end)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QEMUIOVector qiov;
-    struct iovec iov;
-    int n, ret;
-
-    n = n_end - n_start;
-    if (n <= 0) {
-        return 0;
-    }
-
-    iov.iov_len = n * BDRV_SECTOR_SIZE;
-    iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
-    if (iov.iov_base == NULL) {
-        return -ENOMEM;
-    }
-
-    qemu_iovec_init_external(&qiov, &iov, 1);
-
-    BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
-
-    if (!bs->drv) {
-        ret = -ENOMEDIUM;
-        goto out;
-    }
-
-    /* Call .bdrv_co_readv() directly instead of using the public block-layer
-     * interface.  This avoids double I/O throttling and request tracking,
-     * which can lead to deadlock when block layer copy-on-read is enabled.
-     */
-    ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
-    if (ret < 0) {
-        goto out;
-    }
-
-    if (bs->encrypted) {
-        Error *err = NULL;
-        assert(s->cipher);
-        if (qcow2_encrypt_sectors(s, start_sect + n_start,
-                                  iov.iov_base, iov.iov_base, n,
-                                  true, &err) < 0) {
-            ret = -EIO;
-            error_free(err);
-            goto out;
-        }
-    }
-
-    ret = qcow2_pre_write_overlap_check(bs, 0,
-            cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE);
-    if (ret < 0) {
-        goto out;
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
-    ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n,
-                         &qiov);
-    if (ret < 0) {
-        goto out;
-    }
-
-    ret = 0;
-out:
-    qemu_vfree(iov.iov_base);
-    return ret;
-}
-
-
-/*
- * get_cluster_offset
- *
- * For a given offset of the disk image, find the cluster offset in
- * qcow2 file. The offset is stored in *cluster_offset.
- *
- * on entry, *num is the number of contiguous sectors we'd like to
- * access following offset.
- *
- * on exit, *num is the number of contiguous sectors we can read.
- *
- * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
- * cases.
- */
-int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *cluster_offset)
-{
-    BDRVQcow2State *s = bs->opaque;
-    unsigned int l2_index;
-    uint64_t l1_index, l2_offset, *l2_table;
-    int l1_bits, c;
-    unsigned int index_in_cluster, nb_clusters;
-    uint64_t nb_available, nb_needed;
-    int ret;
-
-    index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
-    nb_needed = *num + index_in_cluster;
-
-    l1_bits = s->l2_bits + s->cluster_bits;
-
-    /* compute how many bytes there are between the offset and
-     * the end of the l1 entry
-     */
-
-    nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
-
-    /* compute the number of available sectors */
-
-    nb_available = (nb_available >> 9) + index_in_cluster;
-
-    if (nb_needed > nb_available) {
-        nb_needed = nb_available;
-    }
-    assert(nb_needed <= INT_MAX);
-
-    *cluster_offset = 0;
-
-    /* seek to the l2 offset in the l1 table */
-
-    l1_index = offset >> l1_bits;
-    if (l1_index >= s->l1_size) {
-        ret = QCOW2_CLUSTER_UNALLOCATED;
-        goto out;
-    }
-
-    l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
-    if (!l2_offset) {
-        ret = QCOW2_CLUSTER_UNALLOCATED;
-        goto out;
-    }
-
-    if (offset_into_cluster(s, l2_offset)) {
-        qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64
-                                " unaligned (L1 index: %#" PRIx64 ")",
-                                l2_offset, l1_index);
-        return -EIO;
-    }
-
-    /* load the l2 table in memory */
-
-    ret = l2_load(bs, l2_offset, &l2_table);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* find the cluster offset for the given disk offset */
-
-    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
-    *cluster_offset = be64_to_cpu(l2_table[l2_index]);
-
-    /* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */
-    nb_clusters = size_to_clusters(s, nb_needed << 9);
-
-    ret = qcow2_get_cluster_type(*cluster_offset);
-    switch (ret) {
-    case QCOW2_CLUSTER_COMPRESSED:
-        /* Compressed clusters can only be processed one by one */
-        c = 1;
-        *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
-        break;
-    case QCOW2_CLUSTER_ZERO:
-        if (s->qcow_version < 3) {
-            qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
-                                    " in pre-v3 image (L2 offset: %#" PRIx64
-                                    ", L2 index: %#x)", l2_offset, l2_index);
-            ret = -EIO;
-            goto fail;
-        }
-        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
-                                              QCOW2_CLUSTER_ZERO);
-        *cluster_offset = 0;
-        break;
-    case QCOW2_CLUSTER_UNALLOCATED:
-        /* how many empty clusters ? */
-        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
-                                              QCOW2_CLUSTER_UNALLOCATED);
-        *cluster_offset = 0;
-        break;
-    case QCOW2_CLUSTER_NORMAL:
-        /* how many allocated clusters ? */
-        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], QCOW_OFLAG_ZERO);
-        *cluster_offset &= L2E_OFFSET_MASK;
-        if (offset_into_cluster(s, *cluster_offset)) {
-            qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset %#"
-                                    PRIx64 " unaligned (L2 offset: %#" PRIx64
-                                    ", L2 index: %#x)", *cluster_offset,
-                                    l2_offset, l2_index);
-            ret = -EIO;
-            goto fail;
-        }
-        break;
-    default:
-        abort();
-    }
-
-    qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-
-    nb_available = (c * s->cluster_sectors);
-
-out:
-    if (nb_available > nb_needed)
-        nb_available = nb_needed;
-
-    *num = nb_available - index_in_cluster;
-
-    return ret;
-
-fail:
-    qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
-    return ret;
-}
-
-/*
- * get_cluster_table
- *
- * for a given disk offset, load (and allocate if needed)
- * the l2 table.
- *
- * the l2 table offset in the qcow2 file and the cluster index
- * in the l2 table are given to the caller.
- *
- * Returns 0 on success, -errno in failure case
- */
-static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
-                             uint64_t **new_l2_table,
-                             int *new_l2_index)
-{
-    BDRVQcow2State *s = bs->opaque;
-    unsigned int l2_index;
-    uint64_t l1_index, l2_offset;
-    uint64_t *l2_table = NULL;
-    int ret;
-
-    /* seek to the l2 offset in the l1 table */
-
-    l1_index = offset >> (s->l2_bits + s->cluster_bits);
-    if (l1_index >= s->l1_size) {
-        ret = qcow2_grow_l1_table(bs, l1_index + 1, false);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    assert(l1_index < s->l1_size);
-    l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
-    if (offset_into_cluster(s, l2_offset)) {
-        qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#" PRIx64
-                                " unaligned (L1 index: %#" PRIx64 ")",
-                                l2_offset, l1_index);
-        return -EIO;
-    }
-
-    /* seek the l2 table of the given l2 offset */
-
-    if (s->l1_table[l1_index] & QCOW_OFLAG_COPIED) {
-        /* load the l2 table in memory */
-        ret = l2_load(bs, l2_offset, &l2_table);
-        if (ret < 0) {
-            return ret;
-        }
-    } else {
-        /* First allocate a new L2 table (and do COW if needed) */
-        ret = l2_allocate(bs, l1_index, &l2_table);
-        if (ret < 0) {
-            return ret;
-        }
-
-        /* Then decrease the refcount of the old table */
-        if (l2_offset) {
-            qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
-                                QCOW2_DISCARD_OTHER);
-        }
-    }
-
-    /* find the cluster offset for the given disk offset */
-
-    l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
-
-    *new_l2_table = l2_table;
-    *new_l2_index = l2_index;
-
-    return 0;
-}
-
-/*
- * alloc_compressed_cluster_offset
- *
- * For a given offset of the disk image, return cluster offset in
- * qcow2 file.
- *
- * If the offset is not found, allocate a new compressed cluster.
- *
- * Return the cluster offset if successful,
- * Return 0, otherwise.
- *
- */
-
-uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
-                                               uint64_t offset,
-                                               int compressed_size)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int l2_index, ret;
-    uint64_t *l2_table;
-    int64_t cluster_offset;
-    int nb_csectors;
-
-    ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
-    if (ret < 0) {
-        return 0;
-    }
-
-    /* Compression can't overwrite anything. Fail if the cluster was already
-     * allocated. */
-    cluster_offset = be64_to_cpu(l2_table[l2_index]);
-    if (cluster_offset & L2E_OFFSET_MASK) {
-        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-        return 0;
-    }
-
-    cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
-    if (cluster_offset < 0) {
-        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-        return 0;
-    }
-
-    nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
-                  (cluster_offset >> 9);
-
-    cluster_offset |= QCOW_OFLAG_COMPRESSED |
-                      ((uint64_t)nb_csectors << s->csize_shift);
-
-    /* update L2 table */
-
-    /* compressed clusters never have the copied flag */
-
-    BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
-    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-    l2_table[l2_index] = cpu_to_be64(cluster_offset);
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
-
-    return cluster_offset;
-}
-
-static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int ret;
-
-    if (r->nb_sectors == 0) {
-        return 0;
-    }
-
-    qemu_co_mutex_unlock(&s->lock);
-    ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
-                       r->offset / BDRV_SECTOR_SIZE,
-                       r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
-    qemu_co_mutex_lock(&s->lock);
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    /*
-     * Before we update the L2 table to actually point to the new cluster, we
-     * need to be sure that the refcounts have been increased and COW was
-     * handled.
-     */
-    qcow2_cache_depends_on_flush(s->l2_table_cache);
-
-    return 0;
-}
-
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int i, j = 0, l2_index, ret;
-    uint64_t *old_cluster, *l2_table;
-    uint64_t cluster_offset = m->alloc_offset;
-
-    trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters);
-    assert(m->nb_clusters > 0);
-
-    old_cluster = g_try_new(uint64_t, m->nb_clusters);
-    if (old_cluster == NULL) {
-        ret = -ENOMEM;
-        goto err;
-    }
-
-    /* copy content of unmodified sectors */
-    ret = perform_cow(bs, m, &m->cow_start);
-    if (ret < 0) {
-        goto err;
-    }
-
-    ret = perform_cow(bs, m, &m->cow_end);
-    if (ret < 0) {
-        goto err;
-    }
-
-    /* Update L2 table. */
-    if (s->use_lazy_refcounts) {
-        qcow2_mark_dirty(bs);
-    }
-    if (qcow2_need_accurate_refcounts(s)) {
-        qcow2_cache_set_dependency(bs, s->l2_table_cache,
-                                   s->refcount_block_cache);
-    }
-
-    ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index);
-    if (ret < 0) {
-        goto err;
-    }
-    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-
-    assert(l2_index + m->nb_clusters <= s->l2_size);
-    for (i = 0; i < m->nb_clusters; i++) {
-        /* if two concurrent writes happen to the same unallocated cluster
-	 * each write allocates separate cluster and writes data concurrently.
-	 * The first one to complete updates l2 table with pointer to its
-	 * cluster the second one has to do RMW (which is done above by
-	 * copy_sectors()), update l2 table with its cluster pointer and free
-	 * old cluster. This is what this loop does */
-        if(l2_table[l2_index + i] != 0)
-            old_cluster[j++] = l2_table[l2_index + i];
-
-        l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
-                    (i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
-     }
-
-
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
-
-    /*
-     * If this was a COW, we need to decrease the refcount of the old cluster.
-     *
-     * Don't discard clusters that reach a refcount of 0 (e.g. compressed
-     * clusters), the next write will reuse them anyway.
-     */
-    if (j != 0) {
-        for (i = 0; i < j; i++) {
-            qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
-                                    QCOW2_DISCARD_NEVER);
-        }
-    }
-
-    ret = 0;
-err:
-    g_free(old_cluster);
-    return ret;
- }
-
-/*
- * Returns the number of contiguous clusters that can be used for an allocating
- * write, but require COW to be performed (this includes yet unallocated space,
- * which must copy from the backing file)
- */
-static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
-    uint64_t *l2_table, int l2_index)
-{
-    int i;
-
-    for (i = 0; i < nb_clusters; i++) {
-        uint64_t l2_entry = be64_to_cpu(l2_table[l2_index + i]);
-        int cluster_type = qcow2_get_cluster_type(l2_entry);
-
-        switch(cluster_type) {
-        case QCOW2_CLUSTER_NORMAL:
-            if (l2_entry & QCOW_OFLAG_COPIED) {
-                goto out;
-            }
-            break;
-        case QCOW2_CLUSTER_UNALLOCATED:
-        case QCOW2_CLUSTER_COMPRESSED:
-        case QCOW2_CLUSTER_ZERO:
-            break;
-        default:
-            abort();
-        }
-    }
-
-out:
-    assert(i <= nb_clusters);
-    return i;
-}
-
-/*
- * Check if there already is an AIO write request in flight which allocates
- * the same cluster. In this case we need to wait until the previous
- * request has completed and updated the L2 table accordingly.
- *
- * Returns:
- *   0       if there was no dependency. *cur_bytes indicates the number of
- *           bytes from guest_offset that can be read before the next
- *           dependency must be processed (or the request is complete)
- *
- *   -EAGAIN if we had to wait for another request, previously gathered
- *           information on cluster allocation may be invalid now. The caller
- *           must start over anyway, so consider *cur_bytes undefined.
- */
-static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
-    uint64_t *cur_bytes, QCowL2Meta **m)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QCowL2Meta *old_alloc;
-    uint64_t bytes = *cur_bytes;
-
-    QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) {
-
-        uint64_t start = guest_offset;
-        uint64_t end = start + bytes;
-        uint64_t old_start = l2meta_cow_start(old_alloc);
-        uint64_t old_end = l2meta_cow_end(old_alloc);
-
-        if (end <= old_start || start >= old_end) {
-            /* No intersection */
-        } else {
-            if (start < old_start) {
-                /* Stop at the start of a running allocation */
-                bytes = old_start - start;
-            } else {
-                bytes = 0;
-            }
-
-            /* Stop if already an l2meta exists. After yielding, it wouldn't
-             * be valid any more, so we'd have to clean up the old L2Metas
-             * and deal with requests depending on them before starting to
-             * gather new ones. Not worth the trouble. */
-            if (bytes == 0 && *m) {
-                *cur_bytes = 0;
-                return 0;
-            }
-
-            if (bytes == 0) {
-                /* Wait for the dependency to complete. We need to recheck
-                 * the free/allocated clusters when we continue. */
-                qemu_co_mutex_unlock(&s->lock);
-                qemu_co_queue_wait(&old_alloc->dependent_requests);
-                qemu_co_mutex_lock(&s->lock);
-                return -EAGAIN;
-            }
-        }
-    }
-
-    /* Make sure that existing clusters and new allocations are only used up to
-     * the next dependency if we shortened the request above */
-    *cur_bytes = bytes;
-
-    return 0;
-}
-
-/*
- * Checks how many already allocated clusters that don't require a copy on
- * write there are at the given guest_offset (up to *bytes). If
- * *host_offset is not zero, only physically contiguous clusters beginning at
- * this host offset are counted.
- *
- * Note that guest_offset may not be cluster aligned. In this case, the
- * returned *host_offset points to exact byte referenced by guest_offset and
- * therefore isn't cluster aligned as well.
- *
- * Returns:
- *   0:     if no allocated clusters are available at the given offset.
- *          *bytes is normally unchanged. It is set to 0 if the cluster
- *          is allocated and doesn't need COW, but doesn't have the right
- *          physical offset.
- *
- *   1:     if allocated clusters that don't require a COW are available at
- *          the requested offset. *bytes may have decreased and describes
- *          the length of the area that can be written to.
- *
- *  -errno: in error cases
- */
-static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
-    uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int l2_index;
-    uint64_t cluster_offset;
-    uint64_t *l2_table;
-    uint64_t nb_clusters;
-    unsigned int keep_clusters;
-    int ret;
-
-    trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
-                              *bytes);
-
-    assert(*host_offset == 0 ||    offset_into_cluster(s, guest_offset)
-                                == offset_into_cluster(s, *host_offset));
-
-    /*
-     * Calculate the number of clusters to look for. We stop at L2 table
-     * boundaries to keep things simple.
-     */
-    nb_clusters =
-        size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
-
-    l2_index = offset_to_l2_index(s, guest_offset);
-    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-    assert(nb_clusters <= INT_MAX);
-
-    /* Find L2 entry for the first involved cluster */
-    ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
-    if (ret < 0) {
-        return ret;
-    }
-
-    cluster_offset = be64_to_cpu(l2_table[l2_index]);
-
-    /* Check how many clusters are already allocated and don't need COW */
-    if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
-        && (cluster_offset & QCOW_OFLAG_COPIED))
-    {
-        /* If a specific host_offset is required, check it */
-        bool offset_matches =
-            (cluster_offset & L2E_OFFSET_MASK) == *host_offset;
-
-        if (offset_into_cluster(s, cluster_offset & L2E_OFFSET_MASK)) {
-            qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset "
-                                    "%#llx unaligned (guest offset: %#" PRIx64
-                                    ")", cluster_offset & L2E_OFFSET_MASK,
-                                    guest_offset);
-            ret = -EIO;
-            goto out;
-        }
-
-        if (*host_offset != 0 && !offset_matches) {
-            *bytes = 0;
-            ret = 0;
-            goto out;
-        }
-
-        /* We keep all QCOW_OFLAG_COPIED clusters */
-        keep_clusters =
-            count_contiguous_clusters(nb_clusters, s->cluster_size,
-                                      &l2_table[l2_index],
-                                      QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
-        assert(keep_clusters <= nb_clusters);
-
-        *bytes = MIN(*bytes,
-                 keep_clusters * s->cluster_size
-                 - offset_into_cluster(s, guest_offset));
-
-        ret = 1;
-    } else {
-        ret = 0;
-    }
-
-    /* Cleanup */
-out:
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
-
-    /* Only return a host offset if we actually made progress. Otherwise we
-     * would make requirements for handle_alloc() that it can't fulfill */
-    if (ret > 0) {
-        *host_offset = (cluster_offset & L2E_OFFSET_MASK)
-                     + offset_into_cluster(s, guest_offset);
-    }
-
-    return ret;
-}
-
-/*
- * Allocates new clusters for the given guest_offset.
- *
- * At most *nb_clusters are allocated, and on return *nb_clusters is updated to
- * contain the number of clusters that have been allocated and are contiguous
- * in the image file.
- *
- * If *host_offset is non-zero, it specifies the offset in the image file at
- * which the new clusters must start. *nb_clusters can be 0 on return in this
- * case if the cluster at host_offset is already in use. If *host_offset is
- * zero, the clusters can be allocated anywhere in the image file.
- *
- * *host_offset is updated to contain the offset into the image file at which
- * the first allocated cluster starts.
- *
- * Return 0 on success and -errno in error cases. -EAGAIN means that the
- * function has been waiting for another request and the allocation must be
- * restarted, but the whole request should not be failed.
- */
-static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
-                                   uint64_t *host_offset, uint64_t *nb_clusters)
-{
-    BDRVQcow2State *s = bs->opaque;
-
-    trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
-                                         *host_offset, *nb_clusters);
-
-    /* Allocate new clusters */
-    trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
-    if (*host_offset == 0) {
-        int64_t cluster_offset =
-            qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
-        if (cluster_offset < 0) {
-            return cluster_offset;
-        }
-        *host_offset = cluster_offset;
-        return 0;
-    } else {
-        int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
-        if (ret < 0) {
-            return ret;
-        }
-        *nb_clusters = ret;
-        return 0;
-    }
-}
-
-/*
- * Allocates new clusters for an area that either is yet unallocated or needs a
- * copy on write. If *host_offset is non-zero, clusters are only allocated if
- * the new allocation can match the specified host offset.
- *
- * Note that guest_offset may not be cluster aligned. In this case, the
- * returned *host_offset points to exact byte referenced by guest_offset and
- * therefore isn't cluster aligned as well.
- *
- * Returns:
- *   0:     if no clusters could be allocated. *bytes is set to 0,
- *          *host_offset is left unchanged.
- *
- *   1:     if new clusters were allocated. *bytes may be decreased if the
- *          new allocation doesn't cover all of the requested area.
- *          *host_offset is updated to contain the host offset of the first
- *          newly allocated cluster.
- *
- *  -errno: in error cases
- */
-static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
-    uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int l2_index;
-    uint64_t *l2_table;
-    uint64_t entry;
-    uint64_t nb_clusters;
-    int ret;
-
-    uint64_t alloc_cluster_offset;
-
-    trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
-                             *bytes);
-    assert(*bytes > 0);
-
-    /*
-     * Calculate the number of clusters to look for. We stop at L2 table
-     * boundaries to keep things simple.
-     */
-    nb_clusters =
-        size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes);
-
-    l2_index = offset_to_l2_index(s, guest_offset);
-    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-    assert(nb_clusters <= INT_MAX);
-
-    /* Find L2 entry for the first involved cluster */
-    ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
-    if (ret < 0) {
-        return ret;
-    }
-
-    entry = be64_to_cpu(l2_table[l2_index]);
-
-    /* For the moment, overwrite compressed clusters one by one */
-    if (entry & QCOW_OFLAG_COMPRESSED) {
-        nb_clusters = 1;
-    } else {
-        nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
-    }
-
-    /* This function is only called when there were no non-COW clusters, so if
-     * we can't find any unallocated or COW clusters either, something is
-     * wrong with our code. */
-    assert(nb_clusters > 0);
-
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
-
-    /* Allocate, if necessary at a given offset in the image file */
-    alloc_cluster_offset = start_of_cluster(s, *host_offset);
-    ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
-                                  &nb_clusters);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* Can't extend contiguous allocation */
-    if (nb_clusters == 0) {
-        *bytes = 0;
-        return 0;
-    }
-
-    /* !*host_offset would overwrite the image header and is reserved for "no
-     * host offset preferred". If 0 was a valid host offset, it'd trigger the
-     * following overlap check; do that now to avoid having an invalid value in
-     * *host_offset. */
-    if (!alloc_cluster_offset) {
-        ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
-                                            nb_clusters * s->cluster_size);
-        assert(ret < 0);
-        goto fail;
-    }
-
-    /*
-     * Save info needed for meta data update.
-     *
-     * requested_sectors: Number of sectors from the start of the first
-     * newly allocated cluster to the end of the (possibly shortened
-     * before) write request.
-     *
-     * avail_sectors: Number of sectors from the start of the first
-     * newly allocated to the end of the last newly allocated cluster.
-     *
-     * nb_sectors: The number of sectors from the start of the first
-     * newly allocated cluster to the end of the area that the write
-     * request actually writes to (excluding COW at the end)
-     */
-    int requested_sectors =
-        (*bytes + offset_into_cluster(s, guest_offset))
-        >> BDRV_SECTOR_BITS;
-    int avail_sectors = nb_clusters
-                        << (s->cluster_bits - BDRV_SECTOR_BITS);
-    int alloc_n_start = offset_into_cluster(s, guest_offset)
-                        >> BDRV_SECTOR_BITS;
-    int nb_sectors = MIN(requested_sectors, avail_sectors);
-    QCowL2Meta *old_m = *m;
-
-    *m = g_malloc0(sizeof(**m));
-
-    **m = (QCowL2Meta) {
-        .next           = old_m,
-
-        .alloc_offset   = alloc_cluster_offset,
-        .offset         = start_of_cluster(s, guest_offset),
-        .nb_clusters    = nb_clusters,
-        .nb_available   = nb_sectors,
-
-        .cow_start = {
-            .offset     = 0,
-            .nb_sectors = alloc_n_start,
-        },
-        .cow_end = {
-            .offset     = nb_sectors * BDRV_SECTOR_SIZE,
-            .nb_sectors = avail_sectors - nb_sectors,
-        },
-    };
-    qemu_co_queue_init(&(*m)->dependent_requests);
-    QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
-
-    *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
-    *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE)
-                         - offset_into_cluster(s, guest_offset));
-    assert(*bytes != 0);
-
-    return 1;
-
-fail:
-    if (*m && (*m)->nb_clusters > 0) {
-        QLIST_REMOVE(*m, next_in_flight);
-    }
-    return ret;
-}
-
-/*
- * alloc_cluster_offset
- *
- * For a given offset on the virtual disk, find the cluster offset in qcow2
- * file. If the offset is not found, allocate a new cluster.
- *
- * If the cluster was already allocated, m->nb_clusters is set to 0 and
- * other fields in m are meaningless.
- *
- * If the cluster is newly allocated, m->nb_clusters is set to the number of
- * contiguous clusters that have been allocated. In this case, the other
- * fields of m are valid and contain information about the first allocated
- * cluster.
- *
- * If the request conflicts with another write request in flight, the coroutine
- * is queued and will be reentered when the dependency has completed.
- *
- * Return 0 on success and -errno in error cases
- */
-int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *host_offset, QCowL2Meta **m)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t start, remaining;
-    uint64_t cluster_offset;
-    uint64_t cur_bytes;
-    int ret;
-
-    trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num);
-
-    assert((offset & ~BDRV_SECTOR_MASK) == 0);
-
-again:
-    start = offset;
-    remaining = (uint64_t)*num << BDRV_SECTOR_BITS;
-    cluster_offset = 0;
-    *host_offset = 0;
-    cur_bytes = 0;
-    *m = NULL;
-
-    while (true) {
-
-        if (!*host_offset) {
-            *host_offset = start_of_cluster(s, cluster_offset);
-        }
-
-        assert(remaining >= cur_bytes);
-
-        start           += cur_bytes;
-        remaining       -= cur_bytes;
-        cluster_offset  += cur_bytes;
-
-        if (remaining == 0) {
-            break;
-        }
-
-        cur_bytes = remaining;
-
-        /*
-         * Now start gathering as many contiguous clusters as possible:
-         *
-         * 1. Check for overlaps with in-flight allocations
-         *
-         *      a) Overlap not in the first cluster -> shorten this request and
-         *         let the caller handle the rest in its next loop iteration.
-         *
-         *      b) Real overlaps of two requests. Yield and restart the search
-         *         for contiguous clusters (the situation could have changed
-         *         while we were sleeping)
-         *
-         *      c) TODO: Request starts in the same cluster as the in-flight
-         *         allocation ends. Shorten the COW of the in-fight allocation,
-         *         set cluster_offset to write to the same cluster and set up
-         *         the right synchronisation between the in-flight request and
-         *         the new one.
-         */
-        ret = handle_dependencies(bs, start, &cur_bytes, m);
-        if (ret == -EAGAIN) {
-            /* Currently handle_dependencies() doesn't yield if we already had
-             * an allocation. If it did, we would have to clean up the L2Meta
-             * structs before starting over. */
-            assert(*m == NULL);
-            goto again;
-        } else if (ret < 0) {
-            return ret;
-        } else if (cur_bytes == 0) {
-            break;
-        } else {
-            /* handle_dependencies() may have decreased cur_bytes (shortened
-             * the allocations below) so that the next dependency is processed
-             * correctly during the next loop iteration. */
-        }
-
-        /*
-         * 2. Count contiguous COPIED clusters.
-         */
-        ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m);
-        if (ret < 0) {
-            return ret;
-        } else if (ret) {
-            continue;
-        } else if (cur_bytes == 0) {
-            break;
-        }
-
-        /*
-         * 3. If the request still hasn't completed, allocate new clusters,
-         *    considering any cluster_offset of steps 1c or 2.
-         */
-        ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m);
-        if (ret < 0) {
-            return ret;
-        } else if (ret) {
-            continue;
-        } else {
-            assert(cur_bytes == 0);
-            break;
-        }
-    }
-
-    *num -= remaining >> BDRV_SECTOR_BITS;
-    assert(*num > 0);
-    assert(*host_offset != 0);
-
-    return 0;
-}
-
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
-                             const uint8_t *buf, int buf_size)
-{
-    z_stream strm1, *strm = &strm1;
-    int ret, out_len;
-
-    memset(strm, 0, sizeof(*strm));
-
-    strm->next_in = (uint8_t *)buf;
-    strm->avail_in = buf_size;
-    strm->next_out = out_buf;
-    strm->avail_out = out_buf_size;
-
-    ret = inflateInit2(strm, -12);
-    if (ret != Z_OK)
-        return -1;
-    ret = inflate(strm, Z_FINISH);
-    out_len = strm->next_out - out_buf;
-    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
-        out_len != out_buf_size) {
-        inflateEnd(strm);
-        return -1;
-    }
-    inflateEnd(strm);
-    return 0;
-}
-
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int ret, csize, nb_csectors, sector_offset;
-    uint64_t coffset;
-
-    coffset = cluster_offset & s->cluster_offset_mask;
-    if (s->cluster_cache_offset != coffset) {
-        nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
-        sector_offset = coffset & 511;
-        csize = nb_csectors * 512 - sector_offset;
-        BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
-        ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data,
-                        nb_csectors);
-        if (ret < 0) {
-            return ret;
-        }
-        if (decompress_buffer(s->cluster_cache, s->cluster_size,
-                              s->cluster_data + sector_offset, csize) < 0) {
-            return -EIO;
-        }
-        s->cluster_cache_offset = coffset;
-    }
-    return 0;
-}
-
-/*
- * This discards as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of discarded
- * clusters.
- */
-static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
-                             uint64_t nb_clusters, enum qcow2_discard_type type,
-                             bool full_discard)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t *l2_table;
-    int l2_index;
-    int ret;
-    int i;
-
-    ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* Limit nb_clusters to one L2 table */
-    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-    assert(nb_clusters <= INT_MAX);
-
-    for (i = 0; i < nb_clusters; i++) {
-        uint64_t old_l2_entry;
-
-        old_l2_entry = be64_to_cpu(l2_table[l2_index + i]);
-
-        /*
-         * If full_discard is false, make sure that a discarded area reads back
-         * as zeroes for v3 images (we cannot do it for v2 without actually
-         * writing a zero-filled buffer). We can skip the operation if the
-         * cluster is already marked as zero, or if it's unallocated and we
-         * don't have a backing file.
-         *
-         * TODO We might want to use bdrv_get_block_status(bs) here, but we're
-         * holding s->lock, so that doesn't work today.
-         *
-         * If full_discard is true, the sector should not read back as zeroes,
-         * but rather fall through to the backing file.
-         */
-        switch (qcow2_get_cluster_type(old_l2_entry)) {
-            case QCOW2_CLUSTER_UNALLOCATED:
-                if (full_discard || !bs->backing) {
-                    continue;
-                }
-                break;
-
-            case QCOW2_CLUSTER_ZERO:
-                if (!full_discard) {
-                    continue;
-                }
-                break;
-
-            case QCOW2_CLUSTER_NORMAL:
-            case QCOW2_CLUSTER_COMPRESSED:
-                break;
-
-            default:
-                abort();
-        }
-
-        /* First remove L2 entries */
-        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-        if (!full_discard && s->qcow_version >= 3) {
-            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
-        } else {
-            l2_table[l2_index + i] = cpu_to_be64(0);
-        }
-
-        /* Then decrease the refcount */
-        qcow2_free_any_clusters(bs, old_l2_entry, 1, type);
-    }
-
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
-
-    return nb_clusters;
-}
-
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type, bool full_discard)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t end_offset;
-    uint64_t nb_clusters;
-    int ret;
-
-    end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
-
-    /* Round start up and end down */
-    offset = align_offset(offset, s->cluster_size);
-    end_offset = start_of_cluster(s, end_offset);
-
-    if (offset > end_offset) {
-        return 0;
-    }
-
-    nb_clusters = size_to_clusters(s, end_offset - offset);
-
-    s->cache_discards = true;
-
-    /* Each L2 table is handled by its own loop iteration */
-    while (nb_clusters > 0) {
-        ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        nb_clusters -= ret;
-        offset += (ret * s->cluster_size);
-    }
-
-    ret = 0;
-fail:
-    s->cache_discards = false;
-    qcow2_process_discards(bs, ret);
-
-    return ret;
-}
-
-/*
- * This zeroes as many clusters of nb_clusters as possible at once (i.e.
- * all clusters in the same L2 table) and returns the number of zeroed
- * clusters.
- */
-static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
-                          uint64_t nb_clusters)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t *l2_table;
-    int l2_index;
-    int ret;
-    int i;
-
-    ret = get_cluster_table(bs, offset, &l2_table, &l2_index);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* Limit nb_clusters to one L2 table */
-    nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
-    assert(nb_clusters <= INT_MAX);
-
-    for (i = 0; i < nb_clusters; i++) {
-        uint64_t old_offset;
-
-        old_offset = be64_to_cpu(l2_table[l2_index + i]);
-
-        /* Update L2 entries */
-        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-        if (old_offset & QCOW_OFLAG_COMPRESSED) {
-            l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
-            qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
-        } else {
-            l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
-        }
-    }
-
-    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
-
-    return nb_clusters;
-}
-
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t nb_clusters;
-    int ret;
-
-    /* The zero flag is only supported by version 3 and newer */
-    if (s->qcow_version < 3) {
-        return -ENOTSUP;
-    }
-
-    /* Each L2 table is handled by its own loop iteration */
-    nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
-
-    s->cache_discards = true;
-
-    while (nb_clusters > 0) {
-        ret = zero_single_l2(bs, offset, nb_clusters);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        nb_clusters -= ret;
-        offset += (ret * s->cluster_size);
-    }
-
-    ret = 0;
-fail:
-    s->cache_discards = false;
-    qcow2_process_discards(bs, ret);
-
-    return ret;
-}
-
-/*
- * Expands all zero clusters in a specific L1 table (or deallocates them, for
- * non-backed non-pre-allocated zero clusters).
- *
- * l1_entries and *visited_l1_entries are used to keep track of progress for
- * status_cb(). l1_entries contains the total number of L1 entries and
- * *visited_l1_entries counts all visited L1 entries.
- */
-static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
-                                      int l1_size, int64_t *visited_l1_entries,
-                                      int64_t l1_entries,
-                                      BlockDriverAmendStatusCB *status_cb,
-                                      void *cb_opaque)
-{
-    BDRVQcow2State *s = bs->opaque;
-    bool is_active_l1 = (l1_table == s->l1_table);
-    uint64_t *l2_table = NULL;
-    int ret;
-    int i, j;
-
-    if (!is_active_l1) {
-        /* inactive L2 tables require a buffer to be stored in when loading
-         * them from disk */
-        l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size);
-        if (l2_table == NULL) {
-            return -ENOMEM;
-        }
-    }
-
-    for (i = 0; i < l1_size; i++) {
-        uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
-        bool l2_dirty = false;
-        uint64_t l2_refcount;
-
-        if (!l2_offset) {
-            /* unallocated */
-            (*visited_l1_entries)++;
-            if (status_cb) {
-                status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque);
-            }
-            continue;
-        }
-
-        if (offset_into_cluster(s, l2_offset)) {
-            qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#"
-                                    PRIx64 " unaligned (L1 index: %#x)",
-                                    l2_offset, i);
-            ret = -EIO;
-            goto fail;
-        }
-
-        if (is_active_l1) {
-            /* get active L2 tables from cache */
-            ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
-                    (void **)&l2_table);
-        } else {
-            /* load inactive L2 tables from disk */
-            ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
-                            (void *)l2_table, s->cluster_sectors);
-        }
-        if (ret < 0) {
-            goto fail;
-        }
-
-        ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
-                                 &l2_refcount);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        for (j = 0; j < s->l2_size; j++) {
-            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
-            int64_t offset = l2_entry & L2E_OFFSET_MASK;
-            int cluster_type = qcow2_get_cluster_type(l2_entry);
-            bool preallocated = offset != 0;
-
-            if (cluster_type != QCOW2_CLUSTER_ZERO) {
-                continue;
-            }
-
-            if (!preallocated) {
-                if (!bs->backing) {
-                    /* not backed; therefore we can simply deallocate the
-                     * cluster */
-                    l2_table[j] = 0;
-                    l2_dirty = true;
-                    continue;
-                }
-
-                offset = qcow2_alloc_clusters(bs, s->cluster_size);
-                if (offset < 0) {
-                    ret = offset;
-                    goto fail;
-                }
-
-                if (l2_refcount > 1) {
-                    /* For shared L2 tables, set the refcount accordingly (it is
-                     * already 1 and needs to be l2_refcount) */
-                    ret = qcow2_update_cluster_refcount(bs,
-                            offset >> s->cluster_bits,
-                            refcount_diff(1, l2_refcount), false,
-                            QCOW2_DISCARD_OTHER);
-                    if (ret < 0) {
-                        qcow2_free_clusters(bs, offset, s->cluster_size,
-                                            QCOW2_DISCARD_OTHER);
-                        goto fail;
-                    }
-                }
-            }
-
-            if (offset_into_cluster(s, offset)) {
-                qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset "
-                                        "%#" PRIx64 " unaligned (L2 offset: %#"
-                                        PRIx64 ", L2 index: %#x)", offset,
-                                        l2_offset, j);
-                if (!preallocated) {
-                    qcow2_free_clusters(bs, offset, s->cluster_size,
-                                        QCOW2_DISCARD_ALWAYS);
-                }
-                ret = -EIO;
-                goto fail;
-            }
-
-            ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
-            if (ret < 0) {
-                if (!preallocated) {
-                    qcow2_free_clusters(bs, offset, s->cluster_size,
-                                        QCOW2_DISCARD_ALWAYS);
-                }
-                goto fail;
-            }
-
-            ret = bdrv_write_zeroes(bs->file->bs, offset / BDRV_SECTOR_SIZE,
-                                    s->cluster_sectors, 0);
-            if (ret < 0) {
-                if (!preallocated) {
-                    qcow2_free_clusters(bs, offset, s->cluster_size,
-                                        QCOW2_DISCARD_ALWAYS);
-                }
-                goto fail;
-            }
-
-            if (l2_refcount == 1) {
-                l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
-            } else {
-                l2_table[j] = cpu_to_be64(offset);
-            }
-            l2_dirty = true;
-        }
-
-        if (is_active_l1) {
-            if (l2_dirty) {
-                qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-                qcow2_cache_depends_on_flush(s->l2_table_cache);
-            }
-            qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
-        } else {
-            if (l2_dirty) {
-                ret = qcow2_pre_write_overlap_check(bs,
-                        QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2, l2_offset,
-                        s->cluster_size);
-                if (ret < 0) {
-                    goto fail;
-                }
-
-                ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
-                                 (void *)l2_table, s->cluster_sectors);
-                if (ret < 0) {
-                    goto fail;
-                }
-            }
-        }
-
-        (*visited_l1_entries)++;
-        if (status_cb) {
-            status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque);
-        }
-    }
-
-    ret = 0;
-
-fail:
-    if (l2_table) {
-        if (!is_active_l1) {
-            qemu_vfree(l2_table);
-        } else {
-            qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
-        }
-    }
-    return ret;
-}
-
-/*
- * For backed images, expands all zero clusters on the image. For non-backed
- * images, deallocates all non-pre-allocated zero clusters (and claims the
- * allocation for pre-allocated ones). This is important for downgrading to a
- * qcow2 version which doesn't yet support metadata zero clusters.
- */
-int qcow2_expand_zero_clusters(BlockDriverState *bs,
-                               BlockDriverAmendStatusCB *status_cb,
-                               void *cb_opaque)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t *l1_table = NULL;
-    int64_t l1_entries = 0, visited_l1_entries = 0;
-    int ret;
-    int i, j;
-
-    if (status_cb) {
-        l1_entries = s->l1_size;
-        for (i = 0; i < s->nb_snapshots; i++) {
-            l1_entries += s->snapshots[i].l1_size;
-        }
-    }
-
-    ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
-                                     &visited_l1_entries, l1_entries,
-                                     status_cb, cb_opaque);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* Inactive L1 tables may point to active L2 tables - therefore it is
-     * necessary to flush the L2 table cache before trying to access the L2
-     * tables pointed to by inactive L1 entries (else we might try to expand
-     * zero clusters that have already been expanded); furthermore, it is also
-     * necessary to empty the L2 table cache, since it may contain tables which
-     * are now going to be modified directly on disk, bypassing the cache.
-     * qcow2_cache_empty() does both for us. */
-    ret = qcow2_cache_empty(bs, s->l2_table_cache);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    for (i = 0; i < s->nb_snapshots; i++) {
-        int l1_sectors = (s->snapshots[i].l1_size * sizeof(uint64_t) +
-                BDRV_SECTOR_SIZE - 1) / BDRV_SECTOR_SIZE;
-
-        l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
-
-        ret = bdrv_read(bs->file->bs,
-                        s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
-                        (void *)l1_table, l1_sectors);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        for (j = 0; j < s->snapshots[i].l1_size; j++) {
-            be64_to_cpus(&l1_table[j]);
-        }
-
-        ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
-                                         &visited_l1_entries, l1_entries,
-                                         status_cb, cb_opaque);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-
-    ret = 0;
-
-fail:
-    g_free(l1_table);
-    return ret;
-}
diff --git a/qemu/block/qcow2-refcount.c b/qemu/block/qcow2-refcount.c
deleted file mode 100644
index ca6094ff5..000000000
--- a/qemu/block/qcow2-refcount.c
+++ /dev/null
@@ -1,2921 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/qcow2.h"
-#include "qemu/range.h"
-
-static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
-                            int64_t offset, int64_t length, uint64_t addend,
-                            bool decrease, enum qcow2_discard_type type);
-
-static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index);
-static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index);
-static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index);
-static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index);
-static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index);
-static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index);
-static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index);
-
-static void set_refcount_ro0(void *refcount_array, uint64_t index,
-                             uint64_t value);
-static void set_refcount_ro1(void *refcount_array, uint64_t index,
-                             uint64_t value);
-static void set_refcount_ro2(void *refcount_array, uint64_t index,
-                             uint64_t value);
-static void set_refcount_ro3(void *refcount_array, uint64_t index,
-                             uint64_t value);
-static void set_refcount_ro4(void *refcount_array, uint64_t index,
-                             uint64_t value);
-static void set_refcount_ro5(void *refcount_array, uint64_t index,
-                             uint64_t value);
-static void set_refcount_ro6(void *refcount_array, uint64_t index,
-                             uint64_t value);
-
-
-static Qcow2GetRefcountFunc *const get_refcount_funcs[] = {
-    &get_refcount_ro0,
-    &get_refcount_ro1,
-    &get_refcount_ro2,
-    &get_refcount_ro3,
-    &get_refcount_ro4,
-    &get_refcount_ro5,
-    &get_refcount_ro6
-};
-
-static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
-    &set_refcount_ro0,
-    &set_refcount_ro1,
-    &set_refcount_ro2,
-    &set_refcount_ro3,
-    &set_refcount_ro4,
-    &set_refcount_ro5,
-    &set_refcount_ro6
-};
-
-
-/*********************************************************/
-/* refcount handling */
-
-int qcow2_refcount_init(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    unsigned int refcount_table_size2, i;
-    int ret;
-
-    assert(s->refcount_order >= 0 && s->refcount_order <= 6);
-
-    s->get_refcount = get_refcount_funcs[s->refcount_order];
-    s->set_refcount = set_refcount_funcs[s->refcount_order];
-
-    assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t));
-    refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
-    s->refcount_table = g_try_malloc(refcount_table_size2);
-
-    if (s->refcount_table_size > 0) {
-        if (s->refcount_table == NULL) {
-            ret = -ENOMEM;
-            goto fail;
-        }
-        BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
-        ret = bdrv_pread(bs->file->bs, s->refcount_table_offset,
-                         s->refcount_table, refcount_table_size2);
-        if (ret < 0) {
-            goto fail;
-        }
-        for(i = 0; i < s->refcount_table_size; i++)
-            be64_to_cpus(&s->refcount_table[i]);
-    }
-    return 0;
- fail:
-    return ret;
-}
-
-void qcow2_refcount_close(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    g_free(s->refcount_table);
-}
-
-
-static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index)
-{
-    return (((const uint8_t *)refcount_array)[index / 8] >> (index % 8)) & 0x1;
-}
-
-static void set_refcount_ro0(void *refcount_array, uint64_t index,
-                             uint64_t value)
-{
-    assert(!(value >> 1));
-    ((uint8_t *)refcount_array)[index / 8] &= ~(0x1 << (index % 8));
-    ((uint8_t *)refcount_array)[index / 8] |= value << (index % 8);
-}
-
-static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index)
-{
-    return (((const uint8_t *)refcount_array)[index / 4] >> (2 * (index % 4)))
-           & 0x3;
-}
-
-static void set_refcount_ro1(void *refcount_array, uint64_t index,
-                             uint64_t value)
-{
-    assert(!(value >> 2));
-    ((uint8_t *)refcount_array)[index / 4] &= ~(0x3 << (2 * (index % 4)));
-    ((uint8_t *)refcount_array)[index / 4] |= value << (2 * (index % 4));
-}
-
-static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index)
-{
-    return (((const uint8_t *)refcount_array)[index / 2] >> (4 * (index % 2)))
-           & 0xf;
-}
-
-static void set_refcount_ro2(void *refcount_array, uint64_t index,
-                             uint64_t value)
-{
-    assert(!(value >> 4));
-    ((uint8_t *)refcount_array)[index / 2] &= ~(0xf << (4 * (index % 2)));
-    ((uint8_t *)refcount_array)[index / 2] |= value << (4 * (index % 2));
-}
-
-static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index)
-{
-    return ((const uint8_t *)refcount_array)[index];
-}
-
-static void set_refcount_ro3(void *refcount_array, uint64_t index,
-                             uint64_t value)
-{
-    assert(!(value >> 8));
-    ((uint8_t *)refcount_array)[index] = value;
-}
-
-static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index)
-{
-    return be16_to_cpu(((const uint16_t *)refcount_array)[index]);
-}
-
-static void set_refcount_ro4(void *refcount_array, uint64_t index,
-                             uint64_t value)
-{
-    assert(!(value >> 16));
-    ((uint16_t *)refcount_array)[index] = cpu_to_be16(value);
-}
-
-static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index)
-{
-    return be32_to_cpu(((const uint32_t *)refcount_array)[index]);
-}
-
-static void set_refcount_ro5(void *refcount_array, uint64_t index,
-                             uint64_t value)
-{
-    assert(!(value >> 32));
-    ((uint32_t *)refcount_array)[index] = cpu_to_be32(value);
-}
-
-static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index)
-{
-    return be64_to_cpu(((const uint64_t *)refcount_array)[index]);
-}
-
-static void set_refcount_ro6(void *refcount_array, uint64_t index,
-                             uint64_t value)
-{
-    ((uint64_t *)refcount_array)[index] = cpu_to_be64(value);
-}
-
-
-static int load_refcount_block(BlockDriverState *bs,
-                               int64_t refcount_block_offset,
-                               void **refcount_block)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int ret;
-
-    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
-    ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
-        refcount_block);
-
-    return ret;
-}
-
-/*
- * Retrieves the refcount of the cluster given by its index and stores it in
- * *refcount. Returns 0 on success and -errno on failure.
- */
-int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
-                       uint64_t *refcount)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t refcount_table_index, block_index;
-    int64_t refcount_block_offset;
-    int ret;
-    void *refcount_block;
-
-    refcount_table_index = cluster_index >> s->refcount_block_bits;
-    if (refcount_table_index >= s->refcount_table_size) {
-        *refcount = 0;
-        return 0;
-    }
-    refcount_block_offset =
-        s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
-    if (!refcount_block_offset) {
-        *refcount = 0;
-        return 0;
-    }
-
-    if (offset_into_cluster(s, refcount_block_offset)) {
-        qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" PRIx64
-                                " unaligned (reftable index: %#" PRIx64 ")",
-                                refcount_block_offset, refcount_table_index);
-        return -EIO;
-    }
-
-    ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
-                          &refcount_block);
-    if (ret < 0) {
-        return ret;
-    }
-
-    block_index = cluster_index & (s->refcount_block_size - 1);
-    *refcount = s->get_refcount(refcount_block, block_index);
-
-    qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
-
-    return 0;
-}
-
-/*
- * Rounds the refcount table size up to avoid growing the table for each single
- * refcount block that is allocated.
- */
-static unsigned int next_refcount_table_size(BDRVQcow2State *s,
-    unsigned int min_size)
-{
-    unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
-    unsigned int refcount_table_clusters =
-        MAX(1, s->refcount_table_size >> (s->cluster_bits - 3));
-
-    while (min_clusters > refcount_table_clusters) {
-        refcount_table_clusters = (refcount_table_clusters * 3 + 1) / 2;
-    }
-
-    return refcount_table_clusters << (s->cluster_bits - 3);
-}
-
-
-/* Checks if two offsets are described by the same refcount block */
-static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
-    uint64_t offset_b)
-{
-    uint64_t block_a = offset_a >> (s->cluster_bits + s->refcount_block_bits);
-    uint64_t block_b = offset_b >> (s->cluster_bits + s->refcount_block_bits);
-
-    return (block_a == block_b);
-}
-
-/*
- * Loads a refcount block. If it doesn't exist yet, it is allocated first
- * (including growing the refcount table if needed).
- *
- * Returns 0 on success or -errno in error case
- */
-static int alloc_refcount_block(BlockDriverState *bs,
-                                int64_t cluster_index, void **refcount_block)
-{
-    BDRVQcow2State *s = bs->opaque;
-    unsigned int refcount_table_index;
-    int ret;
-
-    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
-
-    /* Find the refcount block for the given cluster */
-    refcount_table_index = cluster_index >> s->refcount_block_bits;
-
-    if (refcount_table_index < s->refcount_table_size) {
-
-        uint64_t refcount_block_offset =
-            s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
-
-        /* If it's already there, we're done */
-        if (refcount_block_offset) {
-            if (offset_into_cluster(s, refcount_block_offset)) {
-                qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
-                                        PRIx64 " unaligned (reftable index: "
-                                        "%#x)", refcount_block_offset,
-                                        refcount_table_index);
-                return -EIO;
-            }
-
-             return load_refcount_block(bs, refcount_block_offset,
-                                        refcount_block);
-        }
-    }
-
-    /*
-     * If we came here, we need to allocate something. Something is at least
-     * a cluster for the new refcount block. It may also include a new refcount
-     * table if the old refcount table is too small.
-     *
-     * Note that allocating clusters here needs some special care:
-     *
-     * - We can't use the normal qcow2_alloc_clusters(), it would try to
-     *   increase the refcount and very likely we would end up with an endless
-     *   recursion. Instead we must place the refcount blocks in a way that
-     *   they can describe them themselves.
-     *
-     * - We need to consider that at this point we are inside update_refcounts
-     *   and potentially doing an initial refcount increase. This means that
-     *   some clusters have already been allocated by the caller, but their
-     *   refcount isn't accurate yet. If we allocate clusters for metadata, we
-     *   need to return -EAGAIN to signal the caller that it needs to restart
-     *   the search for free clusters.
-     *
-     * - alloc_clusters_noref and qcow2_free_clusters may load a different
-     *   refcount block into the cache
-     */
-
-    *refcount_block = NULL;
-
-    /* We write to the refcount table, so we might depend on L2 tables */
-    ret = qcow2_cache_flush(bs, s->l2_table_cache);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* Allocate the refcount block itself and mark it as used */
-    int64_t new_block = alloc_clusters_noref(bs, s->cluster_size);
-    if (new_block < 0) {
-        return new_block;
-    }
-
-#ifdef DEBUG_ALLOC2
-    fprintf(stderr, "qcow2: Allocate refcount block %d for %" PRIx64
-        " at %" PRIx64 "\n",
-        refcount_table_index, cluster_index << s->cluster_bits, new_block);
-#endif
-
-    if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
-        /* Zero the new refcount block before updating it */
-        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
-                                    refcount_block);
-        if (ret < 0) {
-            goto fail_block;
-        }
-
-        memset(*refcount_block, 0, s->cluster_size);
-
-        /* The block describes itself, need to update the cache */
-        int block_index = (new_block >> s->cluster_bits) &
-            (s->refcount_block_size - 1);
-        s->set_refcount(*refcount_block, block_index, 1);
-    } else {
-        /* Described somewhere else. This can recurse at most twice before we
-         * arrive at a block that describes itself. */
-        ret = update_refcount(bs, new_block, s->cluster_size, 1, false,
-                              QCOW2_DISCARD_NEVER);
-        if (ret < 0) {
-            goto fail_block;
-        }
-
-        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
-        if (ret < 0) {
-            goto fail_block;
-        }
-
-        /* Initialize the new refcount block only after updating its refcount,
-         * update_refcount uses the refcount cache itself */
-        ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
-                                    refcount_block);
-        if (ret < 0) {
-            goto fail_block;
-        }
-
-        memset(*refcount_block, 0, s->cluster_size);
-    }
-
-    /* Now the new refcount block needs to be written to disk */
-    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
-    qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block);
-    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
-    if (ret < 0) {
-        goto fail_block;
-    }
-
-    /* If the refcount table is big enough, just hook the block up there */
-    if (refcount_table_index < s->refcount_table_size) {
-        uint64_t data64 = cpu_to_be64(new_block);
-        BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
-        ret = bdrv_pwrite_sync(bs->file->bs,
-            s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
-            &data64, sizeof(data64));
-        if (ret < 0) {
-            goto fail_block;
-        }
-
-        s->refcount_table[refcount_table_index] = new_block;
-
-        /* The new refcount block may be where the caller intended to put its
-         * data, so let it restart the search. */
-        return -EAGAIN;
-    }
-
-    qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
-
-    /*
-     * If we come here, we need to grow the refcount table. Again, a new
-     * refcount table needs some space and we can't simply allocate to avoid
-     * endless recursion.
-     *
-     * Therefore let's grab new refcount blocks at the end of the image, which
-     * will describe themselves and the new refcount table. This way we can
-     * reference them only in the new table and do the switch to the new
-     * refcount table at once without producing an inconsistent state in
-     * between.
-     */
-    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
-
-    /* Calculate the number of refcount blocks needed so far; this will be the
-     * basis for calculating the index of the first cluster used for the
-     * self-describing refcount structures which we are about to create.
-     *
-     * Because we reached this point, there cannot be any refcount entries for
-     * cluster_index or higher indices yet. However, because new_block has been
-     * allocated to describe that cluster (and it will assume this role later
-     * on), we cannot use that index; also, new_block may actually have a higher
-     * cluster index than cluster_index, so it needs to be taken into account
-     * here (and 1 needs to be added to its value because that cluster is used).
-     */
-    uint64_t blocks_used = DIV_ROUND_UP(MAX(cluster_index + 1,
-                                            (new_block >> s->cluster_bits) + 1),
-                                        s->refcount_block_size);
-
-    if (blocks_used > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) {
-        return -EFBIG;
-    }
-
-    /* And now we need at least one block more for the new metadata */
-    uint64_t table_size = next_refcount_table_size(s, blocks_used + 1);
-    uint64_t last_table_size;
-    uint64_t blocks_clusters;
-    do {
-        uint64_t table_clusters =
-            size_to_clusters(s, table_size * sizeof(uint64_t));
-        blocks_clusters = 1 +
-            ((table_clusters + s->refcount_block_size - 1)
-            / s->refcount_block_size);
-        uint64_t meta_clusters = table_clusters + blocks_clusters;
-
-        last_table_size = table_size;
-        table_size = next_refcount_table_size(s, blocks_used +
-            ((meta_clusters + s->refcount_block_size - 1)
-            / s->refcount_block_size));
-
-    } while (last_table_size != table_size);
-
-#ifdef DEBUG_ALLOC2
-    fprintf(stderr, "qcow2: Grow refcount table %" PRId32 " => %" PRId64 "\n",
-        s->refcount_table_size, table_size);
-#endif
-
-    /* Create the new refcount table and blocks */
-    uint64_t meta_offset = (blocks_used * s->refcount_block_size) *
-        s->cluster_size;
-    uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
-    uint64_t *new_table = g_try_new0(uint64_t, table_size);
-    void *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size);
-
-    assert(table_size > 0 && blocks_clusters > 0);
-    if (new_table == NULL || new_blocks == NULL) {
-        ret = -ENOMEM;
-        goto fail_table;
-    }
-
-    /* Fill the new refcount table */
-    memcpy(new_table, s->refcount_table,
-        s->refcount_table_size * sizeof(uint64_t));
-    new_table[refcount_table_index] = new_block;
-
-    int i;
-    for (i = 0; i < blocks_clusters; i++) {
-        new_table[blocks_used + i] = meta_offset + (i * s->cluster_size);
-    }
-
-    /* Fill the refcount blocks */
-    uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
-    int block = 0;
-    for (i = 0; i < table_clusters + blocks_clusters; i++) {
-        s->set_refcount(new_blocks, block++, 1);
-    }
-
-    /* Write refcount blocks to disk */
-    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
-    ret = bdrv_pwrite_sync(bs->file->bs, meta_offset, new_blocks,
-        blocks_clusters * s->cluster_size);
-    g_free(new_blocks);
-    new_blocks = NULL;
-    if (ret < 0) {
-        goto fail_table;
-    }
-
-    /* Write refcount table to disk */
-    for(i = 0; i < table_size; i++) {
-        cpu_to_be64s(&new_table[i]);
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
-    ret = bdrv_pwrite_sync(bs->file->bs, table_offset, new_table,
-        table_size * sizeof(uint64_t));
-    if (ret < 0) {
-        goto fail_table;
-    }
-
-    for(i = 0; i < table_size; i++) {
-        be64_to_cpus(&new_table[i]);
-    }
-
-    /* Hook up the new refcount table in the qcow2 header */
-    struct QEMU_PACKED {
-        uint64_t d64;
-        uint32_t d32;
-    } data;
-    cpu_to_be64w(&data.d64, table_offset);
-    cpu_to_be32w(&data.d32, table_clusters);
-    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
-    ret = bdrv_pwrite_sync(bs->file->bs,
-                           offsetof(QCowHeader, refcount_table_offset),
-                           &data, sizeof(data));
-    if (ret < 0) {
-        goto fail_table;
-    }
-
-    /* And switch it in memory */
-    uint64_t old_table_offset = s->refcount_table_offset;
-    uint64_t old_table_size = s->refcount_table_size;
-
-    g_free(s->refcount_table);
-    s->refcount_table = new_table;
-    s->refcount_table_size = table_size;
-    s->refcount_table_offset = table_offset;
-
-    /* Free old table. */
-    qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
-                        QCOW2_DISCARD_OTHER);
-
-    ret = load_refcount_block(bs, new_block, refcount_block);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* If we were trying to do the initial refcount update for some cluster
-     * allocation, we might have used the same clusters to store newly
-     * allocated metadata. Make the caller search some new space. */
-    return -EAGAIN;
-
-fail_table:
-    g_free(new_blocks);
-    g_free(new_table);
-fail_block:
-    if (*refcount_block != NULL) {
-        qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
-    }
-    return ret;
-}
-
-void qcow2_process_discards(BlockDriverState *bs, int ret)
-{
-    BDRVQcow2State *s = bs->opaque;
-    Qcow2DiscardRegion *d, *next;
-
-    QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
-        QTAILQ_REMOVE(&s->discards, d, next);
-
-        /* Discard is optional, ignore the return value */
-        if (ret >= 0) {
-            bdrv_discard(bs->file->bs,
-                         d->offset >> BDRV_SECTOR_BITS,
-                         d->bytes >> BDRV_SECTOR_BITS);
-        }
-
-        g_free(d);
-    }
-}
-
-static void update_refcount_discard(BlockDriverState *bs,
-                                    uint64_t offset, uint64_t length)
-{
-    BDRVQcow2State *s = bs->opaque;
-    Qcow2DiscardRegion *d, *p, *next;
-
-    QTAILQ_FOREACH(d, &s->discards, next) {
-        uint64_t new_start = MIN(offset, d->offset);
-        uint64_t new_end = MAX(offset + length, d->offset + d->bytes);
-
-        if (new_end - new_start <= length + d->bytes) {
-            /* There can't be any overlap, areas ending up here have no
-             * references any more and therefore shouldn't get freed another
-             * time. */
-            assert(d->bytes + length == new_end - new_start);
-            d->offset = new_start;
-            d->bytes = new_end - new_start;
-            goto found;
-        }
-    }
-
-    d = g_malloc(sizeof(*d));
-    *d = (Qcow2DiscardRegion) {
-        .bs     = bs,
-        .offset = offset,
-        .bytes  = length,
-    };
-    QTAILQ_INSERT_TAIL(&s->discards, d, next);
-
-found:
-    /* Merge discard requests if they are adjacent now */
-    QTAILQ_FOREACH_SAFE(p, &s->discards, next, next) {
-        if (p == d
-            || p->offset > d->offset + d->bytes
-            || d->offset > p->offset + p->bytes)
-        {
-            continue;
-        }
-
-        /* Still no overlap possible */
-        assert(p->offset == d->offset + d->bytes
-            || d->offset == p->offset + p->bytes);
-
-        QTAILQ_REMOVE(&s->discards, p, next);
-        d->offset = MIN(d->offset, p->offset);
-        d->bytes += p->bytes;
-        g_free(p);
-    }
-}
-
-/* XXX: cache several refcount block clusters ? */
-/* @addend is the absolute value of the addend; if @decrease is set, @addend
- * will be subtracted from the current refcount, otherwise it will be added */
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
-                                                   int64_t offset,
-                                                   int64_t length,
-                                                   uint64_t addend,
-                                                   bool decrease,
-                                                   enum qcow2_discard_type type)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t start, last, cluster_offset;
-    void *refcount_block = NULL;
-    int64_t old_table_index = -1;
-    int ret;
-
-#ifdef DEBUG_ALLOC2
-    fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64
-            " addend=%s%" PRIu64 "\n", offset, length, decrease ? "-" : "",
-            addend);
-#endif
-    if (length < 0) {
-        return -EINVAL;
-    } else if (length == 0) {
-        return 0;
-    }
-
-    if (decrease) {
-        qcow2_cache_set_dependency(bs, s->refcount_block_cache,
-            s->l2_table_cache);
-    }
-
-    start = start_of_cluster(s, offset);
-    last = start_of_cluster(s, offset + length - 1);
-    for(cluster_offset = start; cluster_offset <= last;
-        cluster_offset += s->cluster_size)
-    {
-        int block_index;
-        uint64_t refcount;
-        int64_t cluster_index = cluster_offset >> s->cluster_bits;
-        int64_t table_index = cluster_index >> s->refcount_block_bits;
-
-        /* Load the refcount block and allocate it if needed */
-        if (table_index != old_table_index) {
-            if (refcount_block) {
-                qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
-            }
-            ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
-            if (ret < 0) {
-                goto fail;
-            }
-        }
-        old_table_index = table_index;
-
-        qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
-                                     refcount_block);
-
-        /* we can update the count and save it */
-        block_index = cluster_index & (s->refcount_block_size - 1);
-
-        refcount = s->get_refcount(refcount_block, block_index);
-        if (decrease ? (refcount - addend > refcount)
-                     : (refcount + addend < refcount ||
-                        refcount + addend > s->refcount_max))
-        {
-            ret = -EINVAL;
-            goto fail;
-        }
-        if (decrease) {
-            refcount -= addend;
-        } else {
-            refcount += addend;
-        }
-        if (refcount == 0 && cluster_index < s->free_cluster_index) {
-            s->free_cluster_index = cluster_index;
-        }
-        s->set_refcount(refcount_block, block_index, refcount);
-
-        if (refcount == 0 && s->discard_passthrough[type]) {
-            update_refcount_discard(bs, cluster_offset, s->cluster_size);
-        }
-    }
-
-    ret = 0;
-fail:
-    if (!s->cache_discards) {
-        qcow2_process_discards(bs, ret);
-    }
-
-    /* Write last changed block to disk */
-    if (refcount_block) {
-        qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
-    }
-
-    /*
-     * Try do undo any updates if an error is returned (This may succeed in
-     * some cases like ENOSPC for allocating a new refcount block)
-     */
-    if (ret < 0) {
-        int dummy;
-        dummy = update_refcount(bs, offset, cluster_offset - offset, addend,
-                                !decrease, QCOW2_DISCARD_NEVER);
-        (void)dummy;
-    }
-
-    return ret;
-}
-
-/*
- * Increases or decreases the refcount of a given cluster.
- *
- * @addend is the absolute value of the addend; if @decrease is set, @addend
- * will be subtracted from the current refcount, otherwise it will be added.
- *
- * On success 0 is returned; on failure -errno is returned.
- */
-int qcow2_update_cluster_refcount(BlockDriverState *bs,
-                                  int64_t cluster_index,
-                                  uint64_t addend, bool decrease,
-                                  enum qcow2_discard_type type)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int ret;
-
-    ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
-                          decrease, type);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return 0;
-}
-
-
-
-/*********************************************************/
-/* cluster allocation functions */
-
-
-
-/* return < 0 if error */
-static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t i, nb_clusters, refcount;
-    int ret;
-
-    /* We can't allocate clusters if they may still be queued for discard. */
-    if (s->cache_discards) {
-        qcow2_process_discards(bs, 0);
-    }
-
-    nb_clusters = size_to_clusters(s, size);
-retry:
-    for(i = 0; i < nb_clusters; i++) {
-        uint64_t next_cluster_index = s->free_cluster_index++;
-        ret = qcow2_get_refcount(bs, next_cluster_index, &refcount);
-
-        if (ret < 0) {
-            return ret;
-        } else if (refcount != 0) {
-            goto retry;
-        }
-    }
-
-    /* Make sure that all offsets in the "allocated" range are representable
-     * in an int64_t */
-    if (s->free_cluster_index > 0 &&
-        s->free_cluster_index - 1 > (INT64_MAX >> s->cluster_bits))
-    {
-        return -EFBIG;
-    }
-
-#ifdef DEBUG_ALLOC2
-    fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
-            size,
-            (s->free_cluster_index - nb_clusters) << s->cluster_bits);
-#endif
-    return (s->free_cluster_index - nb_clusters) << s->cluster_bits;
-}
-
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size)
-{
-    int64_t offset;
-    int ret;
-
-    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
-    do {
-        offset = alloc_clusters_noref(bs, size);
-        if (offset < 0) {
-            return offset;
-        }
-
-        ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
-    } while (ret == -EAGAIN);
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    return offset;
-}
-
-int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
-                                int64_t nb_clusters)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t cluster_index, refcount;
-    uint64_t i;
-    int ret;
-
-    assert(nb_clusters >= 0);
-    if (nb_clusters == 0) {
-        return 0;
-    }
-
-    do {
-        /* Check how many clusters there are free */
-        cluster_index = offset >> s->cluster_bits;
-        for(i = 0; i < nb_clusters; i++) {
-            ret = qcow2_get_refcount(bs, cluster_index++, &refcount);
-            if (ret < 0) {
-                return ret;
-            } else if (refcount != 0) {
-                break;
-            }
-        }
-
-        /* And then allocate them */
-        ret = update_refcount(bs, offset, i << s->cluster_bits, 1, false,
-                              QCOW2_DISCARD_NEVER);
-    } while (ret == -EAGAIN);
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    return i;
-}
-
-/* only used to allocate compressed sectors. We try to allocate
-   contiguous sectors. size must be <= cluster_size */
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t offset;
-    size_t free_in_cluster;
-    int ret;
-
-    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
-    assert(size > 0 && size <= s->cluster_size);
-    assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset));
-
-    offset = s->free_byte_offset;
-
-    if (offset) {
-        uint64_t refcount;
-        ret = qcow2_get_refcount(bs, offset >> s->cluster_bits, &refcount);
-        if (ret < 0) {
-            return ret;
-        }
-
-        if (refcount == s->refcount_max) {
-            offset = 0;
-        }
-    }
-
-    free_in_cluster = s->cluster_size - offset_into_cluster(s, offset);
-    do {
-        if (!offset || free_in_cluster < size) {
-            int64_t new_cluster = alloc_clusters_noref(bs, s->cluster_size);
-            if (new_cluster < 0) {
-                return new_cluster;
-            }
-
-            if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) {
-                offset = new_cluster;
-                free_in_cluster = s->cluster_size;
-            } else {
-                free_in_cluster += s->cluster_size;
-            }
-        }
-
-        assert(offset);
-        ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
-        if (ret < 0) {
-            offset = 0;
-        }
-    } while (ret == -EAGAIN);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* The cluster refcount was incremented; refcount blocks must be flushed
-     * before the caller's L2 table updates. */
-    qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
-
-    s->free_byte_offset = offset + size;
-    if (!offset_into_cluster(s, s->free_byte_offset)) {
-        s->free_byte_offset = 0;
-    }
-
-    return offset;
-}
-
-void qcow2_free_clusters(BlockDriverState *bs,
-                          int64_t offset, int64_t size,
-                          enum qcow2_discard_type type)
-{
-    int ret;
-
-    BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
-    ret = update_refcount(bs, offset, size, 1, true, type);
-    if (ret < 0) {
-        fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
-        /* TODO Remember the clusters to free them later and avoid leaking */
-    }
-}
-
-/*
- * Free a cluster using its L2 entry (handles clusters of all types, e.g.
- * normal cluster, compressed cluster, etc.)
- */
-void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
-                             int nb_clusters, enum qcow2_discard_type type)
-{
-    BDRVQcow2State *s = bs->opaque;
-
-    switch (qcow2_get_cluster_type(l2_entry)) {
-    case QCOW2_CLUSTER_COMPRESSED:
-        {
-            int nb_csectors;
-            nb_csectors = ((l2_entry >> s->csize_shift) &
-                           s->csize_mask) + 1;
-            qcow2_free_clusters(bs,
-                (l2_entry & s->cluster_offset_mask) & ~511,
-                nb_csectors * 512, type);
-        }
-        break;
-    case QCOW2_CLUSTER_NORMAL:
-    case QCOW2_CLUSTER_ZERO:
-        if (l2_entry & L2E_OFFSET_MASK) {
-            if (offset_into_cluster(s, l2_entry & L2E_OFFSET_MASK)) {
-                qcow2_signal_corruption(bs, false, -1, -1,
-                                        "Cannot free unaligned cluster %#llx",
-                                        l2_entry & L2E_OFFSET_MASK);
-            } else {
-                qcow2_free_clusters(bs, l2_entry & L2E_OFFSET_MASK,
-                                    nb_clusters << s->cluster_bits, type);
-            }
-        }
-        break;
-    case QCOW2_CLUSTER_UNALLOCATED:
-        break;
-    default:
-        abort();
-    }
-}
-
-
-
-/*********************************************************/
-/* snapshots and image creation */
-
-
-
-/* update the refcounts of snapshots and the copied flag */
-int qcow2_update_snapshot_refcount(BlockDriverState *bs,
-    int64_t l1_table_offset, int l1_size, int addend)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount;
-    bool l1_allocated = false;
-    int64_t old_offset, old_l2_offset;
-    int i, j, l1_modified = 0, nb_csectors;
-    int ret;
-
-    assert(addend >= -1 && addend <= 1);
-
-    l2_table = NULL;
-    l1_table = NULL;
-    l1_size2 = l1_size * sizeof(uint64_t);
-
-    s->cache_discards = true;
-
-    /* WARNING: qcow2_snapshot_goto relies on this function not using the
-     * l1_table_offset when it is the current s->l1_table_offset! Be careful
-     * when changing this! */
-    if (l1_table_offset != s->l1_table_offset) {
-        l1_table = g_try_malloc0(align_offset(l1_size2, 512));
-        if (l1_size2 && l1_table == NULL) {
-            ret = -ENOMEM;
-            goto fail;
-        }
-        l1_allocated = true;
-
-        ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        for(i = 0;i < l1_size; i++)
-            be64_to_cpus(&l1_table[i]);
-    } else {
-        assert(l1_size == s->l1_size);
-        l1_table = s->l1_table;
-        l1_allocated = false;
-    }
-
-    for(i = 0; i < l1_size; i++) {
-        l2_offset = l1_table[i];
-        if (l2_offset) {
-            old_l2_offset = l2_offset;
-            l2_offset &= L1E_OFFSET_MASK;
-
-            if (offset_into_cluster(s, l2_offset)) {
-                qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#"
-                                        PRIx64 " unaligned (L1 index: %#x)",
-                                        l2_offset, i);
-                ret = -EIO;
-                goto fail;
-            }
-
-            ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
-                (void**) &l2_table);
-            if (ret < 0) {
-                goto fail;
-            }
-
-            for(j = 0; j < s->l2_size; j++) {
-                uint64_t cluster_index;
-
-                offset = be64_to_cpu(l2_table[j]);
-                old_offset = offset;
-                offset &= ~QCOW_OFLAG_COPIED;
-
-                switch (qcow2_get_cluster_type(offset)) {
-                    case QCOW2_CLUSTER_COMPRESSED:
-                        nb_csectors = ((offset >> s->csize_shift) &
-                                       s->csize_mask) + 1;
-                        if (addend != 0) {
-                            ret = update_refcount(bs,
-                                (offset & s->cluster_offset_mask) & ~511,
-                                nb_csectors * 512, abs(addend), addend < 0,
-                                QCOW2_DISCARD_SNAPSHOT);
-                            if (ret < 0) {
-                                goto fail;
-                            }
-                        }
-                        /* compressed clusters are never modified */
-                        refcount = 2;
-                        break;
-
-                    case QCOW2_CLUSTER_NORMAL:
-                    case QCOW2_CLUSTER_ZERO:
-                        if (offset_into_cluster(s, offset & L2E_OFFSET_MASK)) {
-                            qcow2_signal_corruption(bs, true, -1, -1, "Data "
-                                                    "cluster offset %#llx "
-                                                    "unaligned (L2 offset: %#"
-                                                    PRIx64 ", L2 index: %#x)",
-                                                    offset & L2E_OFFSET_MASK,
-                                                    l2_offset, j);
-                            ret = -EIO;
-                            goto fail;
-                        }
-
-                        cluster_index = (offset & L2E_OFFSET_MASK) >> s->cluster_bits;
-                        if (!cluster_index) {
-                            /* unallocated */
-                            refcount = 0;
-                            break;
-                        }
-                        if (addend != 0) {
-                            ret = qcow2_update_cluster_refcount(bs,
-                                    cluster_index, abs(addend), addend < 0,
-                                    QCOW2_DISCARD_SNAPSHOT);
-                            if (ret < 0) {
-                                goto fail;
-                            }
-                        }
-
-                        ret = qcow2_get_refcount(bs, cluster_index, &refcount);
-                        if (ret < 0) {
-                            goto fail;
-                        }
-                        break;
-
-                    case QCOW2_CLUSTER_UNALLOCATED:
-                        refcount = 0;
-                        break;
-
-                    default:
-                        abort();
-                }
-
-                if (refcount == 1) {
-                    offset |= QCOW_OFLAG_COPIED;
-                }
-                if (offset != old_offset) {
-                    if (addend > 0) {
-                        qcow2_cache_set_dependency(bs, s->l2_table_cache,
-                            s->refcount_block_cache);
-                    }
-                    l2_table[j] = cpu_to_be64(offset);
-                    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache,
-                                                 l2_table);
-                }
-            }
-
-            qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
-
-            if (addend != 0) {
-                ret = qcow2_update_cluster_refcount(bs, l2_offset >>
-                                                        s->cluster_bits,
-                                                    abs(addend), addend < 0,
-                                                    QCOW2_DISCARD_SNAPSHOT);
-                if (ret < 0) {
-                    goto fail;
-                }
-            }
-            ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
-                                     &refcount);
-            if (ret < 0) {
-                goto fail;
-            } else if (refcount == 1) {
-                l2_offset |= QCOW_OFLAG_COPIED;
-            }
-            if (l2_offset != old_l2_offset) {
-                l1_table[i] = l2_offset;
-                l1_modified = 1;
-            }
-        }
-    }
-
-    ret = bdrv_flush(bs);
-fail:
-    if (l2_table) {
-        qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-    }
-
-    s->cache_discards = false;
-    qcow2_process_discards(bs, ret);
-
-    /* Update L1 only if it isn't deleted anyway (addend = -1) */
-    if (ret == 0 && addend >= 0 && l1_modified) {
-        for (i = 0; i < l1_size; i++) {
-            cpu_to_be64s(&l1_table[i]);
-        }
-
-        ret = bdrv_pwrite_sync(bs->file->bs, l1_table_offset,
-                               l1_table, l1_size2);
-
-        for (i = 0; i < l1_size; i++) {
-            be64_to_cpus(&l1_table[i]);
-        }
-    }
-    if (l1_allocated)
-        g_free(l1_table);
-    return ret;
-}
-
-
-
-
-/*********************************************************/
-/* refcount checking functions */
-
-
-static uint64_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries)
-{
-    /* This assertion holds because there is no way we can address more than
-     * 2^(64 - 9) clusters at once (with cluster size 512 = 2^9, and because
-     * offsets have to be representable in bytes); due to every cluster
-     * corresponding to one refcount entry, we are well below that limit */
-    assert(entries < (UINT64_C(1) << (64 - 9)));
-
-    /* Thanks to the assertion this will not overflow, because
-     * s->refcount_order < 7.
-     * (note: x << s->refcount_order == x * s->refcount_bits) */
-    return DIV_ROUND_UP(entries << s->refcount_order, 8);
-}
-
-/**
- * Reallocates *array so that it can hold new_size entries. *size must contain
- * the current number of entries in *array. If the reallocation fails, *array
- * and *size will not be modified and -errno will be returned. If the
- * reallocation is successful, *array will be set to the new buffer, *size
- * will be set to new_size and 0 will be returned. The size of the reallocated
- * refcount array buffer will be aligned to a cluster boundary, and the newly
- * allocated area will be zeroed.
- */
-static int realloc_refcount_array(BDRVQcow2State *s, void **array,
-                                  int64_t *size, int64_t new_size)
-{
-    int64_t old_byte_size, new_byte_size;
-    void *new_ptr;
-
-    /* Round to clusters so the array can be directly written to disk */
-    old_byte_size = size_to_clusters(s, refcount_array_byte_size(s, *size))
-                    * s->cluster_size;
-    new_byte_size = size_to_clusters(s, refcount_array_byte_size(s, new_size))
-                    * s->cluster_size;
-
-    if (new_byte_size == old_byte_size) {
-        *size = new_size;
-        return 0;
-    }
-
-    assert(new_byte_size > 0);
-
-    if (new_byte_size > SIZE_MAX) {
-        return -ENOMEM;
-    }
-
-    new_ptr = g_try_realloc(*array, new_byte_size);
-    if (!new_ptr) {
-        return -ENOMEM;
-    }
-
-    if (new_byte_size > old_byte_size) {
-        memset((char *)new_ptr + old_byte_size, 0,
-               new_byte_size - old_byte_size);
-    }
-
-    *array = new_ptr;
-    *size  = new_size;
-
-    return 0;
-}
-
-/*
- * Increases the refcount for a range of clusters in a given refcount table.
- * This is used to construct a temporary refcount table out of L1 and L2 tables
- * which can be compared to the refcount table saved in the image.
- *
- * Modifies the number of errors in res.
- */
-static int inc_refcounts(BlockDriverState *bs,
-                         BdrvCheckResult *res,
-                         void **refcount_table,
-                         int64_t *refcount_table_size,
-                         int64_t offset, int64_t size)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t start, last, cluster_offset, k, refcount;
-    int ret;
-
-    if (size <= 0) {
-        return 0;
-    }
-
-    start = start_of_cluster(s, offset);
-    last = start_of_cluster(s, offset + size - 1);
-    for(cluster_offset = start; cluster_offset <= last;
-        cluster_offset += s->cluster_size) {
-        k = cluster_offset >> s->cluster_bits;
-        if (k >= *refcount_table_size) {
-            ret = realloc_refcount_array(s, refcount_table,
-                                         refcount_table_size, k + 1);
-            if (ret < 0) {
-                res->check_errors++;
-                return ret;
-            }
-        }
-
-        refcount = s->get_refcount(*refcount_table, k);
-        if (refcount == s->refcount_max) {
-            fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
-                    "\n", cluster_offset);
-            fprintf(stderr, "Use qemu-img amend to increase the refcount entry "
-                    "width or qemu-img convert to create a clean copy if the "
-                    "image cannot be opened for writing\n");
-            res->corruptions++;
-            continue;
-        }
-        s->set_refcount(*refcount_table, k, refcount + 1);
-    }
-
-    return 0;
-}
-
-/* Flags for check_refcounts_l1() and check_refcounts_l2() */
-enum {
-    CHECK_FRAG_INFO = 0x2,      /* update BlockFragInfo counters */
-};
-
-/*
- * Increases the refcount in the given refcount table for the all clusters
- * referenced in the L2 table. While doing so, performs some checks on L2
- * entries.
- *
- * Returns the number of errors found by the checks or -errno if an internal
- * error occurred.
- */
-static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
-                              void **refcount_table,
-                              int64_t *refcount_table_size, int64_t l2_offset,
-                              int flags)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t *l2_table, l2_entry;
-    uint64_t next_contiguous_offset = 0;
-    int i, l2_size, nb_csectors, ret;
-
-    /* Read L2 table from disk */
-    l2_size = s->l2_size * sizeof(uint64_t);
-    l2_table = g_malloc(l2_size);
-
-    ret = bdrv_pread(bs->file->bs, l2_offset, l2_table, l2_size);
-    if (ret < 0) {
-        fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
-        res->check_errors++;
-        goto fail;
-    }
-
-    /* Do the actual checks */
-    for(i = 0; i < s->l2_size; i++) {
-        l2_entry = be64_to_cpu(l2_table[i]);
-
-        switch (qcow2_get_cluster_type(l2_entry)) {
-        case QCOW2_CLUSTER_COMPRESSED:
-            /* Compressed clusters don't have QCOW_OFLAG_COPIED */
-            if (l2_entry & QCOW_OFLAG_COPIED) {
-                fprintf(stderr, "ERROR: cluster %" PRId64 ": "
-                    "copied flag must never be set for compressed "
-                    "clusters\n", l2_entry >> s->cluster_bits);
-                l2_entry &= ~QCOW_OFLAG_COPIED;
-                res->corruptions++;
-            }
-
-            /* Mark cluster as used */
-            nb_csectors = ((l2_entry >> s->csize_shift) &
-                           s->csize_mask) + 1;
-            l2_entry &= s->cluster_offset_mask;
-            ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-                                l2_entry & ~511, nb_csectors * 512);
-            if (ret < 0) {
-                goto fail;
-            }
-
-            if (flags & CHECK_FRAG_INFO) {
-                res->bfi.allocated_clusters++;
-                res->bfi.compressed_clusters++;
-
-                /* Compressed clusters are fragmented by nature.  Since they
-                 * take up sub-sector space but we only have sector granularity
-                 * I/O we need to re-read the same sectors even for adjacent
-                 * compressed clusters.
-                 */
-                res->bfi.fragmented_clusters++;
-            }
-            break;
-
-        case QCOW2_CLUSTER_ZERO:
-            if ((l2_entry & L2E_OFFSET_MASK) == 0) {
-                break;
-            }
-            /* fall through */
-
-        case QCOW2_CLUSTER_NORMAL:
-        {
-            uint64_t offset = l2_entry & L2E_OFFSET_MASK;
-
-            if (flags & CHECK_FRAG_INFO) {
-                res->bfi.allocated_clusters++;
-                if (next_contiguous_offset &&
-                    offset != next_contiguous_offset) {
-                    res->bfi.fragmented_clusters++;
-                }
-                next_contiguous_offset = offset + s->cluster_size;
-            }
-
-            /* Mark cluster as used */
-            ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-                                offset, s->cluster_size);
-            if (ret < 0) {
-                goto fail;
-            }
-
-            /* Correct offsets are cluster aligned */
-            if (offset_into_cluster(s, offset)) {
-                fprintf(stderr, "ERROR offset=%" PRIx64 ": Cluster is not "
-                    "properly aligned; L2 entry corrupted.\n", offset);
-                res->corruptions++;
-            }
-            break;
-        }
-
-        case QCOW2_CLUSTER_UNALLOCATED:
-            break;
-
-        default:
-            abort();
-        }
-    }
-
-    g_free(l2_table);
-    return 0;
-
-fail:
-    g_free(l2_table);
-    return ret;
-}
-
-/*
- * Increases the refcount for the L1 table, its L2 tables and all referenced
- * clusters in the given refcount table. While doing so, performs some checks
- * on L1 and L2 entries.
- *
- * Returns the number of errors found by the checks or -errno if an internal
- * error occurred.
- */
-static int check_refcounts_l1(BlockDriverState *bs,
-                              BdrvCheckResult *res,
-                              void **refcount_table,
-                              int64_t *refcount_table_size,
-                              int64_t l1_table_offset, int l1_size,
-                              int flags)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t *l1_table = NULL, l2_offset, l1_size2;
-    int i, ret;
-
-    l1_size2 = l1_size * sizeof(uint64_t);
-
-    /* Mark L1 table as used */
-    ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-                        l1_table_offset, l1_size2);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* Read L1 table entries from disk */
-    if (l1_size2 > 0) {
-        l1_table = g_try_malloc(l1_size2);
-        if (l1_table == NULL) {
-            ret = -ENOMEM;
-            res->check_errors++;
-            goto fail;
-        }
-        ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
-        if (ret < 0) {
-            fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
-            res->check_errors++;
-            goto fail;
-        }
-        for(i = 0;i < l1_size; i++)
-            be64_to_cpus(&l1_table[i]);
-    }
-
-    /* Do the actual checks */
-    for(i = 0; i < l1_size; i++) {
-        l2_offset = l1_table[i];
-        if (l2_offset) {
-            /* Mark L2 table as used */
-            l2_offset &= L1E_OFFSET_MASK;
-            ret = inc_refcounts(bs, res, refcount_table, refcount_table_size,
-                                l2_offset, s->cluster_size);
-            if (ret < 0) {
-                goto fail;
-            }
-
-            /* L2 tables are cluster aligned */
-            if (offset_into_cluster(s, l2_offset)) {
-                fprintf(stderr, "ERROR l2_offset=%" PRIx64 ": Table is not "
-                    "cluster aligned; L1 entry corrupted\n", l2_offset);
-                res->corruptions++;
-            }
-
-            /* Process and check L2 entries */
-            ret = check_refcounts_l2(bs, res, refcount_table,
-                                     refcount_table_size, l2_offset, flags);
-            if (ret < 0) {
-                goto fail;
-            }
-        }
-    }
-    g_free(l1_table);
-    return 0;
-
-fail:
-    g_free(l1_table);
-    return ret;
-}
-
-/*
- * Checks the OFLAG_COPIED flag for all L1 and L2 entries.
- *
- * This function does not print an error message nor does it increment
- * check_errors if qcow2_get_refcount fails (this is because such an error will
- * have been already detected and sufficiently signaled by the calling function
- * (qcow2_check_refcounts) by the time this function is called).
- */
-static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
-                              BdrvCheckMode fix)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
-    int ret;
-    uint64_t refcount;
-    int i, j;
-
-    for (i = 0; i < s->l1_size; i++) {
-        uint64_t l1_entry = s->l1_table[i];
-        uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK;
-        bool l2_dirty = false;
-
-        if (!l2_offset) {
-            continue;
-        }
-
-        ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
-                                 &refcount);
-        if (ret < 0) {
-            /* don't print message nor increment check_errors */
-            continue;
-        }
-        if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
-            fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d "
-                    "l1_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
-                    fix & BDRV_FIX_ERRORS ? "Repairing" :
-                                            "ERROR",
-                    i, l1_entry, refcount);
-            if (fix & BDRV_FIX_ERRORS) {
-                s->l1_table[i] = refcount == 1
-                               ? l1_entry |  QCOW_OFLAG_COPIED
-                               : l1_entry & ~QCOW_OFLAG_COPIED;
-                ret = qcow2_write_l1_entry(bs, i);
-                if (ret < 0) {
-                    res->check_errors++;
-                    goto fail;
-                }
-                res->corruptions_fixed++;
-            } else {
-                res->corruptions++;
-            }
-        }
-
-        ret = bdrv_pread(bs->file->bs, l2_offset, l2_table,
-                         s->l2_size * sizeof(uint64_t));
-        if (ret < 0) {
-            fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
-                    strerror(-ret));
-            res->check_errors++;
-            goto fail;
-        }
-
-        for (j = 0; j < s->l2_size; j++) {
-            uint64_t l2_entry = be64_to_cpu(l2_table[j]);
-            uint64_t data_offset = l2_entry & L2E_OFFSET_MASK;
-            int cluster_type = qcow2_get_cluster_type(l2_entry);
-
-            if ((cluster_type == QCOW2_CLUSTER_NORMAL) ||
-                ((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) {
-                ret = qcow2_get_refcount(bs,
-                                         data_offset >> s->cluster_bits,
-                                         &refcount);
-                if (ret < 0) {
-                    /* don't print message nor increment check_errors */
-                    continue;
-                }
-                if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
-                    fprintf(stderr, "%s OFLAG_COPIED data cluster: "
-                            "l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
-                            fix & BDRV_FIX_ERRORS ? "Repairing" :
-                                                    "ERROR",
-                            l2_entry, refcount);
-                    if (fix & BDRV_FIX_ERRORS) {
-                        l2_table[j] = cpu_to_be64(refcount == 1
-                                    ? l2_entry |  QCOW_OFLAG_COPIED
-                                    : l2_entry & ~QCOW_OFLAG_COPIED);
-                        l2_dirty = true;
-                        res->corruptions_fixed++;
-                    } else {
-                        res->corruptions++;
-                    }
-                }
-            }
-        }
-
-        if (l2_dirty) {
-            ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
-                                                l2_offset, s->cluster_size);
-            if (ret < 0) {
-                fprintf(stderr, "ERROR: Could not write L2 table; metadata "
-                        "overlap check failed: %s\n", strerror(-ret));
-                res->check_errors++;
-                goto fail;
-            }
-
-            ret = bdrv_pwrite(bs->file->bs, l2_offset, l2_table,
-                              s->cluster_size);
-            if (ret < 0) {
-                fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
-                        strerror(-ret));
-                res->check_errors++;
-                goto fail;
-            }
-        }
-    }
-
-    ret = 0;
-
-fail:
-    qemu_vfree(l2_table);
-    return ret;
-}
-
-/*
- * Checks consistency of refblocks and accounts for each refblock in
- * *refcount_table.
- */
-static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
-                           BdrvCheckMode fix, bool *rebuild,
-                           void **refcount_table, int64_t *nb_clusters)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t i, size;
-    int ret;
-
-    for(i = 0; i < s->refcount_table_size; i++) {
-        uint64_t offset, cluster;
-        offset = s->refcount_table[i];
-        cluster = offset >> s->cluster_bits;
-
-        /* Refcount blocks are cluster aligned */
-        if (offset_into_cluster(s, offset)) {
-            fprintf(stderr, "ERROR refcount block %" PRId64 " is not "
-                "cluster aligned; refcount table entry corrupted\n", i);
-            res->corruptions++;
-            *rebuild = true;
-            continue;
-        }
-
-        if (cluster >= *nb_clusters) {
-            fprintf(stderr, "%s refcount block %" PRId64 " is outside image\n",
-                    fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
-
-            if (fix & BDRV_FIX_ERRORS) {
-                int64_t new_nb_clusters;
-
-                if (offset > INT64_MAX - s->cluster_size) {
-                    ret = -EINVAL;
-                    goto resize_fail;
-                }
-
-                ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
-                if (ret < 0) {
-                    goto resize_fail;
-                }
-                size = bdrv_getlength(bs->file->bs);
-                if (size < 0) {
-                    ret = size;
-                    goto resize_fail;
-                }
-
-                new_nb_clusters = size_to_clusters(s, size);
-                assert(new_nb_clusters >= *nb_clusters);
-
-                ret = realloc_refcount_array(s, refcount_table,
-                                             nb_clusters, new_nb_clusters);
-                if (ret < 0) {
-                    res->check_errors++;
-                    return ret;
-                }
-
-                if (cluster >= *nb_clusters) {
-                    ret = -EINVAL;
-                    goto resize_fail;
-                }
-
-                res->corruptions_fixed++;
-                ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                                    offset, s->cluster_size);
-                if (ret < 0) {
-                    return ret;
-                }
-                /* No need to check whether the refcount is now greater than 1:
-                 * This area was just allocated and zeroed, so it can only be
-                 * exactly 1 after inc_refcounts() */
-                continue;
-
-resize_fail:
-                res->corruptions++;
-                *rebuild = true;
-                fprintf(stderr, "ERROR could not resize image: %s\n",
-                        strerror(-ret));
-            } else {
-                res->corruptions++;
-            }
-            continue;
-        }
-
-        if (offset != 0) {
-            ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                                offset, s->cluster_size);
-            if (ret < 0) {
-                return ret;
-            }
-            if (s->get_refcount(*refcount_table, cluster) != 1) {
-                fprintf(stderr, "ERROR refcount block %" PRId64
-                        " refcount=%" PRIu64 "\n", i,
-                        s->get_refcount(*refcount_table, cluster));
-                res->corruptions++;
-                *rebuild = true;
-            }
-        }
-    }
-
-    return 0;
-}
-
-/*
- * Calculates an in-memory refcount table.
- */
-static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                               BdrvCheckMode fix, bool *rebuild,
-                               void **refcount_table, int64_t *nb_clusters)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t i;
-    QCowSnapshot *sn;
-    int ret;
-
-    if (!*refcount_table) {
-        int64_t old_size = 0;
-        ret = realloc_refcount_array(s, refcount_table,
-                                     &old_size, *nb_clusters);
-        if (ret < 0) {
-            res->check_errors++;
-            return ret;
-        }
-    }
-
-    /* header */
-    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                        0, s->cluster_size);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* current L1 table */
-    ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
-                             s->l1_table_offset, s->l1_size, CHECK_FRAG_INFO);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* snapshots */
-    for (i = 0; i < s->nb_snapshots; i++) {
-        sn = s->snapshots + i;
-        ret = check_refcounts_l1(bs, res, refcount_table, nb_clusters,
-                                 sn->l1_table_offset, sn->l1_size, 0);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                        s->snapshots_offset, s->snapshots_size);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* refcount data */
-    ret = inc_refcounts(bs, res, refcount_table, nb_clusters,
-                        s->refcount_table_offset,
-                        s->refcount_table_size * sizeof(uint64_t));
-    if (ret < 0) {
-        return ret;
-    }
-
-    return check_refblocks(bs, res, fix, rebuild, refcount_table, nb_clusters);
-}
-
-/*
- * Compares the actual reference count for each cluster in the image against the
- * refcount as reported by the refcount structures on-disk.
- */
-static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                              BdrvCheckMode fix, bool *rebuild,
-                              int64_t *highest_cluster,
-                              void *refcount_table, int64_t nb_clusters)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t i;
-    uint64_t refcount1, refcount2;
-    int ret;
-
-    for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
-        ret = qcow2_get_refcount(bs, i, &refcount1);
-        if (ret < 0) {
-            fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
-                    i, strerror(-ret));
-            res->check_errors++;
-            continue;
-        }
-
-        refcount2 = s->get_refcount(refcount_table, i);
-
-        if (refcount1 > 0 || refcount2 > 0) {
-            *highest_cluster = i;
-        }
-
-        if (refcount1 != refcount2) {
-            /* Check if we're allowed to fix the mismatch */
-            int *num_fixed = NULL;
-            if (refcount1 == 0) {
-                *rebuild = true;
-            } else if (refcount1 > refcount2 && (fix & BDRV_FIX_LEAKS)) {
-                num_fixed = &res->leaks_fixed;
-            } else if (refcount1 < refcount2 && (fix & BDRV_FIX_ERRORS)) {
-                num_fixed = &res->corruptions_fixed;
-            }
-
-            fprintf(stderr, "%s cluster %" PRId64 " refcount=%" PRIu64
-                    " reference=%" PRIu64 "\n",
-                   num_fixed != NULL     ? "Repairing" :
-                   refcount1 < refcount2 ? "ERROR" :
-                                           "Leaked",
-                   i, refcount1, refcount2);
-
-            if (num_fixed) {
-                ret = update_refcount(bs, i << s->cluster_bits, 1,
-                                      refcount_diff(refcount1, refcount2),
-                                      refcount1 > refcount2,
-                                      QCOW2_DISCARD_ALWAYS);
-                if (ret >= 0) {
-                    (*num_fixed)++;
-                    continue;
-                }
-            }
-
-            /* And if we couldn't, print an error */
-            if (refcount1 < refcount2) {
-                res->corruptions++;
-            } else {
-                res->leaks++;
-            }
-        }
-    }
-}
-
-/*
- * Allocates clusters using an in-memory refcount table (IMRT) in contrast to
- * the on-disk refcount structures.
- *
- * On input, *first_free_cluster tells where to start looking, and need not
- * actually be a free cluster; the returned offset will not be before that
- * cluster.  On output, *first_free_cluster points to the first gap found, even
- * if that gap was too small to be used as the returned offset.
- *
- * Note that *first_free_cluster is a cluster index whereas the return value is
- * an offset.
- */
-static int64_t alloc_clusters_imrt(BlockDriverState *bs,
-                                   int cluster_count,
-                                   void **refcount_table,
-                                   int64_t *imrt_nb_clusters,
-                                   int64_t *first_free_cluster)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t cluster = *first_free_cluster, i;
-    bool first_gap = true;
-    int contiguous_free_clusters;
-    int ret;
-
-    /* Starting at *first_free_cluster, find a range of at least cluster_count
-     * continuously free clusters */
-    for (contiguous_free_clusters = 0;
-         cluster < *imrt_nb_clusters &&
-         contiguous_free_clusters < cluster_count;
-         cluster++)
-    {
-        if (!s->get_refcount(*refcount_table, cluster)) {
-            contiguous_free_clusters++;
-            if (first_gap) {
-                /* If this is the first free cluster found, update
-                 * *first_free_cluster accordingly */
-                *first_free_cluster = cluster;
-                first_gap = false;
-            }
-        } else if (contiguous_free_clusters) {
-            contiguous_free_clusters = 0;
-        }
-    }
-
-    /* If contiguous_free_clusters is greater than zero, it contains the number
-     * of continuously free clusters until the current cluster; the first free
-     * cluster in the current "gap" is therefore
-     * cluster - contiguous_free_clusters */
-
-    /* If no such range could be found, grow the in-memory refcount table
-     * accordingly to append free clusters at the end of the image */
-    if (contiguous_free_clusters < cluster_count) {
-        /* contiguous_free_clusters clusters are already empty at the image end;
-         * we need cluster_count clusters; therefore, we have to allocate
-         * cluster_count - contiguous_free_clusters new clusters at the end of
-         * the image (which is the current value of cluster; note that cluster
-         * may exceed old_imrt_nb_clusters if *first_free_cluster pointed beyond
-         * the image end) */
-        ret = realloc_refcount_array(s, refcount_table, imrt_nb_clusters,
-                                     cluster + cluster_count
-                                     - contiguous_free_clusters);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    /* Go back to the first free cluster */
-    cluster -= contiguous_free_clusters;
-    for (i = 0; i < cluster_count; i++) {
-        s->set_refcount(*refcount_table, cluster + i, 1);
-    }
-
-    return cluster << s->cluster_bits;
-}
-
-/*
- * Creates a new refcount structure based solely on the in-memory information
- * given through *refcount_table. All necessary allocations will be reflected
- * in that array.
- *
- * On success, the old refcount structure is leaked (it will be covered by the
- * new refcount structure).
- */
-static int rebuild_refcount_structure(BlockDriverState *bs,
-                                      BdrvCheckResult *res,
-                                      void **refcount_table,
-                                      int64_t *nb_clusters)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0;
-    int64_t refblock_offset, refblock_start, refblock_index;
-    uint32_t reftable_size = 0;
-    uint64_t *on_disk_reftable = NULL;
-    void *on_disk_refblock;
-    int ret = 0;
-    struct {
-        uint64_t reftable_offset;
-        uint32_t reftable_clusters;
-    } QEMU_PACKED reftable_offset_and_clusters;
-
-    qcow2_cache_empty(bs, s->refcount_block_cache);
-
-write_refblocks:
-    for (; cluster < *nb_clusters; cluster++) {
-        if (!s->get_refcount(*refcount_table, cluster)) {
-            continue;
-        }
-
-        refblock_index = cluster >> s->refcount_block_bits;
-        refblock_start = refblock_index << s->refcount_block_bits;
-
-        /* Don't allocate a cluster in a refblock already written to disk */
-        if (first_free_cluster < refblock_start) {
-            first_free_cluster = refblock_start;
-        }
-        refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
-                                              nb_clusters, &first_free_cluster);
-        if (refblock_offset < 0) {
-            fprintf(stderr, "ERROR allocating refblock: %s\n",
-                    strerror(-refblock_offset));
-            res->check_errors++;
-            ret = refblock_offset;
-            goto fail;
-        }
-
-        if (reftable_size <= refblock_index) {
-            uint32_t old_reftable_size = reftable_size;
-            uint64_t *new_on_disk_reftable;
-
-            reftable_size = ROUND_UP((refblock_index + 1) * sizeof(uint64_t),
-                                     s->cluster_size) / sizeof(uint64_t);
-            new_on_disk_reftable = g_try_realloc(on_disk_reftable,
-                                                 reftable_size *
-                                                 sizeof(uint64_t));
-            if (!new_on_disk_reftable) {
-                res->check_errors++;
-                ret = -ENOMEM;
-                goto fail;
-            }
-            on_disk_reftable = new_on_disk_reftable;
-
-            memset(on_disk_reftable + old_reftable_size, 0,
-                   (reftable_size - old_reftable_size) * sizeof(uint64_t));
-
-            /* The offset we have for the reftable is now no longer valid;
-             * this will leak that range, but we can easily fix that by running
-             * a leak-fixing check after this rebuild operation */
-            reftable_offset = -1;
-        }
-        on_disk_reftable[refblock_index] = refblock_offset;
-
-        /* If this is apparently the last refblock (for now), try to squeeze the
-         * reftable in */
-        if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits &&
-            reftable_offset < 0)
-        {
-            uint64_t reftable_clusters = size_to_clusters(s, reftable_size *
-                                                          sizeof(uint64_t));
-            reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
-                                                  refcount_table, nb_clusters,
-                                                  &first_free_cluster);
-            if (reftable_offset < 0) {
-                fprintf(stderr, "ERROR allocating reftable: %s\n",
-                        strerror(-reftable_offset));
-                res->check_errors++;
-                ret = reftable_offset;
-                goto fail;
-            }
-        }
-
-        ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
-                                            s->cluster_size);
-        if (ret < 0) {
-            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
-            goto fail;
-        }
-
-        /* The size of *refcount_table is always cluster-aligned, therefore the
-         * write operation will not overflow */
-        on_disk_refblock = (void *)((char *) *refcount_table +
-                                    refblock_index * s->cluster_size);
-
-        ret = bdrv_write(bs->file->bs, refblock_offset / BDRV_SECTOR_SIZE,
-                         on_disk_refblock, s->cluster_sectors);
-        if (ret < 0) {
-            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
-            goto fail;
-        }
-
-        /* Go to the end of this refblock */
-        cluster = refblock_start + s->refcount_block_size - 1;
-    }
-
-    if (reftable_offset < 0) {
-        uint64_t post_refblock_start, reftable_clusters;
-
-        post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size);
-        reftable_clusters = size_to_clusters(s,
-                                             reftable_size * sizeof(uint64_t));
-        /* Not pretty but simple */
-        if (first_free_cluster < post_refblock_start) {
-            first_free_cluster = post_refblock_start;
-        }
-        reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
-                                              refcount_table, nb_clusters,
-                                              &first_free_cluster);
-        if (reftable_offset < 0) {
-            fprintf(stderr, "ERROR allocating reftable: %s\n",
-                    strerror(-reftable_offset));
-            res->check_errors++;
-            ret = reftable_offset;
-            goto fail;
-        }
-
-        goto write_refblocks;
-    }
-
-    assert(on_disk_reftable);
-
-    for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
-        cpu_to_be64s(&on_disk_reftable[refblock_index]);
-    }
-
-    ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset,
-                                        reftable_size * sizeof(uint64_t));
-    if (ret < 0) {
-        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
-        goto fail;
-    }
-
-    assert(reftable_size < INT_MAX / sizeof(uint64_t));
-    ret = bdrv_pwrite(bs->file->bs, reftable_offset, on_disk_reftable,
-                      reftable_size * sizeof(uint64_t));
-    if (ret < 0) {
-        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
-        goto fail;
-    }
-
-    /* Enter new reftable into the image header */
-    cpu_to_be64w(&reftable_offset_and_clusters.reftable_offset,
-                 reftable_offset);
-    cpu_to_be32w(&reftable_offset_and_clusters.reftable_clusters,
-                 size_to_clusters(s, reftable_size * sizeof(uint64_t)));
-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader,
-                                                  refcount_table_offset),
-                           &reftable_offset_and_clusters,
-                           sizeof(reftable_offset_and_clusters));
-    if (ret < 0) {
-        fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret));
-        goto fail;
-    }
-
-    for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
-        be64_to_cpus(&on_disk_reftable[refblock_index]);
-    }
-    s->refcount_table = on_disk_reftable;
-    s->refcount_table_offset = reftable_offset;
-    s->refcount_table_size = reftable_size;
-
-    return 0;
-
-fail:
-    g_free(on_disk_reftable);
-    return ret;
-}
-
-/*
- * Checks an image for refcount consistency.
- *
- * Returns 0 if no errors are found, the number of errors in case the image is
- * detected as corrupted, and -errno when an internal error occurred.
- */
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                          BdrvCheckMode fix)
-{
-    BDRVQcow2State *s = bs->opaque;
-    BdrvCheckResult pre_compare_res;
-    int64_t size, highest_cluster, nb_clusters;
-    void *refcount_table = NULL;
-    bool rebuild = false;
-    int ret;
-
-    size = bdrv_getlength(bs->file->bs);
-    if (size < 0) {
-        res->check_errors++;
-        return size;
-    }
-
-    nb_clusters = size_to_clusters(s, size);
-    if (nb_clusters > INT_MAX) {
-        res->check_errors++;
-        return -EFBIG;
-    }
-
-    res->bfi.total_clusters =
-        size_to_clusters(s, bs->total_sectors * BDRV_SECTOR_SIZE);
-
-    ret = calculate_refcounts(bs, res, fix, &rebuild, &refcount_table,
-                              &nb_clusters);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* In case we don't need to rebuild the refcount structure (but want to fix
-     * something), this function is immediately called again, in which case the
-     * result should be ignored */
-    pre_compare_res = *res;
-    compare_refcounts(bs, res, 0, &rebuild, &highest_cluster, refcount_table,
-                      nb_clusters);
-
-    if (rebuild && (fix & BDRV_FIX_ERRORS)) {
-        BdrvCheckResult old_res = *res;
-        int fresh_leaks = 0;
-
-        fprintf(stderr, "Rebuilding refcount structure\n");
-        ret = rebuild_refcount_structure(bs, res, &refcount_table,
-                                         &nb_clusters);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        res->corruptions = 0;
-        res->leaks = 0;
-
-        /* Because the old reftable has been exchanged for a new one the
-         * references have to be recalculated */
-        rebuild = false;
-        memset(refcount_table, 0, refcount_array_byte_size(s, nb_clusters));
-        ret = calculate_refcounts(bs, res, 0, &rebuild, &refcount_table,
-                                  &nb_clusters);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        if (fix & BDRV_FIX_LEAKS) {
-            /* The old refcount structures are now leaked, fix it; the result
-             * can be ignored, aside from leaks which were introduced by
-             * rebuild_refcount_structure() that could not be fixed */
-            BdrvCheckResult saved_res = *res;
-            *res = (BdrvCheckResult){ 0 };
-
-            compare_refcounts(bs, res, BDRV_FIX_LEAKS, &rebuild,
-                              &highest_cluster, refcount_table, nb_clusters);
-            if (rebuild) {
-                fprintf(stderr, "ERROR rebuilt refcount structure is still "
-                        "broken\n");
-            }
-
-            /* Any leaks accounted for here were introduced by
-             * rebuild_refcount_structure() because that function has created a
-             * new refcount structure from scratch */
-            fresh_leaks = res->leaks;
-            *res = saved_res;
-        }
-
-        if (res->corruptions < old_res.corruptions) {
-            res->corruptions_fixed += old_res.corruptions - res->corruptions;
-        }
-        if (res->leaks < old_res.leaks) {
-            res->leaks_fixed += old_res.leaks - res->leaks;
-        }
-        res->leaks += fresh_leaks;
-    } else if (fix) {
-        if (rebuild) {
-            fprintf(stderr, "ERROR need to rebuild refcount structures\n");
-            res->check_errors++;
-            ret = -EIO;
-            goto fail;
-        }
-
-        if (res->leaks || res->corruptions) {
-            *res = pre_compare_res;
-            compare_refcounts(bs, res, fix, &rebuild, &highest_cluster,
-                              refcount_table, nb_clusters);
-        }
-    }
-
-    /* check OFLAG_COPIED */
-    ret = check_oflag_copied(bs, res, fix);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    res->image_end_offset = (highest_cluster + 1) * s->cluster_size;
-    ret = 0;
-
-fail:
-    g_free(refcount_table);
-
-    return ret;
-}
-
-#define overlaps_with(ofs, sz) \
-    ranges_overlap(offset, size, ofs, sz)
-
-/*
- * Checks if the given offset into the image file is actually free to use by
- * looking for overlaps with important metadata sections (L1/L2 tables etc.),
- * i.e. a sanity check without relying on the refcount tables.
- *
- * The ign parameter specifies what checks not to perform (being a bitmask of
- * QCow2MetadataOverlap values), i.e., what sections to ignore.
- *
- * Returns:
- * - 0 if writing to this offset will not affect the mentioned metadata
- * - a positive QCow2MetadataOverlap value indicating one overlapping section
- * - a negative value (-errno) indicating an error while performing a check,
- *   e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2
- */
-int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
-                                 int64_t size)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int chk = s->overlap_check & ~ign;
-    int i, j;
-
-    if (!size) {
-        return 0;
-    }
-
-    if (chk & QCOW2_OL_MAIN_HEADER) {
-        if (offset < s->cluster_size) {
-            return QCOW2_OL_MAIN_HEADER;
-        }
-    }
-
-    /* align range to test to cluster boundaries */
-    size = align_offset(offset_into_cluster(s, offset) + size, s->cluster_size);
-    offset = start_of_cluster(s, offset);
-
-    if ((chk & QCOW2_OL_ACTIVE_L1) && s->l1_size) {
-        if (overlaps_with(s->l1_table_offset, s->l1_size * sizeof(uint64_t))) {
-            return QCOW2_OL_ACTIVE_L1;
-        }
-    }
-
-    if ((chk & QCOW2_OL_REFCOUNT_TABLE) && s->refcount_table_size) {
-        if (overlaps_with(s->refcount_table_offset,
-            s->refcount_table_size * sizeof(uint64_t))) {
-            return QCOW2_OL_REFCOUNT_TABLE;
-        }
-    }
-
-    if ((chk & QCOW2_OL_SNAPSHOT_TABLE) && s->snapshots_size) {
-        if (overlaps_with(s->snapshots_offset, s->snapshots_size)) {
-            return QCOW2_OL_SNAPSHOT_TABLE;
-        }
-    }
-
-    if ((chk & QCOW2_OL_INACTIVE_L1) && s->snapshots) {
-        for (i = 0; i < s->nb_snapshots; i++) {
-            if (s->snapshots[i].l1_size &&
-                overlaps_with(s->snapshots[i].l1_table_offset,
-                s->snapshots[i].l1_size * sizeof(uint64_t))) {
-                return QCOW2_OL_INACTIVE_L1;
-            }
-        }
-    }
-
-    if ((chk & QCOW2_OL_ACTIVE_L2) && s->l1_table) {
-        for (i = 0; i < s->l1_size; i++) {
-            if ((s->l1_table[i] & L1E_OFFSET_MASK) &&
-                overlaps_with(s->l1_table[i] & L1E_OFFSET_MASK,
-                s->cluster_size)) {
-                return QCOW2_OL_ACTIVE_L2;
-            }
-        }
-    }
-
-    if ((chk & QCOW2_OL_REFCOUNT_BLOCK) && s->refcount_table) {
-        for (i = 0; i < s->refcount_table_size; i++) {
-            if ((s->refcount_table[i] & REFT_OFFSET_MASK) &&
-                overlaps_with(s->refcount_table[i] & REFT_OFFSET_MASK,
-                s->cluster_size)) {
-                return QCOW2_OL_REFCOUNT_BLOCK;
-            }
-        }
-    }
-
-    if ((chk & QCOW2_OL_INACTIVE_L2) && s->snapshots) {
-        for (i = 0; i < s->nb_snapshots; i++) {
-            uint64_t l1_ofs = s->snapshots[i].l1_table_offset;
-            uint32_t l1_sz  = s->snapshots[i].l1_size;
-            uint64_t l1_sz2 = l1_sz * sizeof(uint64_t);
-            uint64_t *l1 = g_try_malloc(l1_sz2);
-            int ret;
-
-            if (l1_sz2 && l1 == NULL) {
-                return -ENOMEM;
-            }
-
-            ret = bdrv_pread(bs->file->bs, l1_ofs, l1, l1_sz2);
-            if (ret < 0) {
-                g_free(l1);
-                return ret;
-            }
-
-            for (j = 0; j < l1_sz; j++) {
-                uint64_t l2_ofs = be64_to_cpu(l1[j]) & L1E_OFFSET_MASK;
-                if (l2_ofs && overlaps_with(l2_ofs, s->cluster_size)) {
-                    g_free(l1);
-                    return QCOW2_OL_INACTIVE_L2;
-                }
-            }
-
-            g_free(l1);
-        }
-    }
-
-    return 0;
-}
-
-static const char *metadata_ol_names[] = {
-    [QCOW2_OL_MAIN_HEADER_BITNR]    = "qcow2_header",
-    [QCOW2_OL_ACTIVE_L1_BITNR]      = "active L1 table",
-    [QCOW2_OL_ACTIVE_L2_BITNR]      = "active L2 table",
-    [QCOW2_OL_REFCOUNT_TABLE_BITNR] = "refcount table",
-    [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = "refcount block",
-    [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = "snapshot table",
-    [QCOW2_OL_INACTIVE_L1_BITNR]    = "inactive L1 table",
-    [QCOW2_OL_INACTIVE_L2_BITNR]    = "inactive L2 table",
-};
-
-/*
- * First performs a check for metadata overlaps (through
- * qcow2_check_metadata_overlap); if that fails with a negative value (error
- * while performing a check), that value is returned. If an impending overlap
- * is detected, the BDS will be made unusable, the qcow2 file marked corrupt
- * and -EIO returned.
- *
- * Returns 0 if there were neither overlaps nor errors while checking for
- * overlaps; or a negative value (-errno) on error.
- */
-int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
-                                  int64_t size)
-{
-    int ret = qcow2_check_metadata_overlap(bs, ign, offset, size);
-
-    if (ret < 0) {
-        return ret;
-    } else if (ret > 0) {
-        int metadata_ol_bitnr = ctz32(ret);
-        assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
-
-        qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid "
-                                "write on metadata (overlaps with %s)",
-                                metadata_ol_names[metadata_ol_bitnr]);
-        return -EIO;
-    }
-
-    return 0;
-}
-
-/* A pointer to a function of this type is given to walk_over_reftable(). That
- * function will create refblocks and pass them to a RefblockFinishOp once they
- * are completed (@refblock). @refblock_empty is set if the refblock is
- * completely empty.
- *
- * Along with the refblock, a corresponding reftable entry is passed, in the
- * reftable @reftable (which may be reallocated) at @reftable_index.
- *
- * @allocated should be set to true if a new cluster has been allocated.
- */
-typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
-                               uint64_t reftable_index, uint64_t *reftable_size,
-                               void *refblock, bool refblock_empty,
-                               bool *allocated, Error **errp);
-
-/**
- * This "operation" for walk_over_reftable() allocates the refblock on disk (if
- * it is not empty) and inserts its offset into the new reftable. The size of
- * this new reftable is increased as required.
- */
-static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
-                          uint64_t reftable_index, uint64_t *reftable_size,
-                          void *refblock, bool refblock_empty, bool *allocated,
-                          Error **errp)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t offset;
-
-    if (!refblock_empty && reftable_index >= *reftable_size) {
-        uint64_t *new_reftable;
-        uint64_t new_reftable_size;
-
-        new_reftable_size = ROUND_UP(reftable_index + 1,
-                                     s->cluster_size / sizeof(uint64_t));
-        if (new_reftable_size > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) {
-            error_setg(errp,
-                       "This operation would make the refcount table grow "
-                       "beyond the maximum size supported by QEMU, aborting");
-            return -ENOTSUP;
-        }
-
-        new_reftable = g_try_realloc(*reftable, new_reftable_size *
-                                                sizeof(uint64_t));
-        if (!new_reftable) {
-            error_setg(errp, "Failed to increase reftable buffer size");
-            return -ENOMEM;
-        }
-
-        memset(new_reftable + *reftable_size, 0,
-               (new_reftable_size - *reftable_size) * sizeof(uint64_t));
-
-        *reftable      = new_reftable;
-        *reftable_size = new_reftable_size;
-    }
-
-    if (!refblock_empty && !(*reftable)[reftable_index]) {
-        offset = qcow2_alloc_clusters(bs, s->cluster_size);
-        if (offset < 0) {
-            error_setg_errno(errp, -offset, "Failed to allocate refblock");
-            return offset;
-        }
-        (*reftable)[reftable_index] = offset;
-        *allocated = true;
-    }
-
-    return 0;
-}
-
-/**
- * This "operation" for walk_over_reftable() writes the refblock to disk at the
- * offset specified by the new reftable's entry. It does not modify the new
- * reftable or change any refcounts.
- */
-static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
-                          uint64_t reftable_index, uint64_t *reftable_size,
-                          void *refblock, bool refblock_empty, bool *allocated,
-                          Error **errp)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t offset;
-    int ret;
-
-    if (reftable_index < *reftable_size && (*reftable)[reftable_index]) {
-        offset = (*reftable)[reftable_index];
-
-        ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Overlap check failed");
-            return ret;
-        }
-
-        ret = bdrv_pwrite(bs->file->bs, offset, refblock, s->cluster_size);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Failed to write refblock");
-            return ret;
-        }
-    } else {
-        assert(refblock_empty);
-    }
-
-    return 0;
-}
-
-/**
- * This function walks over the existing reftable and every referenced refblock;
- * if @new_set_refcount is non-NULL, it is called for every refcount entry to
- * create an equal new entry in the passed @new_refblock. Once that
- * @new_refblock is completely filled, @operation will be called.
- *
- * @status_cb and @cb_opaque are used for the amend operation's status callback.
- * @index is the index of the walk_over_reftable() calls and @total is the total
- * number of walk_over_reftable() calls per amend operation. Both are used for
- * calculating the parameters for the status callback.
- *
- * @allocated is set to true if a new cluster has been allocated.
- */
-static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
-                              uint64_t *new_reftable_index,
-                              uint64_t *new_reftable_size,
-                              void *new_refblock, int new_refblock_size,
-                              int new_refcount_bits,
-                              RefblockFinishOp *operation, bool *allocated,
-                              Qcow2SetRefcountFunc *new_set_refcount,
-                              BlockDriverAmendStatusCB *status_cb,
-                              void *cb_opaque, int index, int total,
-                              Error **errp)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t reftable_index;
-    bool new_refblock_empty = true;
-    int refblock_index;
-    int new_refblock_index = 0;
-    int ret;
-
-    for (reftable_index = 0; reftable_index < s->refcount_table_size;
-         reftable_index++)
-    {
-        uint64_t refblock_offset = s->refcount_table[reftable_index]
-                                 & REFT_OFFSET_MASK;
-
-        status_cb(bs, (uint64_t)index * s->refcount_table_size + reftable_index,
-                  (uint64_t)total * s->refcount_table_size, cb_opaque);
-
-        if (refblock_offset) {
-            void *refblock;
-
-            if (offset_into_cluster(s, refblock_offset)) {
-                qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
-                                        PRIx64 " unaligned (reftable index: %#"
-                                        PRIx64 ")", refblock_offset,
-                                        reftable_index);
-                error_setg(errp,
-                           "Image is corrupt (unaligned refblock offset)");
-                return -EIO;
-            }
-
-            ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offset,
-                                  &refblock);
-            if (ret < 0) {
-                error_setg_errno(errp, -ret, "Failed to retrieve refblock");
-                return ret;
-            }
-
-            for (refblock_index = 0; refblock_index < s->refcount_block_size;
-                 refblock_index++)
-            {
-                uint64_t refcount;
-
-                if (new_refblock_index >= new_refblock_size) {
-                    /* new_refblock is now complete */
-                    ret = operation(bs, new_reftable, *new_reftable_index,
-                                    new_reftable_size, new_refblock,
-                                    new_refblock_empty, allocated, errp);
-                    if (ret < 0) {
-                        qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
-                        return ret;
-                    }
-
-                    (*new_reftable_index)++;
-                    new_refblock_index = 0;
-                    new_refblock_empty = true;
-                }
-
-                refcount = s->get_refcount(refblock, refblock_index);
-                if (new_refcount_bits < 64 && refcount >> new_refcount_bits) {
-                    uint64_t offset;
-
-                    qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
-
-                    offset = ((reftable_index << s->refcount_block_bits)
-                              + refblock_index) << s->cluster_bits;
-
-                    error_setg(errp, "Cannot decrease refcount entry width to "
-                               "%i bits: Cluster at offset %#" PRIx64 " has a "
-                               "refcount of %" PRIu64, new_refcount_bits,
-                               offset, refcount);
-                    return -EINVAL;
-                }
-
-                if (new_set_refcount) {
-                    new_set_refcount(new_refblock, new_refblock_index++,
-                                     refcount);
-                } else {
-                    new_refblock_index++;
-                }
-                new_refblock_empty = new_refblock_empty && refcount == 0;
-            }
-
-            qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
-        } else {
-            /* No refblock means every refcount is 0 */
-            for (refblock_index = 0; refblock_index < s->refcount_block_size;
-                 refblock_index++)
-            {
-                if (new_refblock_index >= new_refblock_size) {
-                    /* new_refblock is now complete */
-                    ret = operation(bs, new_reftable, *new_reftable_index,
-                                    new_reftable_size, new_refblock,
-                                    new_refblock_empty, allocated, errp);
-                    if (ret < 0) {
-                        return ret;
-                    }
-
-                    (*new_reftable_index)++;
-                    new_refblock_index = 0;
-                    new_refblock_empty = true;
-                }
-
-                if (new_set_refcount) {
-                    new_set_refcount(new_refblock, new_refblock_index++, 0);
-                } else {
-                    new_refblock_index++;
-                }
-            }
-        }
-    }
-
-    if (new_refblock_index > 0) {
-        /* Complete the potentially existing partially filled final refblock */
-        if (new_set_refcount) {
-            for (; new_refblock_index < new_refblock_size;
-                 new_refblock_index++)
-            {
-                new_set_refcount(new_refblock, new_refblock_index, 0);
-            }
-        }
-
-        ret = operation(bs, new_reftable, *new_reftable_index,
-                        new_reftable_size, new_refblock, new_refblock_empty,
-                        allocated, errp);
-        if (ret < 0) {
-            return ret;
-        }
-
-        (*new_reftable_index)++;
-    }
-
-    status_cb(bs, (uint64_t)(index + 1) * s->refcount_table_size,
-              (uint64_t)total * s->refcount_table_size, cb_opaque);
-
-    return 0;
-}
-
-int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
-                                BlockDriverAmendStatusCB *status_cb,
-                                void *cb_opaque, Error **errp)
-{
-    BDRVQcow2State *s = bs->opaque;
-    Qcow2GetRefcountFunc *new_get_refcount;
-    Qcow2SetRefcountFunc *new_set_refcount;
-    void *new_refblock = qemu_blockalign(bs->file->bs, s->cluster_size);
-    uint64_t *new_reftable = NULL, new_reftable_size = 0;
-    uint64_t *old_reftable, old_reftable_size, old_reftable_offset;
-    uint64_t new_reftable_index = 0;
-    uint64_t i;
-    int64_t new_reftable_offset = 0, allocated_reftable_size = 0;
-    int new_refblock_size, new_refcount_bits = 1 << refcount_order;
-    int old_refcount_order;
-    int walk_index = 0;
-    int ret;
-    bool new_allocation;
-
-    assert(s->qcow_version >= 3);
-    assert(refcount_order >= 0 && refcount_order <= 6);
-
-    /* see qcow2_open() */
-    new_refblock_size = 1 << (s->cluster_bits - (refcount_order - 3));
-
-    new_get_refcount = get_refcount_funcs[refcount_order];
-    new_set_refcount = set_refcount_funcs[refcount_order];
-
-
-    do {
-        int total_walks;
-
-        new_allocation = false;
-
-        /* At least we have to do this walk and the one which writes the
-         * refblocks; also, at least we have to do this loop here at least
-         * twice (normally), first to do the allocations, and second to
-         * determine that everything is correctly allocated, this then makes
-         * three walks in total */
-        total_walks = MAX(walk_index + 2, 3);
-
-        /* First, allocate the structures so they are present in the refcount
-         * structures */
-        ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
-                                 &new_reftable_size, NULL, new_refblock_size,
-                                 new_refcount_bits, &alloc_refblock,
-                                 &new_allocation, NULL, status_cb, cb_opaque,
-                                 walk_index++, total_walks, errp);
-        if (ret < 0) {
-            goto done;
-        }
-
-        new_reftable_index = 0;
-
-        if (new_allocation) {
-            if (new_reftable_offset) {
-                qcow2_free_clusters(bs, new_reftable_offset,
-                                    allocated_reftable_size * sizeof(uint64_t),
-                                    QCOW2_DISCARD_NEVER);
-            }
-
-            new_reftable_offset = qcow2_alloc_clusters(bs, new_reftable_size *
-                                                           sizeof(uint64_t));
-            if (new_reftable_offset < 0) {
-                error_setg_errno(errp, -new_reftable_offset,
-                                 "Failed to allocate the new reftable");
-                ret = new_reftable_offset;
-                goto done;
-            }
-            allocated_reftable_size = new_reftable_size;
-        }
-    } while (new_allocation);
-
-    /* Second, write the new refblocks */
-    ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
-                             &new_reftable_size, new_refblock,
-                             new_refblock_size, new_refcount_bits,
-                             &flush_refblock, &new_allocation, new_set_refcount,
-                             status_cb, cb_opaque, walk_index, walk_index + 1,
-                             errp);
-    if (ret < 0) {
-        goto done;
-    }
-    assert(!new_allocation);
-
-
-    /* Write the new reftable */
-    ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset,
-                                        new_reftable_size * sizeof(uint64_t));
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Overlap check failed");
-        goto done;
-    }
-
-    for (i = 0; i < new_reftable_size; i++) {
-        cpu_to_be64s(&new_reftable[i]);
-    }
-
-    ret = bdrv_pwrite(bs->file->bs, new_reftable_offset, new_reftable,
-                      new_reftable_size * sizeof(uint64_t));
-
-    for (i = 0; i < new_reftable_size; i++) {
-        be64_to_cpus(&new_reftable[i]);
-    }
-
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to write the new reftable");
-        goto done;
-    }
-
-
-    /* Empty the refcount cache */
-    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to flush the refblock cache");
-        goto done;
-    }
-
-    /* Update the image header to point to the new reftable; this only updates
-     * the fields which are relevant to qcow2_update_header(); other fields
-     * such as s->refcount_table or s->refcount_bits stay stale for now
-     * (because we have to restore everything if qcow2_update_header() fails) */
-    old_refcount_order  = s->refcount_order;
-    old_reftable_size   = s->refcount_table_size;
-    old_reftable_offset = s->refcount_table_offset;
-
-    s->refcount_order        = refcount_order;
-    s->refcount_table_size   = new_reftable_size;
-    s->refcount_table_offset = new_reftable_offset;
-
-    ret = qcow2_update_header(bs);
-    if (ret < 0) {
-        s->refcount_order        = old_refcount_order;
-        s->refcount_table_size   = old_reftable_size;
-        s->refcount_table_offset = old_reftable_offset;
-        error_setg_errno(errp, -ret, "Failed to update the qcow2 header");
-        goto done;
-    }
-
-    /* Now update the rest of the in-memory information */
-    old_reftable = s->refcount_table;
-    s->refcount_table = new_reftable;
-
-    s->refcount_bits = 1 << refcount_order;
-    s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
-    s->refcount_max += s->refcount_max - 1;
-
-    s->refcount_block_bits = s->cluster_bits - (refcount_order - 3);
-    s->refcount_block_size = 1 << s->refcount_block_bits;
-
-    s->get_refcount = new_get_refcount;
-    s->set_refcount = new_set_refcount;
-
-    /* For cleaning up all old refblocks and the old reftable below the "done"
-     * label */
-    new_reftable        = old_reftable;
-    new_reftable_size   = old_reftable_size;
-    new_reftable_offset = old_reftable_offset;
-
-done:
-    if (new_reftable) {
-        /* On success, new_reftable actually points to the old reftable (and
-         * new_reftable_size is the old reftable's size); but that is just
-         * fine */
-        for (i = 0; i < new_reftable_size; i++) {
-            uint64_t offset = new_reftable[i] & REFT_OFFSET_MASK;
-            if (offset) {
-                qcow2_free_clusters(bs, offset, s->cluster_size,
-                                    QCOW2_DISCARD_OTHER);
-            }
-        }
-        g_free(new_reftable);
-
-        if (new_reftable_offset > 0) {
-            qcow2_free_clusters(bs, new_reftable_offset,
-                                new_reftable_size * sizeof(uint64_t),
-                                QCOW2_DISCARD_OTHER);
-        }
-    }
-
-    qemu_vfree(new_refblock);
-    return ret;
-}
diff --git a/qemu/block/qcow2-snapshot.c b/qemu/block/qcow2-snapshot.c
deleted file mode 100644
index 5f4a17e47..000000000
--- a/qemu/block/qcow2-snapshot.c
+++ /dev/null
@@ -1,738 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "block/block_int.h"
-#include "block/qcow2.h"
-#include "qemu/error-report.h"
-#include "qemu/cutils.h"
-
-void qcow2_free_snapshots(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int i;
-
-    for(i = 0; i < s->nb_snapshots; i++) {
-        g_free(s->snapshots[i].name);
-        g_free(s->snapshots[i].id_str);
-    }
-    g_free(s->snapshots);
-    s->snapshots = NULL;
-    s->nb_snapshots = 0;
-}
-
-int qcow2_read_snapshots(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QCowSnapshotHeader h;
-    QCowSnapshotExtraData extra;
-    QCowSnapshot *sn;
-    int i, id_str_size, name_size;
-    int64_t offset;
-    uint32_t extra_data_size;
-    int ret;
-
-    if (!s->nb_snapshots) {
-        s->snapshots = NULL;
-        s->snapshots_size = 0;
-        return 0;
-    }
-
-    offset = s->snapshots_offset;
-    s->snapshots = g_new0(QCowSnapshot, s->nb_snapshots);
-
-    for(i = 0; i < s->nb_snapshots; i++) {
-        /* Read statically sized part of the snapshot header */
-        offset = align_offset(offset, 8);
-        ret = bdrv_pread(bs->file->bs, offset, &h, sizeof(h));
-        if (ret < 0) {
-            goto fail;
-        }
-
-        offset += sizeof(h);
-        sn = s->snapshots + i;
-        sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
-        sn->l1_size = be32_to_cpu(h.l1_size);
-        sn->vm_state_size = be32_to_cpu(h.vm_state_size);
-        sn->date_sec = be32_to_cpu(h.date_sec);
-        sn->date_nsec = be32_to_cpu(h.date_nsec);
-        sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
-        extra_data_size = be32_to_cpu(h.extra_data_size);
-
-        id_str_size = be16_to_cpu(h.id_str_size);
-        name_size = be16_to_cpu(h.name_size);
-
-        /* Read extra data */
-        ret = bdrv_pread(bs->file->bs, offset, &extra,
-                         MIN(sizeof(extra), extra_data_size));
-        if (ret < 0) {
-            goto fail;
-        }
-        offset += extra_data_size;
-
-        if (extra_data_size >= 8) {
-            sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
-        }
-
-        if (extra_data_size >= 16) {
-            sn->disk_size = be64_to_cpu(extra.disk_size);
-        } else {
-            sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
-        }
-
-        /* Read snapshot ID */
-        sn->id_str = g_malloc(id_str_size + 1);
-        ret = bdrv_pread(bs->file->bs, offset, sn->id_str, id_str_size);
-        if (ret < 0) {
-            goto fail;
-        }
-        offset += id_str_size;
-        sn->id_str[id_str_size] = '\0';
-
-        /* Read snapshot name */
-        sn->name = g_malloc(name_size + 1);
-        ret = bdrv_pread(bs->file->bs, offset, sn->name, name_size);
-        if (ret < 0) {
-            goto fail;
-        }
-        offset += name_size;
-        sn->name[name_size] = '\0';
-
-        if (offset - s->snapshots_offset > QCOW_MAX_SNAPSHOTS_SIZE) {
-            ret = -EFBIG;
-            goto fail;
-        }
-    }
-
-    assert(offset - s->snapshots_offset <= INT_MAX);
-    s->snapshots_size = offset - s->snapshots_offset;
-    return 0;
-
-fail:
-    qcow2_free_snapshots(bs);
-    return ret;
-}
-
-/* add at the end of the file a new list of snapshots */
-static int qcow2_write_snapshots(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QCowSnapshot *sn;
-    QCowSnapshotHeader h;
-    QCowSnapshotExtraData extra;
-    int i, name_size, id_str_size, snapshots_size;
-    struct {
-        uint32_t nb_snapshots;
-        uint64_t snapshots_offset;
-    } QEMU_PACKED header_data;
-    int64_t offset, snapshots_offset = 0;
-    int ret;
-
-    /* compute the size of the snapshots */
-    offset = 0;
-    for(i = 0; i < s->nb_snapshots; i++) {
-        sn = s->snapshots + i;
-        offset = align_offset(offset, 8);
-        offset += sizeof(h);
-        offset += sizeof(extra);
-        offset += strlen(sn->id_str);
-        offset += strlen(sn->name);
-
-        if (offset > QCOW_MAX_SNAPSHOTS_SIZE) {
-            ret = -EFBIG;
-            goto fail;
-        }
-    }
-
-    assert(offset <= INT_MAX);
-    snapshots_size = offset;
-
-    /* Allocate space for the new snapshot list */
-    snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
-    offset = snapshots_offset;
-    if (offset < 0) {
-        ret = offset;
-        goto fail;
-    }
-    ret = bdrv_flush(bs);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* The snapshot list position has not yet been updated, so these clusters
-     * must indeed be completely free */
-    ret = qcow2_pre_write_overlap_check(bs, 0, offset, snapshots_size);
-    if (ret < 0) {
-        goto fail;
-    }
-
-
-    /* Write all snapshots to the new list */
-    for(i = 0; i < s->nb_snapshots; i++) {
-        sn = s->snapshots + i;
-        memset(&h, 0, sizeof(h));
-        h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
-        h.l1_size = cpu_to_be32(sn->l1_size);
-        /* If it doesn't fit in 32 bit, older implementations should treat it
-         * as a disk-only snapshot rather than truncate the VM state */
-        if (sn->vm_state_size <= 0xffffffff) {
-            h.vm_state_size = cpu_to_be32(sn->vm_state_size);
-        }
-        h.date_sec = cpu_to_be32(sn->date_sec);
-        h.date_nsec = cpu_to_be32(sn->date_nsec);
-        h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
-        h.extra_data_size = cpu_to_be32(sizeof(extra));
-
-        memset(&extra, 0, sizeof(extra));
-        extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
-        extra.disk_size = cpu_to_be64(sn->disk_size);
-
-        id_str_size = strlen(sn->id_str);
-        name_size = strlen(sn->name);
-        assert(id_str_size <= UINT16_MAX && name_size <= UINT16_MAX);
-        h.id_str_size = cpu_to_be16(id_str_size);
-        h.name_size = cpu_to_be16(name_size);
-        offset = align_offset(offset, 8);
-
-        ret = bdrv_pwrite(bs->file->bs, offset, &h, sizeof(h));
-        if (ret < 0) {
-            goto fail;
-        }
-        offset += sizeof(h);
-
-        ret = bdrv_pwrite(bs->file->bs, offset, &extra, sizeof(extra));
-        if (ret < 0) {
-            goto fail;
-        }
-        offset += sizeof(extra);
-
-        ret = bdrv_pwrite(bs->file->bs, offset, sn->id_str, id_str_size);
-        if (ret < 0) {
-            goto fail;
-        }
-        offset += id_str_size;
-
-        ret = bdrv_pwrite(bs->file->bs, offset, sn->name, name_size);
-        if (ret < 0) {
-            goto fail;
-        }
-        offset += name_size;
-    }
-
-    /*
-     * Update the header to point to the new snapshot table. This requires the
-     * new table and its refcounts to be stable on disk.
-     */
-    ret = bdrv_flush(bs);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
-        offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
-
-    header_data.nb_snapshots        = cpu_to_be32(s->nb_snapshots);
-    header_data.snapshots_offset    = cpu_to_be64(snapshots_offset);
-
-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, nb_snapshots),
-                           &header_data, sizeof(header_data));
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* free the old snapshot table */
-    qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
-                        QCOW2_DISCARD_SNAPSHOT);
-    s->snapshots_offset = snapshots_offset;
-    s->snapshots_size = snapshots_size;
-    return 0;
-
-fail:
-    if (snapshots_offset > 0) {
-        qcow2_free_clusters(bs, snapshots_offset, snapshots_size,
-                            QCOW2_DISCARD_ALWAYS);
-    }
-    return ret;
-}
-
-static void find_new_snapshot_id(BlockDriverState *bs,
-                                 char *id_str, int id_str_size)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QCowSnapshot *sn;
-    int i;
-    unsigned long id, id_max = 0;
-
-    for(i = 0; i < s->nb_snapshots; i++) {
-        sn = s->snapshots + i;
-        id = strtoul(sn->id_str, NULL, 10);
-        if (id > id_max)
-            id_max = id;
-    }
-    snprintf(id_str, id_str_size, "%lu", id_max + 1);
-}
-
-static int find_snapshot_by_id_and_name(BlockDriverState *bs,
-                                        const char *id,
-                                        const char *name)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int i;
-
-    if (id && name) {
-        for (i = 0; i < s->nb_snapshots; i++) {
-            if (!strcmp(s->snapshots[i].id_str, id) &&
-                !strcmp(s->snapshots[i].name, name)) {
-                return i;
-            }
-        }
-    } else if (id) {
-        for (i = 0; i < s->nb_snapshots; i++) {
-            if (!strcmp(s->snapshots[i].id_str, id)) {
-                return i;
-            }
-        }
-    } else if (name) {
-        for (i = 0; i < s->nb_snapshots; i++) {
-            if (!strcmp(s->snapshots[i].name, name)) {
-                return i;
-            }
-        }
-    }
-
-    return -1;
-}
-
-static int find_snapshot_by_id_or_name(BlockDriverState *bs,
-                                       const char *id_or_name)
-{
-    int ret;
-
-    ret = find_snapshot_by_id_and_name(bs, id_or_name, NULL);
-    if (ret >= 0) {
-        return ret;
-    }
-    return find_snapshot_by_id_and_name(bs, NULL, id_or_name);
-}
-
-/* if no id is provided, a new one is constructed */
-int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QCowSnapshot *new_snapshot_list = NULL;
-    QCowSnapshot *old_snapshot_list = NULL;
-    QCowSnapshot sn1, *sn = &sn1;
-    int i, ret;
-    uint64_t *l1_table = NULL;
-    int64_t l1_table_offset;
-
-    if (s->nb_snapshots >= QCOW_MAX_SNAPSHOTS) {
-        return -EFBIG;
-    }
-
-    memset(sn, 0, sizeof(*sn));
-
-    /* Generate an ID */
-    find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
-
-    /* Check that the ID is unique */
-    if (find_snapshot_by_id_and_name(bs, sn_info->id_str, NULL) >= 0) {
-        return -EEXIST;
-    }
-
-    /* Populate sn with passed data */
-    sn->id_str = g_strdup(sn_info->id_str);
-    sn->name = g_strdup(sn_info->name);
-
-    sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
-    sn->vm_state_size = sn_info->vm_state_size;
-    sn->date_sec = sn_info->date_sec;
-    sn->date_nsec = sn_info->date_nsec;
-    sn->vm_clock_nsec = sn_info->vm_clock_nsec;
-
-    /* Allocate the L1 table of the snapshot and copy the current one there. */
-    l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
-    if (l1_table_offset < 0) {
-        ret = l1_table_offset;
-        goto fail;
-    }
-
-    sn->l1_table_offset = l1_table_offset;
-    sn->l1_size = s->l1_size;
-
-    l1_table = g_try_new(uint64_t, s->l1_size);
-    if (s->l1_size && l1_table == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-
-    for(i = 0; i < s->l1_size; i++) {
-        l1_table[i] = cpu_to_be64(s->l1_table[i]);
-    }
-
-    ret = qcow2_pre_write_overlap_check(bs, 0, sn->l1_table_offset,
-                                        s->l1_size * sizeof(uint64_t));
-    if (ret < 0) {
-        goto fail;
-    }
-
-    ret = bdrv_pwrite(bs->file->bs, sn->l1_table_offset, l1_table,
-                      s->l1_size * sizeof(uint64_t));
-    if (ret < 0) {
-        goto fail;
-    }
-
-    g_free(l1_table);
-    l1_table = NULL;
-
-    /*
-     * Increase the refcounts of all clusters and make sure everything is
-     * stable on disk before updating the snapshot table to contain a pointer
-     * to the new L1 table.
-     */
-    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* Append the new snapshot to the snapshot list */
-    new_snapshot_list = g_new(QCowSnapshot, s->nb_snapshots + 1);
-    if (s->snapshots) {
-        memcpy(new_snapshot_list, s->snapshots,
-               s->nb_snapshots * sizeof(QCowSnapshot));
-        old_snapshot_list = s->snapshots;
-    }
-    s->snapshots = new_snapshot_list;
-    s->snapshots[s->nb_snapshots++] = *sn;
-
-    ret = qcow2_write_snapshots(bs);
-    if (ret < 0) {
-        g_free(s->snapshots);
-        s->snapshots = old_snapshot_list;
-        s->nb_snapshots--;
-        goto fail;
-    }
-
-    g_free(old_snapshot_list);
-
-    /* The VM state isn't needed any more in the active L1 table; in fact, it
-     * hurts by causing expensive COW for the next snapshot. */
-    qcow2_discard_clusters(bs, qcow2_vm_state_offset(s),
-                           align_offset(sn->vm_state_size, s->cluster_size)
-                                >> BDRV_SECTOR_BITS,
-                           QCOW2_DISCARD_NEVER, false);
-
-#ifdef DEBUG_ALLOC
-    {
-      BdrvCheckResult result = {0};
-      qcow2_check_refcounts(bs, &result, 0);
-    }
-#endif
-    return 0;
-
-fail:
-    g_free(sn->id_str);
-    g_free(sn->name);
-    g_free(l1_table);
-
-    return ret;
-}
-
-/* copy the snapshot 'snapshot_name' into the current disk image */
-int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QCowSnapshot *sn;
-    int i, snapshot_index;
-    int cur_l1_bytes, sn_l1_bytes;
-    int ret;
-    uint64_t *sn_l1_table = NULL;
-
-    /* Search the snapshot */
-    snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
-    if (snapshot_index < 0) {
-        return -ENOENT;
-    }
-    sn = &s->snapshots[snapshot_index];
-
-    if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
-        error_report("qcow2: Loading snapshots with different disk "
-            "size is not implemented");
-        ret = -ENOTSUP;
-        goto fail;
-    }
-
-    /*
-     * Make sure that the current L1 table is big enough to contain the whole
-     * L1 table of the snapshot. If the snapshot L1 table is smaller, the
-     * current one must be padded with zeros.
-     */
-    ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    cur_l1_bytes = s->l1_size * sizeof(uint64_t);
-    sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
-
-    /*
-     * Copy the snapshot L1 table to the current L1 table.
-     *
-     * Before overwriting the old current L1 table on disk, make sure to
-     * increase all refcounts for the clusters referenced by the new one.
-     * Decrease the refcount referenced by the old one only when the L1
-     * table is overwritten.
-     */
-    sn_l1_table = g_try_malloc0(cur_l1_bytes);
-    if (cur_l1_bytes && sn_l1_table == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-
-    ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
-                     sn_l1_table, sn_l1_bytes);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
-                                         sn->l1_size, 1);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
-                                        s->l1_table_offset, cur_l1_bytes);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    ret = bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, sn_l1_table,
-                           cur_l1_bytes);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /*
-     * Decrease refcount of clusters of current L1 table.
-     *
-     * At this point, the in-memory s->l1_table points to the old L1 table,
-     * whereas on disk we already have the new one.
-     *
-     * qcow2_update_snapshot_refcount special cases the current L1 table to use
-     * the in-memory data instead of really using the offset to load a new one,
-     * which is why this works.
-     */
-    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
-                                         s->l1_size, -1);
-
-    /*
-     * Now update the in-memory L1 table to be in sync with the on-disk one. We
-     * need to do this even if updating refcounts failed.
-     */
-    for(i = 0;i < s->l1_size; i++) {
-        s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
-    }
-
-    if (ret < 0) {
-        goto fail;
-    }
-
-    g_free(sn_l1_table);
-    sn_l1_table = NULL;
-
-    /*
-     * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
-     * when we decreased the refcount of the old snapshot.
-     */
-    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
-    if (ret < 0) {
-        goto fail;
-    }
-
-#ifdef DEBUG_ALLOC
-    {
-        BdrvCheckResult result = {0};
-        qcow2_check_refcounts(bs, &result, 0);
-    }
-#endif
-    return 0;
-
-fail:
-    g_free(sn_l1_table);
-    return ret;
-}
-
-int qcow2_snapshot_delete(BlockDriverState *bs,
-                          const char *snapshot_id,
-                          const char *name,
-                          Error **errp)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QCowSnapshot sn;
-    int snapshot_index, ret;
-
-    /* Search the snapshot */
-    snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
-    if (snapshot_index < 0) {
-        error_setg(errp, "Can't find the snapshot");
-        return -ENOENT;
-    }
-    sn = s->snapshots[snapshot_index];
-
-    /* Remove it from the snapshot list */
-    memmove(s->snapshots + snapshot_index,
-            s->snapshots + snapshot_index + 1,
-            (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
-    s->nb_snapshots--;
-    ret = qcow2_write_snapshots(bs);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "Failed to remove snapshot from snapshot list");
-        return ret;
-    }
-
-    /*
-     * The snapshot is now unused, clean up. If we fail after this point, we
-     * won't recover but just leak clusters.
-     */
-    g_free(sn.id_str);
-    g_free(sn.name);
-
-    /*
-     * Now decrease the refcounts of clusters referenced by the snapshot and
-     * free the L1 table.
-     */
-    ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
-                                         sn.l1_size, -1);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Failed to free the cluster and L1 table");
-        return ret;
-    }
-    qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
-                        QCOW2_DISCARD_SNAPSHOT);
-
-    /* must update the copied flag on the current cluster offsets */
-    ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "Failed to update snapshot status in disk");
-        return ret;
-    }
-
-#ifdef DEBUG_ALLOC
-    {
-        BdrvCheckResult result = {0};
-        qcow2_check_refcounts(bs, &result, 0);
-    }
-#endif
-    return 0;
-}
-
-int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QEMUSnapshotInfo *sn_tab, *sn_info;
-    QCowSnapshot *sn;
-    int i;
-
-    if (!s->nb_snapshots) {
-        *psn_tab = NULL;
-        return s->nb_snapshots;
-    }
-
-    sn_tab = g_new0(QEMUSnapshotInfo, s->nb_snapshots);
-    for(i = 0; i < s->nb_snapshots; i++) {
-        sn_info = sn_tab + i;
-        sn = s->snapshots + i;
-        pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
-                sn->id_str);
-        pstrcpy(sn_info->name, sizeof(sn_info->name),
-                sn->name);
-        sn_info->vm_state_size = sn->vm_state_size;
-        sn_info->date_sec = sn->date_sec;
-        sn_info->date_nsec = sn->date_nsec;
-        sn_info->vm_clock_nsec = sn->vm_clock_nsec;
-    }
-    *psn_tab = sn_tab;
-    return s->nb_snapshots;
-}
-
-int qcow2_snapshot_load_tmp(BlockDriverState *bs,
-                            const char *snapshot_id,
-                            const char *name,
-                            Error **errp)
-{
-    int i, snapshot_index;
-    BDRVQcow2State *s = bs->opaque;
-    QCowSnapshot *sn;
-    uint64_t *new_l1_table;
-    int new_l1_bytes;
-    int ret;
-
-    assert(bs->read_only);
-
-    /* Search the snapshot */
-    snapshot_index = find_snapshot_by_id_and_name(bs, snapshot_id, name);
-    if (snapshot_index < 0) {
-        error_setg(errp,
-                   "Can't find snapshot");
-        return -ENOENT;
-    }
-    sn = &s->snapshots[snapshot_index];
-
-    /* Allocate and read in the snapshot's L1 table */
-    if (sn->l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
-        error_setg(errp, "Snapshot L1 table too large");
-        return -EFBIG;
-    }
-    new_l1_bytes = sn->l1_size * sizeof(uint64_t);
-    new_l1_table = qemu_try_blockalign(bs->file->bs,
-                                       align_offset(new_l1_bytes, 512));
-    if (new_l1_table == NULL) {
-        return -ENOMEM;
-    }
-
-    ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
-                     new_l1_table, new_l1_bytes);
-    if (ret < 0) {
-        error_setg(errp, "Failed to read l1 table for snapshot");
-        qemu_vfree(new_l1_table);
-        return ret;
-    }
-
-    /* Switch the L1 table */
-    qemu_vfree(s->l1_table);
-
-    s->l1_size = sn->l1_size;
-    s->l1_table_offset = sn->l1_table_offset;
-    s->l1_table = new_l1_table;
-
-    for(i = 0;i < s->l1_size; i++) {
-        be64_to_cpus(&s->l1_table[i]);
-    }
-
-    return 0;
-}
diff --git a/qemu/block/qcow2.c b/qemu/block/qcow2.c
deleted file mode 100644
index 470734be9..000000000
--- a/qemu/block/qcow2.c
+++ /dev/null
@@ -1,3373 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "block/block_int.h"
-#include "sysemu/block-backend.h"
-#include "qemu/module.h"
-#include <zlib.h>
-#include "block/qcow2.h"
-#include "qemu/error-report.h"
-#include "qapi/qmp/qerror.h"
-#include "qapi/qmp/qbool.h"
-#include "qapi/util.h"
-#include "qapi/qmp/types.h"
-#include "qapi-event.h"
-#include "trace.h"
-#include "qemu/option_int.h"
-#include "qemu/cutils.h"
-
-/*
-  Differences with QCOW:
-
-  - Support for multiple incremental snapshots.
-  - Memory management by reference counts.
-  - Clusters which have a reference count of one have the bit
-    QCOW_OFLAG_COPIED to optimize write performance.
-  - Size of compressed clusters is stored in sectors to reduce bit usage
-    in the cluster offsets.
-  - Support for storing additional data (such as the VM state) in the
-    snapshots.
-  - If a backing store is used, the cluster size is not constrained
-    (could be backported to QCOW).
-  - L2 tables have always a size of one cluster.
-*/
-
-
-typedef struct {
-    uint32_t magic;
-    uint32_t len;
-} QEMU_PACKED QCowExtension;
-
-#define  QCOW2_EXT_MAGIC_END 0
-#define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
-#define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
-
-static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const QCowHeader *cow_header = (const void *)buf;
-
-    if (buf_size >= sizeof(QCowHeader) &&
-        be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
-        be32_to_cpu(cow_header->version) >= 2)
-        return 100;
-    else
-        return 0;
-}
-
-
-/* 
- * read qcow2 extension and fill bs
- * start reading from start_offset
- * finish reading upon magic of value 0 or when end_offset reached
- * unknown magic is skipped (future extension this version knows nothing about)
- * return 0 upon success, non-0 otherwise
- */
-static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
-                                 uint64_t end_offset, void **p_feature_table,
-                                 Error **errp)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QCowExtension ext;
-    uint64_t offset;
-    int ret;
-
-#ifdef DEBUG_EXT
-    printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
-#endif
-    offset = start_offset;
-    while (offset < end_offset) {
-
-#ifdef DEBUG_EXT
-        /* Sanity check */
-        if (offset > s->cluster_size)
-            printf("qcow2_read_extension: suspicious offset %lu\n", offset);
-
-        printf("attempting to read extended header in offset %lu\n", offset);
-#endif
-
-        ret = bdrv_pread(bs->file->bs, offset, &ext, sizeof(ext));
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
-                             "pread fail from offset %" PRIu64, offset);
-            return 1;
-        }
-        be32_to_cpus(&ext.magic);
-        be32_to_cpus(&ext.len);
-        offset += sizeof(ext);
-#ifdef DEBUG_EXT
-        printf("ext.magic = 0x%x\n", ext.magic);
-#endif
-        if (offset > end_offset || ext.len > end_offset - offset) {
-            error_setg(errp, "Header extension too large");
-            return -EINVAL;
-        }
-
-        switch (ext.magic) {
-        case QCOW2_EXT_MAGIC_END:
-            return 0;
-
-        case QCOW2_EXT_MAGIC_BACKING_FORMAT:
-            if (ext.len >= sizeof(bs->backing_format)) {
-                error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
-                           " too large (>=%zu)", ext.len,
-                           sizeof(bs->backing_format));
-                return 2;
-            }
-            ret = bdrv_pread(bs->file->bs, offset, bs->backing_format, ext.len);
-            if (ret < 0) {
-                error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
-                                 "Could not read format name");
-                return 3;
-            }
-            bs->backing_format[ext.len] = '\0';
-            s->image_backing_format = g_strdup(bs->backing_format);
-#ifdef DEBUG_EXT
-            printf("Qcow2: Got format extension %s\n", bs->backing_format);
-#endif
-            break;
-
-        case QCOW2_EXT_MAGIC_FEATURE_TABLE:
-            if (p_feature_table != NULL) {
-                void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
-                ret = bdrv_pread(bs->file->bs, offset , feature_table, ext.len);
-                if (ret < 0) {
-                    error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
-                                     "Could not read table");
-                    return ret;
-                }
-
-                *p_feature_table = feature_table;
-            }
-            break;
-
-        default:
-            /* unknown magic - save it in case we need to rewrite the header */
-            {
-                Qcow2UnknownHeaderExtension *uext;
-
-                uext = g_malloc0(sizeof(*uext)  + ext.len);
-                uext->magic = ext.magic;
-                uext->len = ext.len;
-                QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
-
-                ret = bdrv_pread(bs->file->bs, offset , uext->data, uext->len);
-                if (ret < 0) {
-                    error_setg_errno(errp, -ret, "ERROR: unknown extension: "
-                                     "Could not read data");
-                    return ret;
-                }
-            }
-            break;
-        }
-
-        offset += ((ext.len + 7) & ~7);
-    }
-
-    return 0;
-}
-
-static void cleanup_unknown_header_ext(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    Qcow2UnknownHeaderExtension *uext, *next;
-
-    QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
-        QLIST_REMOVE(uext, next);
-        g_free(uext);
-    }
-}
-
-static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
-                                       uint64_t mask)
-{
-    char *features = g_strdup("");
-    char *old;
-
-    while (table && table->name[0] != '\0') {
-        if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
-            if (mask & (1ULL << table->bit)) {
-                old = features;
-                features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "",
-                                           table->name);
-                g_free(old);
-                mask &= ~(1ULL << table->bit);
-            }
-        }
-        table++;
-    }
-
-    if (mask) {
-        old = features;
-        features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64,
-                                   old, *old ? ", " : "", mask);
-        g_free(old);
-    }
-
-    error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
-    g_free(features);
-}
-
-/*
- * Sets the dirty bit and flushes afterwards if necessary.
- *
- * The incompatible_features bit is only set if the image file header was
- * updated successfully.  Therefore it is not required to check the return
- * value of this function.
- */
-int qcow2_mark_dirty(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t val;
-    int ret;
-
-    assert(s->qcow_version >= 3);
-
-    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
-        return 0; /* already dirty */
-    }
-
-    val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
-    ret = bdrv_pwrite(bs->file->bs, offsetof(QCowHeader, incompatible_features),
-                      &val, sizeof(val));
-    if (ret < 0) {
-        return ret;
-    }
-    ret = bdrv_flush(bs->file->bs);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* Only treat image as dirty if the header was updated successfully */
-    s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
-    return 0;
-}
-
-/*
- * Clears the dirty bit and flushes before if necessary.  Only call this
- * function when there are no pending requests, it does not guard against
- * concurrent requests dirtying the image.
- */
-static int qcow2_mark_clean(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-
-    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
-        int ret;
-
-        s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
-
-        ret = bdrv_flush(bs);
-        if (ret < 0) {
-            return ret;
-        }
-
-        return qcow2_update_header(bs);
-    }
-    return 0;
-}
-
-/*
- * Marks the image as corrupt.
- */
-int qcow2_mark_corrupt(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-
-    s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
-    return qcow2_update_header(bs);
-}
-
-/*
- * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
- * before if necessary.
- */
-int qcow2_mark_consistent(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-
-    if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
-        int ret = bdrv_flush(bs);
-        if (ret < 0) {
-            return ret;
-        }
-
-        s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
-        return qcow2_update_header(bs);
-    }
-    return 0;
-}
-
-static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
-                       BdrvCheckMode fix)
-{
-    int ret = qcow2_check_refcounts(bs, result, fix);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (fix && result->check_errors == 0 && result->corruptions == 0) {
-        ret = qcow2_mark_clean(bs);
-        if (ret < 0) {
-            return ret;
-        }
-        return qcow2_mark_consistent(bs);
-    }
-    return ret;
-}
-
-static int validate_table_offset(BlockDriverState *bs, uint64_t offset,
-                                 uint64_t entries, size_t entry_len)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t size;
-
-    /* Use signed INT64_MAX as the maximum even for uint64_t header fields,
-     * because values will be passed to qemu functions taking int64_t. */
-    if (entries > INT64_MAX / entry_len) {
-        return -EINVAL;
-    }
-
-    size = entries * entry_len;
-
-    if (INT64_MAX - size < offset) {
-        return -EINVAL;
-    }
-
-    /* Tables must be cluster aligned */
-    if (offset & (s->cluster_size - 1)) {
-        return -EINVAL;
-    }
-
-    return 0;
-}
-
-static QemuOptsList qcow2_runtime_opts = {
-    .name = "qcow2",
-    .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
-    .desc = {
-        {
-            .name = QCOW2_OPT_LAZY_REFCOUNTS,
-            .type = QEMU_OPT_BOOL,
-            .help = "Postpone refcount updates",
-        },
-        {
-            .name = QCOW2_OPT_DISCARD_REQUEST,
-            .type = QEMU_OPT_BOOL,
-            .help = "Pass guest discard requests to the layer below",
-        },
-        {
-            .name = QCOW2_OPT_DISCARD_SNAPSHOT,
-            .type = QEMU_OPT_BOOL,
-            .help = "Generate discard requests when snapshot related space "
-                    "is freed",
-        },
-        {
-            .name = QCOW2_OPT_DISCARD_OTHER,
-            .type = QEMU_OPT_BOOL,
-            .help = "Generate discard requests when other clusters are freed",
-        },
-        {
-            .name = QCOW2_OPT_OVERLAP,
-            .type = QEMU_OPT_STRING,
-            .help = "Selects which overlap checks to perform from a range of "
-                    "templates (none, constant, cached, all)",
-        },
-        {
-            .name = QCOW2_OPT_OVERLAP_TEMPLATE,
-            .type = QEMU_OPT_STRING,
-            .help = "Selects which overlap checks to perform from a range of "
-                    "templates (none, constant, cached, all)",
-        },
-        {
-            .name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
-            .type = QEMU_OPT_BOOL,
-            .help = "Check for unintended writes into the main qcow2 header",
-        },
-        {
-            .name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
-            .type = QEMU_OPT_BOOL,
-            .help = "Check for unintended writes into the active L1 table",
-        },
-        {
-            .name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
-            .type = QEMU_OPT_BOOL,
-            .help = "Check for unintended writes into an active L2 table",
-        },
-        {
-            .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
-            .type = QEMU_OPT_BOOL,
-            .help = "Check for unintended writes into the refcount table",
-        },
-        {
-            .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
-            .type = QEMU_OPT_BOOL,
-            .help = "Check for unintended writes into a refcount block",
-        },
-        {
-            .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
-            .type = QEMU_OPT_BOOL,
-            .help = "Check for unintended writes into the snapshot table",
-        },
-        {
-            .name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
-            .type = QEMU_OPT_BOOL,
-            .help = "Check for unintended writes into an inactive L1 table",
-        },
-        {
-            .name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
-            .type = QEMU_OPT_BOOL,
-            .help = "Check for unintended writes into an inactive L2 table",
-        },
-        {
-            .name = QCOW2_OPT_CACHE_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Maximum combined metadata (L2 tables and refcount blocks) "
-                    "cache size",
-        },
-        {
-            .name = QCOW2_OPT_L2_CACHE_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Maximum L2 table cache size",
-        },
-        {
-            .name = QCOW2_OPT_REFCOUNT_CACHE_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Maximum refcount block cache size",
-        },
-        {
-            .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Clean unused cache entries after this time (in seconds)",
-        },
-        { /* end of list */ }
-    },
-};
-
-static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
-    [QCOW2_OL_MAIN_HEADER_BITNR]    = QCOW2_OPT_OVERLAP_MAIN_HEADER,
-    [QCOW2_OL_ACTIVE_L1_BITNR]      = QCOW2_OPT_OVERLAP_ACTIVE_L1,
-    [QCOW2_OL_ACTIVE_L2_BITNR]      = QCOW2_OPT_OVERLAP_ACTIVE_L2,
-    [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
-    [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
-    [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
-    [QCOW2_OL_INACTIVE_L1_BITNR]    = QCOW2_OPT_OVERLAP_INACTIVE_L1,
-    [QCOW2_OL_INACTIVE_L2_BITNR]    = QCOW2_OPT_OVERLAP_INACTIVE_L2,
-};
-
-static void cache_clean_timer_cb(void *opaque)
-{
-    BlockDriverState *bs = opaque;
-    BDRVQcow2State *s = bs->opaque;
-    qcow2_cache_clean_unused(bs, s->l2_table_cache);
-    qcow2_cache_clean_unused(bs, s->refcount_block_cache);
-    timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
-              (int64_t) s->cache_clean_interval * 1000);
-}
-
-static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
-{
-    BDRVQcow2State *s = bs->opaque;
-    if (s->cache_clean_interval > 0) {
-        s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
-                                             SCALE_MS, cache_clean_timer_cb,
-                                             bs);
-        timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
-                  (int64_t) s->cache_clean_interval * 1000);
-    }
-}
-
-static void cache_clean_timer_del(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    if (s->cache_clean_timer) {
-        timer_del(s->cache_clean_timer);
-        timer_free(s->cache_clean_timer);
-        s->cache_clean_timer = NULL;
-    }
-}
-
-static void qcow2_detach_aio_context(BlockDriverState *bs)
-{
-    cache_clean_timer_del(bs);
-}
-
-static void qcow2_attach_aio_context(BlockDriverState *bs,
-                                     AioContext *new_context)
-{
-    cache_clean_timer_init(bs, new_context);
-}
-
-static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
-                             uint64_t *l2_cache_size,
-                             uint64_t *refcount_cache_size, Error **errp)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t combined_cache_size;
-    bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
-
-    combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
-    l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
-    refcount_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
-
-    combined_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_CACHE_SIZE, 0);
-    *l2_cache_size = qemu_opt_get_size(opts, QCOW2_OPT_L2_CACHE_SIZE, 0);
-    *refcount_cache_size = qemu_opt_get_size(opts,
-                                             QCOW2_OPT_REFCOUNT_CACHE_SIZE, 0);
-
-    if (combined_cache_size_set) {
-        if (l2_cache_size_set && refcount_cache_size_set) {
-            error_setg(errp, QCOW2_OPT_CACHE_SIZE ", " QCOW2_OPT_L2_CACHE_SIZE
-                       " and " QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not be set "
-                       "the same time");
-            return;
-        } else if (*l2_cache_size > combined_cache_size) {
-            error_setg(errp, QCOW2_OPT_L2_CACHE_SIZE " may not exceed "
-                       QCOW2_OPT_CACHE_SIZE);
-            return;
-        } else if (*refcount_cache_size > combined_cache_size) {
-            error_setg(errp, QCOW2_OPT_REFCOUNT_CACHE_SIZE " may not exceed "
-                       QCOW2_OPT_CACHE_SIZE);
-            return;
-        }
-
-        if (l2_cache_size_set) {
-            *refcount_cache_size = combined_cache_size - *l2_cache_size;
-        } else if (refcount_cache_size_set) {
-            *l2_cache_size = combined_cache_size - *refcount_cache_size;
-        } else {
-            *refcount_cache_size = combined_cache_size
-                                 / (DEFAULT_L2_REFCOUNT_SIZE_RATIO + 1);
-            *l2_cache_size = combined_cache_size - *refcount_cache_size;
-        }
-    } else {
-        if (!l2_cache_size_set && !refcount_cache_size_set) {
-            *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE,
-                                 (uint64_t)DEFAULT_L2_CACHE_CLUSTERS
-                                 * s->cluster_size);
-            *refcount_cache_size = *l2_cache_size
-                                 / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
-        } else if (!l2_cache_size_set) {
-            *l2_cache_size = *refcount_cache_size
-                           * DEFAULT_L2_REFCOUNT_SIZE_RATIO;
-        } else if (!refcount_cache_size_set) {
-            *refcount_cache_size = *l2_cache_size
-                                 / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
-        }
-    }
-}
-
-typedef struct Qcow2ReopenState {
-    Qcow2Cache *l2_table_cache;
-    Qcow2Cache *refcount_block_cache;
-    bool use_lazy_refcounts;
-    int overlap_check;
-    bool discard_passthrough[QCOW2_DISCARD_MAX];
-    uint64_t cache_clean_interval;
-} Qcow2ReopenState;
-
-static int qcow2_update_options_prepare(BlockDriverState *bs,
-                                        Qcow2ReopenState *r,
-                                        QDict *options, int flags,
-                                        Error **errp)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QemuOpts *opts = NULL;
-    const char *opt_overlap_check, *opt_overlap_check_template;
-    int overlap_check_template = 0;
-    uint64_t l2_cache_size, refcount_cache_size;
-    int i;
-    Error *local_err = NULL;
-    int ret;
-
-    opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* get L2 table/refcount block cache size from command line options */
-    read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size,
-                     &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    l2_cache_size /= s->cluster_size;
-    if (l2_cache_size < MIN_L2_CACHE_SIZE) {
-        l2_cache_size = MIN_L2_CACHE_SIZE;
-    }
-    if (l2_cache_size > INT_MAX) {
-        error_setg(errp, "L2 cache size too big");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    refcount_cache_size /= s->cluster_size;
-    if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
-        refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
-    }
-    if (refcount_cache_size > INT_MAX) {
-        error_setg(errp, "Refcount cache size too big");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* alloc new L2 table/refcount block cache, flush old one */
-    if (s->l2_table_cache) {
-        ret = qcow2_cache_flush(bs, s->l2_table_cache);
-        if (ret) {
-            error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
-            goto fail;
-        }
-    }
-
-    if (s->refcount_block_cache) {
-        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
-        if (ret) {
-            error_setg_errno(errp, -ret,
-                             "Failed to flush the refcount block cache");
-            goto fail;
-        }
-    }
-
-    r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
-    r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
-    if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
-        error_setg(errp, "Could not allocate metadata caches");
-        ret = -ENOMEM;
-        goto fail;
-    }
-
-    /* New interval for cache cleanup timer */
-    r->cache_clean_interval =
-        qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
-                            s->cache_clean_interval);
-    if (r->cache_clean_interval > UINT_MAX) {
-        error_setg(errp, "Cache clean interval too big");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* lazy-refcounts; flush if going from enabled to disabled */
-    r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
-        (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
-    if (r->use_lazy_refcounts && s->qcow_version < 3) {
-        error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
-                   "qemu 1.1 compatibility level");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
-        ret = qcow2_mark_clean(bs);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
-            goto fail;
-        }
-    }
-
-    /* Overlap check options */
-    opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
-    opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
-    if (opt_overlap_check_template && opt_overlap_check &&
-        strcmp(opt_overlap_check_template, opt_overlap_check))
-    {
-        error_setg(errp, "Conflicting values for qcow2 options '"
-                   QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
-                   "' ('%s')", opt_overlap_check, opt_overlap_check_template);
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (!opt_overlap_check) {
-        opt_overlap_check = opt_overlap_check_template ?: "cached";
-    }
-
-    if (!strcmp(opt_overlap_check, "none")) {
-        overlap_check_template = 0;
-    } else if (!strcmp(opt_overlap_check, "constant")) {
-        overlap_check_template = QCOW2_OL_CONSTANT;
-    } else if (!strcmp(opt_overlap_check, "cached")) {
-        overlap_check_template = QCOW2_OL_CACHED;
-    } else if (!strcmp(opt_overlap_check, "all")) {
-        overlap_check_template = QCOW2_OL_ALL;
-    } else {
-        error_setg(errp, "Unsupported value '%s' for qcow2 option "
-                   "'overlap-check'. Allowed are any of the following: "
-                   "none, constant, cached, all", opt_overlap_check);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    r->overlap_check = 0;
-    for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
-        /* overlap-check defines a template bitmask, but every flag may be
-         * overwritten through the associated boolean option */
-        r->overlap_check |=
-            qemu_opt_get_bool(opts, overlap_bool_option_names[i],
-                              overlap_check_template & (1 << i)) << i;
-    }
-
-    r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
-    r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
-    r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
-        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
-                          flags & BDRV_O_UNMAP);
-    r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
-        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
-    r->discard_passthrough[QCOW2_DISCARD_OTHER] =
-        qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
-
-    ret = 0;
-fail:
-    qemu_opts_del(opts);
-    opts = NULL;
-    return ret;
-}
-
-static void qcow2_update_options_commit(BlockDriverState *bs,
-                                        Qcow2ReopenState *r)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int i;
-
-    if (s->l2_table_cache) {
-        qcow2_cache_destroy(bs, s->l2_table_cache);
-    }
-    if (s->refcount_block_cache) {
-        qcow2_cache_destroy(bs, s->refcount_block_cache);
-    }
-    s->l2_table_cache = r->l2_table_cache;
-    s->refcount_block_cache = r->refcount_block_cache;
-
-    s->overlap_check = r->overlap_check;
-    s->use_lazy_refcounts = r->use_lazy_refcounts;
-
-    for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
-        s->discard_passthrough[i] = r->discard_passthrough[i];
-    }
-
-    if (s->cache_clean_interval != r->cache_clean_interval) {
-        cache_clean_timer_del(bs);
-        s->cache_clean_interval = r->cache_clean_interval;
-        cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
-    }
-}
-
-static void qcow2_update_options_abort(BlockDriverState *bs,
-                                       Qcow2ReopenState *r)
-{
-    if (r->l2_table_cache) {
-        qcow2_cache_destroy(bs, r->l2_table_cache);
-    }
-    if (r->refcount_block_cache) {
-        qcow2_cache_destroy(bs, r->refcount_block_cache);
-    }
-}
-
-static int qcow2_update_options(BlockDriverState *bs, QDict *options,
-                                int flags, Error **errp)
-{
-    Qcow2ReopenState r = {};
-    int ret;
-
-    ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
-    if (ret >= 0) {
-        qcow2_update_options_commit(bs, &r);
-    } else {
-        qcow2_update_options_abort(bs, &r);
-    }
-
-    return ret;
-}
-
-static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
-{
-    BDRVQcow2State *s = bs->opaque;
-    unsigned int len, i;
-    int ret = 0;
-    QCowHeader header;
-    Error *local_err = NULL;
-    uint64_t ext_end;
-    uint64_t l1_vm_state_index;
-
-    ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not read qcow2 header");
-        goto fail;
-    }
-    be32_to_cpus(&header.magic);
-    be32_to_cpus(&header.version);
-    be64_to_cpus(&header.backing_file_offset);
-    be32_to_cpus(&header.backing_file_size);
-    be64_to_cpus(&header.size);
-    be32_to_cpus(&header.cluster_bits);
-    be32_to_cpus(&header.crypt_method);
-    be64_to_cpus(&header.l1_table_offset);
-    be32_to_cpus(&header.l1_size);
-    be64_to_cpus(&header.refcount_table_offset);
-    be32_to_cpus(&header.refcount_table_clusters);
-    be64_to_cpus(&header.snapshots_offset);
-    be32_to_cpus(&header.nb_snapshots);
-
-    if (header.magic != QCOW_MAGIC) {
-        error_setg(errp, "Image is not in qcow2 format");
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (header.version < 2 || header.version > 3) {
-        error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
-        ret = -ENOTSUP;
-        goto fail;
-    }
-
-    s->qcow_version = header.version;
-
-    /* Initialise cluster size */
-    if (header.cluster_bits < MIN_CLUSTER_BITS ||
-        header.cluster_bits > MAX_CLUSTER_BITS) {
-        error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
-                   header.cluster_bits);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    s->cluster_bits = header.cluster_bits;
-    s->cluster_size = 1 << s->cluster_bits;
-    s->cluster_sectors = 1 << (s->cluster_bits - 9);
-
-    /* Initialise version 3 header fields */
-    if (header.version == 2) {
-        header.incompatible_features    = 0;
-        header.compatible_features      = 0;
-        header.autoclear_features       = 0;
-        header.refcount_order           = 4;
-        header.header_length            = 72;
-    } else {
-        be64_to_cpus(&header.incompatible_features);
-        be64_to_cpus(&header.compatible_features);
-        be64_to_cpus(&header.autoclear_features);
-        be32_to_cpus(&header.refcount_order);
-        be32_to_cpus(&header.header_length);
-
-        if (header.header_length < 104) {
-            error_setg(errp, "qcow2 header too short");
-            ret = -EINVAL;
-            goto fail;
-        }
-    }
-
-    if (header.header_length > s->cluster_size) {
-        error_setg(errp, "qcow2 header exceeds cluster size");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    if (header.header_length > sizeof(header)) {
-        s->unknown_header_fields_size = header.header_length - sizeof(header);
-        s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
-        ret = bdrv_pread(bs->file->bs, sizeof(header), s->unknown_header_fields,
-                         s->unknown_header_fields_size);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
-                             "fields");
-            goto fail;
-        }
-    }
-
-    if (header.backing_file_offset > s->cluster_size) {
-        error_setg(errp, "Invalid backing file offset");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    if (header.backing_file_offset) {
-        ext_end = header.backing_file_offset;
-    } else {
-        ext_end = 1 << header.cluster_bits;
-    }
-
-    /* Handle feature bits */
-    s->incompatible_features    = header.incompatible_features;
-    s->compatible_features      = header.compatible_features;
-    s->autoclear_features       = header.autoclear_features;
-
-    if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
-        void *feature_table = NULL;
-        qcow2_read_extensions(bs, header.header_length, ext_end,
-                              &feature_table, NULL);
-        report_unsupported_feature(errp, feature_table,
-                                   s->incompatible_features &
-                                   ~QCOW2_INCOMPAT_MASK);
-        ret = -ENOTSUP;
-        g_free(feature_table);
-        goto fail;
-    }
-
-    if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
-        /* Corrupt images may not be written to unless they are being repaired
-         */
-        if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
-            error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
-                       "read/write");
-            ret = -EACCES;
-            goto fail;
-        }
-    }
-
-    /* Check support for various header values */
-    if (header.refcount_order > 6) {
-        error_setg(errp, "Reference count entry width too large; may not "
-                   "exceed 64 bits");
-        ret = -EINVAL;
-        goto fail;
-    }
-    s->refcount_order = header.refcount_order;
-    s->refcount_bits = 1 << s->refcount_order;
-    s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
-    s->refcount_max += s->refcount_max - 1;
-
-    if (header.crypt_method > QCOW_CRYPT_AES) {
-        error_setg(errp, "Unsupported encryption method: %" PRIu32,
-                   header.crypt_method);
-        ret = -EINVAL;
-        goto fail;
-    }
-    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) {
-        error_setg(errp, "AES cipher not available");
-        ret = -EINVAL;
-        goto fail;
-    }
-    s->crypt_method_header = header.crypt_method;
-    if (s->crypt_method_header) {
-        if (bdrv_uses_whitelist() &&
-            s->crypt_method_header == QCOW_CRYPT_AES) {
-            error_report("qcow2 built-in AES encryption is deprecated");
-            error_printf("Support for it will be removed in a future release.\n"
-                         "You can use 'qemu-img convert' to switch to an\n"
-                         "unencrypted qcow2 image, or a LUKS raw image.\n");
-        }
-
-        bs->encrypted = 1;
-    }
-
-    s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
-    s->l2_size = 1 << s->l2_bits;
-    /* 2^(s->refcount_order - 3) is the refcount width in bytes */
-    s->refcount_block_bits = s->cluster_bits - (s->refcount_order - 3);
-    s->refcount_block_size = 1 << s->refcount_block_bits;
-    bs->total_sectors = header.size / 512;
-    s->csize_shift = (62 - (s->cluster_bits - 8));
-    s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
-    s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
-
-    s->refcount_table_offset = header.refcount_table_offset;
-    s->refcount_table_size =
-        header.refcount_table_clusters << (s->cluster_bits - 3);
-
-    if (header.refcount_table_clusters > qcow2_max_refcount_clusters(s)) {
-        error_setg(errp, "Reference count table too large");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = validate_table_offset(bs, s->refcount_table_offset,
-                                s->refcount_table_size, sizeof(uint64_t));
-    if (ret < 0) {
-        error_setg(errp, "Invalid reference count table offset");
-        goto fail;
-    }
-
-    /* Snapshot table offset/length */
-    if (header.nb_snapshots > QCOW_MAX_SNAPSHOTS) {
-        error_setg(errp, "Too many snapshots");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = validate_table_offset(bs, header.snapshots_offset,
-                                header.nb_snapshots,
-                                sizeof(QCowSnapshotHeader));
-    if (ret < 0) {
-        error_setg(errp, "Invalid snapshot table offset");
-        goto fail;
-    }
-
-    /* read the level 1 table */
-    if (header.l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
-        error_setg(errp, "Active L1 table too large");
-        ret = -EFBIG;
-        goto fail;
-    }
-    s->l1_size = header.l1_size;
-
-    l1_vm_state_index = size_to_l1(s, header.size);
-    if (l1_vm_state_index > INT_MAX) {
-        error_setg(errp, "Image is too big");
-        ret = -EFBIG;
-        goto fail;
-    }
-    s->l1_vm_state_index = l1_vm_state_index;
-
-    /* the L1 table must contain at least enough entries to put
-       header.size bytes */
-    if (s->l1_size < s->l1_vm_state_index) {
-        error_setg(errp, "L1 table is too small");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = validate_table_offset(bs, header.l1_table_offset,
-                                header.l1_size, sizeof(uint64_t));
-    if (ret < 0) {
-        error_setg(errp, "Invalid L1 table offset");
-        goto fail;
-    }
-    s->l1_table_offset = header.l1_table_offset;
-
-
-    if (s->l1_size > 0) {
-        s->l1_table = qemu_try_blockalign(bs->file->bs,
-            align_offset(s->l1_size * sizeof(uint64_t), 512));
-        if (s->l1_table == NULL) {
-            error_setg(errp, "Could not allocate L1 table");
-            ret = -ENOMEM;
-            goto fail;
-        }
-        ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
-                         s->l1_size * sizeof(uint64_t));
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not read L1 table");
-            goto fail;
-        }
-        for(i = 0;i < s->l1_size; i++) {
-            be64_to_cpus(&s->l1_table[i]);
-        }
-    }
-
-    /* Parse driver-specific options */
-    ret = qcow2_update_options(bs, options, flags, errp);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    s->cluster_cache = g_malloc(s->cluster_size);
-    /* one more sector for decompressed data alignment */
-    s->cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS
-                                                    * s->cluster_size + 512);
-    if (s->cluster_data == NULL) {
-        error_setg(errp, "Could not allocate temporary cluster buffer");
-        ret = -ENOMEM;
-        goto fail;
-    }
-
-    s->cluster_cache_offset = -1;
-    s->flags = flags;
-
-    ret = qcow2_refcount_init(bs);
-    if (ret != 0) {
-        error_setg_errno(errp, -ret, "Could not initialize refcount handling");
-        goto fail;
-    }
-
-    QLIST_INIT(&s->cluster_allocs);
-    QTAILQ_INIT(&s->discards);
-
-    /* read qcow2 extensions */
-    if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
-        &local_err)) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* read the backing file name */
-    if (header.backing_file_offset != 0) {
-        len = header.backing_file_size;
-        if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
-            len >= sizeof(bs->backing_file)) {
-            error_setg(errp, "Backing file name too long");
-            ret = -EINVAL;
-            goto fail;
-        }
-        ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
-                         bs->backing_file, len);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not read backing file name");
-            goto fail;
-        }
-        bs->backing_file[len] = '\0';
-        s->image_backing_file = g_strdup(bs->backing_file);
-    }
-
-    /* Internal snapshots */
-    s->snapshots_offset = header.snapshots_offset;
-    s->nb_snapshots = header.nb_snapshots;
-
-    ret = qcow2_read_snapshots(bs);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not read snapshots");
-        goto fail;
-    }
-
-    /* Clear unknown autoclear feature bits */
-    if (!bs->read_only && !(flags & BDRV_O_INACTIVE) && s->autoclear_features) {
-        s->autoclear_features = 0;
-        ret = qcow2_update_header(bs);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not update qcow2 header");
-            goto fail;
-        }
-    }
-
-    /* Initialise locks */
-    qemu_co_mutex_init(&s->lock);
-
-    /* Repair image if dirty */
-    if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
-        (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
-        BdrvCheckResult result = {0};
-
-        ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not repair dirty image");
-            goto fail;
-        }
-    }
-
-#ifdef DEBUG_ALLOC
-    {
-        BdrvCheckResult result = {0};
-        qcow2_check_refcounts(bs, &result, 0);
-    }
-#endif
-    return ret;
-
- fail:
-    g_free(s->unknown_header_fields);
-    cleanup_unknown_header_ext(bs);
-    qcow2_free_snapshots(bs);
-    qcow2_refcount_close(bs);
-    qemu_vfree(s->l1_table);
-    /* else pre-write overlap checks in cache_destroy may crash */
-    s->l1_table = NULL;
-    cache_clean_timer_del(bs);
-    if (s->l2_table_cache) {
-        qcow2_cache_destroy(bs, s->l2_table_cache);
-    }
-    if (s->refcount_block_cache) {
-        qcow2_cache_destroy(bs, s->refcount_block_cache);
-    }
-    g_free(s->cluster_cache);
-    qemu_vfree(s->cluster_data);
-    return ret;
-}
-
-static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    BDRVQcow2State *s = bs->opaque;
-
-    bs->bl.write_zeroes_alignment = s->cluster_sectors;
-}
-
-static int qcow2_set_key(BlockDriverState *bs, const char *key)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint8_t keybuf[16];
-    int len, i;
-    Error *err = NULL;
-
-    memset(keybuf, 0, 16);
-    len = strlen(key);
-    if (len > 16)
-        len = 16;
-    /* XXX: we could compress the chars to 7 bits to increase
-       entropy */
-    for(i = 0;i < len;i++) {
-        keybuf[i] = key[i];
-    }
-    assert(bs->encrypted);
-
-    qcrypto_cipher_free(s->cipher);
-    s->cipher = qcrypto_cipher_new(
-        QCRYPTO_CIPHER_ALG_AES_128,
-        QCRYPTO_CIPHER_MODE_CBC,
-        keybuf, G_N_ELEMENTS(keybuf),
-        &err);
-
-    if (!s->cipher) {
-        /* XXX would be nice if errors in this method could
-         * be properly propagate to the caller. Would need
-         * the bdrv_set_key() API signature to be fixed. */
-        error_free(err);
-        return -1;
-    }
-    return 0;
-}
-
-static int qcow2_reopen_prepare(BDRVReopenState *state,
-                                BlockReopenQueue *queue, Error **errp)
-{
-    Qcow2ReopenState *r;
-    int ret;
-
-    r = g_new0(Qcow2ReopenState, 1);
-    state->opaque = r;
-
-    ret = qcow2_update_options_prepare(state->bs, r, state->options,
-                                       state->flags, errp);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* We need to write out any unwritten data if we reopen read-only. */
-    if ((state->flags & BDRV_O_RDWR) == 0) {
-        ret = bdrv_flush(state->bs);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        ret = qcow2_mark_clean(state->bs);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-
-    return 0;
-
-fail:
-    qcow2_update_options_abort(state->bs, r);
-    g_free(r);
-    return ret;
-}
-
-static void qcow2_reopen_commit(BDRVReopenState *state)
-{
-    qcow2_update_options_commit(state->bs, state->opaque);
-    g_free(state->opaque);
-}
-
-static void qcow2_reopen_abort(BDRVReopenState *state)
-{
-    qcow2_update_options_abort(state->bs, state->opaque);
-    g_free(state->opaque);
-}
-
-static void qcow2_join_options(QDict *options, QDict *old_options)
-{
-    bool has_new_overlap_template =
-        qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
-        qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
-    bool has_new_total_cache_size =
-        qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
-    bool has_all_cache_options;
-
-    /* New overlap template overrides all old overlap options */
-    if (has_new_overlap_template) {
-        qdict_del(old_options, QCOW2_OPT_OVERLAP);
-        qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
-        qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
-        qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
-        qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
-        qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
-        qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
-        qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
-        qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
-        qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
-    }
-
-    /* New total cache size overrides all old options */
-    if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
-        qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
-        qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
-    }
-
-    qdict_join(options, old_options, false);
-
-    /*
-     * If after merging all cache size options are set, an old total size is
-     * overwritten. Do keep all options, however, if all three are new. The
-     * resulting error message is what we want to happen.
-     */
-    has_all_cache_options =
-        qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
-        qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
-        qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
-
-    if (has_all_cache_options && !has_new_total_cache_size) {
-        qdict_del(options, QCOW2_OPT_CACHE_SIZE);
-    }
-}
-
-static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t cluster_offset;
-    int index_in_cluster, ret;
-    int64_t status = 0;
-
-    *pnum = nb_sectors;
-    qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
-    qemu_co_mutex_unlock(&s->lock);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
-        !s->cipher) {
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
-        *file = bs->file->bs;
-        status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
-    }
-    if (ret == QCOW2_CLUSTER_ZERO) {
-        status |= BDRV_BLOCK_ZERO;
-    } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
-        status |= BDRV_BLOCK_DATA;
-    }
-    return status;
-}
-
-/* handle reading after the end of the backing file */
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
-                  int64_t sector_num, int nb_sectors)
-{
-    int n1;
-    if ((sector_num + nb_sectors) <= bs->total_sectors)
-        return nb_sectors;
-    if (sector_num >= bs->total_sectors)
-        n1 = 0;
-    else
-        n1 = bs->total_sectors - sector_num;
-
-    qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
-
-    return n1;
-}
-
-static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
-                          int remaining_sectors, QEMUIOVector *qiov)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int index_in_cluster, n1;
-    int ret;
-    int cur_nr_sectors; /* number of sectors in current iteration */
-    uint64_t cluster_offset = 0;
-    uint64_t bytes_done = 0;
-    QEMUIOVector hd_qiov;
-    uint8_t *cluster_data = NULL;
-
-    qemu_iovec_init(&hd_qiov, qiov->niov);
-
-    qemu_co_mutex_lock(&s->lock);
-
-    while (remaining_sectors != 0) {
-
-        /* prepare next request */
-        cur_nr_sectors = remaining_sectors;
-        if (s->cipher) {
-            cur_nr_sectors = MIN(cur_nr_sectors,
-                QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
-        }
-
-        ret = qcow2_get_cluster_offset(bs, sector_num << 9,
-            &cur_nr_sectors, &cluster_offset);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-
-        qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
-            cur_nr_sectors * 512);
-
-        switch (ret) {
-        case QCOW2_CLUSTER_UNALLOCATED:
-
-            if (bs->backing) {
-                /* read from the base image */
-                n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
-                    sector_num, cur_nr_sectors);
-                if (n1 > 0) {
-                    QEMUIOVector local_qiov;
-
-                    qemu_iovec_init(&local_qiov, hd_qiov.niov);
-                    qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
-                                      n1 * BDRV_SECTOR_SIZE);
-
-                    BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
-                    qemu_co_mutex_unlock(&s->lock);
-                    ret = bdrv_co_readv(bs->backing->bs, sector_num,
-                                        n1, &local_qiov);
-                    qemu_co_mutex_lock(&s->lock);
-
-                    qemu_iovec_destroy(&local_qiov);
-
-                    if (ret < 0) {
-                        goto fail;
-                    }
-                }
-            } else {
-                /* Note: in this case, no need to wait */
-                qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
-            }
-            break;
-
-        case QCOW2_CLUSTER_ZERO:
-            qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
-            break;
-
-        case QCOW2_CLUSTER_COMPRESSED:
-            /* add AIO support for compressed blocks ? */
-            ret = qcow2_decompress_cluster(bs, cluster_offset);
-            if (ret < 0) {
-                goto fail;
-            }
-
-            qemu_iovec_from_buf(&hd_qiov, 0,
-                s->cluster_cache + index_in_cluster * 512,
-                512 * cur_nr_sectors);
-            break;
-
-        case QCOW2_CLUSTER_NORMAL:
-            if ((cluster_offset & 511) != 0) {
-                ret = -EIO;
-                goto fail;
-            }
-
-            if (bs->encrypted) {
-                assert(s->cipher);
-
-                /*
-                 * For encrypted images, read everything into a temporary
-                 * contiguous buffer on which the AES functions can work.
-                 */
-                if (!cluster_data) {
-                    cluster_data =
-                        qemu_try_blockalign(bs->file->bs,
-                                            QCOW_MAX_CRYPT_CLUSTERS
-                                            * s->cluster_size);
-                    if (cluster_data == NULL) {
-                        ret = -ENOMEM;
-                        goto fail;
-                    }
-                }
-
-                assert(cur_nr_sectors <=
-                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
-                qemu_iovec_reset(&hd_qiov);
-                qemu_iovec_add(&hd_qiov, cluster_data,
-                    512 * cur_nr_sectors);
-            }
-
-            BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-            qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_readv(bs->file->bs,
-                                (cluster_offset >> 9) + index_in_cluster,
-                                cur_nr_sectors, &hd_qiov);
-            qemu_co_mutex_lock(&s->lock);
-            if (ret < 0) {
-                goto fail;
-            }
-            if (bs->encrypted) {
-                assert(s->cipher);
-                Error *err = NULL;
-                if (qcow2_encrypt_sectors(s, sector_num,  cluster_data,
-                                          cluster_data, cur_nr_sectors, false,
-                                          &err) < 0) {
-                    error_free(err);
-                    ret = -EIO;
-                    goto fail;
-                }
-                qemu_iovec_from_buf(qiov, bytes_done,
-                    cluster_data, 512 * cur_nr_sectors);
-            }
-            break;
-
-        default:
-            g_assert_not_reached();
-            ret = -EIO;
-            goto fail;
-        }
-
-        remaining_sectors -= cur_nr_sectors;
-        sector_num += cur_nr_sectors;
-        bytes_done += cur_nr_sectors * 512;
-    }
-    ret = 0;
-
-fail:
-    qemu_co_mutex_unlock(&s->lock);
-
-    qemu_iovec_destroy(&hd_qiov);
-    qemu_vfree(cluster_data);
-
-    return ret;
-}
-
-static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
-                           int64_t sector_num,
-                           int remaining_sectors,
-                           QEMUIOVector *qiov)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int index_in_cluster;
-    int ret;
-    int cur_nr_sectors; /* number of sectors in current iteration */
-    uint64_t cluster_offset;
-    QEMUIOVector hd_qiov;
-    uint64_t bytes_done = 0;
-    uint8_t *cluster_data = NULL;
-    QCowL2Meta *l2meta = NULL;
-
-    trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
-                                 remaining_sectors);
-
-    qemu_iovec_init(&hd_qiov, qiov->niov);
-
-    s->cluster_cache_offset = -1; /* disable compressed cache */
-
-    qemu_co_mutex_lock(&s->lock);
-
-    while (remaining_sectors != 0) {
-
-        l2meta = NULL;
-
-        trace_qcow2_writev_start_part(qemu_coroutine_self());
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        cur_nr_sectors = remaining_sectors;
-        if (bs->encrypted &&
-            cur_nr_sectors >
-            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) {
-            cur_nr_sectors =
-                QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster;
-        }
-
-        ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
-            &cur_nr_sectors, &cluster_offset, &l2meta);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        assert((cluster_offset & 511) == 0);
-
-        qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
-            cur_nr_sectors * 512);
-
-        if (bs->encrypted) {
-            Error *err = NULL;
-            assert(s->cipher);
-            if (!cluster_data) {
-                cluster_data = qemu_try_blockalign(bs->file->bs,
-                                                   QCOW_MAX_CRYPT_CLUSTERS
-                                                   * s->cluster_size);
-                if (cluster_data == NULL) {
-                    ret = -ENOMEM;
-                    goto fail;
-                }
-            }
-
-            assert(hd_qiov.size <=
-                   QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
-            qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
-
-            if (qcow2_encrypt_sectors(s, sector_num, cluster_data,
-                                      cluster_data, cur_nr_sectors,
-                                      true, &err) < 0) {
-                error_free(err);
-                ret = -EIO;
-                goto fail;
-            }
-
-            qemu_iovec_reset(&hd_qiov);
-            qemu_iovec_add(&hd_qiov, cluster_data,
-                cur_nr_sectors * 512);
-        }
-
-        ret = qcow2_pre_write_overlap_check(bs, 0,
-                cluster_offset + index_in_cluster * BDRV_SECTOR_SIZE,
-                cur_nr_sectors * BDRV_SECTOR_SIZE);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        qemu_co_mutex_unlock(&s->lock);
-        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-        trace_qcow2_writev_data(qemu_coroutine_self(),
-                                (cluster_offset >> 9) + index_in_cluster);
-        ret = bdrv_co_writev(bs->file->bs,
-                             (cluster_offset >> 9) + index_in_cluster,
-                             cur_nr_sectors, &hd_qiov);
-        qemu_co_mutex_lock(&s->lock);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        while (l2meta != NULL) {
-            QCowL2Meta *next;
-
-            ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
-            if (ret < 0) {
-                goto fail;
-            }
-
-            /* Take the request off the list of running requests */
-            if (l2meta->nb_clusters != 0) {
-                QLIST_REMOVE(l2meta, next_in_flight);
-            }
-
-            qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
-            next = l2meta->next;
-            g_free(l2meta);
-            l2meta = next;
-        }
-
-        remaining_sectors -= cur_nr_sectors;
-        sector_num += cur_nr_sectors;
-        bytes_done += cur_nr_sectors * 512;
-        trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
-    }
-    ret = 0;
-
-fail:
-    qemu_co_mutex_unlock(&s->lock);
-
-    while (l2meta != NULL) {
-        QCowL2Meta *next;
-
-        if (l2meta->nb_clusters != 0) {
-            QLIST_REMOVE(l2meta, next_in_flight);
-        }
-        qemu_co_queue_restart_all(&l2meta->dependent_requests);
-
-        next = l2meta->next;
-        g_free(l2meta);
-        l2meta = next;
-    }
-
-    qemu_iovec_destroy(&hd_qiov);
-    qemu_vfree(cluster_data);
-    trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
-
-    return ret;
-}
-
-static int qcow2_inactivate(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int ret, result = 0;
-
-    ret = qcow2_cache_flush(bs, s->l2_table_cache);
-    if (ret) {
-        result = ret;
-        error_report("Failed to flush the L2 table cache: %s",
-                     strerror(-ret));
-    }
-
-    ret = qcow2_cache_flush(bs, s->refcount_block_cache);
-    if (ret) {
-        result = ret;
-        error_report("Failed to flush the refcount block cache: %s",
-                     strerror(-ret));
-    }
-
-    if (result == 0) {
-        qcow2_mark_clean(bs);
-    }
-
-    return result;
-}
-
-static void qcow2_close(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    qemu_vfree(s->l1_table);
-    /* else pre-write overlap checks in cache_destroy may crash */
-    s->l1_table = NULL;
-
-    if (!(s->flags & BDRV_O_INACTIVE)) {
-        qcow2_inactivate(bs);
-    }
-
-    cache_clean_timer_del(bs);
-    qcow2_cache_destroy(bs, s->l2_table_cache);
-    qcow2_cache_destroy(bs, s->refcount_block_cache);
-
-    qcrypto_cipher_free(s->cipher);
-    s->cipher = NULL;
-
-    g_free(s->unknown_header_fields);
-    cleanup_unknown_header_ext(bs);
-
-    g_free(s->image_backing_file);
-    g_free(s->image_backing_format);
-
-    g_free(s->cluster_cache);
-    qemu_vfree(s->cluster_data);
-    qcow2_refcount_close(bs);
-    qcow2_free_snapshots(bs);
-}
-
-static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int flags = s->flags;
-    QCryptoCipher *cipher = NULL;
-    QDict *options;
-    Error *local_err = NULL;
-    int ret;
-
-    /*
-     * Backing files are read-only which makes all of their metadata immutable,
-     * that means we don't have to worry about reopening them here.
-     */
-
-    cipher = s->cipher;
-    s->cipher = NULL;
-
-    qcow2_close(bs);
-
-    bdrv_invalidate_cache(bs->file->bs, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        bs->drv = NULL;
-        return;
-    }
-
-    memset(s, 0, sizeof(BDRVQcow2State));
-    options = qdict_clone_shallow(bs->options);
-
-    flags &= ~BDRV_O_INACTIVE;
-    ret = qcow2_open(bs, options, flags, &local_err);
-    QDECREF(options);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_prepend(errp, "Could not reopen qcow2 layer: ");
-        bs->drv = NULL;
-        return;
-    } else if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
-        bs->drv = NULL;
-        return;
-    }
-
-    s->cipher = cipher;
-}
-
-static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
-    size_t len, size_t buflen)
-{
-    QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
-    size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
-
-    if (buflen < ext_len) {
-        return -ENOSPC;
-    }
-
-    *ext_backing_fmt = (QCowExtension) {
-        .magic  = cpu_to_be32(magic),
-        .len    = cpu_to_be32(len),
-    };
-    memcpy(buf + sizeof(QCowExtension), s, len);
-
-    return ext_len;
-}
-
-/*
- * Updates the qcow2 header, including the variable length parts of it, i.e.
- * the backing file name and all extensions. qcow2 was not designed to allow
- * such changes, so if we run out of space (we can only use the first cluster)
- * this function may fail.
- *
- * Returns 0 on success, -errno in error cases.
- */
-int qcow2_update_header(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    QCowHeader *header;
-    char *buf;
-    size_t buflen = s->cluster_size;
-    int ret;
-    uint64_t total_size;
-    uint32_t refcount_table_clusters;
-    size_t header_length;
-    Qcow2UnknownHeaderExtension *uext;
-
-    buf = qemu_blockalign(bs, buflen);
-
-    /* Header structure */
-    header = (QCowHeader*) buf;
-
-    if (buflen < sizeof(*header)) {
-        ret = -ENOSPC;
-        goto fail;
-    }
-
-    header_length = sizeof(*header) + s->unknown_header_fields_size;
-    total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
-    refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
-
-    *header = (QCowHeader) {
-        /* Version 2 fields */
-        .magic                  = cpu_to_be32(QCOW_MAGIC),
-        .version                = cpu_to_be32(s->qcow_version),
-        .backing_file_offset    = 0,
-        .backing_file_size      = 0,
-        .cluster_bits           = cpu_to_be32(s->cluster_bits),
-        .size                   = cpu_to_be64(total_size),
-        .crypt_method           = cpu_to_be32(s->crypt_method_header),
-        .l1_size                = cpu_to_be32(s->l1_size),
-        .l1_table_offset        = cpu_to_be64(s->l1_table_offset),
-        .refcount_table_offset  = cpu_to_be64(s->refcount_table_offset),
-        .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
-        .nb_snapshots           = cpu_to_be32(s->nb_snapshots),
-        .snapshots_offset       = cpu_to_be64(s->snapshots_offset),
-
-        /* Version 3 fields */
-        .incompatible_features  = cpu_to_be64(s->incompatible_features),
-        .compatible_features    = cpu_to_be64(s->compatible_features),
-        .autoclear_features     = cpu_to_be64(s->autoclear_features),
-        .refcount_order         = cpu_to_be32(s->refcount_order),
-        .header_length          = cpu_to_be32(header_length),
-    };
-
-    /* For older versions, write a shorter header */
-    switch (s->qcow_version) {
-    case 2:
-        ret = offsetof(QCowHeader, incompatible_features);
-        break;
-    case 3:
-        ret = sizeof(*header);
-        break;
-    default:
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    buf += ret;
-    buflen -= ret;
-    memset(buf, 0, buflen);
-
-    /* Preserve any unknown field in the header */
-    if (s->unknown_header_fields_size) {
-        if (buflen < s->unknown_header_fields_size) {
-            ret = -ENOSPC;
-            goto fail;
-        }
-
-        memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
-        buf += s->unknown_header_fields_size;
-        buflen -= s->unknown_header_fields_size;
-    }
-
-    /* Backing file format header extension */
-    if (s->image_backing_format) {
-        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
-                             s->image_backing_format,
-                             strlen(s->image_backing_format),
-                             buflen);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        buf += ret;
-        buflen -= ret;
-    }
-
-    /* Feature table */
-    if (s->qcow_version >= 3) {
-        Qcow2Feature features[] = {
-            {
-                .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
-                .bit  = QCOW2_INCOMPAT_DIRTY_BITNR,
-                .name = "dirty bit",
-            },
-            {
-                .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
-                .bit  = QCOW2_INCOMPAT_CORRUPT_BITNR,
-                .name = "corrupt bit",
-            },
-            {
-                .type = QCOW2_FEAT_TYPE_COMPATIBLE,
-                .bit  = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
-                .name = "lazy refcounts",
-            },
-        };
-
-        ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
-                             features, sizeof(features), buflen);
-        if (ret < 0) {
-            goto fail;
-        }
-        buf += ret;
-        buflen -= ret;
-    }
-
-    /* Keep unknown header extensions */
-    QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
-        ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        buf += ret;
-        buflen -= ret;
-    }
-
-    /* End of header extensions */
-    ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    buf += ret;
-    buflen -= ret;
-
-    /* Backing file name */
-    if (s->image_backing_file) {
-        size_t backing_file_len = strlen(s->image_backing_file);
-
-        if (buflen < backing_file_len) {
-            ret = -ENOSPC;
-            goto fail;
-        }
-
-        /* Using strncpy is ok here, since buf is not NUL-terminated. */
-        strncpy(buf, s->image_backing_file, buflen);
-
-        header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
-        header->backing_file_size   = cpu_to_be32(backing_file_len);
-    }
-
-    /* Write the new header */
-    ret = bdrv_pwrite(bs->file->bs, 0, header, s->cluster_size);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    ret = 0;
-fail:
-    qemu_vfree(header);
-    return ret;
-}
-
-static int qcow2_change_backing_file(BlockDriverState *bs,
-    const char *backing_file, const char *backing_fmt)
-{
-    BDRVQcow2State *s = bs->opaque;
-
-    if (backing_file && strlen(backing_file) > 1023) {
-        return -EINVAL;
-    }
-
-    pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
-    pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
-
-    g_free(s->image_backing_file);
-    g_free(s->image_backing_format);
-
-    s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
-    s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
-
-    return qcow2_update_header(bs);
-}
-
-static int preallocate(BlockDriverState *bs)
-{
-    uint64_t nb_sectors;
-    uint64_t offset;
-    uint64_t host_offset = 0;
-    int num;
-    int ret;
-    QCowL2Meta *meta;
-
-    nb_sectors = bdrv_nb_sectors(bs);
-    offset = 0;
-
-    while (nb_sectors) {
-        num = MIN(nb_sectors, INT_MAX >> BDRV_SECTOR_BITS);
-        ret = qcow2_alloc_cluster_offset(bs, offset, &num,
-                                         &host_offset, &meta);
-        if (ret < 0) {
-            return ret;
-        }
-
-        while (meta) {
-            QCowL2Meta *next = meta->next;
-
-            ret = qcow2_alloc_cluster_link_l2(bs, meta);
-            if (ret < 0) {
-                qcow2_free_any_clusters(bs, meta->alloc_offset,
-                                        meta->nb_clusters, QCOW2_DISCARD_NEVER);
-                return ret;
-            }
-
-            /* There are no dependent requests, but we need to remove our
-             * request from the list of in-flight requests */
-            QLIST_REMOVE(meta, next_in_flight);
-
-            g_free(meta);
-            meta = next;
-        }
-
-        /* TODO Preallocate data if requested */
-
-        nb_sectors -= num;
-        offset += num << BDRV_SECTOR_BITS;
-    }
-
-    /*
-     * It is expected that the image file is large enough to actually contain
-     * all of the allocated clusters (otherwise we get failing reads after
-     * EOF). Extend the image to the last allocated sector.
-     */
-    if (host_offset != 0) {
-        uint8_t buf[BDRV_SECTOR_SIZE];
-        memset(buf, 0, BDRV_SECTOR_SIZE);
-        ret = bdrv_write(bs->file->bs,
-                         (host_offset >> BDRV_SECTOR_BITS) + num - 1,
-                         buf, 1);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    return 0;
-}
-
-static int qcow2_create2(const char *filename, int64_t total_size,
-                         const char *backing_file, const char *backing_format,
-                         int flags, size_t cluster_size, PreallocMode prealloc,
-                         QemuOpts *opts, int version, int refcount_order,
-                         Error **errp)
-{
-    int cluster_bits;
-    QDict *options;
-
-    /* Calculate cluster_bits */
-    cluster_bits = ctz32(cluster_size);
-    if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
-        (1 << cluster_bits) != cluster_size)
-    {
-        error_setg(errp, "Cluster size must be a power of two between %d and "
-                   "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
-        return -EINVAL;
-    }
-
-    /*
-     * Open the image file and write a minimal qcow2 header.
-     *
-     * We keep things simple and start with a zero-sized image. We also
-     * do without refcount blocks or a L1 table for now. We'll fix the
-     * inconsistency later.
-     *
-     * We do need a refcount table because growing the refcount table means
-     * allocating two new refcount blocks - the seconds of which would be at
-     * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
-     * size for any qcow2 image.
-     */
-    BlockBackend *blk;
-    QCowHeader *header;
-    uint64_t* refcount_table;
-    Error *local_err = NULL;
-    int ret;
-
-    if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
-        /* Note: The following calculation does not need to be exact; if it is a
-         * bit off, either some bytes will be "leaked" (which is fine) or we
-         * will need to increase the file size by some bytes (which is fine,
-         * too, as long as the bulk is allocated here). Therefore, using
-         * floating point arithmetic is fine. */
-        int64_t meta_size = 0;
-        uint64_t nreftablee, nrefblocke, nl1e, nl2e;
-        int64_t aligned_total_size = align_offset(total_size, cluster_size);
-        int refblock_bits, refblock_size;
-        /* refcount entry size in bytes */
-        double rces = (1 << refcount_order) / 8.;
-
-        /* see qcow2_open() */
-        refblock_bits = cluster_bits - (refcount_order - 3);
-        refblock_size = 1 << refblock_bits;
-
-        /* header: 1 cluster */
-        meta_size += cluster_size;
-
-        /* total size of L2 tables */
-        nl2e = aligned_total_size / cluster_size;
-        nl2e = align_offset(nl2e, cluster_size / sizeof(uint64_t));
-        meta_size += nl2e * sizeof(uint64_t);
-
-        /* total size of L1 tables */
-        nl1e = nl2e * sizeof(uint64_t) / cluster_size;
-        nl1e = align_offset(nl1e, cluster_size / sizeof(uint64_t));
-        meta_size += nl1e * sizeof(uint64_t);
-
-        /* total size of refcount blocks
-         *
-         * note: every host cluster is reference-counted, including metadata
-         * (even refcount blocks are recursively included).
-         * Let:
-         *   a = total_size (this is the guest disk size)
-         *   m = meta size not including refcount blocks and refcount tables
-         *   c = cluster size
-         *   y1 = number of refcount blocks entries
-         *   y2 = meta size including everything
-         *   rces = refcount entry size in bytes
-         * then,
-         *   y1 = (y2 + a)/c
-         *   y2 = y1 * rces + y1 * rces * sizeof(u64) / c + m
-         * we can get y1:
-         *   y1 = (a + m) / (c - rces - rces * sizeof(u64) / c)
-         */
-        nrefblocke = (aligned_total_size + meta_size + cluster_size)
-                   / (cluster_size - rces - rces * sizeof(uint64_t)
-                                                 / cluster_size);
-        meta_size += DIV_ROUND_UP(nrefblocke, refblock_size) * cluster_size;
-
-        /* total size of refcount tables */
-        nreftablee = nrefblocke / refblock_size;
-        nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t));
-        meta_size += nreftablee * sizeof(uint64_t);
-
-        qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
-                            aligned_total_size + meta_size, &error_abort);
-        qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_lookup[prealloc],
-                     &error_abort);
-    }
-
-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        return ret;
-    }
-
-    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (blk == NULL) {
-        error_propagate(errp, local_err);
-        return -EIO;
-    }
-
-    blk_set_allow_write_beyond_eof(blk, true);
-
-    /* Write the header */
-    QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
-    header = g_malloc0(cluster_size);
-    *header = (QCowHeader) {
-        .magic                      = cpu_to_be32(QCOW_MAGIC),
-        .version                    = cpu_to_be32(version),
-        .cluster_bits               = cpu_to_be32(cluster_bits),
-        .size                       = cpu_to_be64(0),
-        .l1_table_offset            = cpu_to_be64(0),
-        .l1_size                    = cpu_to_be32(0),
-        .refcount_table_offset      = cpu_to_be64(cluster_size),
-        .refcount_table_clusters    = cpu_to_be32(1),
-        .refcount_order             = cpu_to_be32(refcount_order),
-        .header_length              = cpu_to_be32(sizeof(*header)),
-    };
-
-    if (flags & BLOCK_FLAG_ENCRYPT) {
-        header->crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
-    } else {
-        header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
-    }
-
-    if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
-        header->compatible_features |=
-            cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
-    }
-
-    ret = blk_pwrite(blk, 0, header, cluster_size);
-    g_free(header);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not write qcow2 header");
-        goto out;
-    }
-
-    /* Write a refcount table with one refcount block */
-    refcount_table = g_malloc0(2 * cluster_size);
-    refcount_table[0] = cpu_to_be64(2 * cluster_size);
-    ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size);
-    g_free(refcount_table);
-
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not write refcount table");
-        goto out;
-    }
-
-    blk_unref(blk);
-    blk = NULL;
-
-    /*
-     * And now open the image and make it consistent first (i.e. increase the
-     * refcount of the cluster that is occupied by the header and the refcount
-     * table)
-     */
-    options = qdict_new();
-    qdict_put(options, "driver", qstring_from_str("qcow2"));
-    blk = blk_new_open(filename, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
-    if (blk == NULL) {
-        error_propagate(errp, local_err);
-        ret = -EIO;
-        goto out;
-    }
-
-    ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
-                         "header and refcount table");
-        goto out;
-
-    } else if (ret != 0) {
-        error_report("Huh, first cluster in empty image is already in use?");
-        abort();
-    }
-
-    /* Create a full header (including things like feature table) */
-    ret = qcow2_update_header(blk_bs(blk));
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not update qcow2 header");
-        goto out;
-    }
-
-    /* Okay, now that we have a valid image, let's give it the right size */
-    ret = blk_truncate(blk, total_size);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not resize image");
-        goto out;
-    }
-
-    /* Want a backing file? There you go.*/
-    if (backing_file) {
-        ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
-                             "with format '%s'", backing_file, backing_format);
-            goto out;
-        }
-    }
-
-    /* And if we're supposed to preallocate metadata, do that now */
-    if (prealloc != PREALLOC_MODE_OFF) {
-        BDRVQcow2State *s = blk_bs(blk)->opaque;
-        qemu_co_mutex_lock(&s->lock);
-        ret = preallocate(blk_bs(blk));
-        qemu_co_mutex_unlock(&s->lock);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not preallocate metadata");
-            goto out;
-        }
-    }
-
-    blk_unref(blk);
-    blk = NULL;
-
-    /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
-    options = qdict_new();
-    qdict_put(options, "driver", qstring_from_str("qcow2"));
-    blk = blk_new_open(filename, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_NO_BACKING, &local_err);
-    if (blk == NULL) {
-        error_propagate(errp, local_err);
-        ret = -EIO;
-        goto out;
-    }
-
-    ret = 0;
-out:
-    if (blk) {
-        blk_unref(blk);
-    }
-    return ret;
-}
-
-static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    char *backing_file = NULL;
-    char *backing_fmt = NULL;
-    char *buf = NULL;
-    uint64_t size = 0;
-    int flags = 0;
-    size_t cluster_size = DEFAULT_CLUSTER_SIZE;
-    PreallocMode prealloc;
-    int version = 3;
-    uint64_t refcount_bits = 16;
-    int refcount_order;
-    Error *local_err = NULL;
-    int ret;
-
-    /* Read out options */
-    size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                    BDRV_SECTOR_SIZE);
-    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-    backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
-    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
-        flags |= BLOCK_FLAG_ENCRYPT;
-    }
-    cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
-                                         DEFAULT_CLUSTER_SIZE);
-    buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
-                               PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
-                               &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto finish;
-    }
-    g_free(buf);
-    buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
-    if (!buf) {
-        /* keep the default */
-    } else if (!strcmp(buf, "0.10")) {
-        version = 2;
-    } else if (!strcmp(buf, "1.1")) {
-        version = 3;
-    } else {
-        error_setg(errp, "Invalid compatibility level: '%s'", buf);
-        ret = -EINVAL;
-        goto finish;
-    }
-
-    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_LAZY_REFCOUNTS, false)) {
-        flags |= BLOCK_FLAG_LAZY_REFCOUNTS;
-    }
-
-    if (backing_file && prealloc != PREALLOC_MODE_OFF) {
-        error_setg(errp, "Backing file and preallocation cannot be used at "
-                   "the same time");
-        ret = -EINVAL;
-        goto finish;
-    }
-
-    if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
-        error_setg(errp, "Lazy refcounts only supported with compatibility "
-                   "level 1.1 and above (use compat=1.1 or greater)");
-        ret = -EINVAL;
-        goto finish;
-    }
-
-    refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS,
-                                            refcount_bits);
-    if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
-        error_setg(errp, "Refcount width must be a power of two and may not "
-                   "exceed 64 bits");
-        ret = -EINVAL;
-        goto finish;
-    }
-
-    if (version < 3 && refcount_bits != 16) {
-        error_setg(errp, "Different refcount widths than 16 bits require "
-                   "compatibility level 1.1 or above (use compat=1.1 or "
-                   "greater)");
-        ret = -EINVAL;
-        goto finish;
-    }
-
-    refcount_order = ctz32(refcount_bits);
-
-    ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
-                        cluster_size, prealloc, opts, version, refcount_order,
-                        &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-
-finish:
-    g_free(backing_file);
-    g_free(backing_fmt);
-    g_free(buf);
-    return ret;
-}
-
-static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
-{
-    int ret;
-    BDRVQcow2State *s = bs->opaque;
-
-    /* Emulate misaligned zero writes */
-    if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
-        return -ENOTSUP;
-    }
-
-    /* Whatever is left can use real zero clusters */
-    qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
-        nb_sectors);
-    qemu_co_mutex_unlock(&s->lock);
-
-    return ret;
-}
-
-static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors)
-{
-    int ret;
-    BDRVQcow2State *s = bs->opaque;
-
-    qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
-        nb_sectors, QCOW2_DISCARD_REQUEST, false);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t new_l1_size;
-    int ret;
-
-    if (offset & 511) {
-        error_report("The new size must be a multiple of 512");
-        return -EINVAL;
-    }
-
-    /* cannot proceed if image has snapshots */
-    if (s->nb_snapshots) {
-        error_report("Can't resize an image which has snapshots");
-        return -ENOTSUP;
-    }
-
-    /* shrinking is currently not supported */
-    if (offset < bs->total_sectors * 512) {
-        error_report("qcow2 doesn't support shrinking images yet");
-        return -ENOTSUP;
-    }
-
-    new_l1_size = size_to_l1(s, offset);
-    ret = qcow2_grow_l1_table(bs, new_l1_size, true);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* write updated header.size */
-    offset = cpu_to_be64(offset);
-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, size),
-                           &offset, sizeof(uint64_t));
-    if (ret < 0) {
-        return ret;
-    }
-
-    s->l1_vm_state_index = new_l1_size;
-    return 0;
-}
-
-/* XXX: put compressed sectors first, then all the cluster aligned
-   tables to avoid losing bytes in alignment */
-static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                                  const uint8_t *buf, int nb_sectors)
-{
-    BDRVQcow2State *s = bs->opaque;
-    z_stream strm;
-    int ret, out_len;
-    uint8_t *out_buf;
-    uint64_t cluster_offset;
-
-    if (nb_sectors == 0) {
-        /* align end of file to a sector boundary to ease reading with
-           sector based I/Os */
-        cluster_offset = bdrv_getlength(bs->file->bs);
-        return bdrv_truncate(bs->file->bs, cluster_offset);
-    }
-
-    if (nb_sectors != s->cluster_sectors) {
-        ret = -EINVAL;
-
-        /* Zero-pad last write if image size is not cluster aligned */
-        if (sector_num + nb_sectors == bs->total_sectors &&
-            nb_sectors < s->cluster_sectors) {
-            uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
-            memset(pad_buf, 0, s->cluster_size);
-            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
-            ret = qcow2_write_compressed(bs, sector_num,
-                                         pad_buf, s->cluster_sectors);
-            qemu_vfree(pad_buf);
-        }
-        return ret;
-    }
-
-    out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
-
-    /* best compression, small window, no zlib header */
-    memset(&strm, 0, sizeof(strm));
-    ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
-                       Z_DEFLATED, -12,
-                       9, Z_DEFAULT_STRATEGY);
-    if (ret != 0) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    strm.avail_in = s->cluster_size;
-    strm.next_in = (uint8_t *)buf;
-    strm.avail_out = s->cluster_size;
-    strm.next_out = out_buf;
-
-    ret = deflate(&strm, Z_FINISH);
-    if (ret != Z_STREAM_END && ret != Z_OK) {
-        deflateEnd(&strm);
-        ret = -EINVAL;
-        goto fail;
-    }
-    out_len = strm.next_out - out_buf;
-
-    deflateEnd(&strm);
-
-    if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
-        /* could not compress: write normal cluster */
-        ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
-        if (ret < 0) {
-            goto fail;
-        }
-    } else {
-        cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
-            sector_num << 9, out_len);
-        if (!cluster_offset) {
-            ret = -EIO;
-            goto fail;
-        }
-        cluster_offset &= s->cluster_offset_mask;
-
-        ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
-        ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-
-    ret = 0;
-fail:
-    g_free(out_buf);
-    return ret;
-}
-
-static int make_completely_empty(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int ret, l1_clusters;
-    int64_t offset;
-    uint64_t *new_reftable = NULL;
-    uint64_t rt_entry, l1_size2;
-    struct {
-        uint64_t l1_offset;
-        uint64_t reftable_offset;
-        uint32_t reftable_clusters;
-    } QEMU_PACKED l1_ofs_rt_ofs_cls;
-
-    ret = qcow2_cache_empty(bs, s->l2_table_cache);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    ret = qcow2_cache_empty(bs, s->refcount_block_cache);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* Refcounts will be broken utterly */
-    ret = qcow2_mark_dirty(bs);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
-
-    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
-    l1_size2 = (uint64_t)s->l1_size * sizeof(uint64_t);
-
-    /* After this call, neither the in-memory nor the on-disk refcount
-     * information accurately describe the actual references */
-
-    ret = bdrv_write_zeroes(bs->file->bs, s->l1_table_offset / BDRV_SECTOR_SIZE,
-                            l1_clusters * s->cluster_sectors, 0);
-    if (ret < 0) {
-        goto fail_broken_refcounts;
-    }
-    memset(s->l1_table, 0, l1_size2);
-
-    BLKDBG_EVENT(bs->file, BLKDBG_EMPTY_IMAGE_PREPARE);
-
-    /* Overwrite enough clusters at the beginning of the sectors to place
-     * the refcount table, a refcount block and the L1 table in; this may
-     * overwrite parts of the existing refcount and L1 table, which is not
-     * an issue because the dirty flag is set, complete data loss is in fact
-     * desired and partial data loss is consequently fine as well */
-    ret = bdrv_write_zeroes(bs->file->bs, s->cluster_size / BDRV_SECTOR_SIZE,
-                            (2 + l1_clusters) * s->cluster_size /
-                            BDRV_SECTOR_SIZE, 0);
-    /* This call (even if it failed overall) may have overwritten on-disk
-     * refcount structures; in that case, the in-memory refcount information
-     * will probably differ from the on-disk information which makes the BDS
-     * unusable */
-    if (ret < 0) {
-        goto fail_broken_refcounts;
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
-    BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_UPDATE);
-
-    /* "Create" an empty reftable (one cluster) directly after the image
-     * header and an empty L1 table three clusters after the image header;
-     * the cluster between those two will be used as the first refblock */
-    cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
-    cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
-    cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_table_offset),
-                           &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
-    if (ret < 0) {
-        goto fail_broken_refcounts;
-    }
-
-    s->l1_table_offset = 3 * s->cluster_size;
-
-    new_reftable = g_try_new0(uint64_t, s->cluster_size / sizeof(uint64_t));
-    if (!new_reftable) {
-        ret = -ENOMEM;
-        goto fail_broken_refcounts;
-    }
-
-    s->refcount_table_offset = s->cluster_size;
-    s->refcount_table_size   = s->cluster_size / sizeof(uint64_t);
-
-    g_free(s->refcount_table);
-    s->refcount_table = new_reftable;
-    new_reftable = NULL;
-
-    /* Now the in-memory refcount information again corresponds to the on-disk
-     * information (reftable is empty and no refblocks (the refblock cache is
-     * empty)); however, this means some clusters (e.g. the image header) are
-     * referenced, but not refcounted, but the normal qcow2 code assumes that
-     * the in-memory information is always correct */
-
-    BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC);
-
-    /* Enter the first refblock into the reftable */
-    rt_entry = cpu_to_be64(2 * s->cluster_size);
-    ret = bdrv_pwrite_sync(bs->file->bs, s->cluster_size,
-                           &rt_entry, sizeof(rt_entry));
-    if (ret < 0) {
-        goto fail_broken_refcounts;
-    }
-    s->refcount_table[0] = 2 * s->cluster_size;
-
-    s->free_cluster_index = 0;
-    assert(3 + l1_clusters <= s->refcount_block_size);
-    offset = qcow2_alloc_clusters(bs, 3 * s->cluster_size + l1_size2);
-    if (offset < 0) {
-        ret = offset;
-        goto fail_broken_refcounts;
-    } else if (offset > 0) {
-        error_report("First cluster in emptied image is in use");
-        abort();
-    }
-
-    /* Now finally the in-memory information corresponds to the on-disk
-     * structures and is correct */
-    ret = qcow2_mark_clean(bs);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    return 0;
-
-fail_broken_refcounts:
-    /* The BDS is unusable at this point. If we wanted to make it usable, we
-     * would have to call qcow2_refcount_close(), qcow2_refcount_init(),
-     * qcow2_check_refcounts(), qcow2_refcount_close() and qcow2_refcount_init()
-     * again. However, because the functions which could have caused this error
-     * path to be taken are used by those functions as well, it's very likely
-     * that that sequence will fail as well. Therefore, just eject the BDS. */
-    bs->drv = NULL;
-
-fail:
-    g_free(new_reftable);
-    return ret;
-}
-
-static int qcow2_make_empty(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    uint64_t start_sector;
-    int sector_step = INT_MAX / BDRV_SECTOR_SIZE;
-    int l1_clusters, ret = 0;
-
-    l1_clusters = DIV_ROUND_UP(s->l1_size, s->cluster_size / sizeof(uint64_t));
-
-    if (s->qcow_version >= 3 && !s->snapshots &&
-        3 + l1_clusters <= s->refcount_block_size) {
-        /* The following function only works for qcow2 v3 images (it requires
-         * the dirty flag) and only as long as there are no snapshots (because
-         * it completely empties the image). Furthermore, the L1 table and three
-         * additional clusters (image header, refcount table, one refcount
-         * block) have to fit inside one refcount block. */
-        return make_completely_empty(bs);
-    }
-
-    /* This fallback code simply discards every active cluster; this is slow,
-     * but works in all cases */
-    for (start_sector = 0; start_sector < bs->total_sectors;
-         start_sector += sector_step)
-    {
-        /* As this function is generally used after committing an external
-         * snapshot, QCOW2_DISCARD_SNAPSHOT seems appropriate. Also, the
-         * default action for this kind of discard is to pass the discard,
-         * which will ideally result in an actually smaller image file, as
-         * is probably desired. */
-        ret = qcow2_discard_clusters(bs, start_sector * BDRV_SECTOR_SIZE,
-                                     MIN(sector_step,
-                                         bs->total_sectors - start_sector),
-                                     QCOW2_DISCARD_SNAPSHOT, true);
-        if (ret < 0) {
-            break;
-        }
-    }
-
-    return ret;
-}
-
-static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int ret;
-
-    qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_cache_flush(bs, s->l2_table_cache);
-    if (ret < 0) {
-        qemu_co_mutex_unlock(&s->lock);
-        return ret;
-    }
-
-    if (qcow2_need_accurate_refcounts(s)) {
-        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
-        if (ret < 0) {
-            qemu_co_mutex_unlock(&s->lock);
-            return ret;
-        }
-    }
-    qemu_co_mutex_unlock(&s->lock);
-
-    return 0;
-}
-
-static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVQcow2State *s = bs->opaque;
-    bdi->unallocated_blocks_are_zero = true;
-    bdi->can_write_zeroes_with_unmap = (s->qcow_version >= 3);
-    bdi->cluster_size = s->cluster_size;
-    bdi->vm_state_offset = qcow2_vm_state_offset(s);
-    return 0;
-}
-
-static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1);
-
-    *spec_info = (ImageInfoSpecific){
-        .type  = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
-        .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1),
-    };
-    if (s->qcow_version == 2) {
-        *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
-            .compat             = g_strdup("0.10"),
-            .refcount_bits      = s->refcount_bits,
-        };
-    } else if (s->qcow_version == 3) {
-        *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
-            .compat             = g_strdup("1.1"),
-            .lazy_refcounts     = s->compatible_features &
-                                  QCOW2_COMPAT_LAZY_REFCOUNTS,
-            .has_lazy_refcounts = true,
-            .corrupt            = s->incompatible_features &
-                                  QCOW2_INCOMPAT_CORRUPT,
-            .has_corrupt        = true,
-            .refcount_bits      = s->refcount_bits,
-        };
-    } else {
-        /* if this assertion fails, this probably means a new version was
-         * added without having it covered here */
-        assert(false);
-    }
-
-    return spec_info;
-}
-
-#if 0
-static void dump_refcounts(BlockDriverState *bs)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t nb_clusters, k, k1, size;
-    int refcount;
-
-    size = bdrv_getlength(bs->file->bs);
-    nb_clusters = size_to_clusters(s, size);
-    for(k = 0; k < nb_clusters;) {
-        k1 = k;
-        refcount = get_refcount(bs, k);
-        k++;
-        while (k < nb_clusters && get_refcount(bs, k) == refcount)
-            k++;
-        printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount,
-               k - k1);
-    }
-}
-#endif
-
-static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
-                              int64_t pos)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int64_t total_sectors = bs->total_sectors;
-    bool zero_beyond_eof = bs->zero_beyond_eof;
-    int ret;
-
-    BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
-    bs->zero_beyond_eof = false;
-    ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
-    bs->zero_beyond_eof = zero_beyond_eof;
-
-    /* bdrv_co_do_writev will have increased the total_sectors value to include
-     * the VM state - the VM state is however not an actual part of the block
-     * device, therefore, we need to restore the old value. */
-    bs->total_sectors = total_sectors;
-
-    return ret;
-}
-
-static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
-                              int64_t pos, int size)
-{
-    BDRVQcow2State *s = bs->opaque;
-    bool zero_beyond_eof = bs->zero_beyond_eof;
-    int ret;
-
-    BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
-    bs->zero_beyond_eof = false;
-    ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
-    bs->zero_beyond_eof = zero_beyond_eof;
-
-    return ret;
-}
-
-/*
- * Downgrades an image's version. To achieve this, any incompatible features
- * have to be removed.
- */
-static int qcow2_downgrade(BlockDriverState *bs, int target_version,
-                           BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int current_version = s->qcow_version;
-    int ret;
-
-    if (target_version == current_version) {
-        return 0;
-    } else if (target_version > current_version) {
-        return -EINVAL;
-    } else if (target_version != 2) {
-        return -EINVAL;
-    }
-
-    if (s->refcount_order != 4) {
-        error_report("compat=0.10 requires refcount_bits=16");
-        return -ENOTSUP;
-    }
-
-    /* clear incompatible features */
-    if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
-        ret = qcow2_mark_clean(bs);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in
-     * the first place; if that happens nonetheless, returning -ENOTSUP is the
-     * best thing to do anyway */
-
-    if (s->incompatible_features) {
-        return -ENOTSUP;
-    }
-
-    /* since we can ignore compatible features, we can set them to 0 as well */
-    s->compatible_features = 0;
-    /* if lazy refcounts have been used, they have already been fixed through
-     * clearing the dirty flag */
-
-    /* clearing autoclear features is trivial */
-    s->autoclear_features = 0;
-
-    ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
-    if (ret < 0) {
-        return ret;
-    }
-
-    s->qcow_version = target_version;
-    ret = qcow2_update_header(bs);
-    if (ret < 0) {
-        s->qcow_version = current_version;
-        return ret;
-    }
-    return 0;
-}
-
-typedef enum Qcow2AmendOperation {
-    /* This is the value Qcow2AmendHelperCBInfo::last_operation will be
-     * statically initialized to so that the helper CB can discern the first
-     * invocation from an operation change */
-    QCOW2_NO_OPERATION = 0,
-
-    QCOW2_CHANGING_REFCOUNT_ORDER,
-    QCOW2_DOWNGRADING,
-} Qcow2AmendOperation;
-
-typedef struct Qcow2AmendHelperCBInfo {
-    /* The code coordinating the amend operations should only modify
-     * these four fields; the rest will be managed by the CB */
-    BlockDriverAmendStatusCB *original_status_cb;
-    void *original_cb_opaque;
-
-    Qcow2AmendOperation current_operation;
-
-    /* Total number of operations to perform (only set once) */
-    int total_operations;
-
-    /* The following fields are managed by the CB */
-
-    /* Number of operations completed */
-    int operations_completed;
-
-    /* Cumulative offset of all completed operations */
-    int64_t offset_completed;
-
-    Qcow2AmendOperation last_operation;
-    int64_t last_work_size;
-} Qcow2AmendHelperCBInfo;
-
-static void qcow2_amend_helper_cb(BlockDriverState *bs,
-                                  int64_t operation_offset,
-                                  int64_t operation_work_size, void *opaque)
-{
-    Qcow2AmendHelperCBInfo *info = opaque;
-    int64_t current_work_size;
-    int64_t projected_work_size;
-
-    if (info->current_operation != info->last_operation) {
-        if (info->last_operation != QCOW2_NO_OPERATION) {
-            info->offset_completed += info->last_work_size;
-            info->operations_completed++;
-        }
-
-        info->last_operation = info->current_operation;
-    }
-
-    assert(info->total_operations > 0);
-    assert(info->operations_completed < info->total_operations);
-
-    info->last_work_size = operation_work_size;
-
-    current_work_size = info->offset_completed + operation_work_size;
-
-    /* current_work_size is the total work size for (operations_completed + 1)
-     * operations (which includes this one), so multiply it by the number of
-     * operations not covered and divide it by the number of operations
-     * covered to get a projection for the operations not covered */
-    projected_work_size = current_work_size * (info->total_operations -
-                                               info->operations_completed - 1)
-                                            / (info->operations_completed + 1);
-
-    info->original_status_cb(bs, info->offset_completed + operation_offset,
-                             current_work_size + projected_work_size,
-                             info->original_cb_opaque);
-}
-
-static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
-                               BlockDriverAmendStatusCB *status_cb,
-                               void *cb_opaque)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int old_version = s->qcow_version, new_version = old_version;
-    uint64_t new_size = 0;
-    const char *backing_file = NULL, *backing_format = NULL;
-    bool lazy_refcounts = s->use_lazy_refcounts;
-    const char *compat = NULL;
-    uint64_t cluster_size = s->cluster_size;
-    bool encrypt;
-    int refcount_bits = s->refcount_bits;
-    int ret;
-    QemuOptDesc *desc = opts->list->desc;
-    Qcow2AmendHelperCBInfo helper_cb_info;
-
-    while (desc && desc->name) {
-        if (!qemu_opt_find(opts, desc->name)) {
-            /* only change explicitly defined options */
-            desc++;
-            continue;
-        }
-
-        if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
-            compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
-            if (!compat) {
-                /* preserve default */
-            } else if (!strcmp(compat, "0.10")) {
-                new_version = 2;
-            } else if (!strcmp(compat, "1.1")) {
-                new_version = 3;
-            } else {
-                error_report("Unknown compatibility level %s", compat);
-                return -EINVAL;
-            }
-        } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
-            error_report("Cannot change preallocation mode");
-            return -ENOTSUP;
-        } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
-            new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
-        } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
-            backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
-        } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
-            backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
-        } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) {
-            encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT,
-                                        !!s->cipher);
-
-            if (encrypt != !!s->cipher) {
-                error_report("Changing the encryption flag is not supported");
-                return -ENOTSUP;
-            }
-        } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
-            cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
-                                             cluster_size);
-            if (cluster_size != s->cluster_size) {
-                error_report("Changing the cluster size is not supported");
-                return -ENOTSUP;
-            }
-        } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
-            lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
-                                               lazy_refcounts);
-        } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
-            refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
-                                                refcount_bits);
-
-            if (refcount_bits <= 0 || refcount_bits > 64 ||
-                !is_power_of_2(refcount_bits))
-            {
-                error_report("Refcount width must be a power of two and may "
-                             "not exceed 64 bits");
-                return -EINVAL;
-            }
-        } else {
-            /* if this point is reached, this probably means a new option was
-             * added without having it covered here */
-            abort();
-        }
-
-        desc++;
-    }
-
-    helper_cb_info = (Qcow2AmendHelperCBInfo){
-        .original_status_cb = status_cb,
-        .original_cb_opaque = cb_opaque,
-        .total_operations = (new_version < old_version)
-                          + (s->refcount_bits != refcount_bits)
-    };
-
-    /* Upgrade first (some features may require compat=1.1) */
-    if (new_version > old_version) {
-        s->qcow_version = new_version;
-        ret = qcow2_update_header(bs);
-        if (ret < 0) {
-            s->qcow_version = old_version;
-            return ret;
-        }
-    }
-
-    if (s->refcount_bits != refcount_bits) {
-        int refcount_order = ctz32(refcount_bits);
-        Error *local_error = NULL;
-
-        if (new_version < 3 && refcount_bits != 16) {
-            error_report("Different refcount widths than 16 bits require "
-                         "compatibility level 1.1 or above (use compat=1.1 or "
-                         "greater)");
-            return -EINVAL;
-        }
-
-        helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
-        ret = qcow2_change_refcount_order(bs, refcount_order,
-                                          &qcow2_amend_helper_cb,
-                                          &helper_cb_info, &local_error);
-        if (ret < 0) {
-            error_report_err(local_error);
-            return ret;
-        }
-    }
-
-    if (backing_file || backing_format) {
-        ret = qcow2_change_backing_file(bs,
-                    backing_file ?: s->image_backing_file,
-                    backing_format ?: s->image_backing_format);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    if (s->use_lazy_refcounts != lazy_refcounts) {
-        if (lazy_refcounts) {
-            if (new_version < 3) {
-                error_report("Lazy refcounts only supported with compatibility "
-                             "level 1.1 and above (use compat=1.1 or greater)");
-                return -EINVAL;
-            }
-            s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
-            ret = qcow2_update_header(bs);
-            if (ret < 0) {
-                s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
-                return ret;
-            }
-            s->use_lazy_refcounts = true;
-        } else {
-            /* make image clean first */
-            ret = qcow2_mark_clean(bs);
-            if (ret < 0) {
-                return ret;
-            }
-            /* now disallow lazy refcounts */
-            s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
-            ret = qcow2_update_header(bs);
-            if (ret < 0) {
-                s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
-                return ret;
-            }
-            s->use_lazy_refcounts = false;
-        }
-    }
-
-    if (new_size) {
-        ret = bdrv_truncate(bs, new_size);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    /* Downgrade last (so unsupported features can be removed before) */
-    if (new_version < old_version) {
-        helper_cb_info.current_operation = QCOW2_DOWNGRADING;
-        ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
-                              &helper_cb_info);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    return 0;
-}
-
-/*
- * If offset or size are negative, respectively, they will not be included in
- * the BLOCK_IMAGE_CORRUPTED event emitted.
- * fatal will be ignored for read-only BDS; corruptions found there will always
- * be considered non-fatal.
- */
-void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
-                             int64_t size, const char *message_format, ...)
-{
-    BDRVQcow2State *s = bs->opaque;
-    const char *node_name;
-    char *message;
-    va_list ap;
-
-    fatal = fatal && !bs->read_only;
-
-    if (s->signaled_corruption &&
-        (!fatal || (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT)))
-    {
-        return;
-    }
-
-    va_start(ap, message_format);
-    message = g_strdup_vprintf(message_format, ap);
-    va_end(ap);
-
-    if (fatal) {
-        fprintf(stderr, "qcow2: Marking image as corrupt: %s; further "
-                "corruption events will be suppressed\n", message);
-    } else {
-        fprintf(stderr, "qcow2: Image is corrupt: %s; further non-fatal "
-                "corruption events will be suppressed\n", message);
-    }
-
-    node_name = bdrv_get_node_name(bs);
-    qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
-                                          *node_name != '\0', node_name,
-                                          message, offset >= 0, offset,
-                                          size >= 0, size,
-                                          fatal, &error_abort);
-    g_free(message);
-
-    if (fatal) {
-        qcow2_mark_corrupt(bs);
-        bs->drv = NULL; /* make BDS unusable */
-    }
-
-    s->signaled_corruption = true;
-}
-
-static QemuOptsList qcow2_create_opts = {
-    .name = "qcow2-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_COMPAT_LEVEL,
-            .type = QEMU_OPT_STRING,
-            .help = "Compatibility level (0.10 or 1.1)"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FILE,
-            .type = QEMU_OPT_STRING,
-            .help = "File name of a base image"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FMT,
-            .type = QEMU_OPT_STRING,
-            .help = "Image format of the base image"
-        },
-        {
-            .name = BLOCK_OPT_ENCRYPT,
-            .type = QEMU_OPT_BOOL,
-            .help = "Encrypt the image",
-            .def_value_str = "off"
-        },
-        {
-            .name = BLOCK_OPT_CLUSTER_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "qcow2 cluster size",
-            .def_value_str = stringify(DEFAULT_CLUSTER_SIZE)
-        },
-        {
-            .name = BLOCK_OPT_PREALLOC,
-            .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode (allowed values: off, metadata, "
-                    "falloc, full)"
-        },
-        {
-            .name = BLOCK_OPT_LAZY_REFCOUNTS,
-            .type = QEMU_OPT_BOOL,
-            .help = "Postpone refcount updates",
-            .def_value_str = "off"
-        },
-        {
-            .name = BLOCK_OPT_REFCOUNT_BITS,
-            .type = QEMU_OPT_NUMBER,
-            .help = "Width of a reference count entry in bits",
-            .def_value_str = "16"
-        },
-        { /* end of list */ }
-    }
-};
-
-BlockDriver bdrv_qcow2 = {
-    .format_name        = "qcow2",
-    .instance_size      = sizeof(BDRVQcow2State),
-    .bdrv_probe         = qcow2_probe,
-    .bdrv_open          = qcow2_open,
-    .bdrv_close         = qcow2_close,
-    .bdrv_reopen_prepare  = qcow2_reopen_prepare,
-    .bdrv_reopen_commit   = qcow2_reopen_commit,
-    .bdrv_reopen_abort    = qcow2_reopen_abort,
-    .bdrv_join_options    = qcow2_join_options,
-    .bdrv_create        = qcow2_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_co_get_block_status = qcow2_co_get_block_status,
-    .bdrv_set_key       = qcow2_set_key,
-
-    .bdrv_co_readv          = qcow2_co_readv,
-    .bdrv_co_writev         = qcow2_co_writev,
-    .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
-
-    .bdrv_co_write_zeroes   = qcow2_co_write_zeroes,
-    .bdrv_co_discard        = qcow2_co_discard,
-    .bdrv_truncate          = qcow2_truncate,
-    .bdrv_write_compressed  = qcow2_write_compressed,
-    .bdrv_make_empty        = qcow2_make_empty,
-
-    .bdrv_snapshot_create   = qcow2_snapshot_create,
-    .bdrv_snapshot_goto     = qcow2_snapshot_goto,
-    .bdrv_snapshot_delete   = qcow2_snapshot_delete,
-    .bdrv_snapshot_list     = qcow2_snapshot_list,
-    .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
-    .bdrv_get_info          = qcow2_get_info,
-    .bdrv_get_specific_info = qcow2_get_specific_info,
-
-    .bdrv_save_vmstate    = qcow2_save_vmstate,
-    .bdrv_load_vmstate    = qcow2_load_vmstate,
-
-    .supports_backing           = true,
-    .bdrv_change_backing_file   = qcow2_change_backing_file,
-
-    .bdrv_refresh_limits        = qcow2_refresh_limits,
-    .bdrv_invalidate_cache      = qcow2_invalidate_cache,
-    .bdrv_inactivate            = qcow2_inactivate,
-
-    .create_opts         = &qcow2_create_opts,
-    .bdrv_check          = qcow2_check,
-    .bdrv_amend_options  = qcow2_amend_options,
-
-    .bdrv_detach_aio_context  = qcow2_detach_aio_context,
-    .bdrv_attach_aio_context  = qcow2_attach_aio_context,
-};
-
-static void bdrv_qcow2_init(void)
-{
-    bdrv_register(&bdrv_qcow2);
-}
-
-block_init(bdrv_qcow2_init);
diff --git a/qemu/block/qcow2.h b/qemu/block/qcow2.h
deleted file mode 100644
index a063a3c1a..000000000
--- a/qemu/block/qcow2.h
+++ /dev/null
@@ -1,599 +0,0 @@
-/*
- * Block driver for the QCOW version 2 format
- *
- * Copyright (c) 2004-2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef BLOCK_QCOW2_H
-#define BLOCK_QCOW2_H
-
-#include "crypto/cipher.h"
-#include "qemu/coroutine.h"
-
-//#define DEBUG_ALLOC
-//#define DEBUG_ALLOC2
-//#define DEBUG_EXT
-
-#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xfb)
-
-#define QCOW_CRYPT_NONE 0
-#define QCOW_CRYPT_AES  1
-
-#define QCOW_MAX_CRYPT_CLUSTERS 32
-#define QCOW_MAX_SNAPSHOTS 65536
-
-/* 8 MB refcount table is enough for 2 PB images at 64k cluster size
- * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
-#define QCOW_MAX_REFTABLE_SIZE 0x800000
-
-/* 32 MB L1 table is enough for 2 PB images at 64k cluster size
- * (128 GB for 512 byte clusters, 2 EB for 2 MB clusters) */
-#define QCOW_MAX_L1_SIZE 0x2000000
-
-/* Allow for an average of 1k per snapshot table entry, should be plenty of
- * space for snapshot names and IDs */
-#define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)
-
-/* indicate that the refcount of the referenced cluster is exactly one. */
-#define QCOW_OFLAG_COPIED     (1ULL << 63)
-/* indicate that the cluster is compressed (they never have the copied flag) */
-#define QCOW_OFLAG_COMPRESSED (1ULL << 62)
-/* The cluster reads as all zeros */
-#define QCOW_OFLAG_ZERO (1ULL << 0)
-
-#define MIN_CLUSTER_BITS 9
-#define MAX_CLUSTER_BITS 21
-
-/* Must be at least 2 to cover COW */
-#define MIN_L2_CACHE_SIZE 2 /* clusters */
-
-/* Must be at least 4 to cover all cases of refcount table growth */
-#define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */
-
-/* Whichever is more */
-#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */
-#define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */
-
-/* The refblock cache needs only a fourth of the L2 cache size to cover as many
- * clusters */
-#define DEFAULT_L2_REFCOUNT_SIZE_RATIO 4
-
-#define DEFAULT_CLUSTER_SIZE 65536
-
-
-#define QCOW2_OPT_LAZY_REFCOUNTS "lazy-refcounts"
-#define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
-#define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
-#define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
-#define QCOW2_OPT_OVERLAP "overlap-check"
-#define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template"
-#define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
-#define QCOW2_OPT_OVERLAP_ACTIVE_L1 "overlap-check.active-l1"
-#define QCOW2_OPT_OVERLAP_ACTIVE_L2 "overlap-check.active-l2"
-#define QCOW2_OPT_OVERLAP_REFCOUNT_TABLE "overlap-check.refcount-table"
-#define QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK "overlap-check.refcount-block"
-#define QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE "overlap-check.snapshot-table"
-#define QCOW2_OPT_OVERLAP_INACTIVE_L1 "overlap-check.inactive-l1"
-#define QCOW2_OPT_OVERLAP_INACTIVE_L2 "overlap-check.inactive-l2"
-#define QCOW2_OPT_CACHE_SIZE "cache-size"
-#define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
-#define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"
-#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval"
-
-typedef struct QCowHeader {
-    uint32_t magic;
-    uint32_t version;
-    uint64_t backing_file_offset;
-    uint32_t backing_file_size;
-    uint32_t cluster_bits;
-    uint64_t size; /* in bytes */
-    uint32_t crypt_method;
-    uint32_t l1_size; /* XXX: save number of clusters instead ? */
-    uint64_t l1_table_offset;
-    uint64_t refcount_table_offset;
-    uint32_t refcount_table_clusters;
-    uint32_t nb_snapshots;
-    uint64_t snapshots_offset;
-
-    /* The following fields are only valid for version >= 3 */
-    uint64_t incompatible_features;
-    uint64_t compatible_features;
-    uint64_t autoclear_features;
-
-    uint32_t refcount_order;
-    uint32_t header_length;
-} QEMU_PACKED QCowHeader;
-
-typedef struct QEMU_PACKED QCowSnapshotHeader {
-    /* header is 8 byte aligned */
-    uint64_t l1_table_offset;
-
-    uint32_t l1_size;
-    uint16_t id_str_size;
-    uint16_t name_size;
-
-    uint32_t date_sec;
-    uint32_t date_nsec;
-
-    uint64_t vm_clock_nsec;
-
-    uint32_t vm_state_size;
-    uint32_t extra_data_size; /* for extension */
-    /* extra data follows */
-    /* id_str follows */
-    /* name follows  */
-} QCowSnapshotHeader;
-
-typedef struct QEMU_PACKED QCowSnapshotExtraData {
-    uint64_t vm_state_size_large;
-    uint64_t disk_size;
-} QCowSnapshotExtraData;
-
-
-typedef struct QCowSnapshot {
-    uint64_t l1_table_offset;
-    uint32_t l1_size;
-    char *id_str;
-    char *name;
-    uint64_t disk_size;
-    uint64_t vm_state_size;
-    uint32_t date_sec;
-    uint32_t date_nsec;
-    uint64_t vm_clock_nsec;
-} QCowSnapshot;
-
-struct Qcow2Cache;
-typedef struct Qcow2Cache Qcow2Cache;
-
-typedef struct Qcow2UnknownHeaderExtension {
-    uint32_t magic;
-    uint32_t len;
-    QLIST_ENTRY(Qcow2UnknownHeaderExtension) next;
-    uint8_t data[];
-} Qcow2UnknownHeaderExtension;
-
-enum {
-    QCOW2_FEAT_TYPE_INCOMPATIBLE    = 0,
-    QCOW2_FEAT_TYPE_COMPATIBLE      = 1,
-    QCOW2_FEAT_TYPE_AUTOCLEAR       = 2,
-};
-
-/* Incompatible feature bits */
-enum {
-    QCOW2_INCOMPAT_DIRTY_BITNR   = 0,
-    QCOW2_INCOMPAT_CORRUPT_BITNR = 1,
-    QCOW2_INCOMPAT_DIRTY         = 1 << QCOW2_INCOMPAT_DIRTY_BITNR,
-    QCOW2_INCOMPAT_CORRUPT       = 1 << QCOW2_INCOMPAT_CORRUPT_BITNR,
-
-    QCOW2_INCOMPAT_MASK          = QCOW2_INCOMPAT_DIRTY
-                                 | QCOW2_INCOMPAT_CORRUPT,
-};
-
-/* Compatible feature bits */
-enum {
-    QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR = 0,
-    QCOW2_COMPAT_LAZY_REFCOUNTS       = 1 << QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
-
-    QCOW2_COMPAT_FEAT_MASK            = QCOW2_COMPAT_LAZY_REFCOUNTS,
-};
-
-enum qcow2_discard_type {
-    QCOW2_DISCARD_NEVER = 0,
-    QCOW2_DISCARD_ALWAYS,
-    QCOW2_DISCARD_REQUEST,
-    QCOW2_DISCARD_SNAPSHOT,
-    QCOW2_DISCARD_OTHER,
-    QCOW2_DISCARD_MAX
-};
-
-typedef struct Qcow2Feature {
-    uint8_t type;
-    uint8_t bit;
-    char    name[46];
-} QEMU_PACKED Qcow2Feature;
-
-typedef struct Qcow2DiscardRegion {
-    BlockDriverState *bs;
-    uint64_t offset;
-    uint64_t bytes;
-    QTAILQ_ENTRY(Qcow2DiscardRegion) next;
-} Qcow2DiscardRegion;
-
-typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
-                                      uint64_t index);
-typedef void Qcow2SetRefcountFunc(void *refcount_array,
-                                  uint64_t index, uint64_t value);
-
-typedef struct BDRVQcow2State {
-    int cluster_bits;
-    int cluster_size;
-    int cluster_sectors;
-    int l2_bits;
-    int l2_size;
-    int l1_size;
-    int l1_vm_state_index;
-    int refcount_block_bits;
-    int refcount_block_size;
-    int csize_shift;
-    int csize_mask;
-    uint64_t cluster_offset_mask;
-    uint64_t l1_table_offset;
-    uint64_t *l1_table;
-
-    Qcow2Cache* l2_table_cache;
-    Qcow2Cache* refcount_block_cache;
-    QEMUTimer *cache_clean_timer;
-    unsigned cache_clean_interval;
-
-    uint8_t *cluster_cache;
-    uint8_t *cluster_data;
-    uint64_t cluster_cache_offset;
-    QLIST_HEAD(QCowClusterAlloc, QCowL2Meta) cluster_allocs;
-
-    uint64_t *refcount_table;
-    uint64_t refcount_table_offset;
-    uint32_t refcount_table_size;
-    uint64_t free_cluster_index;
-    uint64_t free_byte_offset;
-
-    CoMutex lock;
-
-    QCryptoCipher *cipher; /* current cipher, NULL if no key yet */
-    uint32_t crypt_method_header;
-    uint64_t snapshots_offset;
-    int snapshots_size;
-    unsigned int nb_snapshots;
-    QCowSnapshot *snapshots;
-
-    int flags;
-    int qcow_version;
-    bool use_lazy_refcounts;
-    int refcount_order;
-    int refcount_bits;
-    uint64_t refcount_max;
-
-    Qcow2GetRefcountFunc *get_refcount;
-    Qcow2SetRefcountFunc *set_refcount;
-
-    bool discard_passthrough[QCOW2_DISCARD_MAX];
-
-    int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
-    bool signaled_corruption;
-
-    uint64_t incompatible_features;
-    uint64_t compatible_features;
-    uint64_t autoclear_features;
-
-    size_t unknown_header_fields_size;
-    void* unknown_header_fields;
-    QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
-    QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
-    bool cache_discards;
-
-    /* Backing file path and format as stored in the image (this is not the
-     * effective path/format, which may be the result of a runtime option
-     * override) */
-    char *image_backing_file;
-    char *image_backing_format;
-} BDRVQcow2State;
-
-typedef struct Qcow2COWRegion {
-    /**
-     * Offset of the COW region in bytes from the start of the first cluster
-     * touched by the request.
-     */
-    uint64_t    offset;
-
-    /** Number of sectors to copy */
-    int         nb_sectors;
-} Qcow2COWRegion;
-
-/**
- * Describes an in-flight (part of a) write request that writes to clusters
- * that are not referenced in their L2 table yet.
- */
-typedef struct QCowL2Meta
-{
-    /** Guest offset of the first newly allocated cluster */
-    uint64_t offset;
-
-    /** Host offset of the first newly allocated cluster */
-    uint64_t alloc_offset;
-
-    /**
-     * Number of sectors from the start of the first allocated cluster to
-     * the end of the (possibly shortened) request
-     */
-    int nb_available;
-
-    /** Number of newly allocated clusters */
-    int nb_clusters;
-
-    /**
-     * Requests that overlap with this allocation and wait to be restarted
-     * when the allocating request has completed.
-     */
-    CoQueue dependent_requests;
-
-    /**
-     * The COW Region between the start of the first allocated cluster and the
-     * area the guest actually writes to.
-     */
-    Qcow2COWRegion cow_start;
-
-    /**
-     * The COW Region between the area the guest actually writes to and the
-     * end of the last allocated cluster.
-     */
-    Qcow2COWRegion cow_end;
-
-    /** Pointer to next L2Meta of the same write request */
-    struct QCowL2Meta *next;
-
-    QLIST_ENTRY(QCowL2Meta) next_in_flight;
-} QCowL2Meta;
-
-enum {
-    QCOW2_CLUSTER_UNALLOCATED,
-    QCOW2_CLUSTER_NORMAL,
-    QCOW2_CLUSTER_COMPRESSED,
-    QCOW2_CLUSTER_ZERO
-};
-
-typedef enum QCow2MetadataOverlap {
-    QCOW2_OL_MAIN_HEADER_BITNR    = 0,
-    QCOW2_OL_ACTIVE_L1_BITNR      = 1,
-    QCOW2_OL_ACTIVE_L2_BITNR      = 2,
-    QCOW2_OL_REFCOUNT_TABLE_BITNR = 3,
-    QCOW2_OL_REFCOUNT_BLOCK_BITNR = 4,
-    QCOW2_OL_SNAPSHOT_TABLE_BITNR = 5,
-    QCOW2_OL_INACTIVE_L1_BITNR    = 6,
-    QCOW2_OL_INACTIVE_L2_BITNR    = 7,
-
-    QCOW2_OL_MAX_BITNR            = 8,
-
-    QCOW2_OL_NONE           = 0,
-    QCOW2_OL_MAIN_HEADER    = (1 << QCOW2_OL_MAIN_HEADER_BITNR),
-    QCOW2_OL_ACTIVE_L1      = (1 << QCOW2_OL_ACTIVE_L1_BITNR),
-    QCOW2_OL_ACTIVE_L2      = (1 << QCOW2_OL_ACTIVE_L2_BITNR),
-    QCOW2_OL_REFCOUNT_TABLE = (1 << QCOW2_OL_REFCOUNT_TABLE_BITNR),
-    QCOW2_OL_REFCOUNT_BLOCK = (1 << QCOW2_OL_REFCOUNT_BLOCK_BITNR),
-    QCOW2_OL_SNAPSHOT_TABLE = (1 << QCOW2_OL_SNAPSHOT_TABLE_BITNR),
-    QCOW2_OL_INACTIVE_L1    = (1 << QCOW2_OL_INACTIVE_L1_BITNR),
-    /* NOTE: Checking overlaps with inactive L2 tables will result in bdrv
-     * reads. */
-    QCOW2_OL_INACTIVE_L2    = (1 << QCOW2_OL_INACTIVE_L2_BITNR),
-} QCow2MetadataOverlap;
-
-/* Perform all overlap checks which can be done in constant time */
-#define QCOW2_OL_CONSTANT \
-    (QCOW2_OL_MAIN_HEADER | QCOW2_OL_ACTIVE_L1 | QCOW2_OL_REFCOUNT_TABLE | \
-     QCOW2_OL_SNAPSHOT_TABLE)
-
-/* Perform all overlap checks which don't require disk access */
-#define QCOW2_OL_CACHED \
-    (QCOW2_OL_CONSTANT | QCOW2_OL_ACTIVE_L2 | QCOW2_OL_REFCOUNT_BLOCK | \
-     QCOW2_OL_INACTIVE_L1)
-
-/* Perform all overlap checks */
-#define QCOW2_OL_ALL \
-    (QCOW2_OL_CACHED | QCOW2_OL_INACTIVE_L2)
-
-#define L1E_OFFSET_MASK 0x00fffffffffffe00ULL
-#define L2E_OFFSET_MASK 0x00fffffffffffe00ULL
-#define L2E_COMPRESSED_OFFSET_SIZE_MASK 0x3fffffffffffffffULL
-
-#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
-
-static inline int64_t start_of_cluster(BDRVQcow2State *s, int64_t offset)
-{
-    return offset & ~(s->cluster_size - 1);
-}
-
-static inline int64_t offset_into_cluster(BDRVQcow2State *s, int64_t offset)
-{
-    return offset & (s->cluster_size - 1);
-}
-
-static inline uint64_t size_to_clusters(BDRVQcow2State *s, uint64_t size)
-{
-    return (size + (s->cluster_size - 1)) >> s->cluster_bits;
-}
-
-static inline int64_t size_to_l1(BDRVQcow2State *s, int64_t size)
-{
-    int shift = s->cluster_bits + s->l2_bits;
-    return (size + (1ULL << shift) - 1) >> shift;
-}
-
-static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset)
-{
-    return (offset >> s->cluster_bits) & (s->l2_size - 1);
-}
-
-static inline int64_t align_offset(int64_t offset, int n)
-{
-    offset = (offset + n - 1) & ~(n - 1);
-    return offset;
-}
-
-static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s)
-{
-    return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
-}
-
-static inline uint64_t qcow2_max_refcount_clusters(BDRVQcow2State *s)
-{
-    return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
-}
-
-static inline int qcow2_get_cluster_type(uint64_t l2_entry)
-{
-    if (l2_entry & QCOW_OFLAG_COMPRESSED) {
-        return QCOW2_CLUSTER_COMPRESSED;
-    } else if (l2_entry & QCOW_OFLAG_ZERO) {
-        return QCOW2_CLUSTER_ZERO;
-    } else if (!(l2_entry & L2E_OFFSET_MASK)) {
-        return QCOW2_CLUSTER_UNALLOCATED;
-    } else {
-        return QCOW2_CLUSTER_NORMAL;
-    }
-}
-
-/* Check whether refcounts are eager or lazy */
-static inline bool qcow2_need_accurate_refcounts(BDRVQcow2State *s)
-{
-    return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
-}
-
-static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
-{
-    return m->offset + m->cow_start.offset;
-}
-
-static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
-{
-    return m->offset + m->cow_end.offset
-        + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
-}
-
-static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
-{
-    return r1 > r2 ? r1 - r2 : r2 - r1;
-}
-
-// FIXME Need qcow2_ prefix to global functions
-
-/* qcow2.c functions */
-int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
-                  int64_t sector_num, int nb_sectors);
-
-int qcow2_mark_dirty(BlockDriverState *bs);
-int qcow2_mark_corrupt(BlockDriverState *bs);
-int qcow2_mark_consistent(BlockDriverState *bs);
-int qcow2_update_header(BlockDriverState *bs);
-
-void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
-                             int64_t size, const char *message_format, ...)
-                             GCC_FMT_ATTR(5, 6);
-
-/* qcow2-refcount.c functions */
-int qcow2_refcount_init(BlockDriverState *bs);
-void qcow2_refcount_close(BlockDriverState *bs);
-
-int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
-                       uint64_t *refcount);
-
-int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
-                                  uint64_t addend, bool decrease,
-                                  enum qcow2_discard_type type);
-
-int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
-int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
-                                int64_t nb_clusters);
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
-void qcow2_free_clusters(BlockDriverState *bs,
-                          int64_t offset, int64_t size,
-                          enum qcow2_discard_type type);
-void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
-                             int nb_clusters, enum qcow2_discard_type type);
-
-int qcow2_update_snapshot_refcount(BlockDriverState *bs,
-    int64_t l1_table_offset, int l1_size, int addend);
-
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
-                          BdrvCheckMode fix);
-
-void qcow2_process_discards(BlockDriverState *bs, int ret);
-
-int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
-                                 int64_t size);
-int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
-                                  int64_t size);
-
-int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
-                                BlockDriverAmendStatusCB *status_cb,
-                                void *cb_opaque, Error **errp);
-
-/* qcow2-cluster.c functions */
-int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
-                        bool exact_size);
-int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
-void qcow2_l2_cache_reset(BlockDriverState *bs);
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
-                          uint8_t *out_buf, const uint8_t *in_buf,
-                          int nb_sectors, bool enc, Error **errp);
-
-int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *cluster_offset);
-int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *host_offset, QCowL2Meta **m);
-uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
-                                         uint64_t offset,
-                                         int compressed_size);
-
-int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type, bool full_discard);
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
-
-int qcow2_expand_zero_clusters(BlockDriverState *bs,
-                               BlockDriverAmendStatusCB *status_cb,
-                               void *cb_opaque);
-
-/* qcow2-snapshot.c functions */
-int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
-int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id);
-int qcow2_snapshot_delete(BlockDriverState *bs,
-                          const char *snapshot_id,
-                          const char *name,
-                          Error **errp);
-int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab);
-int qcow2_snapshot_load_tmp(BlockDriverState *bs,
-                            const char *snapshot_id,
-                            const char *name,
-                            Error **errp);
-
-void qcow2_free_snapshots(BlockDriverState *bs);
-int qcow2_read_snapshots(BlockDriverState *bs);
-
-/* qcow2-cache.c functions */
-Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
-int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
-
-void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
-     void *table);
-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
-int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
-    Qcow2Cache *dependency);
-void qcow2_cache_depends_on_flush(Qcow2Cache *c);
-
-void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
-int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
-
-int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
-    void **table);
-int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
-    void **table);
-void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
-
-#endif
diff --git a/qemu/block/qed-check.c b/qemu/block/qed-check.c
deleted file mode 100644
index 622f30897..000000000
--- a/qemu/block/qed-check.c
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * QEMU Enhanced Disk Format Consistency Check
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qed.h"
-
-typedef struct {
-    BDRVQEDState *s;
-    BdrvCheckResult *result;
-    bool fix;                           /* whether to fix invalid offsets */
-
-    uint64_t nclusters;
-    uint32_t *used_clusters;            /* referenced cluster bitmap */
-
-    QEDRequest request;
-} QEDCheck;
-
-static bool qed_test_bit(uint32_t *bitmap, uint64_t n) {
-    return !!(bitmap[n / 32] & (1 << (n % 32)));
-}
-
-static void qed_set_bit(uint32_t *bitmap, uint64_t n) {
-    bitmap[n / 32] |= 1 << (n % 32);
-}
-
-/**
- * Set bitmap bits for clusters
- *
- * @check:          Check structure
- * @offset:         Starting offset in bytes
- * @n:              Number of clusters
- */
-static bool qed_set_used_clusters(QEDCheck *check, uint64_t offset,
-                                  unsigned int n)
-{
-    uint64_t cluster = qed_bytes_to_clusters(check->s, offset);
-    unsigned int corruptions = 0;
-
-    while (n-- != 0) {
-        /* Clusters should only be referenced once */
-        if (qed_test_bit(check->used_clusters, cluster)) {
-            corruptions++;
-        }
-
-        qed_set_bit(check->used_clusters, cluster);
-        cluster++;
-    }
-
-    check->result->corruptions += corruptions;
-    return corruptions == 0;
-}
-
-/**
- * Check an L2 table
- *
- * @ret:            Number of invalid cluster offsets
- */
-static unsigned int qed_check_l2_table(QEDCheck *check, QEDTable *table)
-{
-    BDRVQEDState *s = check->s;
-    unsigned int i, num_invalid = 0;
-    uint64_t last_offset = 0;
-
-    for (i = 0; i < s->table_nelems; i++) {
-        uint64_t offset = table->offsets[i];
-
-        if (qed_offset_is_unalloc_cluster(offset) ||
-            qed_offset_is_zero_cluster(offset)) {
-            continue;
-        }
-        check->result->bfi.allocated_clusters++;
-        if (last_offset && (last_offset + s->header.cluster_size != offset)) {
-            check->result->bfi.fragmented_clusters++;
-        }
-        last_offset = offset;
-
-        /* Detect invalid cluster offset */
-        if (!qed_check_cluster_offset(s, offset)) {
-            if (check->fix) {
-                table->offsets[i] = 0;
-                check->result->corruptions_fixed++;
-            } else {
-                check->result->corruptions++;
-            }
-
-            num_invalid++;
-            continue;
-        }
-
-        qed_set_used_clusters(check, offset, 1);
-    }
-
-    return num_invalid;
-}
-
-/**
- * Descend tables and check each cluster is referenced once only
- */
-static int qed_check_l1_table(QEDCheck *check, QEDTable *table)
-{
-    BDRVQEDState *s = check->s;
-    unsigned int i, num_invalid_l1 = 0;
-    int ret, last_error = 0;
-
-    /* Mark L1 table clusters used */
-    qed_set_used_clusters(check, s->header.l1_table_offset,
-                          s->header.table_size);
-
-    for (i = 0; i < s->table_nelems; i++) {
-        unsigned int num_invalid_l2;
-        uint64_t offset = table->offsets[i];
-
-        if (qed_offset_is_unalloc_cluster(offset)) {
-            continue;
-        }
-
-        /* Detect invalid L2 offset */
-        if (!qed_check_table_offset(s, offset)) {
-            /* Clear invalid offset */
-            if (check->fix) {
-                table->offsets[i] = 0;
-                check->result->corruptions_fixed++;
-            } else {
-                check->result->corruptions++;
-            }
-
-            num_invalid_l1++;
-            continue;
-        }
-
-        if (!qed_set_used_clusters(check, offset, s->header.table_size)) {
-            continue; /* skip an invalid table */
-        }
-
-        ret = qed_read_l2_table_sync(s, &check->request, offset);
-        if (ret) {
-            check->result->check_errors++;
-            last_error = ret;
-            continue;
-        }
-
-        num_invalid_l2 = qed_check_l2_table(check,
-                                            check->request.l2_table->table);
-
-        /* Write out fixed L2 table */
-        if (num_invalid_l2 > 0 && check->fix) {
-            ret = qed_write_l2_table_sync(s, &check->request, 0,
-                                          s->table_nelems, false);
-            if (ret) {
-                check->result->check_errors++;
-                last_error = ret;
-                continue;
-            }
-        }
-    }
-
-    /* Drop reference to final table */
-    qed_unref_l2_cache_entry(check->request.l2_table);
-    check->request.l2_table = NULL;
-
-    /* Write out fixed L1 table */
-    if (num_invalid_l1 > 0 && check->fix) {
-        ret = qed_write_l1_table_sync(s, 0, s->table_nelems);
-        if (ret) {
-            check->result->check_errors++;
-            last_error = ret;
-        }
-    }
-
-    return last_error;
-}
-
-/**
- * Check for unreferenced (leaked) clusters
- */
-static void qed_check_for_leaks(QEDCheck *check)
-{
-    BDRVQEDState *s = check->s;
-    uint64_t i;
-
-    for (i = s->header.header_size; i < check->nclusters; i++) {
-        if (!qed_test_bit(check->used_clusters, i)) {
-            check->result->leaks++;
-        }
-    }
-}
-
-/**
- * Mark an image clean once it passes check or has been repaired
- */
-static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
-{
-    /* Skip if there were unfixable corruptions or I/O errors */
-    if (result->corruptions > 0 || result->check_errors > 0) {
-        return;
-    }
-
-    /* Skip if image is already marked clean */
-    if (!(s->header.features & QED_F_NEED_CHECK)) {
-        return;
-    }
-
-    /* Ensure fixes reach storage before clearing check bit */
-    bdrv_flush(s->bs);
-
-    s->header.features &= ~QED_F_NEED_CHECK;
-    qed_write_header_sync(s);
-}
-
-int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
-{
-    QEDCheck check = {
-        .s = s,
-        .result = result,
-        .nclusters = qed_bytes_to_clusters(s, s->file_size),
-        .request = { .l2_table = NULL },
-        .fix = fix,
-    };
-    int ret;
-
-    check.used_clusters = g_try_new0(uint32_t, (check.nclusters + 31) / 32);
-    if (check.nclusters && check.used_clusters == NULL) {
-        return -ENOMEM;
-    }
-
-    check.result->bfi.total_clusters =
-        (s->header.image_size + s->header.cluster_size - 1) /
-            s->header.cluster_size;
-    ret = qed_check_l1_table(&check, s->l1_table);
-    if (ret == 0) {
-        /* Only check for leaks if entire image was scanned successfully */
-        qed_check_for_leaks(&check);
-
-        if (fix) {
-            qed_check_mark_clean(s, result);
-        }
-    }
-
-    g_free(check.used_clusters);
-    return ret;
-}
diff --git a/qemu/block/qed-cluster.c b/qemu/block/qed-cluster.c
deleted file mode 100644
index c24e75616..000000000
--- a/qemu/block/qed-cluster.c
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * QEMU Enhanced Disk Format Cluster functions
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qed.h"
-
-/**
- * Count the number of contiguous data clusters
- *
- * @s:              QED state
- * @table:          L2 table
- * @index:          First cluster index
- * @n:              Maximum number of clusters
- * @offset:         Set to first cluster offset
- *
- * This function scans tables for contiguous clusters.  A contiguous run of
- * clusters may be allocated, unallocated, or zero.
- */
-static unsigned int qed_count_contiguous_clusters(BDRVQEDState *s,
-                                                  QEDTable *table,
-                                                  unsigned int index,
-                                                  unsigned int n,
-                                                  uint64_t *offset)
-{
-    unsigned int end = MIN(index + n, s->table_nelems);
-    uint64_t last = table->offsets[index];
-    unsigned int i;
-
-    *offset = last;
-
-    for (i = index + 1; i < end; i++) {
-        if (qed_offset_is_unalloc_cluster(last)) {
-            /* Counting unallocated clusters */
-            if (!qed_offset_is_unalloc_cluster(table->offsets[i])) {
-                break;
-            }
-        } else if (qed_offset_is_zero_cluster(last)) {
-            /* Counting zero clusters */
-            if (!qed_offset_is_zero_cluster(table->offsets[i])) {
-                break;
-            }
-        } else {
-            /* Counting allocated clusters */
-            if (table->offsets[i] != last + s->header.cluster_size) {
-                break;
-            }
-            last = table->offsets[i];
-        }
-    }
-    return i - index;
-}
-
-typedef struct {
-    BDRVQEDState *s;
-    uint64_t pos;
-    size_t len;
-
-    QEDRequest *request;
-
-    /* User callback */
-    QEDFindClusterFunc *cb;
-    void *opaque;
-} QEDFindClusterCB;
-
-static void qed_find_cluster_cb(void *opaque, int ret)
-{
-    QEDFindClusterCB *find_cluster_cb = opaque;
-    BDRVQEDState *s = find_cluster_cb->s;
-    QEDRequest *request = find_cluster_cb->request;
-    uint64_t offset = 0;
-    size_t len = 0;
-    unsigned int index;
-    unsigned int n;
-
-    if (ret) {
-        goto out;
-    }
-
-    index = qed_l2_index(s, find_cluster_cb->pos);
-    n = qed_bytes_to_clusters(s,
-                              qed_offset_into_cluster(s, find_cluster_cb->pos) +
-                              find_cluster_cb->len);
-    n = qed_count_contiguous_clusters(s, request->l2_table->table,
-                                      index, n, &offset);
-
-    if (qed_offset_is_unalloc_cluster(offset)) {
-        ret = QED_CLUSTER_L2;
-    } else if (qed_offset_is_zero_cluster(offset)) {
-        ret = QED_CLUSTER_ZERO;
-    } else if (qed_check_cluster_offset(s, offset)) {
-        ret = QED_CLUSTER_FOUND;
-    } else {
-        ret = -EINVAL;
-    }
-
-    len = MIN(find_cluster_cb->len, n * s->header.cluster_size -
-              qed_offset_into_cluster(s, find_cluster_cb->pos));
-
-out:
-    find_cluster_cb->cb(find_cluster_cb->opaque, ret, offset, len);
-    g_free(find_cluster_cb);
-}
-
-/**
- * Find the offset of a data cluster
- *
- * @s:          QED state
- * @request:    L2 cache entry
- * @pos:        Byte position in device
- * @len:        Number of bytes
- * @cb:         Completion function
- * @opaque:     User data for completion function
- *
- * This function translates a position in the block device to an offset in the
- * image file.  It invokes the cb completion callback to report back the
- * translated offset or unallocated range in the image file.
- *
- * If the L2 table exists, request->l2_table points to the L2 table cache entry
- * and the caller must free the reference when they are finished.  The cache
- * entry is exposed in this way to avoid callers having to read the L2 table
- * again later during request processing.  If request->l2_table is non-NULL it
- * will be unreferenced before taking on the new cache entry.
- */
-void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
-                      size_t len, QEDFindClusterFunc *cb, void *opaque)
-{
-    QEDFindClusterCB *find_cluster_cb;
-    uint64_t l2_offset;
-
-    /* Limit length to L2 boundary.  Requests are broken up at the L2 boundary
-     * so that a request acts on one L2 table at a time.
-     */
-    len = MIN(len, (((pos >> s->l1_shift) + 1) << s->l1_shift) - pos);
-
-    l2_offset = s->l1_table->offsets[qed_l1_index(s, pos)];
-    if (qed_offset_is_unalloc_cluster(l2_offset)) {
-        cb(opaque, QED_CLUSTER_L1, 0, len);
-        return;
-    }
-    if (!qed_check_table_offset(s, l2_offset)) {
-        cb(opaque, -EINVAL, 0, 0);
-        return;
-    }
-
-    find_cluster_cb = g_malloc(sizeof(*find_cluster_cb));
-    find_cluster_cb->s = s;
-    find_cluster_cb->pos = pos;
-    find_cluster_cb->len = len;
-    find_cluster_cb->cb = cb;
-    find_cluster_cb->opaque = opaque;
-    find_cluster_cb->request = request;
-
-    qed_read_l2_table(s, request, l2_offset,
-                      qed_find_cluster_cb, find_cluster_cb);
-}
diff --git a/qemu/block/qed-gencb.c b/qemu/block/qed-gencb.c
deleted file mode 100644
index faf8ecc84..000000000
--- a/qemu/block/qed-gencb.c
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qed.h"
-
-void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque)
-{
-    GenericCB *gencb = g_malloc(len);
-    gencb->cb = cb;
-    gencb->opaque = opaque;
-    return gencb;
-}
-
-void gencb_complete(void *opaque, int ret)
-{
-    GenericCB *gencb = opaque;
-    BlockCompletionFunc *cb = gencb->cb;
-    void *user_opaque = gencb->opaque;
-
-    g_free(gencb);
-    cb(user_opaque, ret);
-}
diff --git a/qemu/block/qed-l2-cache.c b/qemu/block/qed-l2-cache.c
deleted file mode 100644
index 5cba79465..000000000
--- a/qemu/block/qed-l2-cache.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * QEMU Enhanced Disk Format L2 Cache
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-/*
- * L2 table cache usage is as follows:
- *
- * An open image has one L2 table cache that is used to avoid accessing the
- * image file for recently referenced L2 tables.
- *
- * Cluster offset lookup translates the logical offset within the block device
- * to a cluster offset within the image file.  This is done by indexing into
- * the L1 and L2 tables which store cluster offsets.  It is here where the L2
- * table cache serves up recently referenced L2 tables.
- *
- * If there is a cache miss, that L2 table is read from the image file and
- * committed to the cache.  Subsequent accesses to that L2 table will be served
- * from the cache until the table is evicted from the cache.
- *
- * L2 tables are also committed to the cache when new L2 tables are allocated
- * in the image file.  Since the L2 table cache is write-through, the new L2
- * table is first written out to the image file and then committed to the
- * cache.
- *
- * Multiple I/O requests may be using an L2 table cache entry at any given
- * time.  That means an entry may be in use across several requests and
- * reference counting is needed to free the entry at the correct time.  In
- * particular, an entry evicted from the cache will only be freed once all
- * references are dropped.
- *
- * An in-flight I/O request will hold a reference to a L2 table cache entry for
- * the period during which it needs to access the L2 table.  This includes
- * cluster offset lookup, L2 table allocation, and L2 table update when a new
- * data cluster has been allocated.
- *
- * An interesting case occurs when two requests need to access an L2 table that
- * is not in the cache.  Since the operation to read the table from the image
- * file takes some time to complete, both requests may see a cache miss and
- * start reading the L2 table from the image file.  The first to finish will
- * commit its L2 table into the cache.  When the second tries to commit its
- * table will be deleted in favor of the existing cache entry.
- */
-
-#include "qemu/osdep.h"
-#include "trace.h"
-#include "qed.h"
-
-/* Each L2 holds 2GB so this let's us fully cache a 100GB disk */
-#define MAX_L2_CACHE_SIZE 50
-
-/**
- * Initialize the L2 cache
- */
-void qed_init_l2_cache(L2TableCache *l2_cache)
-{
-    QTAILQ_INIT(&l2_cache->entries);
-    l2_cache->n_entries = 0;
-}
-
-/**
- * Free the L2 cache
- */
-void qed_free_l2_cache(L2TableCache *l2_cache)
-{
-    CachedL2Table *entry, *next_entry;
-
-    QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next_entry) {
-        qemu_vfree(entry->table);
-        g_free(entry);
-    }
-}
-
-/**
- * Allocate an uninitialized entry from the cache
- *
- * The returned entry has a reference count of 1 and is owned by the caller.
- * The caller must allocate the actual table field for this entry and it must
- * be freeable using qemu_vfree().
- */
-CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache)
-{
-    CachedL2Table *entry;
-
-    entry = g_malloc0(sizeof(*entry));
-    entry->ref++;
-
-    trace_qed_alloc_l2_cache_entry(l2_cache, entry);
-
-    return entry;
-}
-
-/**
- * Decrease an entry's reference count and free if necessary when the reference
- * count drops to zero.
- */
-void qed_unref_l2_cache_entry(CachedL2Table *entry)
-{
-    if (!entry) {
-        return;
-    }
-
-    entry->ref--;
-    trace_qed_unref_l2_cache_entry(entry, entry->ref);
-    if (entry->ref == 0) {
-        qemu_vfree(entry->table);
-        g_free(entry);
-    }
-}
-
-/**
- * Find an entry in the L2 cache.  This may return NULL and it's up to the
- * caller to satisfy the cache miss.
- *
- * For a cached entry, this function increases the reference count and returns
- * the entry.
- */
-CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset)
-{
-    CachedL2Table *entry;
-
-    QTAILQ_FOREACH(entry, &l2_cache->entries, node) {
-        if (entry->offset == offset) {
-            trace_qed_find_l2_cache_entry(l2_cache, entry, offset, entry->ref);
-            entry->ref++;
-            return entry;
-        }
-    }
-    return NULL;
-}
-
-/**
- * Commit an L2 cache entry into the cache.  This is meant to be used as part of
- * the process to satisfy a cache miss.  A caller would allocate an entry which
- * is not actually in the L2 cache and then once the entry was valid and
- * present on disk, the entry can be committed into the cache.
- *
- * Since the cache is write-through, it's important that this function is not
- * called until the entry is present on disk and the L1 has been updated to
- * point to the entry.
- *
- * N.B. This function steals a reference to the l2_table from the caller so the
- * caller must obtain a new reference by issuing a call to
- * qed_find_l2_cache_entry().
- */
-void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table)
-{
-    CachedL2Table *entry;
-
-    entry = qed_find_l2_cache_entry(l2_cache, l2_table->offset);
-    if (entry) {
-        qed_unref_l2_cache_entry(entry);
-        qed_unref_l2_cache_entry(l2_table);
-        return;
-    }
-
-    /* Evict an unused cache entry so we have space.  If all entries are in use
-     * we can grow the cache temporarily and we try to shrink back down later.
-     */
-    if (l2_cache->n_entries >= MAX_L2_CACHE_SIZE) {
-        CachedL2Table *next;
-        QTAILQ_FOREACH_SAFE(entry, &l2_cache->entries, node, next) {
-            if (entry->ref > 1) {
-                continue;
-            }
-
-            QTAILQ_REMOVE(&l2_cache->entries, entry, node);
-            l2_cache->n_entries--;
-            qed_unref_l2_cache_entry(entry);
-
-            /* Stop evicting when we've shrunk back to max size */
-            if (l2_cache->n_entries < MAX_L2_CACHE_SIZE) {
-                break;
-            }
-        }
-    }
-
-    l2_cache->n_entries++;
-    QTAILQ_INSERT_TAIL(&l2_cache->entries, l2_table, node);
-}
diff --git a/qemu/block/qed-table.c b/qemu/block/qed-table.c
deleted file mode 100644
index 802945f5e..000000000
--- a/qemu/block/qed-table.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * QEMU Enhanced Disk Format Table I/O
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "trace.h"
-#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
-#include "qed.h"
-
-typedef struct {
-    GenericCB gencb;
-    BDRVQEDState *s;
-    QEDTable *table;
-
-    struct iovec iov;
-    QEMUIOVector qiov;
-} QEDReadTableCB;
-
-static void qed_read_table_cb(void *opaque, int ret)
-{
-    QEDReadTableCB *read_table_cb = opaque;
-    QEDTable *table = read_table_cb->table;
-    int noffsets = read_table_cb->qiov.size / sizeof(uint64_t);
-    int i;
-
-    /* Handle I/O error */
-    if (ret) {
-        goto out;
-    }
-
-    /* Byteswap offsets */
-    for (i = 0; i < noffsets; i++) {
-        table->offsets[i] = le64_to_cpu(table->offsets[i]);
-    }
-
-out:
-    /* Completion */
-    trace_qed_read_table_cb(read_table_cb->s, read_table_cb->table, ret);
-    gencb_complete(&read_table_cb->gencb, ret);
-}
-
-static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
-                           BlockCompletionFunc *cb, void *opaque)
-{
-    QEDReadTableCB *read_table_cb = gencb_alloc(sizeof(*read_table_cb),
-                                                cb, opaque);
-    QEMUIOVector *qiov = &read_table_cb->qiov;
-
-    trace_qed_read_table(s, offset, table);
-
-    read_table_cb->s = s;
-    read_table_cb->table = table;
-    read_table_cb->iov.iov_base = table->offsets,
-    read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
-
-    qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
-    bdrv_aio_readv(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, qiov,
-                   qiov->size / BDRV_SECTOR_SIZE,
-                   qed_read_table_cb, read_table_cb);
-}
-
-typedef struct {
-    GenericCB gencb;
-    BDRVQEDState *s;
-    QEDTable *orig_table;
-    QEDTable *table;
-    bool flush;             /* flush after write? */
-
-    struct iovec iov;
-    QEMUIOVector qiov;
-} QEDWriteTableCB;
-
-static void qed_write_table_cb(void *opaque, int ret)
-{
-    QEDWriteTableCB *write_table_cb = opaque;
-
-    trace_qed_write_table_cb(write_table_cb->s,
-                             write_table_cb->orig_table,
-                             write_table_cb->flush,
-                             ret);
-
-    if (ret) {
-        goto out;
-    }
-
-    if (write_table_cb->flush) {
-        /* We still need to flush first */
-        write_table_cb->flush = false;
-        bdrv_aio_flush(write_table_cb->s->bs, qed_write_table_cb,
-                       write_table_cb);
-        return;
-    }
-
-out:
-    qemu_vfree(write_table_cb->table);
-    gencb_complete(&write_table_cb->gencb, ret);
-}
-
-/**
- * Write out an updated part or all of a table
- *
- * @s:          QED state
- * @offset:     Offset of table in image file, in bytes
- * @table:      Table
- * @index:      Index of first element
- * @n:          Number of elements
- * @flush:      Whether or not to sync to disk
- * @cb:         Completion function
- * @opaque:     Argument for completion function
- */
-static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
-                            unsigned int index, unsigned int n, bool flush,
-                            BlockCompletionFunc *cb, void *opaque)
-{
-    QEDWriteTableCB *write_table_cb;
-    unsigned int sector_mask = BDRV_SECTOR_SIZE / sizeof(uint64_t) - 1;
-    unsigned int start, end, i;
-    size_t len_bytes;
-
-    trace_qed_write_table(s, offset, table, index, n);
-
-    /* Calculate indices of the first and one after last elements */
-    start = index & ~sector_mask;
-    end = (index + n + sector_mask) & ~sector_mask;
-
-    len_bytes = (end - start) * sizeof(uint64_t);
-
-    write_table_cb = gencb_alloc(sizeof(*write_table_cb), cb, opaque);
-    write_table_cb->s = s;
-    write_table_cb->orig_table = table;
-    write_table_cb->flush = flush;
-    write_table_cb->table = qemu_blockalign(s->bs, len_bytes);
-    write_table_cb->iov.iov_base = write_table_cb->table->offsets;
-    write_table_cb->iov.iov_len = len_bytes;
-    qemu_iovec_init_external(&write_table_cb->qiov, &write_table_cb->iov, 1);
-
-    /* Byteswap table */
-    for (i = start; i < end; i++) {
-        uint64_t le_offset = cpu_to_le64(table->offsets[i]);
-        write_table_cb->table->offsets[i - start] = le_offset;
-    }
-
-    /* Adjust for offset into table */
-    offset += start * sizeof(uint64_t);
-
-    bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
-                    &write_table_cb->qiov,
-                    write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
-                    qed_write_table_cb, write_table_cb);
-}
-
-/**
- * Propagate return value from async callback
- */
-static void qed_sync_cb(void *opaque, int ret)
-{
-    *(int *)opaque = ret;
-}
-
-int qed_read_l1_table_sync(BDRVQEDState *s)
-{
-    int ret = -EINPROGRESS;
-
-    qed_read_table(s, s->header.l1_table_offset,
-                   s->l1_table, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
-
-    return ret;
-}
-
-void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
-                        BlockCompletionFunc *cb, void *opaque)
-{
-    BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
-    qed_write_table(s, s->header.l1_table_offset,
-                    s->l1_table, index, n, false, cb, opaque);
-}
-
-int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
-                            unsigned int n)
-{
-    int ret = -EINPROGRESS;
-
-    qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
-
-    return ret;
-}
-
-typedef struct {
-    GenericCB gencb;
-    BDRVQEDState *s;
-    uint64_t l2_offset;
-    QEDRequest *request;
-} QEDReadL2TableCB;
-
-static void qed_read_l2_table_cb(void *opaque, int ret)
-{
-    QEDReadL2TableCB *read_l2_table_cb = opaque;
-    QEDRequest *request = read_l2_table_cb->request;
-    BDRVQEDState *s = read_l2_table_cb->s;
-    CachedL2Table *l2_table = request->l2_table;
-    uint64_t l2_offset = read_l2_table_cb->l2_offset;
-
-    if (ret) {
-        /* can't trust loaded L2 table anymore */
-        qed_unref_l2_cache_entry(l2_table);
-        request->l2_table = NULL;
-    } else {
-        l2_table->offset = l2_offset;
-
-        qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
-
-        /* This is guaranteed to succeed because we just committed the entry
-         * to the cache.
-         */
-        request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
-        assert(request->l2_table != NULL);
-    }
-
-    gencb_complete(&read_l2_table_cb->gencb, ret);
-}
-
-void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
-                       BlockCompletionFunc *cb, void *opaque)
-{
-    QEDReadL2TableCB *read_l2_table_cb;
-
-    qed_unref_l2_cache_entry(request->l2_table);
-
-    /* Check for cached L2 entry */
-    request->l2_table = qed_find_l2_cache_entry(&s->l2_cache, offset);
-    if (request->l2_table) {
-        cb(opaque, 0);
-        return;
-    }
-
-    request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
-    request->l2_table->table = qed_alloc_table(s);
-
-    read_l2_table_cb = gencb_alloc(sizeof(*read_l2_table_cb), cb, opaque);
-    read_l2_table_cb->s = s;
-    read_l2_table_cb->l2_offset = offset;
-    read_l2_table_cb->request = request;
-
-    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
-    qed_read_table(s, offset, request->l2_table->table,
-                   qed_read_l2_table_cb, read_l2_table_cb);
-}
-
-int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset)
-{
-    int ret = -EINPROGRESS;
-
-    qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
-
-    return ret;
-}
-
-void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
-                        unsigned int index, unsigned int n, bool flush,
-                        BlockCompletionFunc *cb, void *opaque)
-{
-    BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
-    qed_write_table(s, request->l2_table->offset,
-                    request->l2_table->table, index, n, flush, cb, opaque);
-}
-
-int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
-                            unsigned int index, unsigned int n, bool flush)
-{
-    int ret = -EINPROGRESS;
-
-    qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
-    while (ret == -EINPROGRESS) {
-        aio_poll(bdrv_get_aio_context(s->bs), true);
-    }
-
-    return ret;
-}
diff --git a/qemu/block/qed.c b/qemu/block/qed.c
deleted file mode 100644
index 0af52741d..000000000
--- a/qemu/block/qed.c
+++ /dev/null
@@ -1,1689 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu/timer.h"
-#include "trace.h"
-#include "qed.h"
-#include "qapi/qmp/qerror.h"
-#include "migration/migration.h"
-#include "sysemu/block-backend.h"
-
-static const AIOCBInfo qed_aiocb_info = {
-    .aiocb_size         = sizeof(QEDAIOCB),
-};
-
-static int bdrv_qed_probe(const uint8_t *buf, int buf_size,
-                          const char *filename)
-{
-    const QEDHeader *header = (const QEDHeader *)buf;
-
-    if (buf_size < sizeof(*header)) {
-        return 0;
-    }
-    if (le32_to_cpu(header->magic) != QED_MAGIC) {
-        return 0;
-    }
-    return 100;
-}
-
-/**
- * Check whether an image format is raw
- *
- * @fmt:    Backing file format, may be NULL
- */
-static bool qed_fmt_is_raw(const char *fmt)
-{
-    return fmt && strcmp(fmt, "raw") == 0;
-}
-
-static void qed_header_le_to_cpu(const QEDHeader *le, QEDHeader *cpu)
-{
-    cpu->magic = le32_to_cpu(le->magic);
-    cpu->cluster_size = le32_to_cpu(le->cluster_size);
-    cpu->table_size = le32_to_cpu(le->table_size);
-    cpu->header_size = le32_to_cpu(le->header_size);
-    cpu->features = le64_to_cpu(le->features);
-    cpu->compat_features = le64_to_cpu(le->compat_features);
-    cpu->autoclear_features = le64_to_cpu(le->autoclear_features);
-    cpu->l1_table_offset = le64_to_cpu(le->l1_table_offset);
-    cpu->image_size = le64_to_cpu(le->image_size);
-    cpu->backing_filename_offset = le32_to_cpu(le->backing_filename_offset);
-    cpu->backing_filename_size = le32_to_cpu(le->backing_filename_size);
-}
-
-static void qed_header_cpu_to_le(const QEDHeader *cpu, QEDHeader *le)
-{
-    le->magic = cpu_to_le32(cpu->magic);
-    le->cluster_size = cpu_to_le32(cpu->cluster_size);
-    le->table_size = cpu_to_le32(cpu->table_size);
-    le->header_size = cpu_to_le32(cpu->header_size);
-    le->features = cpu_to_le64(cpu->features);
-    le->compat_features = cpu_to_le64(cpu->compat_features);
-    le->autoclear_features = cpu_to_le64(cpu->autoclear_features);
-    le->l1_table_offset = cpu_to_le64(cpu->l1_table_offset);
-    le->image_size = cpu_to_le64(cpu->image_size);
-    le->backing_filename_offset = cpu_to_le32(cpu->backing_filename_offset);
-    le->backing_filename_size = cpu_to_le32(cpu->backing_filename_size);
-}
-
-int qed_write_header_sync(BDRVQEDState *s)
-{
-    QEDHeader le;
-    int ret;
-
-    qed_header_cpu_to_le(&s->header, &le);
-    ret = bdrv_pwrite(s->bs->file->bs, 0, &le, sizeof(le));
-    if (ret != sizeof(le)) {
-        return ret;
-    }
-    return 0;
-}
-
-typedef struct {
-    GenericCB gencb;
-    BDRVQEDState *s;
-    struct iovec iov;
-    QEMUIOVector qiov;
-    int nsectors;
-    uint8_t *buf;
-} QEDWriteHeaderCB;
-
-static void qed_write_header_cb(void *opaque, int ret)
-{
-    QEDWriteHeaderCB *write_header_cb = opaque;
-
-    qemu_vfree(write_header_cb->buf);
-    gencb_complete(write_header_cb, ret);
-}
-
-static void qed_write_header_read_cb(void *opaque, int ret)
-{
-    QEDWriteHeaderCB *write_header_cb = opaque;
-    BDRVQEDState *s = write_header_cb->s;
-
-    if (ret) {
-        qed_write_header_cb(write_header_cb, ret);
-        return;
-    }
-
-    /* Update header */
-    qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
-
-    bdrv_aio_writev(s->bs->file->bs, 0, &write_header_cb->qiov,
-                    write_header_cb->nsectors, qed_write_header_cb,
-                    write_header_cb);
-}
-
-/**
- * Update header in-place (does not rewrite backing filename or other strings)
- *
- * This function only updates known header fields in-place and does not affect
- * extra data after the QED header.
- */
-static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
-                             void *opaque)
-{
-    /* We must write full sectors for O_DIRECT but cannot necessarily generate
-     * the data following the header if an unrecognized compat feature is
-     * active.  Therefore, first read the sectors containing the header, update
-     * them, and write back.
-     */
-
-    int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) /
-                   BDRV_SECTOR_SIZE;
-    size_t len = nsectors * BDRV_SECTOR_SIZE;
-    QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
-                                                    cb, opaque);
-
-    write_header_cb->s = s;
-    write_header_cb->nsectors = nsectors;
-    write_header_cb->buf = qemu_blockalign(s->bs, len);
-    write_header_cb->iov.iov_base = write_header_cb->buf;
-    write_header_cb->iov.iov_len = len;
-    qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
-
-    bdrv_aio_readv(s->bs->file->bs, 0, &write_header_cb->qiov, nsectors,
-                   qed_write_header_read_cb, write_header_cb);
-}
-
-static uint64_t qed_max_image_size(uint32_t cluster_size, uint32_t table_size)
-{
-    uint64_t table_entries;
-    uint64_t l2_size;
-
-    table_entries = (table_size * cluster_size) / sizeof(uint64_t);
-    l2_size = table_entries * cluster_size;
-
-    return l2_size * table_entries;
-}
-
-static bool qed_is_cluster_size_valid(uint32_t cluster_size)
-{
-    if (cluster_size < QED_MIN_CLUSTER_SIZE ||
-        cluster_size > QED_MAX_CLUSTER_SIZE) {
-        return false;
-    }
-    if (cluster_size & (cluster_size - 1)) {
-        return false; /* not power of 2 */
-    }
-    return true;
-}
-
-static bool qed_is_table_size_valid(uint32_t table_size)
-{
-    if (table_size < QED_MIN_TABLE_SIZE ||
-        table_size > QED_MAX_TABLE_SIZE) {
-        return false;
-    }
-    if (table_size & (table_size - 1)) {
-        return false; /* not power of 2 */
-    }
-    return true;
-}
-
-static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
-                                    uint32_t table_size)
-{
-    if (image_size % BDRV_SECTOR_SIZE != 0) {
-        return false; /* not multiple of sector size */
-    }
-    if (image_size > qed_max_image_size(cluster_size, table_size)) {
-        return false; /* image is too large */
-    }
-    return true;
-}
-
-/**
- * Read a string of known length from the image file
- *
- * @file:       Image file
- * @offset:     File offset to start of string, in bytes
- * @n:          String length in bytes
- * @buf:        Destination buffer
- * @buflen:     Destination buffer length in bytes
- * @ret:        0 on success, -errno on failure
- *
- * The string is NUL-terminated.
- */
-static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
-                           char *buf, size_t buflen)
-{
-    int ret;
-    if (n >= buflen) {
-        return -EINVAL;
-    }
-    ret = bdrv_pread(file, offset, buf, n);
-    if (ret < 0) {
-        return ret;
-    }
-    buf[n] = '\0';
-    return 0;
-}
-
-/**
- * Allocate new clusters
- *
- * @s:          QED state
- * @n:          Number of contiguous clusters to allocate
- * @ret:        Offset of first allocated cluster
- *
- * This function only produces the offset where the new clusters should be
- * written.  It updates BDRVQEDState but does not make any changes to the image
- * file.
- */
-static uint64_t qed_alloc_clusters(BDRVQEDState *s, unsigned int n)
-{
-    uint64_t offset = s->file_size;
-    s->file_size += n * s->header.cluster_size;
-    return offset;
-}
-
-QEDTable *qed_alloc_table(BDRVQEDState *s)
-{
-    /* Honor O_DIRECT memory alignment requirements */
-    return qemu_blockalign(s->bs,
-                           s->header.cluster_size * s->header.table_size);
-}
-
-/**
- * Allocate a new zeroed L2 table
- */
-static CachedL2Table *qed_new_l2_table(BDRVQEDState *s)
-{
-    CachedL2Table *l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
-
-    l2_table->table = qed_alloc_table(s);
-    l2_table->offset = qed_alloc_clusters(s, s->header.table_size);
-
-    memset(l2_table->table->offsets, 0,
-           s->header.cluster_size * s->header.table_size);
-    return l2_table;
-}
-
-static void qed_aio_next_io(void *opaque, int ret);
-
-static void qed_plug_allocating_write_reqs(BDRVQEDState *s)
-{
-    assert(!s->allocating_write_reqs_plugged);
-
-    s->allocating_write_reqs_plugged = true;
-}
-
-static void qed_unplug_allocating_write_reqs(BDRVQEDState *s)
-{
-    QEDAIOCB *acb;
-
-    assert(s->allocating_write_reqs_plugged);
-
-    s->allocating_write_reqs_plugged = false;
-
-    acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
-    if (acb) {
-        qed_aio_next_io(acb, 0);
-    }
-}
-
-static void qed_finish_clear_need_check(void *opaque, int ret)
-{
-    /* Do nothing */
-}
-
-static void qed_flush_after_clear_need_check(void *opaque, int ret)
-{
-    BDRVQEDState *s = opaque;
-
-    bdrv_aio_flush(s->bs, qed_finish_clear_need_check, s);
-
-    /* No need to wait until flush completes */
-    qed_unplug_allocating_write_reqs(s);
-}
-
-static void qed_clear_need_check(void *opaque, int ret)
-{
-    BDRVQEDState *s = opaque;
-
-    if (ret) {
-        qed_unplug_allocating_write_reqs(s);
-        return;
-    }
-
-    s->header.features &= ~QED_F_NEED_CHECK;
-    qed_write_header(s, qed_flush_after_clear_need_check, s);
-}
-
-static void qed_need_check_timer_cb(void *opaque)
-{
-    BDRVQEDState *s = opaque;
-
-    /* The timer should only fire when allocating writes have drained */
-    assert(!QSIMPLEQ_FIRST(&s->allocating_write_reqs));
-
-    trace_qed_need_check_timer_cb(s);
-
-    qed_plug_allocating_write_reqs(s);
-
-    /* Ensure writes are on disk before clearing flag */
-    bdrv_aio_flush(s->bs, qed_clear_need_check, s);
-}
-
-static void qed_start_need_check_timer(BDRVQEDState *s)
-{
-    trace_qed_start_need_check_timer(s);
-
-    /* Use QEMU_CLOCK_VIRTUAL so we don't alter the image file while suspended for
-     * migration.
-     */
-    timer_mod(s->need_check_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
-                   NANOSECONDS_PER_SECOND * QED_NEED_CHECK_TIMEOUT);
-}
-
-/* It's okay to call this multiple times or when no timer is started */
-static void qed_cancel_need_check_timer(BDRVQEDState *s)
-{
-    trace_qed_cancel_need_check_timer(s);
-    timer_del(s->need_check_timer);
-}
-
-static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVQEDState *s = bs->opaque;
-
-    qed_cancel_need_check_timer(s);
-    timer_free(s->need_check_timer);
-}
-
-static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
-                                        AioContext *new_context)
-{
-    BDRVQEDState *s = bs->opaque;
-
-    s->need_check_timer = aio_timer_new(new_context,
-                                        QEMU_CLOCK_VIRTUAL, SCALE_NS,
-                                        qed_need_check_timer_cb, s);
-    if (s->header.features & QED_F_NEED_CHECK) {
-        qed_start_need_check_timer(s);
-    }
-}
-
-static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
-{
-    BDRVQEDState *s = bs->opaque;
-    QEDHeader le_header;
-    int64_t file_size;
-    int ret;
-
-    s->bs = bs;
-    QSIMPLEQ_INIT(&s->allocating_write_reqs);
-
-    ret = bdrv_pread(bs->file->bs, 0, &le_header, sizeof(le_header));
-    if (ret < 0) {
-        return ret;
-    }
-    qed_header_le_to_cpu(&le_header, &s->header);
-
-    if (s->header.magic != QED_MAGIC) {
-        error_setg(errp, "Image not in QED format");
-        return -EINVAL;
-    }
-    if (s->header.features & ~QED_FEATURE_MASK) {
-        /* image uses unsupported feature bits */
-        error_setg(errp, "Unsupported QED features: %" PRIx64,
-                   s->header.features & ~QED_FEATURE_MASK);
-        return -ENOTSUP;
-    }
-    if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
-        return -EINVAL;
-    }
-
-    /* Round down file size to the last cluster */
-    file_size = bdrv_getlength(bs->file->bs);
-    if (file_size < 0) {
-        return file_size;
-    }
-    s->file_size = qed_start_of_cluster(s, file_size);
-
-    if (!qed_is_table_size_valid(s->header.table_size)) {
-        return -EINVAL;
-    }
-    if (!qed_is_image_size_valid(s->header.image_size,
-                                 s->header.cluster_size,
-                                 s->header.table_size)) {
-        return -EINVAL;
-    }
-    if (!qed_check_table_offset(s, s->header.l1_table_offset)) {
-        return -EINVAL;
-    }
-
-    s->table_nelems = (s->header.cluster_size * s->header.table_size) /
-                      sizeof(uint64_t);
-    s->l2_shift = ctz32(s->header.cluster_size);
-    s->l2_mask = s->table_nelems - 1;
-    s->l1_shift = s->l2_shift + ctz32(s->table_nelems);
-
-    /* Header size calculation must not overflow uint32_t */
-    if (s->header.header_size > UINT32_MAX / s->header.cluster_size) {
-        return -EINVAL;
-    }
-
-    if ((s->header.features & QED_F_BACKING_FILE)) {
-        if ((uint64_t)s->header.backing_filename_offset +
-            s->header.backing_filename_size >
-            s->header.cluster_size * s->header.header_size) {
-            return -EINVAL;
-        }
-
-        ret = qed_read_string(bs->file->bs, s->header.backing_filename_offset,
-                              s->header.backing_filename_size, bs->backing_file,
-                              sizeof(bs->backing_file));
-        if (ret < 0) {
-            return ret;
-        }
-
-        if (s->header.features & QED_F_BACKING_FORMAT_NO_PROBE) {
-            pstrcpy(bs->backing_format, sizeof(bs->backing_format), "raw");
-        }
-    }
-
-    /* Reset unknown autoclear feature bits.  This is a backwards
-     * compatibility mechanism that allows images to be opened by older
-     * programs, which "knock out" unknown feature bits.  When an image is
-     * opened by a newer program again it can detect that the autoclear
-     * feature is no longer valid.
-     */
-    if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
-        !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INACTIVE)) {
-        s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
-
-        ret = qed_write_header_sync(s);
-        if (ret) {
-            return ret;
-        }
-
-        /* From here on only known autoclear feature bits are valid */
-        bdrv_flush(bs->file->bs);
-    }
-
-    s->l1_table = qed_alloc_table(s);
-    qed_init_l2_cache(&s->l2_cache);
-
-    ret = qed_read_l1_table_sync(s);
-    if (ret) {
-        goto out;
-    }
-
-    /* If image was not closed cleanly, check consistency */
-    if (!(flags & BDRV_O_CHECK) && (s->header.features & QED_F_NEED_CHECK)) {
-        /* Read-only images cannot be fixed.  There is no risk of corruption
-         * since write operations are not possible.  Therefore, allow
-         * potentially inconsistent images to be opened read-only.  This can
-         * aid data recovery from an otherwise inconsistent image.
-         */
-        if (!bdrv_is_read_only(bs->file->bs) &&
-            !(flags & BDRV_O_INACTIVE)) {
-            BdrvCheckResult result = {0};
-
-            ret = qed_check(s, &result, true);
-            if (ret) {
-                goto out;
-            }
-        }
-    }
-
-    bdrv_qed_attach_aio_context(bs, bdrv_get_aio_context(bs));
-
-out:
-    if (ret) {
-        qed_free_l2_cache(&s->l2_cache);
-        qemu_vfree(s->l1_table);
-    }
-    return ret;
-}
-
-static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    BDRVQEDState *s = bs->opaque;
-
-    bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
-}
-
-/* We have nothing to do for QED reopen, stubs just return
- * success */
-static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
-                                   BlockReopenQueue *queue, Error **errp)
-{
-    return 0;
-}
-
-static void bdrv_qed_close(BlockDriverState *bs)
-{
-    BDRVQEDState *s = bs->opaque;
-
-    bdrv_qed_detach_aio_context(bs);
-
-    /* Ensure writes reach stable storage */
-    bdrv_flush(bs->file->bs);
-
-    /* Clean shutdown, no check required on next open */
-    if (s->header.features & QED_F_NEED_CHECK) {
-        s->header.features &= ~QED_F_NEED_CHECK;
-        qed_write_header_sync(s);
-    }
-
-    qed_free_l2_cache(&s->l2_cache);
-    qemu_vfree(s->l1_table);
-}
-
-static int qed_create(const char *filename, uint32_t cluster_size,
-                      uint64_t image_size, uint32_t table_size,
-                      const char *backing_file, const char *backing_fmt,
-                      QemuOpts *opts, Error **errp)
-{
-    QEDHeader header = {
-        .magic = QED_MAGIC,
-        .cluster_size = cluster_size,
-        .table_size = table_size,
-        .header_size = 1,
-        .features = 0,
-        .compat_features = 0,
-        .l1_table_offset = cluster_size,
-        .image_size = image_size,
-    };
-    QEDHeader le_header;
-    uint8_t *l1_table = NULL;
-    size_t l1_size = header.cluster_size * header.table_size;
-    Error *local_err = NULL;
-    int ret = 0;
-    BlockBackend *blk;
-
-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        return ret;
-    }
-
-    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (blk == NULL) {
-        error_propagate(errp, local_err);
-        return -EIO;
-    }
-
-    blk_set_allow_write_beyond_eof(blk, true);
-
-    /* File must start empty and grow, check truncate is supported */
-    ret = blk_truncate(blk, 0);
-    if (ret < 0) {
-        goto out;
-    }
-
-    if (backing_file) {
-        header.features |= QED_F_BACKING_FILE;
-        header.backing_filename_offset = sizeof(le_header);
-        header.backing_filename_size = strlen(backing_file);
-
-        if (qed_fmt_is_raw(backing_fmt)) {
-            header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
-        }
-    }
-
-    qed_header_cpu_to_le(&header, &le_header);
-    ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header));
-    if (ret < 0) {
-        goto out;
-    }
-    ret = blk_pwrite(blk, sizeof(le_header), backing_file,
-                     header.backing_filename_size);
-    if (ret < 0) {
-        goto out;
-    }
-
-    l1_table = g_malloc0(l1_size);
-    ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size);
-    if (ret < 0) {
-        goto out;
-    }
-
-    ret = 0; /* success */
-out:
-    g_free(l1_table);
-    blk_unref(blk);
-    return ret;
-}
-
-static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    uint64_t image_size = 0;
-    uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
-    uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
-    char *backing_file = NULL;
-    char *backing_fmt = NULL;
-    int ret;
-
-    image_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-    backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
-    cluster_size = qemu_opt_get_size_del(opts,
-                                         BLOCK_OPT_CLUSTER_SIZE,
-                                         QED_DEFAULT_CLUSTER_SIZE);
-    table_size = qemu_opt_get_size_del(opts, BLOCK_OPT_TABLE_SIZE,
-                                       QED_DEFAULT_TABLE_SIZE);
-
-    if (!qed_is_cluster_size_valid(cluster_size)) {
-        error_setg(errp, "QED cluster size must be within range [%u, %u] "
-                         "and power of 2",
-                   QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
-        ret = -EINVAL;
-        goto finish;
-    }
-    if (!qed_is_table_size_valid(table_size)) {
-        error_setg(errp, "QED table size must be within range [%u, %u] "
-                         "and power of 2",
-                   QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
-        ret = -EINVAL;
-        goto finish;
-    }
-    if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) {
-        error_setg(errp, "QED image size must be a non-zero multiple of "
-                         "cluster size and less than %" PRIu64 " bytes",
-                   qed_max_image_size(cluster_size, table_size));
-        ret = -EINVAL;
-        goto finish;
-    }
-
-    ret = qed_create(filename, cluster_size, image_size, table_size,
-                     backing_file, backing_fmt, opts, errp);
-
-finish:
-    g_free(backing_file);
-    g_free(backing_fmt);
-    return ret;
-}
-
-typedef struct {
-    BlockDriverState *bs;
-    Coroutine *co;
-    uint64_t pos;
-    int64_t status;
-    int *pnum;
-    BlockDriverState **file;
-} QEDIsAllocatedCB;
-
-static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
-{
-    QEDIsAllocatedCB *cb = opaque;
-    BDRVQEDState *s = cb->bs->opaque;
-    *cb->pnum = len / BDRV_SECTOR_SIZE;
-    switch (ret) {
-    case QED_CLUSTER_FOUND:
-        offset |= qed_offset_into_cluster(s, cb->pos);
-        cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
-        *cb->file = cb->bs->file->bs;
-        break;
-    case QED_CLUSTER_ZERO:
-        cb->status = BDRV_BLOCK_ZERO;
-        break;
-    case QED_CLUSTER_L2:
-    case QED_CLUSTER_L1:
-        cb->status = 0;
-        break;
-    default:
-        assert(ret < 0);
-        cb->status = ret;
-        break;
-    }
-
-    if (cb->co) {
-        qemu_coroutine_enter(cb->co, NULL);
-    }
-}
-
-static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
-                                                 int64_t sector_num,
-                                                 int nb_sectors, int *pnum,
-                                                 BlockDriverState **file)
-{
-    BDRVQEDState *s = bs->opaque;
-    size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
-    QEDIsAllocatedCB cb = {
-        .bs = bs,
-        .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE,
-        .status = BDRV_BLOCK_OFFSET_MASK,
-        .pnum = pnum,
-        .file = file,
-    };
-    QEDRequest request = { .l2_table = NULL };
-
-    qed_find_cluster(s, &request, cb.pos, len, qed_is_allocated_cb, &cb);
-
-    /* Now sleep if the callback wasn't invoked immediately */
-    while (cb.status == BDRV_BLOCK_OFFSET_MASK) {
-        cb.co = qemu_coroutine_self();
-        qemu_coroutine_yield();
-    }
-
-    qed_unref_l2_cache_entry(request.l2_table);
-
-    return cb.status;
-}
-
-static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
-{
-    return acb->common.bs->opaque;
-}
-
-/**
- * Read from the backing file or zero-fill if no backing file
- *
- * @s:              QED state
- * @pos:            Byte position in device
- * @qiov:           Destination I/O vector
- * @backing_qiov:   Possibly shortened copy of qiov, to be allocated here
- * @cb:             Completion function
- * @opaque:         User data for completion function
- *
- * This function reads qiov->size bytes starting at pos from the backing file.
- * If there is no backing file then zeroes are read.
- */
-static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
-                                  QEMUIOVector *qiov,
-                                  QEMUIOVector **backing_qiov,
-                                  BlockCompletionFunc *cb, void *opaque)
-{
-    uint64_t backing_length = 0;
-    size_t size;
-
-    /* If there is a backing file, get its length.  Treat the absence of a
-     * backing file like a zero length backing file.
-     */
-    if (s->bs->backing) {
-        int64_t l = bdrv_getlength(s->bs->backing->bs);
-        if (l < 0) {
-            cb(opaque, l);
-            return;
-        }
-        backing_length = l;
-    }
-
-    /* Zero all sectors if reading beyond the end of the backing file */
-    if (pos >= backing_length ||
-        pos + qiov->size > backing_length) {
-        qemu_iovec_memset(qiov, 0, 0, qiov->size);
-    }
-
-    /* Complete now if there are no backing file sectors to read */
-    if (pos >= backing_length) {
-        cb(opaque, 0);
-        return;
-    }
-
-    /* If the read straddles the end of the backing file, shorten it */
-    size = MIN((uint64_t)backing_length - pos, qiov->size);
-
-    assert(*backing_qiov == NULL);
-    *backing_qiov = g_new(QEMUIOVector, 1);
-    qemu_iovec_init(*backing_qiov, qiov->niov);
-    qemu_iovec_concat(*backing_qiov, qiov, 0, size);
-
-    BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
-    bdrv_aio_readv(s->bs->backing->bs, pos / BDRV_SECTOR_SIZE,
-                   *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
-}
-
-typedef struct {
-    GenericCB gencb;
-    BDRVQEDState *s;
-    QEMUIOVector qiov;
-    QEMUIOVector *backing_qiov;
-    struct iovec iov;
-    uint64_t offset;
-} CopyFromBackingFileCB;
-
-static void qed_copy_from_backing_file_cb(void *opaque, int ret)
-{
-    CopyFromBackingFileCB *copy_cb = opaque;
-    qemu_vfree(copy_cb->iov.iov_base);
-    gencb_complete(&copy_cb->gencb, ret);
-}
-
-static void qed_copy_from_backing_file_write(void *opaque, int ret)
-{
-    CopyFromBackingFileCB *copy_cb = opaque;
-    BDRVQEDState *s = copy_cb->s;
-
-    if (copy_cb->backing_qiov) {
-        qemu_iovec_destroy(copy_cb->backing_qiov);
-        g_free(copy_cb->backing_qiov);
-        copy_cb->backing_qiov = NULL;
-    }
-
-    if (ret) {
-        qed_copy_from_backing_file_cb(copy_cb, ret);
-        return;
-    }
-
-    BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
-    bdrv_aio_writev(s->bs->file->bs, copy_cb->offset / BDRV_SECTOR_SIZE,
-                    &copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
-                    qed_copy_from_backing_file_cb, copy_cb);
-}
-
-/**
- * Copy data from backing file into the image
- *
- * @s:          QED state
- * @pos:        Byte position in device
- * @len:        Number of bytes
- * @offset:     Byte offset in image file
- * @cb:         Completion function
- * @opaque:     User data for completion function
- */
-static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
-                                       uint64_t len, uint64_t offset,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
-{
-    CopyFromBackingFileCB *copy_cb;
-
-    /* Skip copy entirely if there is no work to do */
-    if (len == 0) {
-        cb(opaque, 0);
-        return;
-    }
-
-    copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
-    copy_cb->s = s;
-    copy_cb->offset = offset;
-    copy_cb->backing_qiov = NULL;
-    copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
-    copy_cb->iov.iov_len = len;
-    qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
-
-    qed_read_backing_file(s, pos, &copy_cb->qiov, &copy_cb->backing_qiov,
-                          qed_copy_from_backing_file_write, copy_cb);
-}
-
-/**
- * Link one or more contiguous clusters into a table
- *
- * @s:              QED state
- * @table:          L2 table
- * @index:          First cluster index
- * @n:              Number of contiguous clusters
- * @cluster:        First cluster offset
- *
- * The cluster offset may be an allocated byte offset in the image file, the
- * zero cluster marker, or the unallocated cluster marker.
- */
-static void qed_update_l2_table(BDRVQEDState *s, QEDTable *table, int index,
-                                unsigned int n, uint64_t cluster)
-{
-    int i;
-    for (i = index; i < index + n; i++) {
-        table->offsets[i] = cluster;
-        if (!qed_offset_is_unalloc_cluster(cluster) &&
-            !qed_offset_is_zero_cluster(cluster)) {
-            cluster += s->header.cluster_size;
-        }
-    }
-}
-
-static void qed_aio_complete_bh(void *opaque)
-{
-    QEDAIOCB *acb = opaque;
-    BlockCompletionFunc *cb = acb->common.cb;
-    void *user_opaque = acb->common.opaque;
-    int ret = acb->bh_ret;
-
-    qemu_bh_delete(acb->bh);
-    qemu_aio_unref(acb);
-
-    /* Invoke callback */
-    cb(user_opaque, ret);
-}
-
-static void qed_aio_complete(QEDAIOCB *acb, int ret)
-{
-    BDRVQEDState *s = acb_to_s(acb);
-
-    trace_qed_aio_complete(s, acb, ret);
-
-    /* Free resources */
-    qemu_iovec_destroy(&acb->cur_qiov);
-    qed_unref_l2_cache_entry(acb->request.l2_table);
-
-    /* Free the buffer we may have allocated for zero writes */
-    if (acb->flags & QED_AIOCB_ZERO) {
-        qemu_vfree(acb->qiov->iov[0].iov_base);
-        acb->qiov->iov[0].iov_base = NULL;
-    }
-
-    /* Arrange for a bh to invoke the completion function */
-    acb->bh_ret = ret;
-    acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
-                         qed_aio_complete_bh, acb);
-    qemu_bh_schedule(acb->bh);
-
-    /* Start next allocating write request waiting behind this one.  Note that
-     * requests enqueue themselves when they first hit an unallocated cluster
-     * but they wait until the entire request is finished before waking up the
-     * next request in the queue.  This ensures that we don't cycle through
-     * requests multiple times but rather finish one at a time completely.
-     */
-    if (acb == QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
-        QSIMPLEQ_REMOVE_HEAD(&s->allocating_write_reqs, next);
-        acb = QSIMPLEQ_FIRST(&s->allocating_write_reqs);
-        if (acb) {
-            qed_aio_next_io(acb, 0);
-        } else if (s->header.features & QED_F_NEED_CHECK) {
-            qed_start_need_check_timer(s);
-        }
-    }
-}
-
-/**
- * Commit the current L2 table to the cache
- */
-static void qed_commit_l2_update(void *opaque, int ret)
-{
-    QEDAIOCB *acb = opaque;
-    BDRVQEDState *s = acb_to_s(acb);
-    CachedL2Table *l2_table = acb->request.l2_table;
-    uint64_t l2_offset = l2_table->offset;
-
-    qed_commit_l2_cache_entry(&s->l2_cache, l2_table);
-
-    /* This is guaranteed to succeed because we just committed the entry to the
-     * cache.
-     */
-    acb->request.l2_table = qed_find_l2_cache_entry(&s->l2_cache, l2_offset);
-    assert(acb->request.l2_table != NULL);
-
-    qed_aio_next_io(opaque, ret);
-}
-
-/**
- * Update L1 table with new L2 table offset and write it out
- */
-static void qed_aio_write_l1_update(void *opaque, int ret)
-{
-    QEDAIOCB *acb = opaque;
-    BDRVQEDState *s = acb_to_s(acb);
-    int index;
-
-    if (ret) {
-        qed_aio_complete(acb, ret);
-        return;
-    }
-
-    index = qed_l1_index(s, acb->cur_pos);
-    s->l1_table->offsets[index] = acb->request.l2_table->offset;
-
-    qed_write_l1_table(s, index, 1, qed_commit_l2_update, acb);
-}
-
-/**
- * Update L2 table with new cluster offsets and write them out
- */
-static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
-{
-    BDRVQEDState *s = acb_to_s(acb);
-    bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
-    int index;
-
-    if (ret) {
-        goto err;
-    }
-
-    if (need_alloc) {
-        qed_unref_l2_cache_entry(acb->request.l2_table);
-        acb->request.l2_table = qed_new_l2_table(s);
-    }
-
-    index = qed_l2_index(s, acb->cur_pos);
-    qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters,
-                         offset);
-
-    if (need_alloc) {
-        /* Write out the whole new L2 table */
-        qed_write_l2_table(s, &acb->request, 0, s->table_nelems, true,
-                            qed_aio_write_l1_update, acb);
-    } else {
-        /* Write out only the updated part of the L2 table */
-        qed_write_l2_table(s, &acb->request, index, acb->cur_nclusters, false,
-                            qed_aio_next_io, acb);
-    }
-    return;
-
-err:
-    qed_aio_complete(acb, ret);
-}
-
-static void qed_aio_write_l2_update_cb(void *opaque, int ret)
-{
-    QEDAIOCB *acb = opaque;
-    qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
-}
-
-/**
- * Flush new data clusters before updating the L2 table
- *
- * This flush is necessary when a backing file is in use.  A crash during an
- * allocating write could result in empty clusters in the image.  If the write
- * only touched a subregion of the cluster, then backing image sectors have
- * been lost in the untouched region.  The solution is to flush after writing a
- * new data cluster and before updating the L2 table.
- */
-static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
-{
-    QEDAIOCB *acb = opaque;
-    BDRVQEDState *s = acb_to_s(acb);
-
-    if (!bdrv_aio_flush(s->bs->file->bs, qed_aio_write_l2_update_cb, opaque)) {
-        qed_aio_complete(acb, -EIO);
-    }
-}
-
-/**
- * Write data to the image file
- */
-static void qed_aio_write_main(void *opaque, int ret)
-{
-    QEDAIOCB *acb = opaque;
-    BDRVQEDState *s = acb_to_s(acb);
-    uint64_t offset = acb->cur_cluster +
-                      qed_offset_into_cluster(s, acb->cur_pos);
-    BlockCompletionFunc *next_fn;
-
-    trace_qed_aio_write_main(s, acb, ret, offset, acb->cur_qiov.size);
-
-    if (ret) {
-        qed_aio_complete(acb, ret);
-        return;
-    }
-
-    if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
-        next_fn = qed_aio_next_io;
-    } else {
-        if (s->bs->backing) {
-            next_fn = qed_aio_write_flush_before_l2_update;
-        } else {
-            next_fn = qed_aio_write_l2_update_cb;
-        }
-    }
-
-    BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
-    bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
-                    &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
-                    next_fn, acb);
-}
-
-/**
- * Populate back untouched region of new data cluster
- */
-static void qed_aio_write_postfill(void *opaque, int ret)
-{
-    QEDAIOCB *acb = opaque;
-    BDRVQEDState *s = acb_to_s(acb);
-    uint64_t start = acb->cur_pos + acb->cur_qiov.size;
-    uint64_t len =
-        qed_start_of_cluster(s, start + s->header.cluster_size - 1) - start;
-    uint64_t offset = acb->cur_cluster +
-                      qed_offset_into_cluster(s, acb->cur_pos) +
-                      acb->cur_qiov.size;
-
-    if (ret) {
-        qed_aio_complete(acb, ret);
-        return;
-    }
-
-    trace_qed_aio_write_postfill(s, acb, start, len, offset);
-    qed_copy_from_backing_file(s, start, len, offset,
-                                qed_aio_write_main, acb);
-}
-
-/**
- * Populate front untouched region of new data cluster
- */
-static void qed_aio_write_prefill(void *opaque, int ret)
-{
-    QEDAIOCB *acb = opaque;
-    BDRVQEDState *s = acb_to_s(acb);
-    uint64_t start = qed_start_of_cluster(s, acb->cur_pos);
-    uint64_t len = qed_offset_into_cluster(s, acb->cur_pos);
-
-    trace_qed_aio_write_prefill(s, acb, start, len, acb->cur_cluster);
-    qed_copy_from_backing_file(s, start, len, acb->cur_cluster,
-                                qed_aio_write_postfill, acb);
-}
-
-/**
- * Check if the QED_F_NEED_CHECK bit should be set during allocating write
- */
-static bool qed_should_set_need_check(BDRVQEDState *s)
-{
-    /* The flush before L2 update path ensures consistency */
-    if (s->bs->backing) {
-        return false;
-    }
-
-    return !(s->header.features & QED_F_NEED_CHECK);
-}
-
-static void qed_aio_write_zero_cluster(void *opaque, int ret)
-{
-    QEDAIOCB *acb = opaque;
-
-    if (ret) {
-        qed_aio_complete(acb, ret);
-        return;
-    }
-
-    qed_aio_write_l2_update(acb, 0, 1);
-}
-
-/**
- * Write new data cluster
- *
- * @acb:        Write request
- * @len:        Length in bytes
- *
- * This path is taken when writing to previously unallocated clusters.
- */
-static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
-{
-    BDRVQEDState *s = acb_to_s(acb);
-    BlockCompletionFunc *cb;
-
-    /* Cancel timer when the first allocating request comes in */
-    if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
-        qed_cancel_need_check_timer(s);
-    }
-
-    /* Freeze this request if another allocating write is in progress */
-    if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs)) {
-        QSIMPLEQ_INSERT_TAIL(&s->allocating_write_reqs, acb, next);
-    }
-    if (acb != QSIMPLEQ_FIRST(&s->allocating_write_reqs) ||
-        s->allocating_write_reqs_plugged) {
-        return; /* wait for existing request to finish */
-    }
-
-    acb->cur_nclusters = qed_bytes_to_clusters(s,
-            qed_offset_into_cluster(s, acb->cur_pos) + len);
-    qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
-
-    if (acb->flags & QED_AIOCB_ZERO) {
-        /* Skip ahead if the clusters are already zero */
-        if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
-            qed_aio_next_io(acb, 0);
-            return;
-        }
-
-        cb = qed_aio_write_zero_cluster;
-    } else {
-        cb = qed_aio_write_prefill;
-        acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
-    }
-
-    if (qed_should_set_need_check(s)) {
-        s->header.features |= QED_F_NEED_CHECK;
-        qed_write_header(s, cb, acb);
-    } else {
-        cb(acb, 0);
-    }
-}
-
-/**
- * Write data cluster in place
- *
- * @acb:        Write request
- * @offset:     Cluster offset in bytes
- * @len:        Length in bytes
- *
- * This path is taken when writing to already allocated clusters.
- */
-static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
-{
-    /* Allocate buffer for zero writes */
-    if (acb->flags & QED_AIOCB_ZERO) {
-        struct iovec *iov = acb->qiov->iov;
-
-        if (!iov->iov_base) {
-            iov->iov_base = qemu_try_blockalign(acb->common.bs, iov->iov_len);
-            if (iov->iov_base == NULL) {
-                qed_aio_complete(acb, -ENOMEM);
-                return;
-            }
-            memset(iov->iov_base, 0, iov->iov_len);
-        }
-    }
-
-    /* Calculate the I/O vector */
-    acb->cur_cluster = offset;
-    qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
-
-    /* Do the actual write */
-    qed_aio_write_main(acb, 0);
-}
-
-/**
- * Write data cluster
- *
- * @opaque:     Write request
- * @ret:        QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
- *              or -errno
- * @offset:     Cluster offset in bytes
- * @len:        Length in bytes
- *
- * Callback from qed_find_cluster().
- */
-static void qed_aio_write_data(void *opaque, int ret,
-                               uint64_t offset, size_t len)
-{
-    QEDAIOCB *acb = opaque;
-
-    trace_qed_aio_write_data(acb_to_s(acb), acb, ret, offset, len);
-
-    acb->find_cluster_ret = ret;
-
-    switch (ret) {
-    case QED_CLUSTER_FOUND:
-        qed_aio_write_inplace(acb, offset, len);
-        break;
-
-    case QED_CLUSTER_L2:
-    case QED_CLUSTER_L1:
-    case QED_CLUSTER_ZERO:
-        qed_aio_write_alloc(acb, len);
-        break;
-
-    default:
-        qed_aio_complete(acb, ret);
-        break;
-    }
-}
-
-/**
- * Read data cluster
- *
- * @opaque:     Read request
- * @ret:        QED_CLUSTER_FOUND, QED_CLUSTER_L2, QED_CLUSTER_L1,
- *              or -errno
- * @offset:     Cluster offset in bytes
- * @len:        Length in bytes
- *
- * Callback from qed_find_cluster().
- */
-static void qed_aio_read_data(void *opaque, int ret,
-                              uint64_t offset, size_t len)
-{
-    QEDAIOCB *acb = opaque;
-    BDRVQEDState *s = acb_to_s(acb);
-    BlockDriverState *bs = acb->common.bs;
-
-    /* Adjust offset into cluster */
-    offset += qed_offset_into_cluster(s, acb->cur_pos);
-
-    trace_qed_aio_read_data(s, acb, ret, offset, len);
-
-    if (ret < 0) {
-        goto err;
-    }
-
-    qemu_iovec_concat(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
-
-    /* Handle zero cluster and backing file reads */
-    if (ret == QED_CLUSTER_ZERO) {
-        qemu_iovec_memset(&acb->cur_qiov, 0, 0, acb->cur_qiov.size);
-        qed_aio_next_io(acb, 0);
-        return;
-    } else if (ret != QED_CLUSTER_FOUND) {
-        qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
-                              &acb->backing_qiov, qed_aio_next_io, acb);
-        return;
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    bdrv_aio_readv(bs->file->bs, offset / BDRV_SECTOR_SIZE,
-                   &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
-                   qed_aio_next_io, acb);
-    return;
-
-err:
-    qed_aio_complete(acb, ret);
-}
-
-/**
- * Begin next I/O or complete the request
- */
-static void qed_aio_next_io(void *opaque, int ret)
-{
-    QEDAIOCB *acb = opaque;
-    BDRVQEDState *s = acb_to_s(acb);
-    QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
-                                qed_aio_write_data : qed_aio_read_data;
-
-    trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
-
-    if (acb->backing_qiov) {
-        qemu_iovec_destroy(acb->backing_qiov);
-        g_free(acb->backing_qiov);
-        acb->backing_qiov = NULL;
-    }
-
-    /* Handle I/O error */
-    if (ret) {
-        qed_aio_complete(acb, ret);
-        return;
-    }
-
-    acb->qiov_offset += acb->cur_qiov.size;
-    acb->cur_pos += acb->cur_qiov.size;
-    qemu_iovec_reset(&acb->cur_qiov);
-
-    /* Complete request */
-    if (acb->cur_pos >= acb->end_pos) {
-        qed_aio_complete(acb, 0);
-        return;
-    }
-
-    /* Find next cluster and start I/O */
-    qed_find_cluster(s, &acb->request,
-                      acb->cur_pos, acb->end_pos - acb->cur_pos,
-                      io_fn, acb);
-}
-
-static BlockAIOCB *qed_aio_setup(BlockDriverState *bs,
-                                 int64_t sector_num,
-                                 QEMUIOVector *qiov, int nb_sectors,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque, int flags)
-{
-    QEDAIOCB *acb = qemu_aio_get(&qed_aiocb_info, bs, cb, opaque);
-
-    trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors,
-                        opaque, flags);
-
-    acb->flags = flags;
-    acb->qiov = qiov;
-    acb->qiov_offset = 0;
-    acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
-    acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
-    acb->backing_qiov = NULL;
-    acb->request.l2_table = NULL;
-    qemu_iovec_init(&acb->cur_qiov, qiov->niov);
-
-    /* Start request */
-    qed_aio_next_io(acb, 0);
-    return &acb->common;
-}
-
-static BlockAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
-                                      int64_t sector_num,
-                                      QEMUIOVector *qiov, int nb_sectors,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque)
-{
-    return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-}
-
-static BlockAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
-                                       int64_t sector_num,
-                                       QEMUIOVector *qiov, int nb_sectors,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
-{
-    return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
-                         opaque, QED_AIOCB_WRITE);
-}
-
-typedef struct {
-    Coroutine *co;
-    int ret;
-    bool done;
-} QEDWriteZeroesCB;
-
-static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
-{
-    QEDWriteZeroesCB *cb = opaque;
-
-    cb->done = true;
-    cb->ret = ret;
-    if (cb->co) {
-        qemu_coroutine_enter(cb->co, NULL);
-    }
-}
-
-static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
-                                                 int64_t sector_num,
-                                                 int nb_sectors,
-                                                 BdrvRequestFlags flags)
-{
-    BlockAIOCB *blockacb;
-    BDRVQEDState *s = bs->opaque;
-    QEDWriteZeroesCB cb = { .done = false };
-    QEMUIOVector qiov;
-    struct iovec iov;
-
-    /* Refuse if there are untouched backing file sectors */
-    if (bs->backing) {
-        if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
-            return -ENOTSUP;
-        }
-        if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) {
-            return -ENOTSUP;
-        }
-    }
-
-    /* Zero writes start without an I/O buffer.  If a buffer becomes necessary
-     * then it will be allocated during request processing.
-     */
-    iov.iov_base = NULL,
-    iov.iov_len  = nb_sectors * BDRV_SECTOR_SIZE,
-
-    qemu_iovec_init_external(&qiov, &iov, 1);
-    blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors,
-                             qed_co_write_zeroes_cb, &cb,
-                             QED_AIOCB_WRITE | QED_AIOCB_ZERO);
-    if (!blockacb) {
-        return -EIO;
-    }
-    if (!cb.done) {
-        cb.co = qemu_coroutine_self();
-        qemu_coroutine_yield();
-    }
-    assert(cb.done);
-    return cb.ret;
-}
-
-static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
-{
-    BDRVQEDState *s = bs->opaque;
-    uint64_t old_image_size;
-    int ret;
-
-    if (!qed_is_image_size_valid(offset, s->header.cluster_size,
-                                 s->header.table_size)) {
-        return -EINVAL;
-    }
-
-    /* Shrinking is currently not supported */
-    if ((uint64_t)offset < s->header.image_size) {
-        return -ENOTSUP;
-    }
-
-    old_image_size = s->header.image_size;
-    s->header.image_size = offset;
-    ret = qed_write_header_sync(s);
-    if (ret < 0) {
-        s->header.image_size = old_image_size;
-    }
-    return ret;
-}
-
-static int64_t bdrv_qed_getlength(BlockDriverState *bs)
-{
-    BDRVQEDState *s = bs->opaque;
-    return s->header.image_size;
-}
-
-static int bdrv_qed_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVQEDState *s = bs->opaque;
-
-    memset(bdi, 0, sizeof(*bdi));
-    bdi->cluster_size = s->header.cluster_size;
-    bdi->is_dirty = s->header.features & QED_F_NEED_CHECK;
-    bdi->unallocated_blocks_are_zero = true;
-    bdi->can_write_zeroes_with_unmap = true;
-    return 0;
-}
-
-static int bdrv_qed_change_backing_file(BlockDriverState *bs,
-                                        const char *backing_file,
-                                        const char *backing_fmt)
-{
-    BDRVQEDState *s = bs->opaque;
-    QEDHeader new_header, le_header;
-    void *buffer;
-    size_t buffer_len, backing_file_len;
-    int ret;
-
-    /* Refuse to set backing filename if unknown compat feature bits are
-     * active.  If the image uses an unknown compat feature then we may not
-     * know the layout of data following the header structure and cannot safely
-     * add a new string.
-     */
-    if (backing_file && (s->header.compat_features &
-                         ~QED_COMPAT_FEATURE_MASK)) {
-        return -ENOTSUP;
-    }
-
-    memcpy(&new_header, &s->header, sizeof(new_header));
-
-    new_header.features &= ~(QED_F_BACKING_FILE |
-                             QED_F_BACKING_FORMAT_NO_PROBE);
-
-    /* Adjust feature flags */
-    if (backing_file) {
-        new_header.features |= QED_F_BACKING_FILE;
-
-        if (qed_fmt_is_raw(backing_fmt)) {
-            new_header.features |= QED_F_BACKING_FORMAT_NO_PROBE;
-        }
-    }
-
-    /* Calculate new header size */
-    backing_file_len = 0;
-
-    if (backing_file) {
-        backing_file_len = strlen(backing_file);
-    }
-
-    buffer_len = sizeof(new_header);
-    new_header.backing_filename_offset = buffer_len;
-    new_header.backing_filename_size = backing_file_len;
-    buffer_len += backing_file_len;
-
-    /* Make sure we can rewrite header without failing */
-    if (buffer_len > new_header.header_size * new_header.cluster_size) {
-        return -ENOSPC;
-    }
-
-    /* Prepare new header */
-    buffer = g_malloc(buffer_len);
-
-    qed_header_cpu_to_le(&new_header, &le_header);
-    memcpy(buffer, &le_header, sizeof(le_header));
-    buffer_len = sizeof(le_header);
-
-    if (backing_file) {
-        memcpy(buffer + buffer_len, backing_file, backing_file_len);
-        buffer_len += backing_file_len;
-    }
-
-    /* Write new header */
-    ret = bdrv_pwrite_sync(bs->file->bs, 0, buffer, buffer_len);
-    g_free(buffer);
-    if (ret == 0) {
-        memcpy(&s->header, &new_header, sizeof(new_header));
-    }
-    return ret;
-}
-
-static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
-{
-    BDRVQEDState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;
-
-    bdrv_qed_close(bs);
-
-    bdrv_invalidate_cache(bs->file->bs, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    memset(s, 0, sizeof(BDRVQEDState));
-    ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        error_prepend(errp, "Could not reopen qed layer: ");
-        return;
-    } else if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not reopen qed layer");
-        return;
-    }
-}
-
-static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
-                          BdrvCheckMode fix)
-{
-    BDRVQEDState *s = bs->opaque;
-
-    return qed_check(s, result, !!fix);
-}
-
-static QemuOptsList qed_create_opts = {
-    .name = "qed-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(qed_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FILE,
-            .type = QEMU_OPT_STRING,
-            .help = "File name of a base image"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FMT,
-            .type = QEMU_OPT_STRING,
-            .help = "Image format of the base image"
-        },
-        {
-            .name = BLOCK_OPT_CLUSTER_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Cluster size (in bytes)",
-            .def_value_str = stringify(QED_DEFAULT_CLUSTER_SIZE)
-        },
-        {
-            .name = BLOCK_OPT_TABLE_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "L1/L2 table size (in clusters)"
-        },
-        { /* end of list */ }
-    }
-};
-
-static BlockDriver bdrv_qed = {
-    .format_name              = "qed",
-    .instance_size            = sizeof(BDRVQEDState),
-    .create_opts              = &qed_create_opts,
-    .supports_backing         = true,
-
-    .bdrv_probe               = bdrv_qed_probe,
-    .bdrv_open                = bdrv_qed_open,
-    .bdrv_close               = bdrv_qed_close,
-    .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
-    .bdrv_create              = bdrv_qed_create,
-    .bdrv_has_zero_init       = bdrv_has_zero_init_1,
-    .bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
-    .bdrv_aio_readv           = bdrv_qed_aio_readv,
-    .bdrv_aio_writev          = bdrv_qed_aio_writev,
-    .bdrv_co_write_zeroes     = bdrv_qed_co_write_zeroes,
-    .bdrv_truncate            = bdrv_qed_truncate,
-    .bdrv_getlength           = bdrv_qed_getlength,
-    .bdrv_get_info            = bdrv_qed_get_info,
-    .bdrv_refresh_limits      = bdrv_qed_refresh_limits,
-    .bdrv_change_backing_file = bdrv_qed_change_backing_file,
-    .bdrv_invalidate_cache    = bdrv_qed_invalidate_cache,
-    .bdrv_check               = bdrv_qed_check,
-    .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
-    .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
-};
-
-static void bdrv_qed_init(void)
-{
-    bdrv_register(&bdrv_qed);
-}
-
-block_init(bdrv_qed_init);
diff --git a/qemu/block/qed.h b/qemu/block/qed.h
deleted file mode 100644
index 22b319875..000000000
--- a/qemu/block/qed.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * QEMU Enhanced Disk Format
- *
- * Copyright IBM, Corp. 2010
- *
- * Authors:
- *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#ifndef BLOCK_QED_H
-#define BLOCK_QED_H
-
-#include "block/block_int.h"
-#include "qemu/cutils.h"
-
-/* The layout of a QED file is as follows:
- *
- * +--------+----------+----------+----------+-----+
- * | header | L1 table | cluster0 | cluster1 | ... |
- * +--------+----------+----------+----------+-----+
- *
- * There is a 2-level pagetable for cluster allocation:
- *
- *                     +----------+
- *                     | L1 table |
- *                     +----------+
- *                ,------'  |  '------.
- *           +----------+   |    +----------+
- *           | L2 table |  ...   | L2 table |
- *           +----------+        +----------+
- *       ,------'  |  '------.
- *  +----------+   |    +----------+
- *  |   Data   |  ...   |   Data   |
- *  +----------+        +----------+
- *
- * The L1 table is fixed size and always present.  L2 tables are allocated on
- * demand.  The L1 table size determines the maximum possible image size; it
- * can be influenced using the cluster_size and table_size values.
- *
- * All fields are little-endian on disk.
- */
-#define  QED_DEFAULT_CLUSTER_SIZE  65536
-enum {
-    QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
-
-    /* The image supports a backing file */
-    QED_F_BACKING_FILE = 0x01,
-
-    /* The image needs a consistency check before use */
-    QED_F_NEED_CHECK = 0x02,
-
-    /* The backing file format must not be probed, treat as raw image */
-    QED_F_BACKING_FORMAT_NO_PROBE = 0x04,
-
-    /* Feature bits must be used when the on-disk format changes */
-    QED_FEATURE_MASK = QED_F_BACKING_FILE | /* supported feature bits */
-                       QED_F_NEED_CHECK |
-                       QED_F_BACKING_FORMAT_NO_PROBE,
-    QED_COMPAT_FEATURE_MASK = 0,            /* supported compat feature bits */
-    QED_AUTOCLEAR_FEATURE_MASK = 0,         /* supported autoclear feature bits */
-
-    /* Data is stored in groups of sectors called clusters.  Cluster size must
-     * be large to avoid keeping too much metadata.  I/O requests that have
-     * sub-cluster size will require read-modify-write.
-     */
-    QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
-    QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
-
-    /* Allocated clusters are tracked using a 2-level pagetable.  Table size is
-     * a multiple of clusters so large maximum image sizes can be supported
-     * without jacking up the cluster size too much.
-     */
-    QED_MIN_TABLE_SIZE = 1,        /* in clusters */
-    QED_MAX_TABLE_SIZE = 16,
-    QED_DEFAULT_TABLE_SIZE = 4,
-
-    /* Delay to flush and clean image after last allocating write completes */
-    QED_NEED_CHECK_TIMEOUT = 5,    /* in seconds */
-};
-
-typedef struct {
-    uint32_t magic;                 /* QED\0 */
-
-    uint32_t cluster_size;          /* in bytes */
-    uint32_t table_size;            /* for L1 and L2 tables, in clusters */
-    uint32_t header_size;           /* in clusters */
-
-    uint64_t features;              /* format feature bits */
-    uint64_t compat_features;       /* compatible feature bits */
-    uint64_t autoclear_features;    /* self-resetting feature bits */
-
-    uint64_t l1_table_offset;       /* in bytes */
-    uint64_t image_size;            /* total logical image size, in bytes */
-
-    /* if (features & QED_F_BACKING_FILE) */
-    uint32_t backing_filename_offset; /* in bytes from start of header */
-    uint32_t backing_filename_size;   /* in bytes */
-} QEMU_PACKED QEDHeader;
-
-typedef struct {
-    uint64_t offsets[0];            /* in bytes */
-} QEDTable;
-
-/* The L2 cache is a simple write-through cache for L2 structures */
-typedef struct CachedL2Table {
-    QEDTable *table;
-    uint64_t offset;    /* offset=0 indicates an invalidate entry */
-    QTAILQ_ENTRY(CachedL2Table) node;
-    int ref;
-} CachedL2Table;
-
-typedef struct {
-    QTAILQ_HEAD(, CachedL2Table) entries;
-    unsigned int n_entries;
-} L2TableCache;
-
-typedef struct QEDRequest {
-    CachedL2Table *l2_table;
-} QEDRequest;
-
-enum {
-    QED_AIOCB_WRITE = 0x0001,       /* read or write? */
-    QED_AIOCB_ZERO  = 0x0002,       /* zero write, used with QED_AIOCB_WRITE */
-};
-
-typedef struct QEDAIOCB {
-    BlockAIOCB common;
-    QEMUBH *bh;
-    int bh_ret;                     /* final return status for completion bh */
-    QSIMPLEQ_ENTRY(QEDAIOCB) next;  /* next request */
-    int flags;                      /* QED_AIOCB_* bits ORed together */
-    uint64_t end_pos;               /* request end on block device, in bytes */
-
-    /* User scatter-gather list */
-    QEMUIOVector *qiov;
-    size_t qiov_offset;             /* byte count already processed */
-
-    /* Current cluster scatter-gather list */
-    QEMUIOVector cur_qiov;
-    QEMUIOVector *backing_qiov;
-    uint64_t cur_pos;               /* position on block device, in bytes */
-    uint64_t cur_cluster;           /* cluster offset in image file */
-    unsigned int cur_nclusters;     /* number of clusters being accessed */
-    int find_cluster_ret;           /* used for L1/L2 update */
-
-    QEDRequest request;
-} QEDAIOCB;
-
-typedef struct {
-    BlockDriverState *bs;           /* device */
-    uint64_t file_size;             /* length of image file, in bytes */
-
-    QEDHeader header;               /* always cpu-endian */
-    QEDTable *l1_table;
-    L2TableCache l2_cache;          /* l2 table cache */
-    uint32_t table_nelems;
-    uint32_t l1_shift;
-    uint32_t l2_shift;
-    uint32_t l2_mask;
-
-    /* Allocating write request queue */
-    QSIMPLEQ_HEAD(, QEDAIOCB) allocating_write_reqs;
-    bool allocating_write_reqs_plugged;
-
-    /* Periodic flush and clear need check flag */
-    QEMUTimer *need_check_timer;
-} BDRVQEDState;
-
-enum {
-    QED_CLUSTER_FOUND,         /* cluster found */
-    QED_CLUSTER_ZERO,          /* zero cluster found */
-    QED_CLUSTER_L2,            /* cluster missing in L2 */
-    QED_CLUSTER_L1,            /* cluster missing in L1 */
-};
-
-/**
- * qed_find_cluster() completion callback
- *
- * @opaque:     User data for completion callback
- * @ret:        QED_CLUSTER_FOUND   Success
- *              QED_CLUSTER_L2      Data cluster unallocated in L2
- *              QED_CLUSTER_L1      L2 unallocated in L1
- *              -errno              POSIX error occurred
- * @offset:     Data cluster offset
- * @len:        Contiguous bytes starting from cluster offset
- *
- * This function is invoked when qed_find_cluster() completes.
- *
- * On success ret is QED_CLUSTER_FOUND and offset/len are a contiguous range
- * in the image file.
- *
- * On failure ret is QED_CLUSTER_L2 or QED_CLUSTER_L1 for missing L2 or L1
- * table offset, respectively.  len is number of contiguous unallocated bytes.
- */
-typedef void QEDFindClusterFunc(void *opaque, int ret, uint64_t offset, size_t len);
-
-/**
- * Generic callback for chaining async callbacks
- */
-typedef struct {
-    BlockCompletionFunc *cb;
-    void *opaque;
-} GenericCB;
-
-void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque);
-void gencb_complete(void *opaque, int ret);
-
-/**
- * Header functions
- */
-int qed_write_header_sync(BDRVQEDState *s);
-
-/**
- * L2 cache functions
- */
-void qed_init_l2_cache(L2TableCache *l2_cache);
-void qed_free_l2_cache(L2TableCache *l2_cache);
-CachedL2Table *qed_alloc_l2_cache_entry(L2TableCache *l2_cache);
-void qed_unref_l2_cache_entry(CachedL2Table *entry);
-CachedL2Table *qed_find_l2_cache_entry(L2TableCache *l2_cache, uint64_t offset);
-void qed_commit_l2_cache_entry(L2TableCache *l2_cache, CachedL2Table *l2_table);
-
-/**
- * Table I/O functions
- */
-int qed_read_l1_table_sync(BDRVQEDState *s);
-void qed_write_l1_table(BDRVQEDState *s, unsigned int index, unsigned int n,
-                        BlockCompletionFunc *cb, void *opaque);
-int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
-                            unsigned int n);
-int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
-                           uint64_t offset);
-void qed_read_l2_table(BDRVQEDState *s, QEDRequest *request, uint64_t offset,
-                       BlockCompletionFunc *cb, void *opaque);
-void qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
-                        unsigned int index, unsigned int n, bool flush,
-                        BlockCompletionFunc *cb, void *opaque);
-int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
-                            unsigned int index, unsigned int n, bool flush);
-
-/**
- * Cluster functions
- */
-void qed_find_cluster(BDRVQEDState *s, QEDRequest *request, uint64_t pos,
-                      size_t len, QEDFindClusterFunc *cb, void *opaque);
-
-/**
- * Consistency check
- */
-int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix);
-
-QEDTable *qed_alloc_table(BDRVQEDState *s);
-
-/**
- * Round down to the start of a cluster
- */
-static inline uint64_t qed_start_of_cluster(BDRVQEDState *s, uint64_t offset)
-{
-    return offset & ~(uint64_t)(s->header.cluster_size - 1);
-}
-
-static inline uint64_t qed_offset_into_cluster(BDRVQEDState *s, uint64_t offset)
-{
-    return offset & (s->header.cluster_size - 1);
-}
-
-static inline uint64_t qed_bytes_to_clusters(BDRVQEDState *s, uint64_t bytes)
-{
-    return qed_start_of_cluster(s, bytes + (s->header.cluster_size - 1)) /
-           (s->header.cluster_size - 1);
-}
-
-static inline unsigned int qed_l1_index(BDRVQEDState *s, uint64_t pos)
-{
-    return pos >> s->l1_shift;
-}
-
-static inline unsigned int qed_l2_index(BDRVQEDState *s, uint64_t pos)
-{
-    return (pos >> s->l2_shift) & s->l2_mask;
-}
-
-/**
- * Test if a cluster offset is valid
- */
-static inline bool qed_check_cluster_offset(BDRVQEDState *s, uint64_t offset)
-{
-    uint64_t header_size = (uint64_t)s->header.header_size *
-                           s->header.cluster_size;
-
-    if (offset & (s->header.cluster_size - 1)) {
-        return false;
-    }
-    return offset >= header_size && offset < s->file_size;
-}
-
-/**
- * Test if a table offset is valid
- */
-static inline bool qed_check_table_offset(BDRVQEDState *s, uint64_t offset)
-{
-    uint64_t end_offset = offset + (s->header.table_size - 1) *
-                          s->header.cluster_size;
-
-    /* Overflow check */
-    if (end_offset <= offset) {
-        return false;
-    }
-
-    return qed_check_cluster_offset(s, offset) &&
-           qed_check_cluster_offset(s, end_offset);
-}
-
-static inline bool qed_offset_is_cluster_aligned(BDRVQEDState *s,
-                                                 uint64_t offset)
-{
-    if (qed_offset_into_cluster(s, offset)) {
-        return false;
-    }
-    return true;
-}
-
-static inline bool qed_offset_is_unalloc_cluster(uint64_t offset)
-{
-    if (offset == 0) {
-        return true;
-    }
-    return false;
-}
-
-static inline bool qed_offset_is_zero_cluster(uint64_t offset)
-{
-    if (offset == 1) {
-        return true;
-    }
-    return false;
-}
-
-#endif /* BLOCK_QED_H */
diff --git a/qemu/block/quorum.c b/qemu/block/quorum.c
deleted file mode 100644
index da15465a9..000000000
--- a/qemu/block/quorum.c
+++ /dev/null
@@ -1,1091 +0,0 @@
-/*
- * Quorum Block filter
- *
- * Copyright (C) 2012-2014 Nodalink, EURL.
- *
- * Author:
- *   Benoît Canet <benoit.canet@irqsave.net>
- *
- * Based on the design and code of blkverify.c (Copyright (C) 2010 IBM, Corp)
- * and blkmirror.c (Copyright (C) 2011 Red Hat, Inc).
- *
- * This work is licensed under the terms of the GNU GPL, version 2 or later.
- * See the COPYING file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "block/block_int.h"
-#include "qapi/qmp/qbool.h"
-#include "qapi/qmp/qdict.h"
-#include "qapi/qmp/qerror.h"
-#include "qapi/qmp/qint.h"
-#include "qapi/qmp/qjson.h"
-#include "qapi/qmp/qlist.h"
-#include "qapi/qmp/qstring.h"
-#include "qapi-event.h"
-#include "crypto/hash.h"
-
-#define HASH_LENGTH 32
-
-#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
-#define QUORUM_OPT_BLKVERIFY      "blkverify"
-#define QUORUM_OPT_REWRITE        "rewrite-corrupted"
-#define QUORUM_OPT_READ_PATTERN   "read-pattern"
-
-/* This union holds a vote hash value */
-typedef union QuorumVoteValue {
-    uint8_t h[HASH_LENGTH];    /* SHA-256 hash */
-    int64_t l;                 /* simpler 64 bits hash */
-} QuorumVoteValue;
-
-/* A vote item */
-typedef struct QuorumVoteItem {
-    int index;
-    QLIST_ENTRY(QuorumVoteItem) next;
-} QuorumVoteItem;
-
-/* this structure is a vote version. A version is the set of votes sharing the
- * same vote value.
- * The set of votes will be tracked with the items field and its cardinality is
- * vote_count.
- */
-typedef struct QuorumVoteVersion {
-    QuorumVoteValue value;
-    int index;
-    int vote_count;
-    QLIST_HEAD(, QuorumVoteItem) items;
-    QLIST_ENTRY(QuorumVoteVersion) next;
-} QuorumVoteVersion;
-
-/* this structure holds a group of vote versions together */
-typedef struct QuorumVotes {
-    QLIST_HEAD(, QuorumVoteVersion) vote_list;
-    bool (*compare)(QuorumVoteValue *a, QuorumVoteValue *b);
-} QuorumVotes;
-
-/* the following structure holds the state of one quorum instance */
-typedef struct BDRVQuorumState {
-    BdrvChild **children;  /* children BlockDriverStates */
-    int num_children;      /* children count */
-    int threshold;         /* if less than threshold children reads gave the
-                            * same result a quorum error occurs.
-                            */
-    bool is_blkverify;     /* true if the driver is in blkverify mode
-                            * Writes are mirrored on two children devices.
-                            * On reads the two children devices' contents are
-                            * compared and if a difference is spotted its
-                            * location is printed and the code aborts.
-                            * It is useful to debug other block drivers by
-                            * comparing them with a reference one.
-                            */
-    bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
-                            * block if Quorum is reached.
-                            */
-
-    QuorumReadPattern read_pattern;
-} BDRVQuorumState;
-
-typedef struct QuorumAIOCB QuorumAIOCB;
-
-/* Quorum will create one instance of the following structure per operation it
- * performs on its children.
- * So for each read/write operation coming from the upper layer there will be
- * $children_count QuorumChildRequest.
- */
-typedef struct QuorumChildRequest {
-    BlockAIOCB *aiocb;
-    QEMUIOVector qiov;
-    uint8_t *buf;
-    int ret;
-    QuorumAIOCB *parent;
-} QuorumChildRequest;
-
-/* Quorum will use the following structure to track progress of each read/write
- * operation received by the upper layer.
- * This structure hold pointers to the QuorumChildRequest structures instances
- * used to do operations on each children and track overall progress.
- */
-struct QuorumAIOCB {
-    BlockAIOCB common;
-
-    /* Request metadata */
-    uint64_t sector_num;
-    int nb_sectors;
-
-    QEMUIOVector *qiov;         /* calling IOV */
-
-    QuorumChildRequest *qcrs;   /* individual child requests */
-    int count;                  /* number of completed AIOCB */
-    int success_count;          /* number of successfully completed AIOCB */
-
-    int rewrite_count;          /* number of replica to rewrite: count down to
-                                 * zero once writes are fired
-                                 */
-
-    QuorumVotes votes;
-
-    bool is_read;
-    int vote_ret;
-    int child_iter;             /* which child to read in fifo pattern */
-};
-
-static bool quorum_vote(QuorumAIOCB *acb);
-
-static void quorum_aio_cancel(BlockAIOCB *blockacb)
-{
-    QuorumAIOCB *acb = container_of(blockacb, QuorumAIOCB, common);
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int i;
-
-    /* cancel all callbacks */
-    for (i = 0; i < s->num_children; i++) {
-        if (acb->qcrs[i].aiocb) {
-            bdrv_aio_cancel_async(acb->qcrs[i].aiocb);
-        }
-    }
-}
-
-static AIOCBInfo quorum_aiocb_info = {
-    .aiocb_size         = sizeof(QuorumAIOCB),
-    .cancel_async       = quorum_aio_cancel,
-};
-
-static void quorum_aio_finalize(QuorumAIOCB *acb)
-{
-    int i, ret = 0;
-
-    if (acb->vote_ret) {
-        ret = acb->vote_ret;
-    }
-
-    acb->common.cb(acb->common.opaque, ret);
-
-    if (acb->is_read) {
-        /* on the quorum case acb->child_iter == s->num_children - 1 */
-        for (i = 0; i <= acb->child_iter; i++) {
-            qemu_vfree(acb->qcrs[i].buf);
-            qemu_iovec_destroy(&acb->qcrs[i].qiov);
-        }
-    }
-
-    g_free(acb->qcrs);
-    qemu_aio_unref(acb);
-}
-
-static bool quorum_sha256_compare(QuorumVoteValue *a, QuorumVoteValue *b)
-{
-    return !memcmp(a->h, b->h, HASH_LENGTH);
-}
-
-static bool quorum_64bits_compare(QuorumVoteValue *a, QuorumVoteValue *b)
-{
-    return a->l == b->l;
-}
-
-static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
-                                   BlockDriverState *bs,
-                                   QEMUIOVector *qiov,
-                                   uint64_t sector_num,
-                                   int nb_sectors,
-                                   BlockCompletionFunc *cb,
-                                   void *opaque)
-{
-    QuorumAIOCB *acb = qemu_aio_get(&quorum_aiocb_info, bs, cb, opaque);
-    int i;
-
-    acb->common.bs->opaque = s;
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-    acb->qiov = qiov;
-    acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
-    acb->count = 0;
-    acb->success_count = 0;
-    acb->rewrite_count = 0;
-    acb->votes.compare = quorum_sha256_compare;
-    QLIST_INIT(&acb->votes.vote_list);
-    acb->is_read = false;
-    acb->vote_ret = 0;
-
-    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].buf = NULL;
-        acb->qcrs[i].ret = 0;
-        acb->qcrs[i].parent = acb;
-    }
-
-    return acb;
-}
-
-static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
-                              int nb_sectors, char *node_name, int ret)
-{
-    const char *msg = NULL;
-    if (ret < 0) {
-        msg = strerror(-ret);
-    }
-
-    qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
-                                      sector_num, nb_sectors, &error_abort);
-}
-
-static void quorum_report_failure(QuorumAIOCB *acb)
-{
-    const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
-    qapi_event_send_quorum_failure(reference, acb->sector_num,
-                                   acb->nb_sectors, &error_abort);
-}
-
-static int quorum_vote_error(QuorumAIOCB *acb);
-
-static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-
-    if (acb->success_count < s->threshold) {
-        acb->vote_ret = quorum_vote_error(acb);
-        quorum_report_failure(acb);
-        return true;
-    }
-
-    return false;
-}
-
-static void quorum_rewrite_aio_cb(void *opaque, int ret)
-{
-    QuorumAIOCB *acb = opaque;
-
-    /* one less rewrite to do */
-    acb->rewrite_count--;
-
-    /* wait until all rewrite callbacks have completed */
-    if (acb->rewrite_count) {
-        return;
-    }
-
-    quorum_aio_finalize(acb);
-}
-
-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb);
-
-static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
-{
-    int i;
-    assert(dest->niov == source->niov);
-    assert(dest->size == source->size);
-    for (i = 0; i < source->niov; i++) {
-        assert(dest->iov[i].iov_len == source->iov[i].iov_len);
-        memcpy(dest->iov[i].iov_base,
-               source->iov[i].iov_base,
-               source->iov[i].iov_len);
-    }
-}
-
-static void quorum_aio_cb(void *opaque, int ret)
-{
-    QuorumChildRequest *sacb = opaque;
-    QuorumAIOCB *acb = sacb->parent;
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    bool rewrite = false;
-
-    if (ret == 0) {
-        acb->success_count++;
-    } else {
-        QuorumOpType type;
-        type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
-        quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
-                          sacb->aiocb->bs->node_name, ret);
-    }
-
-    if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
-        /* We try to read next child in FIFO order if we fail to read */
-        if (ret < 0 && (acb->child_iter + 1) < s->num_children) {
-            acb->child_iter++;
-            read_fifo_child(acb);
-            return;
-        }
-
-        if (ret == 0) {
-            quorum_copy_qiov(acb->qiov, &acb->qcrs[acb->child_iter].qiov);
-        }
-        acb->vote_ret = ret;
-        quorum_aio_finalize(acb);
-        return;
-    }
-
-    sacb->ret = ret;
-    acb->count++;
-    assert(acb->count <= s->num_children);
-    assert(acb->success_count <= s->num_children);
-    if (acb->count < s->num_children) {
-        return;
-    }
-
-    /* Do the vote on read */
-    if (acb->is_read) {
-        rewrite = quorum_vote(acb);
-    } else {
-        quorum_has_too_much_io_failed(acb);
-    }
-
-    /* if no rewrite is done the code will finish right away */
-    if (!rewrite) {
-        quorum_aio_finalize(acb);
-    }
-}
-
-static void quorum_report_bad_versions(BDRVQuorumState *s,
-                                       QuorumAIOCB *acb,
-                                       QuorumVoteValue *value)
-{
-    QuorumVoteVersion *version;
-    QuorumVoteItem *item;
-
-    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
-        if (acb->votes.compare(&version->value, value)) {
-            continue;
-        }
-        QLIST_FOREACH(item, &version->items, next) {
-            quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
-                              acb->nb_sectors,
-                              s->children[item->index]->bs->node_name, 0);
-        }
-    }
-}
-
-static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
-                                        QuorumVoteValue *value)
-{
-    QuorumVoteVersion *version;
-    QuorumVoteItem *item;
-    int count = 0;
-
-    /* first count the number of bad versions: done first to avoid concurrency
-     * issues.
-     */
-    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
-        if (acb->votes.compare(&version->value, value)) {
-            continue;
-        }
-        QLIST_FOREACH(item, &version->items, next) {
-            count++;
-        }
-    }
-
-    /* quorum_rewrite_aio_cb will count down this to zero */
-    acb->rewrite_count = count;
-
-    /* now fire the correcting rewrites */
-    QLIST_FOREACH(version, &acb->votes.vote_list, next) {
-        if (acb->votes.compare(&version->value, value)) {
-            continue;
-        }
-        QLIST_FOREACH(item, &version->items, next) {
-            bdrv_aio_writev(s->children[item->index]->bs, acb->sector_num,
-                            acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
-                            acb);
-        }
-    }
-
-    /* return true if any rewrite is done else false */
-    return count;
-}
-
-static void quorum_count_vote(QuorumVotes *votes,
-                              QuorumVoteValue *value,
-                              int index)
-{
-    QuorumVoteVersion *v = NULL, *version = NULL;
-    QuorumVoteItem *item;
-
-    /* look if we have something with this hash */
-    QLIST_FOREACH(v, &votes->vote_list, next) {
-        if (votes->compare(&v->value, value)) {
-            version = v;
-            break;
-        }
-    }
-
-    /* It's a version not yet in the list add it */
-    if (!version) {
-        version = g_new0(QuorumVoteVersion, 1);
-        QLIST_INIT(&version->items);
-        memcpy(&version->value, value, sizeof(version->value));
-        version->index = index;
-        version->vote_count = 0;
-        QLIST_INSERT_HEAD(&votes->vote_list, version, next);
-    }
-
-    version->vote_count++;
-
-    item = g_new0(QuorumVoteItem, 1);
-    item->index = index;
-    QLIST_INSERT_HEAD(&version->items, item, next);
-}
-
-static void quorum_free_vote_list(QuorumVotes *votes)
-{
-    QuorumVoteVersion *version, *next_version;
-    QuorumVoteItem *item, *next_item;
-
-    QLIST_FOREACH_SAFE(version, &votes->vote_list, next, next_version) {
-        QLIST_REMOVE(version, next);
-        QLIST_FOREACH_SAFE(item, &version->items, next, next_item) {
-            QLIST_REMOVE(item, next);
-            g_free(item);
-        }
-        g_free(version);
-    }
-}
-
-static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash)
-{
-    QEMUIOVector *qiov = &acb->qcrs[i].qiov;
-    size_t len = sizeof(hash->h);
-    uint8_t *data = hash->h;
-
-    /* XXX - would be nice if we could pass in the Error **
-     * and propagate that back, but this quorum code is
-     * restricted to just errno values currently */
-    if (qcrypto_hash_bytesv(QCRYPTO_HASH_ALG_SHA256,
-                            qiov->iov, qiov->niov,
-                            &data, &len,
-                            NULL) < 0) {
-        return -EINVAL;
-    }
-
-    return 0;
-}
-
-static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
-{
-    int max = 0;
-    QuorumVoteVersion *candidate, *winner = NULL;
-
-    QLIST_FOREACH(candidate, &votes->vote_list, next) {
-        if (candidate->vote_count > max) {
-            max = candidate->vote_count;
-            winner = candidate;
-        }
-    }
-
-    return winner;
-}
-
-/* qemu_iovec_compare is handy for blkverify mode because it returns the first
- * differing byte location. Yet it is handcoded to compare vectors one byte
- * after another so it does not benefit from the libc SIMD optimizations.
- * quorum_iovec_compare is written for speed and should be used in the non
- * blkverify mode of quorum.
- */
-static bool quorum_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
-{
-    int i;
-    int result;
-
-    assert(a->niov == b->niov);
-    for (i = 0; i < a->niov; i++) {
-        assert(a->iov[i].iov_len == b->iov[i].iov_len);
-        result = memcmp(a->iov[i].iov_base,
-                        b->iov[i].iov_base,
-                        a->iov[i].iov_len);
-        if (result) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-static void GCC_FMT_ATTR(2, 3) quorum_err(QuorumAIOCB *acb,
-                                          const char *fmt, ...)
-{
-    va_list ap;
-
-    va_start(ap, fmt);
-    fprintf(stderr, "quorum: sector_num=%" PRId64 " nb_sectors=%d ",
-            acb->sector_num, acb->nb_sectors);
-    vfprintf(stderr, fmt, ap);
-    fprintf(stderr, "\n");
-    va_end(ap);
-    exit(1);
-}
-
-static bool quorum_compare(QuorumAIOCB *acb,
-                           QEMUIOVector *a,
-                           QEMUIOVector *b)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    ssize_t offset;
-
-    /* This driver will replace blkverify in this particular case */
-    if (s->is_blkverify) {
-        offset = qemu_iovec_compare(a, b);
-        if (offset != -1) {
-            quorum_err(acb, "contents mismatch in sector %" PRId64,
-                       acb->sector_num +
-                       (uint64_t)(offset / BDRV_SECTOR_SIZE));
-        }
-        return true;
-    }
-
-    return quorum_iovec_compare(a, b);
-}
-
-/* Do a vote to get the error code */
-static int quorum_vote_error(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    QuorumVoteVersion *winner = NULL;
-    QuorumVotes error_votes;
-    QuorumVoteValue result_value;
-    int i, ret = 0;
-    bool error = false;
-
-    QLIST_INIT(&error_votes.vote_list);
-    error_votes.compare = quorum_64bits_compare;
-
-    for (i = 0; i < s->num_children; i++) {
-        ret = acb->qcrs[i].ret;
-        if (ret) {
-            error = true;
-            result_value.l = ret;
-            quorum_count_vote(&error_votes, &result_value, i);
-        }
-    }
-
-    if (error) {
-        winner = quorum_get_vote_winner(&error_votes);
-        ret = winner->value.l;
-    }
-
-    quorum_free_vote_list(&error_votes);
-
-    return ret;
-}
-
-static bool quorum_vote(QuorumAIOCB *acb)
-{
-    bool quorum = true;
-    bool rewrite = false;
-    int i, j, ret;
-    QuorumVoteValue hash;
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    QuorumVoteVersion *winner;
-
-    if (quorum_has_too_much_io_failed(acb)) {
-        return false;
-    }
-
-    /* get the index of the first successful read */
-    for (i = 0; i < s->num_children; i++) {
-        if (!acb->qcrs[i].ret) {
-            break;
-        }
-    }
-
-    assert(i < s->num_children);
-
-    /* compare this read with all other successful reads stopping at quorum
-     * failure
-     */
-    for (j = i + 1; j < s->num_children; j++) {
-        if (acb->qcrs[j].ret) {
-            continue;
-        }
-        quorum = quorum_compare(acb, &acb->qcrs[i].qiov, &acb->qcrs[j].qiov);
-        if (!quorum) {
-            break;
-       }
-    }
-
-    /* Every successful read agrees */
-    if (quorum) {
-        quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
-        return false;
-    }
-
-    /* compute hashes for each successful read, also store indexes */
-    for (i = 0; i < s->num_children; i++) {
-        if (acb->qcrs[i].ret) {
-            continue;
-        }
-        ret = quorum_compute_hash(acb, i, &hash);
-        /* if ever the hash computation failed */
-        if (ret < 0) {
-            acb->vote_ret = ret;
-            goto free_exit;
-        }
-        quorum_count_vote(&acb->votes, &hash, i);
-    }
-
-    /* vote to select the most represented version */
-    winner = quorum_get_vote_winner(&acb->votes);
-
-    /* if the winner count is smaller than threshold the read fails */
-    if (winner->vote_count < s->threshold) {
-        quorum_report_failure(acb);
-        acb->vote_ret = -EIO;
-        goto free_exit;
-    }
-
-    /* we have a winner: copy it */
-    quorum_copy_qiov(acb->qiov, &acb->qcrs[winner->index].qiov);
-
-    /* some versions are bad print them */
-    quorum_report_bad_versions(s, acb, &winner->value);
-
-    /* corruption correction is enabled */
-    if (s->rewrite_corrupted) {
-        rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
-    }
-
-free_exit:
-    /* free lists */
-    quorum_free_vote_list(&acb->votes);
-    return rewrite;
-}
-
-static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size);
-        qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov);
-        qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf);
-    }
-
-    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num,
-                                            &acb->qcrs[i].qiov, acb->nb_sectors,
-                                            quorum_aio_cb, &acb->qcrs[i]);
-    }
-
-    return &acb->common;
-}
-
-static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
-{
-    BDRVQuorumState *s = acb->common.bs->opaque;
-
-    acb->qcrs[acb->child_iter].buf =
-        qemu_blockalign(s->children[acb->child_iter]->bs, acb->qiov->size);
-    qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
-    qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
-                     acb->qcrs[acb->child_iter].buf);
-    acb->qcrs[acb->child_iter].aiocb =
-        bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
-                       &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
-                       quorum_aio_cb, &acb->qcrs[acb->child_iter]);
-
-    return &acb->common;
-}
-
-static BlockAIOCB *quorum_aio_readv(BlockDriverState *bs,
-                                    int64_t sector_num,
-                                    QEMUIOVector *qiov,
-                                    int nb_sectors,
-                                    BlockCompletionFunc *cb,
-                                    void *opaque)
-{
-    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num,
-                                      nb_sectors, cb, opaque);
-    acb->is_read = true;
-
-    if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
-        acb->child_iter = s->num_children - 1;
-        return read_quorum_children(acb);
-    }
-
-    acb->child_iter = 0;
-    return read_fifo_child(acb);
-}
-
-static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
-                                     int64_t sector_num,
-                                     QEMUIOVector *qiov,
-                                     int nb_sectors,
-                                     BlockCompletionFunc *cb,
-                                     void *opaque)
-{
-    BDRVQuorumState *s = bs->opaque;
-    QuorumAIOCB *acb = quorum_aio_get(s, bs, qiov, sector_num, nb_sectors,
-                                      cb, opaque);
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i]->bs, sector_num,
-                                             qiov, nb_sectors, &quorum_aio_cb,
-                                             &acb->qcrs[i]);
-    }
-
-    return &acb->common;
-}
-
-static int64_t quorum_getlength(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int64_t result;
-    int i;
-
-    /* check that all file have the same length */
-    result = bdrv_getlength(s->children[0]->bs);
-    if (result < 0) {
-        return result;
-    }
-    for (i = 1; i < s->num_children; i++) {
-        int64_t value = bdrv_getlength(s->children[i]->bs);
-        if (value < 0) {
-            return value;
-        }
-        if (value != result) {
-            return -EIO;
-        }
-    }
-
-    return result;
-}
-
-static void quorum_invalidate_cache(BlockDriverState *bs, Error **errp)
-{
-    BDRVQuorumState *s = bs->opaque;
-    Error *local_err = NULL;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_invalidate_cache(s->children[i]->bs, &local_err);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return;
-        }
-    }
-}
-
-static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    QuorumVoteVersion *winner = NULL;
-    QuorumVotes error_votes;
-    QuorumVoteValue result_value;
-    int i;
-    int result = 0;
-    int success_count = 0;
-
-    QLIST_INIT(&error_votes.vote_list);
-    error_votes.compare = quorum_64bits_compare;
-
-    for (i = 0; i < s->num_children; i++) {
-        result = bdrv_co_flush(s->children[i]->bs);
-        if (result) {
-            quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
-                              bdrv_nb_sectors(s->children[i]->bs),
-                              s->children[i]->bs->node_name, result);
-            result_value.l = result;
-            quorum_count_vote(&error_votes, &result_value, i);
-        } else {
-            success_count++;
-        }
-    }
-
-    if (success_count >= s->threshold) {
-        result = 0;
-    } else {
-        winner = quorum_get_vote_winner(&error_votes);
-        result = winner->value.l;
-    }
-    quorum_free_vote_list(&error_votes);
-
-    return result;
-}
-
-static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
-                                               BlockDriverState *candidate)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bool perm = bdrv_recurse_is_first_non_filter(s->children[i]->bs,
-                                                     candidate);
-        if (perm) {
-            return true;
-        }
-    }
-
-    return false;
-}
-
-static int quorum_valid_threshold(int threshold, int num_children, Error **errp)
-{
-
-    if (threshold < 1) {
-        error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
-                   "vote-threshold", "value >= 1");
-        return -ERANGE;
-    }
-
-    if (threshold > num_children) {
-        error_setg(errp, "threshold may not exceed children count");
-        return -ERANGE;
-    }
-
-    return 0;
-}
-
-static QemuOptsList quorum_runtime_opts = {
-    .name = "quorum",
-    .head = QTAILQ_HEAD_INITIALIZER(quorum_runtime_opts.head),
-    .desc = {
-        {
-            .name = QUORUM_OPT_VOTE_THRESHOLD,
-            .type = QEMU_OPT_NUMBER,
-            .help = "The number of vote needed for reaching quorum",
-        },
-        {
-            .name = QUORUM_OPT_BLKVERIFY,
-            .type = QEMU_OPT_BOOL,
-            .help = "Trigger block verify mode if set",
-        },
-        {
-            .name = QUORUM_OPT_REWRITE,
-            .type = QEMU_OPT_BOOL,
-            .help = "Rewrite corrupted block on read quorum",
-        },
-        {
-            .name = QUORUM_OPT_READ_PATTERN,
-            .type = QEMU_OPT_STRING,
-            .help = "Allowed pattern: quorum, fifo. Quorum is default",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int parse_read_pattern(const char *opt)
-{
-    int i;
-
-    if (!opt) {
-        /* Set quorum as default */
-        return QUORUM_READ_PATTERN_QUORUM;
-    }
-
-    for (i = 0; i < QUORUM_READ_PATTERN__MAX; i++) {
-        if (!strcmp(opt, QuorumReadPattern_lookup[i])) {
-            return i;
-        }
-    }
-
-    return -EINVAL;
-}
-
-static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
-                       Error **errp)
-{
-    BDRVQuorumState *s = bs->opaque;
-    Error *local_err = NULL;
-    QemuOpts *opts = NULL;
-    bool *opened;
-    int i;
-    int ret = 0;
-
-    qdict_flatten(options);
-
-    /* count how many different children are present */
-    s->num_children = qdict_array_entries(options, "children.");
-    if (s->num_children < 0) {
-        error_setg(&local_err, "Option children is not a valid array");
-        ret = -EINVAL;
-        goto exit;
-    }
-    if (s->num_children < 2) {
-        error_setg(&local_err,
-                   "Number of provided children must be greater than 1");
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
-    /* and validate it against s->num_children */
-    ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    ret = parse_read_pattern(qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN));
-    if (ret < 0) {
-        error_setg(&local_err, "Please set read-pattern as fifo or quorum");
-        goto exit;
-    }
-    s->read_pattern = ret;
-
-    if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
-        /* is the driver in blkverify mode */
-        if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
-            s->num_children == 2 && s->threshold == 2) {
-            s->is_blkverify = true;
-        } else if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false)) {
-            fprintf(stderr, "blkverify mode is set by setting blkverify=on "
-                    "and using two files with vote_threshold=2\n");
-        }
-
-        s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE,
-                                                 false);
-        if (s->rewrite_corrupted && s->is_blkverify) {
-            error_setg(&local_err,
-                       "rewrite-corrupted=on cannot be used with blkverify=on");
-            ret = -EINVAL;
-            goto exit;
-        }
-    }
-
-    /* allocate the children array */
-    s->children = g_new0(BdrvChild *, s->num_children);
-    opened = g_new0(bool, s->num_children);
-
-    for (i = 0; i < s->num_children; i++) {
-        char indexstr[32];
-        ret = snprintf(indexstr, 32, "children.%d", i);
-        assert(ret < 32);
-
-        s->children[i] = bdrv_open_child(NULL, options, indexstr, bs,
-                                         &child_format, false, &local_err);
-        if (local_err) {
-            ret = -EINVAL;
-            goto close_exit;
-        }
-
-        opened[i] = true;
-    }
-
-    g_free(opened);
-    goto exit;
-
-close_exit:
-    /* cleanup on error */
-    for (i = 0; i < s->num_children; i++) {
-        if (!opened[i]) {
-            continue;
-        }
-        bdrv_unref_child(bs, s->children[i]);
-    }
-    g_free(s->children);
-    g_free(opened);
-exit:
-    qemu_opts_del(opts);
-    /* propagate error */
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-    return ret;
-}
-
-static void quorum_close(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_unref_child(bs, s->children[i]);
-    }
-
-    g_free(s->children);
-}
-
-static void quorum_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_detach_aio_context(s->children[i]->bs);
-    }
-}
-
-static void quorum_attach_aio_context(BlockDriverState *bs,
-                                      AioContext *new_context)
-{
-    BDRVQuorumState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_attach_aio_context(s->children[i]->bs, new_context);
-    }
-}
-
-static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
-{
-    BDRVQuorumState *s = bs->opaque;
-    QDict *opts;
-    QList *children;
-    int i;
-
-    for (i = 0; i < s->num_children; i++) {
-        bdrv_refresh_filename(s->children[i]->bs);
-        if (!s->children[i]->bs->full_open_options) {
-            return;
-        }
-    }
-
-    children = qlist_new();
-    for (i = 0; i < s->num_children; i++) {
-        QINCREF(s->children[i]->bs->full_open_options);
-        qlist_append_obj(children,
-                         QOBJECT(s->children[i]->bs->full_open_options));
-    }
-
-    opts = qdict_new();
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("quorum")));
-    qdict_put_obj(opts, QUORUM_OPT_VOTE_THRESHOLD,
-                  QOBJECT(qint_from_int(s->threshold)));
-    qdict_put_obj(opts, QUORUM_OPT_BLKVERIFY,
-                  QOBJECT(qbool_from_bool(s->is_blkverify)));
-    qdict_put_obj(opts, QUORUM_OPT_REWRITE,
-                  QOBJECT(qbool_from_bool(s->rewrite_corrupted)));
-    qdict_put_obj(opts, "children", QOBJECT(children));
-
-    bs->full_open_options = opts;
-}
-
-static BlockDriver bdrv_quorum = {
-    .format_name                        = "quorum",
-    .protocol_name                      = "quorum",
-
-    .instance_size                      = sizeof(BDRVQuorumState),
-
-    .bdrv_file_open                     = quorum_open,
-    .bdrv_close                         = quorum_close,
-    .bdrv_refresh_filename              = quorum_refresh_filename,
-
-    .bdrv_co_flush_to_disk              = quorum_co_flush,
-
-    .bdrv_getlength                     = quorum_getlength,
-
-    .bdrv_aio_readv                     = quorum_aio_readv,
-    .bdrv_aio_writev                    = quorum_aio_writev,
-    .bdrv_invalidate_cache              = quorum_invalidate_cache,
-
-    .bdrv_detach_aio_context            = quorum_detach_aio_context,
-    .bdrv_attach_aio_context            = quorum_attach_aio_context,
-
-    .is_filter                          = true,
-    .bdrv_recurse_is_first_non_filter   = quorum_recurse_is_first_non_filter,
-};
-
-static void bdrv_quorum_init(void)
-{
-    if (!qcrypto_hash_supports(QCRYPTO_HASH_ALG_SHA256)) {
-        /* SHA256 hash support is required for quorum device */
-        return;
-    }
-    bdrv_register(&bdrv_quorum);
-}
-
-block_init(bdrv_quorum_init);
diff --git a/qemu/block/raw-aio.h b/qemu/block/raw-aio.h
deleted file mode 100644
index 811e37501..000000000
--- a/qemu/block/raw-aio.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Declarations for AIO in the raw protocol
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- *  Anthony Liguori   <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-#ifndef QEMU_RAW_AIO_H
-#define QEMU_RAW_AIO_H
-
-#include "qemu/iov.h"
-
-/* AIO request types */
-#define QEMU_AIO_READ         0x0001
-#define QEMU_AIO_WRITE        0x0002
-#define QEMU_AIO_IOCTL        0x0004
-#define QEMU_AIO_FLUSH        0x0008
-#define QEMU_AIO_DISCARD      0x0010
-#define QEMU_AIO_WRITE_ZEROES 0x0020
-#define QEMU_AIO_TYPE_MASK \
-        (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
-         QEMU_AIO_DISCARD|QEMU_AIO_WRITE_ZEROES)
-
-/* AIO flags */
-#define QEMU_AIO_MISALIGNED   0x1000
-#define QEMU_AIO_BLKDEV       0x2000
-
-
-/* linux-aio.c - Linux native implementation */
-#ifdef CONFIG_LINUX_AIO
-void *laio_init(void);
-void laio_cleanup(void *s);
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type);
-void laio_detach_aio_context(void *s, AioContext *old_context);
-void laio_attach_aio_context(void *s, AioContext *new_context);
-void laio_io_plug(BlockDriverState *bs, void *aio_ctx);
-void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
-#endif
-
-#ifdef _WIN32
-typedef struct QEMUWin32AIOState QEMUWin32AIOState;
-QEMUWin32AIOState *win32_aio_init(void);
-void win32_aio_cleanup(QEMUWin32AIOState *aio);
-int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
-BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
-        QEMUWin32AIOState *aio, HANDLE hfile,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type);
-void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
-                                  AioContext *old_context);
-void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
-                                  AioContext *new_context);
-#endif
-
-#endif /* QEMU_RAW_AIO_H */
diff --git a/qemu/block/raw-posix.c b/qemu/block/raw-posix.c
deleted file mode 100644
index 906d5c941..000000000
--- a/qemu/block/raw-posix.c
+++ /dev/null
@@ -1,2701 +0,0 @@
-/*
- * Block driver for RAW files (posix)
- *
- * Copyright (c) 2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu/cutils.h"
-#include "qemu/error-report.h"
-#include "qemu/timer.h"
-#include "qemu/log.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include "trace.h"
-#include "block/thread-pool.h"
-#include "qemu/iov.h"
-#include "raw-aio.h"
-#include "qapi/util.h"
-#include "qapi/qmp/qstring.h"
-
-#if defined(__APPLE__) && (__MACH__)
-#include <paths.h>
-#include <sys/param.h>
-#include <IOKit/IOKitLib.h>
-#include <IOKit/IOBSD.h>
-#include <IOKit/storage/IOMediaBSDClient.h>
-#include <IOKit/storage/IOMedia.h>
-#include <IOKit/storage/IOCDMedia.h>
-//#include <IOKit/storage/IOCDTypes.h>
-#include <IOKit/storage/IODVDMedia.h>
-#include <CoreFoundation/CoreFoundation.h>
-#endif
-
-#ifdef __sun__
-#define _POSIX_PTHREAD_SEMANTICS 1
-#include <sys/dkio.h>
-#endif
-#ifdef __linux__
-#include <sys/ioctl.h>
-#include <sys/param.h>
-#include <linux/cdrom.h>
-#include <linux/fd.h>
-#include <linux/fs.h>
-#include <linux/hdreg.h>
-#include <scsi/sg.h>
-#ifdef __s390__
-#include <asm/dasd.h>
-#endif
-#ifndef FS_NOCOW_FL
-#define FS_NOCOW_FL                     0x00800000 /* Do not cow file */
-#endif
-#endif
-#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_FALLOCATE_ZERO_RANGE)
-#include <linux/falloc.h>
-#endif
-#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
-#include <sys/disk.h>
-#include <sys/cdio.h>
-#endif
-
-#ifdef __OpenBSD__
-#include <sys/ioctl.h>
-#include <sys/disklabel.h>
-#include <sys/dkio.h>
-#endif
-
-#ifdef __NetBSD__
-#include <sys/ioctl.h>
-#include <sys/disklabel.h>
-#include <sys/dkio.h>
-#include <sys/disk.h>
-#endif
-
-#ifdef __DragonFly__
-#include <sys/ioctl.h>
-#include <sys/diskslice.h>
-#endif
-
-#ifdef CONFIG_XFS
-#include <xfs/xfs.h>
-#endif
-
-//#define DEBUG_BLOCK
-
-#ifdef DEBUG_BLOCK
-# define DEBUG_BLOCK_PRINT 1
-#else
-# define DEBUG_BLOCK_PRINT 0
-#endif
-#define DPRINTF(fmt, ...) \
-do { \
-    if (DEBUG_BLOCK_PRINT) { \
-        printf(fmt, ## __VA_ARGS__); \
-    } \
-} while (0)
-
-/* OS X does not have O_DSYNC */
-#ifndef O_DSYNC
-#ifdef O_SYNC
-#define O_DSYNC O_SYNC
-#elif defined(O_FSYNC)
-#define O_DSYNC O_FSYNC
-#endif
-#endif
-
-/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
-#ifndef O_DIRECT
-#define O_DIRECT O_DSYNC
-#endif
-
-#define FTYPE_FILE   0
-#define FTYPE_CD     1
-
-#define MAX_BLOCKSIZE	4096
-
-typedef struct BDRVRawState {
-    int fd;
-    int type;
-    int open_flags;
-    size_t buf_align;
-
-#ifdef CONFIG_LINUX_AIO
-    int use_aio;
-    void *aio_ctx;
-#endif
-#ifdef CONFIG_XFS
-    bool is_xfs:1;
-#endif
-    bool has_discard:1;
-    bool has_write_zeroes:1;
-    bool discard_zeroes:1;
-    bool has_fallocate;
-    bool needs_alignment;
-} BDRVRawState;
-
-typedef struct BDRVRawReopenState {
-    int fd;
-    int open_flags;
-#ifdef CONFIG_LINUX_AIO
-    int use_aio;
-#endif
-} BDRVRawReopenState;
-
-static int fd_open(BlockDriverState *bs);
-static int64_t raw_getlength(BlockDriverState *bs);
-
-typedef struct RawPosixAIOData {
-    BlockDriverState *bs;
-    int aio_fildes;
-    union {
-        struct iovec *aio_iov;
-        void *aio_ioctl_buf;
-    };
-    int aio_niov;
-    uint64_t aio_nbytes;
-#define aio_ioctl_cmd   aio_nbytes /* for QEMU_AIO_IOCTL */
-    off_t aio_offset;
-    int aio_type;
-} RawPosixAIOData;
-
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-static int cdrom_reopen(BlockDriverState *bs);
-#endif
-
-#if defined(__NetBSD__)
-static int raw_normalize_devicepath(const char **filename)
-{
-    static char namebuf[PATH_MAX];
-    const char *dp, *fname;
-    struct stat sb;
-
-    fname = *filename;
-    dp = strrchr(fname, '/');
-    if (lstat(fname, &sb) < 0) {
-        fprintf(stderr, "%s: stat failed: %s\n",
-            fname, strerror(errno));
-        return -errno;
-    }
-
-    if (!S_ISBLK(sb.st_mode)) {
-        return 0;
-    }
-
-    if (dp == NULL) {
-        snprintf(namebuf, PATH_MAX, "r%s", fname);
-    } else {
-        snprintf(namebuf, PATH_MAX, "%.*s/r%s",
-            (int)(dp - fname), fname, dp + 1);
-    }
-    fprintf(stderr, "%s is a block device", fname);
-    *filename = namebuf;
-    fprintf(stderr, ", using %s\n", *filename);
-
-    return 0;
-}
-#else
-static int raw_normalize_devicepath(const char **filename)
-{
-    return 0;
-}
-#endif
-
-/*
- * Get logical block size via ioctl. On success store it in @sector_size_p.
- */
-static int probe_logical_blocksize(int fd, unsigned int *sector_size_p)
-{
-    unsigned int sector_size;
-    bool success = false;
-
-    errno = ENOTSUP;
-
-    /* Try a few ioctls to get the right size */
-#ifdef BLKSSZGET
-    if (ioctl(fd, BLKSSZGET, &sector_size) >= 0) {
-        *sector_size_p = sector_size;
-        success = true;
-    }
-#endif
-#ifdef DKIOCGETBLOCKSIZE
-    if (ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
-        *sector_size_p = sector_size;
-        success = true;
-    }
-#endif
-#ifdef DIOCGSECTORSIZE
-    if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
-        *sector_size_p = sector_size;
-        success = true;
-    }
-#endif
-
-    return success ? 0 : -errno;
-}
-
-/**
- * Get physical block size of @fd.
- * On success, store it in @blk_size and return 0.
- * On failure, return -errno.
- */
-static int probe_physical_blocksize(int fd, unsigned int *blk_size)
-{
-#ifdef BLKPBSZGET
-    if (ioctl(fd, BLKPBSZGET, blk_size) < 0) {
-        return -errno;
-    }
-    return 0;
-#else
-    return -ENOTSUP;
-#endif
-}
-
-/* Check if read is allowed with given memory buffer and length.
- *
- * This function is used to check O_DIRECT memory buffer and request alignment.
- */
-static bool raw_is_io_aligned(int fd, void *buf, size_t len)
-{
-    ssize_t ret = pread(fd, buf, len, 0);
-
-    if (ret >= 0) {
-        return true;
-    }
-
-#ifdef __linux__
-    /* The Linux kernel returns EINVAL for misaligned O_DIRECT reads.  Ignore
-     * other errors (e.g. real I/O error), which could happen on a failed
-     * drive, since we only care about probing alignment.
-     */
-    if (errno != EINVAL) {
-        return true;
-    }
-#endif
-
-    return false;
-}
-
-static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    char *buf;
-    size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
-
-    /* For SCSI generic devices the alignment is not really used.
-       With buffered I/O, we don't have any restrictions. */
-    if (bdrv_is_sg(bs) || !s->needs_alignment) {
-        bs->request_alignment = 1;
-        s->buf_align = 1;
-        return;
-    }
-
-    bs->request_alignment = 0;
-    s->buf_align = 0;
-    /* Let's try to use the logical blocksize for the alignment. */
-    if (probe_logical_blocksize(fd, &bs->request_alignment) < 0) {
-        bs->request_alignment = 0;
-    }
-#ifdef CONFIG_XFS
-    if (s->is_xfs) {
-        struct dioattr da;
-        if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) {
-            bs->request_alignment = da.d_miniosz;
-            /* The kernel returns wrong information for d_mem */
-            /* s->buf_align = da.d_mem; */
-        }
-    }
-#endif
-
-    /* If we could not get the sizes so far, we can only guess them */
-    if (!s->buf_align) {
-        size_t align;
-        buf = qemu_memalign(max_align, 2 * max_align);
-        for (align = 512; align <= max_align; align <<= 1) {
-            if (raw_is_io_aligned(fd, buf + align, max_align)) {
-                s->buf_align = align;
-                break;
-            }
-        }
-        qemu_vfree(buf);
-    }
-
-    if (!bs->request_alignment) {
-        size_t align;
-        buf = qemu_memalign(s->buf_align, max_align);
-        for (align = 512; align <= max_align; align <<= 1) {
-            if (raw_is_io_aligned(fd, buf, align)) {
-                bs->request_alignment = align;
-                break;
-            }
-        }
-        qemu_vfree(buf);
-    }
-
-    if (!s->buf_align || !bs->request_alignment) {
-        error_setg(errp, "Could not find working O_DIRECT alignment. "
-                         "Try cache.direct=off.");
-    }
-}
-
-static void raw_parse_flags(int bdrv_flags, int *open_flags)
-{
-    assert(open_flags != NULL);
-
-    *open_flags |= O_BINARY;
-    *open_flags &= ~O_ACCMODE;
-    if (bdrv_flags & BDRV_O_RDWR) {
-        *open_flags |= O_RDWR;
-    } else {
-        *open_flags |= O_RDONLY;
-    }
-
-    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
-     * and O_DIRECT for no caching. */
-    if ((bdrv_flags & BDRV_O_NOCACHE)) {
-        *open_flags |= O_DIRECT;
-    }
-}
-
-static void raw_detach_aio_context(BlockDriverState *bs)
-{
-#ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-
-    if (s->use_aio) {
-        laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
-    }
-#endif
-}
-
-static void raw_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
-{
-#ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-
-    if (s->use_aio) {
-        laio_attach_aio_context(s->aio_ctx, new_context);
-    }
-#endif
-}
-
-#ifdef CONFIG_LINUX_AIO
-static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
-{
-    int ret = -1;
-    assert(aio_ctx != NULL);
-    assert(use_aio != NULL);
-    /*
-     * Currently Linux do AIO only for files opened with O_DIRECT
-     * specified so check NOCACHE flag too
-     */
-    if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
-                      (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
-
-        /* if non-NULL, laio_init() has already been run */
-        if (*aio_ctx == NULL) {
-            *aio_ctx = laio_init();
-            if (!*aio_ctx) {
-                goto error;
-            }
-        }
-        *use_aio = 1;
-    } else {
-        *use_aio = 0;
-    }
-
-    ret = 0;
-
-error:
-    return ret;
-}
-#endif
-
-static void raw_parse_filename(const char *filename, QDict *options,
-                               Error **errp)
-{
-    /* The filename does not have to be prefixed by the protocol name, since
-     * "file" is the default protocol; therefore, the return value of this
-     * function call can be ignored. */
-    strstart(filename, "file:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static QemuOptsList raw_runtime_opts = {
-    .name = "raw",
-    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "File name of the image",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int raw_open_common(BlockDriverState *bs, QDict *options,
-                           int bdrv_flags, int open_flags, Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *filename = NULL;
-    int fd, ret;
-    struct stat st;
-
-    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    filename = qemu_opt_get(opts, "filename");
-
-    ret = raw_normalize_devicepath(&filename);
-    if (ret != 0) {
-        error_setg_errno(errp, -ret, "Could not normalize device path");
-        goto fail;
-    }
-
-    s->open_flags = open_flags;
-    raw_parse_flags(bdrv_flags, &s->open_flags);
-
-    s->fd = -1;
-    fd = qemu_open(filename, s->open_flags, 0644);
-    if (fd < 0) {
-        ret = -errno;
-        if (ret == -EROFS) {
-            ret = -EACCES;
-        }
-        goto fail;
-    }
-    s->fd = fd;
-
-#ifdef CONFIG_LINUX_AIO
-    if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
-        qemu_close(fd);
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Could not set AIO state");
-        goto fail;
-    }
-    if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
-        error_setg(errp, "aio=native was specified, but it requires "
-                         "cache.direct=on, which was not specified.");
-        ret = -EINVAL;
-        goto fail;
-    }
-#else
-    if (bdrv_flags & BDRV_O_NATIVE_AIO) {
-        error_setg(errp, "aio=native was specified, but is not supported "
-                         "in this build.");
-        ret = -EINVAL;
-        goto fail;
-    }
-#endif /* !defined(CONFIG_LINUX_AIO) */
-
-    s->has_discard = true;
-    s->has_write_zeroes = true;
-    if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
-        s->needs_alignment = true;
-    }
-
-    if (fstat(s->fd, &st) < 0) {
-        ret = -errno;
-        error_setg_errno(errp, errno, "Could not stat file");
-        goto fail;
-    }
-    if (S_ISREG(st.st_mode)) {
-        s->discard_zeroes = true;
-        s->has_fallocate = true;
-    }
-    if (S_ISBLK(st.st_mode)) {
-#ifdef BLKDISCARDZEROES
-        unsigned int arg;
-        if (ioctl(s->fd, BLKDISCARDZEROES, &arg) == 0 && arg) {
-            s->discard_zeroes = true;
-        }
-#endif
-#ifdef __linux__
-        /* On Linux 3.10, BLKDISCARD leaves stale data in the page cache.  Do
-         * not rely on the contents of discarded blocks unless using O_DIRECT.
-         * Same for BLKZEROOUT.
-         */
-        if (!(bs->open_flags & BDRV_O_NOCACHE)) {
-            s->discard_zeroes = false;
-            s->has_write_zeroes = false;
-        }
-#endif
-    }
-#ifdef __FreeBSD__
-    if (S_ISCHR(st.st_mode)) {
-        /*
-         * The file is a char device (disk), which on FreeBSD isn't behind
-         * a pager, so force all requests to be aligned. This is needed
-         * so QEMU makes sure all IO operations on the device are aligned
-         * to sector size, or else FreeBSD will reject them with EINVAL.
-         */
-        s->needs_alignment = true;
-    }
-#endif
-
-#ifdef CONFIG_XFS
-    if (platform_test_xfs_fd(s->fd)) {
-        s->is_xfs = true;
-    }
-#endif
-
-    raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
-
-    ret = 0;
-fail:
-    if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
-        unlink(filename);
-    }
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static int raw_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;
-
-    s->type = FTYPE_FILE;
-    ret = raw_open_common(bs, options, flags, 0, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-    return ret;
-}
-
-static int raw_reopen_prepare(BDRVReopenState *state,
-                              BlockReopenQueue *queue, Error **errp)
-{
-    BDRVRawState *s;
-    BDRVRawReopenState *raw_s;
-    int ret = 0;
-    Error *local_err = NULL;
-
-    assert(state != NULL);
-    assert(state->bs != NULL);
-
-    s = state->bs->opaque;
-
-    state->opaque = g_new0(BDRVRawReopenState, 1);
-    raw_s = state->opaque;
-
-#ifdef CONFIG_LINUX_AIO
-    raw_s->use_aio = s->use_aio;
-
-    /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
-     * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
-     * won't override aio_ctx if aio_ctx is non-NULL */
-    if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
-        error_setg(errp, "Could not set AIO state");
-        return -1;
-    }
-#endif
-
-    if (s->type == FTYPE_CD) {
-        raw_s->open_flags |= O_NONBLOCK;
-    }
-
-    raw_parse_flags(state->flags, &raw_s->open_flags);
-
-    raw_s->fd = -1;
-
-    int fcntl_flags = O_APPEND | O_NONBLOCK;
-#ifdef O_NOATIME
-    fcntl_flags |= O_NOATIME;
-#endif
-
-#ifdef O_ASYNC
-    /* Not all operating systems have O_ASYNC, and those that don't
-     * will not let us track the state into raw_s->open_flags (typically
-     * you achieve the same effect with an ioctl, for example I_SETSIG
-     * on Solaris). But we do not use O_ASYNC, so that's fine.
-     */
-    assert((s->open_flags & O_ASYNC) == 0);
-#endif
-
-    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
-        /* dup the original fd */
-        /* TODO: use qemu fcntl wrapper */
-#ifdef F_DUPFD_CLOEXEC
-        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
-#else
-        raw_s->fd = dup(s->fd);
-        if (raw_s->fd != -1) {
-            qemu_set_cloexec(raw_s->fd);
-        }
-#endif
-        if (raw_s->fd >= 0) {
-            ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
-            if (ret) {
-                qemu_close(raw_s->fd);
-                raw_s->fd = -1;
-            }
-        }
-    }
-
-    /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
-    if (raw_s->fd == -1) {
-        const char *normalized_filename = state->bs->filename;
-        ret = raw_normalize_devicepath(&normalized_filename);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not normalize device path");
-        } else {
-            assert(!(raw_s->open_flags & O_CREAT));
-            raw_s->fd = qemu_open(normalized_filename, raw_s->open_flags);
-            if (raw_s->fd == -1) {
-                error_setg_errno(errp, errno, "Could not reopen file");
-                ret = -1;
-            }
-        }
-    }
-
-    /* Fail already reopen_prepare() if we can't get a working O_DIRECT
-     * alignment with the new fd. */
-    if (raw_s->fd != -1) {
-        raw_probe_alignment(state->bs, raw_s->fd, &local_err);
-        if (local_err) {
-            qemu_close(raw_s->fd);
-            raw_s->fd = -1;
-            error_propagate(errp, local_err);
-            ret = -EINVAL;
-        }
-    }
-
-    return ret;
-}
-
-static void raw_reopen_commit(BDRVReopenState *state)
-{
-    BDRVRawReopenState *raw_s = state->opaque;
-    BDRVRawState *s = state->bs->opaque;
-
-    s->open_flags = raw_s->open_flags;
-
-    qemu_close(s->fd);
-    s->fd = raw_s->fd;
-#ifdef CONFIG_LINUX_AIO
-    s->use_aio = raw_s->use_aio;
-#endif
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-}
-
-
-static void raw_reopen_abort(BDRVReopenState *state)
-{
-    BDRVRawReopenState *raw_s = state->opaque;
-
-     /* nothing to do if NULL, we didn't get far enough */
-    if (raw_s == NULL) {
-        return;
-    }
-
-    if (raw_s->fd >= 0) {
-        qemu_close(raw_s->fd);
-        raw_s->fd = -1;
-    }
-    g_free(state->opaque);
-    state->opaque = NULL;
-}
-
-static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-
-    raw_probe_alignment(bs, s->fd, errp);
-    bs->bl.min_mem_alignment = s->buf_align;
-    bs->bl.opt_mem_alignment = MAX(s->buf_align, getpagesize());
-}
-
-static int check_for_dasd(int fd)
-{
-#ifdef BIODASDINFO2
-    struct dasd_information2_t info = {0};
-
-    return ioctl(fd, BIODASDINFO2, &info);
-#else
-    return -1;
-#endif
-}
-
-/**
- * Try to get @bs's logical and physical block size.
- * On success, store them in @bsz and return zero.
- * On failure, return negative errno.
- */
-static int hdev_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret;
-
-    /* If DASD, get blocksizes */
-    if (check_for_dasd(s->fd) < 0) {
-        return -ENOTSUP;
-    }
-    ret = probe_logical_blocksize(s->fd, &bsz->log);
-    if (ret < 0) {
-        return ret;
-    }
-    return probe_physical_blocksize(s->fd, &bsz->phys);
-}
-
-/**
- * Try to get @bs's geometry: cyls, heads, sectors.
- * On success, store them in @geo and return 0.
- * On failure return -errno.
- * (Allows block driver to assign default geometry values that guest sees)
- */
-#ifdef __linux__
-static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
-{
-    BDRVRawState *s = bs->opaque;
-    struct hd_geometry ioctl_geo = {0};
-
-    /* If DASD, get its geometry */
-    if (check_for_dasd(s->fd) < 0) {
-        return -ENOTSUP;
-    }
-    if (ioctl(s->fd, HDIO_GETGEO, &ioctl_geo) < 0) {
-        return -errno;
-    }
-    /* HDIO_GETGEO may return success even though geo contains zeros
-       (e.g. certain multipath setups) */
-    if (!ioctl_geo.heads || !ioctl_geo.sectors || !ioctl_geo.cylinders) {
-        return -ENOTSUP;
-    }
-    /* Do not return a geometry for partition */
-    if (ioctl_geo.start != 0) {
-        return -ENOTSUP;
-    }
-    geo->heads = ioctl_geo.heads;
-    geo->sectors = ioctl_geo.sectors;
-    geo->cylinders = ioctl_geo.cylinders;
-
-    return 0;
-}
-#else /* __linux__ */
-static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
-{
-    return -ENOTSUP;
-}
-#endif
-
-static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
-{
-    int ret;
-
-    ret = ioctl(aiocb->aio_fildes, aiocb->aio_ioctl_cmd, aiocb->aio_ioctl_buf);
-    if (ret == -1) {
-        return -errno;
-    }
-
-    return 0;
-}
-
-static ssize_t handle_aiocb_flush(RawPosixAIOData *aiocb)
-{
-    int ret;
-
-    ret = qemu_fdatasync(aiocb->aio_fildes);
-    if (ret == -1) {
-        return -errno;
-    }
-    return 0;
-}
-
-#ifdef CONFIG_PREADV
-
-static bool preadv_present = true;
-
-static ssize_t
-qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
-{
-    return preadv(fd, iov, nr_iov, offset);
-}
-
-static ssize_t
-qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
-{
-    return pwritev(fd, iov, nr_iov, offset);
-}
-
-#else
-
-static bool preadv_present = false;
-
-static ssize_t
-qemu_preadv(int fd, const struct iovec *iov, int nr_iov, off_t offset)
-{
-    return -ENOSYS;
-}
-
-static ssize_t
-qemu_pwritev(int fd, const struct iovec *iov, int nr_iov, off_t offset)
-{
-    return -ENOSYS;
-}
-
-#endif
-
-static ssize_t handle_aiocb_rw_vector(RawPosixAIOData *aiocb)
-{
-    ssize_t len;
-
-    do {
-        if (aiocb->aio_type & QEMU_AIO_WRITE)
-            len = qemu_pwritev(aiocb->aio_fildes,
-                               aiocb->aio_iov,
-                               aiocb->aio_niov,
-                               aiocb->aio_offset);
-         else
-            len = qemu_preadv(aiocb->aio_fildes,
-                              aiocb->aio_iov,
-                              aiocb->aio_niov,
-                              aiocb->aio_offset);
-    } while (len == -1 && errno == EINTR);
-
-    if (len == -1) {
-        return -errno;
-    }
-    return len;
-}
-
-/*
- * Read/writes the data to/from a given linear buffer.
- *
- * Returns the number of bytes handles or -errno in case of an error. Short
- * reads are only returned if the end of the file is reached.
- */
-static ssize_t handle_aiocb_rw_linear(RawPosixAIOData *aiocb, char *buf)
-{
-    ssize_t offset = 0;
-    ssize_t len;
-
-    while (offset < aiocb->aio_nbytes) {
-        if (aiocb->aio_type & QEMU_AIO_WRITE) {
-            len = pwrite(aiocb->aio_fildes,
-                         (const char *)buf + offset,
-                         aiocb->aio_nbytes - offset,
-                         aiocb->aio_offset + offset);
-        } else {
-            len = pread(aiocb->aio_fildes,
-                        buf + offset,
-                        aiocb->aio_nbytes - offset,
-                        aiocb->aio_offset + offset);
-        }
-        if (len == -1 && errno == EINTR) {
-            continue;
-        } else if (len == -1 && errno == EINVAL &&
-                   (aiocb->bs->open_flags & BDRV_O_NOCACHE) &&
-                   !(aiocb->aio_type & QEMU_AIO_WRITE) &&
-                   offset > 0) {
-            /* O_DIRECT pread() may fail with EINVAL when offset is unaligned
-             * after a short read.  Assume that O_DIRECT short reads only occur
-             * at EOF.  Therefore this is a short read, not an I/O error.
-             */
-            break;
-        } else if (len == -1) {
-            offset = -errno;
-            break;
-        } else if (len == 0) {
-            break;
-        }
-        offset += len;
-    }
-
-    return offset;
-}
-
-static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
-{
-    ssize_t nbytes;
-    char *buf;
-
-    if (!(aiocb->aio_type & QEMU_AIO_MISALIGNED)) {
-        /*
-         * If there is just a single buffer, and it is properly aligned
-         * we can just use plain pread/pwrite without any problems.
-         */
-        if (aiocb->aio_niov == 1) {
-             return handle_aiocb_rw_linear(aiocb, aiocb->aio_iov->iov_base);
-        }
-        /*
-         * We have more than one iovec, and all are properly aligned.
-         *
-         * Try preadv/pwritev first and fall back to linearizing the
-         * buffer if it's not supported.
-         */
-        if (preadv_present) {
-            nbytes = handle_aiocb_rw_vector(aiocb);
-            if (nbytes == aiocb->aio_nbytes ||
-                (nbytes < 0 && nbytes != -ENOSYS)) {
-                return nbytes;
-            }
-            preadv_present = false;
-        }
-
-        /*
-         * XXX(hch): short read/write.  no easy way to handle the reminder
-         * using these interfaces.  For now retry using plain
-         * pread/pwrite?
-         */
-    }
-
-    /*
-     * Ok, we have to do it the hard way, copy all segments into
-     * a single aligned buffer.
-     */
-    buf = qemu_try_blockalign(aiocb->bs, aiocb->aio_nbytes);
-    if (buf == NULL) {
-        return -ENOMEM;
-    }
-
-    if (aiocb->aio_type & QEMU_AIO_WRITE) {
-        char *p = buf;
-        int i;
-
-        for (i = 0; i < aiocb->aio_niov; ++i) {
-            memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
-            p += aiocb->aio_iov[i].iov_len;
-        }
-        assert(p - buf == aiocb->aio_nbytes);
-    }
-
-    nbytes = handle_aiocb_rw_linear(aiocb, buf);
-    if (!(aiocb->aio_type & QEMU_AIO_WRITE)) {
-        char *p = buf;
-        size_t count = aiocb->aio_nbytes, copy;
-        int i;
-
-        for (i = 0; i < aiocb->aio_niov && count; ++i) {
-            copy = count;
-            if (copy > aiocb->aio_iov[i].iov_len) {
-                copy = aiocb->aio_iov[i].iov_len;
-            }
-            memcpy(aiocb->aio_iov[i].iov_base, p, copy);
-            assert(count >= copy);
-            p     += copy;
-            count -= copy;
-        }
-        assert(count == 0);
-    }
-    qemu_vfree(buf);
-
-    return nbytes;
-}
-
-#ifdef CONFIG_XFS
-static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
-{
-    struct xfs_flock64 fl;
-    int err;
-
-    memset(&fl, 0, sizeof(fl));
-    fl.l_whence = SEEK_SET;
-    fl.l_start = offset;
-    fl.l_len = bytes;
-
-    if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) {
-        err = errno;
-        DPRINTF("cannot write zero range (%s)\n", strerror(errno));
-        return -err;
-    }
-
-    return 0;
-}
-
-static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
-{
-    struct xfs_flock64 fl;
-    int err;
-
-    memset(&fl, 0, sizeof(fl));
-    fl.l_whence = SEEK_SET;
-    fl.l_start = offset;
-    fl.l_len = bytes;
-
-    if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
-        err = errno;
-        DPRINTF("cannot punch hole (%s)\n", strerror(errno));
-        return -err;
-    }
-
-    return 0;
-}
-#endif
-
-static int translate_err(int err)
-{
-    if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
-        err == -ENOTTY) {
-        err = -ENOTSUP;
-    }
-    return err;
-}
-
-#ifdef CONFIG_FALLOCATE
-static int do_fallocate(int fd, int mode, off_t offset, off_t len)
-{
-    do {
-        if (fallocate(fd, mode, offset, len) == 0) {
-            return 0;
-        }
-    } while (errno == EINTR);
-    return translate_err(-errno);
-}
-#endif
-
-static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb)
-{
-    int ret = -ENOTSUP;
-    BDRVRawState *s = aiocb->bs->opaque;
-
-    if (!s->has_write_zeroes) {
-        return -ENOTSUP;
-    }
-
-#ifdef BLKZEROOUT
-    do {
-        uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
-        if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
-            return 0;
-        }
-    } while (errno == EINTR);
-
-    ret = translate_err(-errno);
-#endif
-
-    if (ret == -ENOTSUP) {
-        s->has_write_zeroes = false;
-    }
-    return ret;
-}
-
-static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
-{
-#if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS)
-    BDRVRawState *s = aiocb->bs->opaque;
-#endif
-
-    if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
-        return handle_aiocb_write_zeroes_block(aiocb);
-    }
-
-#ifdef CONFIG_XFS
-    if (s->is_xfs) {
-        return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
-    }
-#endif
-
-#ifdef CONFIG_FALLOCATE_ZERO_RANGE
-    if (s->has_write_zeroes) {
-        int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
-                               aiocb->aio_offset, aiocb->aio_nbytes);
-        if (ret == 0 || ret != -ENOTSUP) {
-            return ret;
-        }
-        s->has_write_zeroes = false;
-    }
-#endif
-
-#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
-    if (s->has_discard && s->has_fallocate) {
-        int ret = do_fallocate(s->fd,
-                               FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-                               aiocb->aio_offset, aiocb->aio_nbytes);
-        if (ret == 0) {
-            ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
-            if (ret == 0 || ret != -ENOTSUP) {
-                return ret;
-            }
-            s->has_fallocate = false;
-        } else if (ret != -ENOTSUP) {
-            return ret;
-        } else {
-            s->has_discard = false;
-        }
-    }
-#endif
-
-#ifdef CONFIG_FALLOCATE
-    if (s->has_fallocate && aiocb->aio_offset >= bdrv_getlength(aiocb->bs)) {
-        int ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
-        if (ret == 0 || ret != -ENOTSUP) {
-            return ret;
-        }
-        s->has_fallocate = false;
-    }
-#endif
-
-    return -ENOTSUP;
-}
-
-static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
-{
-    int ret = -EOPNOTSUPP;
-    BDRVRawState *s = aiocb->bs->opaque;
-
-    if (!s->has_discard) {
-        return -ENOTSUP;
-    }
-
-    if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
-#ifdef BLKDISCARD
-        do {
-            uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
-            if (ioctl(aiocb->aio_fildes, BLKDISCARD, range) == 0) {
-                return 0;
-            }
-        } while (errno == EINTR);
-
-        ret = -errno;
-#endif
-    } else {
-#ifdef CONFIG_XFS
-        if (s->is_xfs) {
-            return xfs_discard(s, aiocb->aio_offset, aiocb->aio_nbytes);
-        }
-#endif
-
-#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
-        ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
-                           aiocb->aio_offset, aiocb->aio_nbytes);
-#endif
-    }
-
-    ret = translate_err(ret);
-    if (ret == -ENOTSUP) {
-        s->has_discard = false;
-    }
-    return ret;
-}
-
-static int aio_worker(void *arg)
-{
-    RawPosixAIOData *aiocb = arg;
-    ssize_t ret = 0;
-
-    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
-    case QEMU_AIO_READ:
-        ret = handle_aiocb_rw(aiocb);
-        if (ret >= 0 && ret < aiocb->aio_nbytes) {
-            iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
-                      0, aiocb->aio_nbytes - ret);
-
-            ret = aiocb->aio_nbytes;
-        }
-        if (ret == aiocb->aio_nbytes) {
-            ret = 0;
-        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
-            ret = -EINVAL;
-        }
-        break;
-    case QEMU_AIO_WRITE:
-        ret = handle_aiocb_rw(aiocb);
-        if (ret == aiocb->aio_nbytes) {
-            ret = 0;
-        } else if (ret >= 0 && ret < aiocb->aio_nbytes) {
-            ret = -EINVAL;
-        }
-        break;
-    case QEMU_AIO_FLUSH:
-        ret = handle_aiocb_flush(aiocb);
-        break;
-    case QEMU_AIO_IOCTL:
-        ret = handle_aiocb_ioctl(aiocb);
-        break;
-    case QEMU_AIO_DISCARD:
-        ret = handle_aiocb_discard(aiocb);
-        break;
-    case QEMU_AIO_WRITE_ZEROES:
-        ret = handle_aiocb_write_zeroes(aiocb);
-        break;
-    default:
-        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
-        ret = -EINVAL;
-        break;
-    }
-
-    g_free(aiocb);
-    return ret;
-}
-
-static int paio_submit_co(BlockDriverState *bs, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        int type)
-{
-    RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
-    ThreadPool *pool;
-
-    acb->bs = bs;
-    acb->aio_type = type;
-    acb->aio_fildes = fd;
-
-    acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
-    acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
-
-    if (qiov) {
-        acb->aio_iov = qiov->iov;
-        acb->aio_niov = qiov->niov;
-        assert(qiov->size == acb->aio_nbytes);
-    }
-
-    trace_paio_submit_co(sector_num, nb_sectors, type);
-    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    return thread_pool_submit_co(pool, aio_worker, acb);
-}
-
-static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
-{
-    RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
-    ThreadPool *pool;
-
-    acb->bs = bs;
-    acb->aio_type = type;
-    acb->aio_fildes = fd;
-
-    acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
-    acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
-
-    if (qiov) {
-        acb->aio_iov = qiov->iov;
-        acb->aio_niov = qiov->niov;
-        assert(qiov->size == acb->aio_nbytes);
-    }
-
-    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
-    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
-}
-
-static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (fd_open(bs) < 0)
-        return NULL;
-
-    /*
-     * Check if the underlying device requires requests to be aligned,
-     * and if the request we are trying to submit is aligned or not.
-     * If this is the case tell the low-level driver that it needs
-     * to copy the buffer.
-     */
-    if (s->needs_alignment) {
-        if (!bdrv_qiov_is_aligned(bs, qiov)) {
-            type |= QEMU_AIO_MISALIGNED;
-#ifdef CONFIG_LINUX_AIO
-        } else if (s->use_aio) {
-            return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
-                               nb_sectors, cb, opaque, type);
-#endif
-        }
-    }
-
-    return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
-                       cb, opaque, type);
-}
-
-static void raw_aio_plug(BlockDriverState *bs)
-{
-#ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-    if (s->use_aio) {
-        laio_io_plug(bs, s->aio_ctx);
-    }
-#endif
-}
-
-static void raw_aio_unplug(BlockDriverState *bs)
-{
-#ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-    if (s->use_aio) {
-        laio_io_unplug(bs, s->aio_ctx, true);
-    }
-#endif
-}
-
-static void raw_aio_flush_io_queue(BlockDriverState *bs)
-{
-#ifdef CONFIG_LINUX_AIO
-    BDRVRawState *s = bs->opaque;
-    if (s->use_aio) {
-        laio_io_unplug(bs, s->aio_ctx, false);
-    }
-#endif
-}
-
-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
-                          cb, opaque, QEMU_AIO_READ);
-}
-
-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
-                          cb, opaque, QEMU_AIO_WRITE);
-}
-
-static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (fd_open(bs) < 0)
-        return NULL;
-
-    return paio_submit(bs, s->fd, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
-}
-
-static void raw_close(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-
-    raw_detach_aio_context(bs);
-
-#ifdef CONFIG_LINUX_AIO
-    if (s->use_aio) {
-        laio_cleanup(s->aio_ctx);
-    }
-#endif
-    if (s->fd >= 0) {
-        qemu_close(s->fd);
-        s->fd = -1;
-    }
-}
-
-static int raw_truncate(BlockDriverState *bs, int64_t offset)
-{
-    BDRVRawState *s = bs->opaque;
-    struct stat st;
-
-    if (fstat(s->fd, &st)) {
-        return -errno;
-    }
-
-    if (S_ISREG(st.st_mode)) {
-        if (ftruncate(s->fd, offset) < 0) {
-            return -errno;
-        }
-    } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
-       if (offset > raw_getlength(bs)) {
-           return -EINVAL;
-       }
-    } else {
-        return -ENOTSUP;
-    }
-
-    return 0;
-}
-
-#ifdef __OpenBSD__
-static int64_t raw_getlength(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int fd = s->fd;
-    struct stat st;
-
-    if (fstat(fd, &st))
-        return -errno;
-    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
-        struct disklabel dl;
-
-        if (ioctl(fd, DIOCGDINFO, &dl))
-            return -errno;
-        return (uint64_t)dl.d_secsize *
-            dl.d_partitions[DISKPART(st.st_rdev)].p_size;
-    } else
-        return st.st_size;
-}
-#elif defined(__NetBSD__)
-static int64_t raw_getlength(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int fd = s->fd;
-    struct stat st;
-
-    if (fstat(fd, &st))
-        return -errno;
-    if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
-        struct dkwedge_info dkw;
-
-        if (ioctl(fd, DIOCGWEDGEINFO, &dkw) != -1) {
-            return dkw.dkw_size * 512;
-        } else {
-            struct disklabel dl;
-
-            if (ioctl(fd, DIOCGDINFO, &dl))
-                return -errno;
-            return (uint64_t)dl.d_secsize *
-                dl.d_partitions[DISKPART(st.st_rdev)].p_size;
-        }
-    } else
-        return st.st_size;
-}
-#elif defined(__sun__)
-static int64_t raw_getlength(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    struct dk_minfo minfo;
-    int ret;
-    int64_t size;
-
-    ret = fd_open(bs);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /*
-     * Use the DKIOCGMEDIAINFO ioctl to read the size.
-     */
-    ret = ioctl(s->fd, DKIOCGMEDIAINFO, &minfo);
-    if (ret != -1) {
-        return minfo.dki_lbsize * minfo.dki_capacity;
-    }
-
-    /*
-     * There are reports that lseek on some devices fails, but
-     * irc discussion said that contingency on contingency was overkill.
-     */
-    size = lseek(s->fd, 0, SEEK_END);
-    if (size < 0) {
-        return -errno;
-    }
-    return size;
-}
-#elif defined(CONFIG_BSD)
-static int64_t raw_getlength(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int fd = s->fd;
-    int64_t size;
-    struct stat sb;
-#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
-    int reopened = 0;
-#endif
-    int ret;
-
-    ret = fd_open(bs);
-    if (ret < 0)
-        return ret;
-
-#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
-again:
-#endif
-    if (!fstat(fd, &sb) && (S_IFCHR & sb.st_mode)) {
-#ifdef DIOCGMEDIASIZE
-	if (ioctl(fd, DIOCGMEDIASIZE, (off_t *)&size))
-#elif defined(DIOCGPART)
-        {
-                struct partinfo pi;
-                if (ioctl(fd, DIOCGPART, &pi) == 0)
-                        size = pi.media_size;
-                else
-                        size = 0;
-        }
-        if (size == 0)
-#endif
-#if defined(__APPLE__) && defined(__MACH__)
-        {
-            uint64_t sectors = 0;
-            uint32_t sector_size = 0;
-
-            if (ioctl(fd, DKIOCGETBLOCKCOUNT, &sectors) == 0
-               && ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) == 0) {
-                size = sectors * sector_size;
-            } else {
-                size = lseek(fd, 0LL, SEEK_END);
-                if (size < 0) {
-                    return -errno;
-                }
-            }
-        }
-#else
-        size = lseek(fd, 0LL, SEEK_END);
-        if (size < 0) {
-            return -errno;
-        }
-#endif
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-        switch(s->type) {
-        case FTYPE_CD:
-            /* XXX FreeBSD acd returns UINT_MAX sectors for an empty drive */
-            if (size == 2048LL * (unsigned)-1)
-                size = 0;
-            /* XXX no disc?  maybe we need to reopen... */
-            if (size <= 0 && !reopened && cdrom_reopen(bs) >= 0) {
-                reopened = 1;
-                goto again;
-            }
-        }
-#endif
-    } else {
-        size = lseek(fd, 0, SEEK_END);
-        if (size < 0) {
-            return -errno;
-        }
-    }
-    return size;
-}
-#else
-static int64_t raw_getlength(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret;
-    int64_t size;
-
-    ret = fd_open(bs);
-    if (ret < 0) {
-        return ret;
-    }
-
-    size = lseek(s->fd, 0, SEEK_END);
-    if (size < 0) {
-        return -errno;
-    }
-    return size;
-}
-#endif
-
-static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
-{
-    struct stat st;
-    BDRVRawState *s = bs->opaque;
-
-    if (fstat(s->fd, &st) < 0) {
-        return -errno;
-    }
-    return (int64_t)st.st_blocks * 512;
-}
-
-static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    int fd;
-    int result = 0;
-    int64_t total_size = 0;
-    bool nocow = false;
-    PreallocMode prealloc;
-    char *buf = NULL;
-    Error *local_err = NULL;
-
-    strstart(filename, "file:", &filename);
-
-    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-    nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false);
-    buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
-                               PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
-                               &local_err);
-    g_free(buf);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        result = -EINVAL;
-        goto out;
-    }
-
-    fd = qemu_open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY,
-                   0644);
-    if (fd < 0) {
-        result = -errno;
-        error_setg_errno(errp, -result, "Could not create file");
-        goto out;
-    }
-
-    if (nocow) {
-#ifdef __linux__
-        /* Set NOCOW flag to solve performance issue on fs like btrfs.
-         * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value
-         * will be ignored since any failure of this operation should not
-         * block the left work.
-         */
-        int attr;
-        if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
-            attr |= FS_NOCOW_FL;
-            ioctl(fd, FS_IOC_SETFLAGS, &attr);
-        }
-#endif
-    }
-
-    if (ftruncate(fd, total_size) != 0) {
-        result = -errno;
-        error_setg_errno(errp, -result, "Could not resize file");
-        goto out_close;
-    }
-
-    switch (prealloc) {
-#ifdef CONFIG_POSIX_FALLOCATE
-    case PREALLOC_MODE_FALLOC:
-        /* posix_fallocate() doesn't set errno. */
-        result = -posix_fallocate(fd, 0, total_size);
-        if (result != 0) {
-            error_setg_errno(errp, -result,
-                             "Could not preallocate data for the new file");
-        }
-        break;
-#endif
-    case PREALLOC_MODE_FULL:
-    {
-        int64_t num = 0, left = total_size;
-        buf = g_malloc0(65536);
-
-        while (left > 0) {
-            num = MIN(left, 65536);
-            result = write(fd, buf, num);
-            if (result < 0) {
-                result = -errno;
-                error_setg_errno(errp, -result,
-                                 "Could not write to the new file");
-                break;
-            }
-            left -= result;
-        }
-        if (result >= 0) {
-            result = fsync(fd);
-            if (result < 0) {
-                result = -errno;
-                error_setg_errno(errp, -result,
-                                 "Could not flush new file to disk");
-            }
-        }
-        g_free(buf);
-        break;
-    }
-    case PREALLOC_MODE_OFF:
-        break;
-    default:
-        result = -EINVAL;
-        error_setg(errp, "Unsupported preallocation mode: %s",
-                   PreallocMode_lookup[prealloc]);
-        break;
-    }
-
-out_close:
-    if (qemu_close(fd) != 0 && result == 0) {
-        result = -errno;
-        error_setg_errno(errp, -result, "Could not close the new file");
-    }
-out:
-    return result;
-}
-
-/*
- * Find allocation range in @bs around offset @start.
- * May change underlying file descriptor's file offset.
- * If @start is not in a hole, store @start in @data, and the
- * beginning of the next hole in @hole, and return 0.
- * If @start is in a non-trailing hole, store @start in @hole and the
- * beginning of the next non-hole in @data, and return 0.
- * If @start is in a trailing hole or beyond EOF, return -ENXIO.
- * If we can't find out, return a negative errno other than -ENXIO.
- */
-static int find_allocation(BlockDriverState *bs, off_t start,
-                           off_t *data, off_t *hole)
-{
-#if defined SEEK_HOLE && defined SEEK_DATA
-    BDRVRawState *s = bs->opaque;
-    off_t offs;
-
-    /*
-     * SEEK_DATA cases:
-     * D1. offs == start: start is in data
-     * D2. offs > start: start is in a hole, next data at offs
-     * D3. offs < 0, errno = ENXIO: either start is in a trailing hole
-     *                              or start is beyond EOF
-     *     If the latter happens, the file has been truncated behind
-     *     our back since we opened it.  All bets are off then.
-     *     Treating like a trailing hole is simplest.
-     * D4. offs < 0, errno != ENXIO: we learned nothing
-     */
-    offs = lseek(s->fd, start, SEEK_DATA);
-    if (offs < 0) {
-        return -errno;          /* D3 or D4 */
-    }
-    assert(offs >= start);
-
-    if (offs > start) {
-        /* D2: in hole, next data at offs */
-        *hole = start;
-        *data = offs;
-        return 0;
-    }
-
-    /* D1: in data, end not yet known */
-
-    /*
-     * SEEK_HOLE cases:
-     * H1. offs == start: start is in a hole
-     *     If this happens here, a hole has been dug behind our back
-     *     since the previous lseek().
-     * H2. offs > start: either start is in data, next hole at offs,
-     *                   or start is in trailing hole, EOF at offs
-     *     Linux treats trailing holes like any other hole: offs ==
-     *     start.  Solaris seeks to EOF instead: offs > start (blech).
-     *     If that happens here, a hole has been dug behind our back
-     *     since the previous lseek().
-     * H3. offs < 0, errno = ENXIO: start is beyond EOF
-     *     If this happens, the file has been truncated behind our
-     *     back since we opened it.  Treat it like a trailing hole.
-     * H4. offs < 0, errno != ENXIO: we learned nothing
-     *     Pretend we know nothing at all, i.e. "forget" about D1.
-     */
-    offs = lseek(s->fd, start, SEEK_HOLE);
-    if (offs < 0) {
-        return -errno;          /* D1 and (H3 or H4) */
-    }
-    assert(offs >= start);
-
-    if (offs > start) {
-        /*
-         * D1 and H2: either in data, next hole at offs, or it was in
-         * data but is now in a trailing hole.  In the latter case,
-         * all bets are off.  Treating it as if it there was data all
-         * the way to EOF is safe, so simply do that.
-         */
-        *data = start;
-        *hole = offs;
-        return 0;
-    }
-
-    /* D1 and H1 */
-    return -EBUSY;
-#else
-    return -ENOTSUP;
-#endif
-}
-
-/*
- * Returns the allocation status of the specified sectors.
- *
- * If 'sector_num' is beyond the end of the disk image the return value is 0
- * and 'pnum' is set to 0.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
- *
- * 'nb_sectors' is the max value 'pnum' should be set to.  If nb_sectors goes
- * beyond the end of the disk image it will be clamped.
- */
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
-                                                    int64_t sector_num,
-                                                    int nb_sectors, int *pnum,
-                                                    BlockDriverState **file)
-{
-    off_t start, data = 0, hole = 0;
-    int64_t total_size;
-    int ret;
-
-    ret = fd_open(bs);
-    if (ret < 0) {
-        return ret;
-    }
-
-    start = sector_num * BDRV_SECTOR_SIZE;
-    total_size = bdrv_getlength(bs);
-    if (total_size < 0) {
-        return total_size;
-    } else if (start >= total_size) {
-        *pnum = 0;
-        return 0;
-    } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
-        nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
-    }
-
-    ret = find_allocation(bs, start, &data, &hole);
-    if (ret == -ENXIO) {
-        /* Trailing hole */
-        *pnum = nb_sectors;
-        ret = BDRV_BLOCK_ZERO;
-    } else if (ret < 0) {
-        /* No info available, so pretend there are no holes */
-        *pnum = nb_sectors;
-        ret = BDRV_BLOCK_DATA;
-    } else if (data == start) {
-        /* On a data extent, compute sectors to the end of the extent,
-         * possibly including a partial sector at EOF. */
-        *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE));
-        ret = BDRV_BLOCK_DATA;
-    } else {
-        /* On a hole, compute sectors to the beginning of the next extent.  */
-        assert(hole == start);
-        *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
-        ret = BDRV_BLOCK_ZERO;
-    }
-    *file = bs;
-    return ret | BDRV_BLOCK_OFFSET_VALID | start;
-}
-
-static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVRawState *s = bs->opaque;
-
-    return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
-                       cb, opaque, QEMU_AIO_DISCARD);
-}
-
-static int coroutine_fn raw_co_write_zeroes(
-    BlockDriverState *bs, int64_t sector_num,
-    int nb_sectors, BdrvRequestFlags flags)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
-                              QEMU_AIO_WRITE_ZEROES);
-    } else if (s->discard_zeroes) {
-        return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
-                              QEMU_AIO_DISCARD);
-    }
-    return -ENOTSUP;
-}
-
-static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVRawState *s = bs->opaque;
-
-    bdi->unallocated_blocks_are_zero = s->discard_zeroes;
-    bdi->can_write_zeroes_with_unmap = s->discard_zeroes;
-    return 0;
-}
-
-static QemuOptsList raw_create_opts = {
-    .name = "raw-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_NOCOW,
-            .type = QEMU_OPT_BOOL,
-            .help = "Turn off copy-on-write (valid only on btrfs)"
-        },
-        {
-            .name = BLOCK_OPT_PREALLOC,
-            .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode (allowed values: off, falloc, full)"
-        },
-        { /* end of list */ }
-    }
-};
-
-BlockDriver bdrv_file = {
-    .format_name = "file",
-    .protocol_name = "file",
-    .instance_size = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_probe = NULL, /* no probe for protocols */
-    .bdrv_parse_filename = raw_parse_filename,
-    .bdrv_file_open = raw_open,
-    .bdrv_reopen_prepare = raw_reopen_prepare,
-    .bdrv_reopen_commit = raw_reopen_commit,
-    .bdrv_reopen_abort = raw_reopen_abort,
-    .bdrv_close = raw_close,
-    .bdrv_create = raw_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_co_get_block_status = raw_co_get_block_status,
-    .bdrv_co_write_zeroes = raw_co_write_zeroes,
-
-    .bdrv_aio_readv = raw_aio_readv,
-    .bdrv_aio_writev = raw_aio_writev,
-    .bdrv_aio_flush = raw_aio_flush,
-    .bdrv_aio_discard = raw_aio_discard,
-    .bdrv_refresh_limits = raw_refresh_limits,
-    .bdrv_io_plug = raw_aio_plug,
-    .bdrv_io_unplug = raw_aio_unplug,
-    .bdrv_flush_io_queue = raw_aio_flush_io_queue,
-
-    .bdrv_truncate = raw_truncate,
-    .bdrv_getlength = raw_getlength,
-    .bdrv_get_info = raw_get_info,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
-
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
-    .create_opts = &raw_create_opts,
-};
-
-/***********************************************/
-/* host device */
-
-#if defined(__APPLE__) && defined(__MACH__)
-static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
-                                CFIndex maxPathSize, int flags);
-static char *FindEjectableOpticalMedia(io_iterator_t *mediaIterator)
-{
-    kern_return_t kernResult = KERN_FAILURE;
-    mach_port_t     masterPort;
-    CFMutableDictionaryRef  classesToMatch;
-    const char *matching_array[] = {kIODVDMediaClass, kIOCDMediaClass};
-    char *mediaType = NULL;
-
-    kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
-    if ( KERN_SUCCESS != kernResult ) {
-        printf( "IOMasterPort returned %d\n", kernResult );
-    }
-
-    int index;
-    for (index = 0; index < ARRAY_SIZE(matching_array); index++) {
-        classesToMatch = IOServiceMatching(matching_array[index]);
-        if (classesToMatch == NULL) {
-            error_report("IOServiceMatching returned NULL for %s",
-                         matching_array[index]);
-            continue;
-        }
-        CFDictionarySetValue(classesToMatch, CFSTR(kIOMediaEjectableKey),
-                             kCFBooleanTrue);
-        kernResult = IOServiceGetMatchingServices(masterPort, classesToMatch,
-                                                  mediaIterator);
-        if (kernResult != KERN_SUCCESS) {
-            error_report("Note: IOServiceGetMatchingServices returned %d",
-                         kernResult);
-            continue;
-        }
-
-        /* If a match was found, leave the loop */
-        if (*mediaIterator != 0) {
-            DPRINTF("Matching using %s\n", matching_array[index]);
-            mediaType = g_strdup(matching_array[index]);
-            break;
-        }
-    }
-    return mediaType;
-}
-
-kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
-                         CFIndex maxPathSize, int flags)
-{
-    io_object_t     nextMedia;
-    kern_return_t   kernResult = KERN_FAILURE;
-    *bsdPath = '\0';
-    nextMedia = IOIteratorNext( mediaIterator );
-    if ( nextMedia )
-    {
-        CFTypeRef   bsdPathAsCFString;
-    bsdPathAsCFString = IORegistryEntryCreateCFProperty( nextMedia, CFSTR( kIOBSDNameKey ), kCFAllocatorDefault, 0 );
-        if ( bsdPathAsCFString ) {
-            size_t devPathLength;
-            strcpy( bsdPath, _PATH_DEV );
-            if (flags & BDRV_O_NOCACHE) {
-                strcat(bsdPath, "r");
-            }
-            devPathLength = strlen( bsdPath );
-            if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
-                kernResult = KERN_SUCCESS;
-            }
-            CFRelease( bsdPathAsCFString );
-        }
-        IOObjectRelease( nextMedia );
-    }
-
-    return kernResult;
-}
-
-/* Sets up a real cdrom for use in QEMU */
-static bool setup_cdrom(char *bsd_path, Error **errp)
-{
-    int index, num_of_test_partitions = 2, fd;
-    char test_partition[MAXPATHLEN];
-    bool partition_found = false;
-
-    /* look for a working partition */
-    for (index = 0; index < num_of_test_partitions; index++) {
-        snprintf(test_partition, sizeof(test_partition), "%ss%d", bsd_path,
-                 index);
-        fd = qemu_open(test_partition, O_RDONLY | O_BINARY | O_LARGEFILE);
-        if (fd >= 0) {
-            partition_found = true;
-            qemu_close(fd);
-            break;
-        }
-    }
-
-    /* if a working partition on the device was not found */
-    if (partition_found == false) {
-        error_setg(errp, "Failed to find a working partition on disc");
-    } else {
-        DPRINTF("Using %s as optical disc\n", test_partition);
-        pstrcpy(bsd_path, MAXPATHLEN, test_partition);
-    }
-    return partition_found;
-}
-
-/* Prints directions on mounting and unmounting a device */
-static void print_unmounting_directions(const char *file_name)
-{
-    error_report("If device %s is mounted on the desktop, unmount"
-                 " it first before using it in QEMU", file_name);
-    error_report("Command to unmount device: diskutil unmountDisk %s",
-                 file_name);
-    error_report("Command to mount device: diskutil mountDisk %s", file_name);
-}
-
-#endif /* defined(__APPLE__) && defined(__MACH__) */
-
-static int hdev_probe_device(const char *filename)
-{
-    struct stat st;
-
-    /* allow a dedicated CD-ROM driver to match with a higher priority */
-    if (strstart(filename, "/dev/cdrom", NULL))
-        return 50;
-
-    if (stat(filename, &st) >= 0 &&
-            (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
-        return 100;
-    }
-
-    return 0;
-}
-
-static int check_hdev_writable(BDRVRawState *s)
-{
-#if defined(BLKROGET)
-    /* Linux block devices can be configured "read-only" using blockdev(8).
-     * This is independent of device node permissions and therefore open(2)
-     * with O_RDWR succeeds.  Actual writes fail with EPERM.
-     *
-     * bdrv_open() is supposed to fail if the disk is read-only.  Explicitly
-     * check for read-only block devices so that Linux block devices behave
-     * properly.
-     */
-    struct stat st;
-    int readonly = 0;
-
-    if (fstat(s->fd, &st)) {
-        return -errno;
-    }
-
-    if (!S_ISBLK(st.st_mode)) {
-        return 0;
-    }
-
-    if (ioctl(s->fd, BLKROGET, &readonly) < 0) {
-        return -errno;
-    }
-
-    if (readonly) {
-        return -EACCES;
-    }
-#endif /* defined(BLKROGET) */
-    return 0;
-}
-
-static void hdev_parse_filename(const char *filename, QDict *options,
-                                Error **errp)
-{
-    /* The prefix is optional, just as for "file". */
-    strstart(filename, "host_device:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static bool hdev_is_sg(BlockDriverState *bs)
-{
-
-#if defined(__linux__)
-
-    struct stat st;
-    struct sg_scsi_id scsiid;
-    int sg_version;
-
-    if (stat(bs->filename, &st) >= 0 && S_ISCHR(st.st_mode) &&
-        !bdrv_ioctl(bs, SG_GET_VERSION_NUM, &sg_version) &&
-        !bdrv_ioctl(bs, SG_GET_SCSI_ID, &scsiid)) {
-        DPRINTF("SG device found: type=%d, version=%d\n",
-            scsiid.scsi_type, sg_version);
-        return true;
-    }
-
-#endif
-
-    return false;
-}
-
-static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;
-
-#if defined(__APPLE__) && defined(__MACH__)
-    const char *filename = qdict_get_str(options, "filename");
-    char bsd_path[MAXPATHLEN] = "";
-    bool error_occurred = false;
-
-    /* If using a real cdrom */
-    if (strcmp(filename, "/dev/cdrom") == 0) {
-        char *mediaType = NULL;
-        kern_return_t ret_val;
-        io_iterator_t mediaIterator = 0;
-
-        mediaType = FindEjectableOpticalMedia(&mediaIterator);
-        if (mediaType == NULL) {
-            error_setg(errp, "Please make sure your CD/DVD is in the optical"
-                       " drive");
-            error_occurred = true;
-            goto hdev_open_Mac_error;
-        }
-
-        ret_val = GetBSDPath(mediaIterator, bsd_path, sizeof(bsd_path), flags);
-        if (ret_val != KERN_SUCCESS) {
-            error_setg(errp, "Could not get BSD path for optical drive");
-            error_occurred = true;
-            goto hdev_open_Mac_error;
-        }
-
-        /* If a real optical drive was not found */
-        if (bsd_path[0] == '\0') {
-            error_setg(errp, "Failed to obtain bsd path for optical drive");
-            error_occurred = true;
-            goto hdev_open_Mac_error;
-        }
-
-        /* If using a cdrom disc and finding a partition on the disc failed */
-        if (strncmp(mediaType, kIOCDMediaClass, 9) == 0 &&
-            setup_cdrom(bsd_path, errp) == false) {
-            print_unmounting_directions(bsd_path);
-            error_occurred = true;
-            goto hdev_open_Mac_error;
-        }
-
-        qdict_put(options, "filename", qstring_from_str(bsd_path));
-
-hdev_open_Mac_error:
-        g_free(mediaType);
-        if (mediaIterator) {
-            IOObjectRelease(mediaIterator);
-        }
-        if (error_occurred) {
-            return -ENOENT;
-        }
-    }
-#endif /* defined(__APPLE__) && defined(__MACH__) */
-
-    s->type = FTYPE_FILE;
-
-    ret = raw_open_common(bs, options, flags, 0, &local_err);
-    if (ret < 0) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        }
-#if defined(__APPLE__) && defined(__MACH__)
-        if (*bsd_path) {
-            filename = bsd_path;
-        }
-        /* if a physical device experienced an error while being opened */
-        if (strncmp(filename, "/dev/", 5) == 0) {
-            print_unmounting_directions(filename);
-        }
-#endif /* defined(__APPLE__) && defined(__MACH__) */
-        return ret;
-    }
-
-    /* Since this does ioctl the device must be already opened */
-    bs->sg = hdev_is_sg(bs);
-
-    if (flags & BDRV_O_RDWR) {
-        ret = check_hdev_writable(s);
-        if (ret < 0) {
-            raw_close(bs);
-            error_setg_errno(errp, -ret, "The device is not writable");
-            return ret;
-        }
-    }
-
-    return ret;
-}
-
-#if defined(__linux__)
-
-static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
-        unsigned long int req, void *buf,
-        BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVRawState *s = bs->opaque;
-    RawPosixAIOData *acb;
-    ThreadPool *pool;
-
-    if (fd_open(bs) < 0)
-        return NULL;
-
-    acb = g_new(RawPosixAIOData, 1);
-    acb->bs = bs;
-    acb->aio_type = QEMU_AIO_IOCTL;
-    acb->aio_fildes = s->fd;
-    acb->aio_offset = 0;
-    acb->aio_ioctl_buf = buf;
-    acb->aio_ioctl_cmd = req;
-    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
-}
-#endif /* linux */
-
-static int fd_open(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-
-    /* this is just to ensure s->fd is sane (its called by io ops) */
-    if (s->fd >= 0)
-        return 0;
-    return -EIO;
-}
-
-static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (fd_open(bs) < 0) {
-        return NULL;
-    }
-    return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
-                       cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
-}
-
-static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
-{
-    BDRVRawState *s = bs->opaque;
-    int rc;
-
-    rc = fd_open(bs);
-    if (rc < 0) {
-        return rc;
-    }
-    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
-        return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
-                              QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
-    } else if (s->discard_zeroes) {
-        return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
-                              QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
-    }
-    return -ENOTSUP;
-}
-
-static int hdev_create(const char *filename, QemuOpts *opts,
-                       Error **errp)
-{
-    int fd;
-    int ret = 0;
-    struct stat stat_buf;
-    int64_t total_size = 0;
-    bool has_prefix;
-
-    /* This function is used by both protocol block drivers and therefore either
-     * of these prefixes may be given.
-     * The return value has to be stored somewhere, otherwise this is an error
-     * due to -Werror=unused-value. */
-    has_prefix =
-        strstart(filename, "host_device:", &filename) ||
-        strstart(filename, "host_cdrom:" , &filename);
-
-    (void)has_prefix;
-
-    ret = raw_normalize_devicepath(&filename);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not normalize device path");
-        return ret;
-    }
-
-    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-
-    fd = qemu_open(filename, O_WRONLY | O_BINARY);
-    if (fd < 0) {
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Could not open device");
-        return ret;
-    }
-
-    if (fstat(fd, &stat_buf) < 0) {
-        ret = -errno;
-        error_setg_errno(errp, -ret, "Could not stat device");
-    } else if (!S_ISBLK(stat_buf.st_mode) && !S_ISCHR(stat_buf.st_mode)) {
-        error_setg(errp,
-                   "The given file is neither a block nor a character device");
-        ret = -ENODEV;
-    } else if (lseek(fd, 0, SEEK_END) < total_size) {
-        error_setg(errp, "Device is too small");
-        ret = -ENOSPC;
-    }
-
-    qemu_close(fd);
-    return ret;
-}
-
-static BlockDriver bdrv_host_device = {
-    .format_name        = "host_device",
-    .protocol_name        = "host_device",
-    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_probe_device  = hdev_probe_device,
-    .bdrv_parse_filename = hdev_parse_filename,
-    .bdrv_file_open     = hdev_open,
-    .bdrv_close         = raw_close,
-    .bdrv_reopen_prepare = raw_reopen_prepare,
-    .bdrv_reopen_commit  = raw_reopen_commit,
-    .bdrv_reopen_abort   = raw_reopen_abort,
-    .bdrv_create         = hdev_create,
-    .create_opts         = &raw_create_opts,
-    .bdrv_co_write_zeroes = hdev_co_write_zeroes,
-
-    .bdrv_aio_readv	= raw_aio_readv,
-    .bdrv_aio_writev	= raw_aio_writev,
-    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_aio_discard   = hdev_aio_discard,
-    .bdrv_refresh_limits = raw_refresh_limits,
-    .bdrv_io_plug = raw_aio_plug,
-    .bdrv_io_unplug = raw_aio_unplug,
-    .bdrv_flush_io_queue = raw_aio_flush_io_queue,
-
-    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength	= raw_getlength,
-    .bdrv_get_info = raw_get_info,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
-    .bdrv_probe_blocksizes = hdev_probe_blocksizes,
-    .bdrv_probe_geometry = hdev_probe_geometry,
-
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
-    /* generic scsi device */
-#ifdef __linux__
-    .bdrv_aio_ioctl     = hdev_aio_ioctl,
-#endif
-};
-
-#if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-static void cdrom_parse_filename(const char *filename, QDict *options,
-                                 Error **errp)
-{
-    /* The prefix is optional, just as for "file". */
-    strstart(filename, "host_cdrom:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-#endif
-
-#ifdef __linux__
-static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;
-
-    s->type = FTYPE_CD;
-
-    /* open will not fail even if no CD is inserted, so add O_NONBLOCK */
-    ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-    return ret;
-}
-
-static int cdrom_probe_device(const char *filename)
-{
-    int fd, ret;
-    int prio = 0;
-    struct stat st;
-
-    fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
-    if (fd < 0) {
-        goto out;
-    }
-    ret = fstat(fd, &st);
-    if (ret == -1 || !S_ISBLK(st.st_mode)) {
-        goto outc;
-    }
-
-    /* Attempt to detect via a CDROM specific ioctl */
-    ret = ioctl(fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
-    if (ret >= 0)
-        prio = 100;
-
-outc:
-    qemu_close(fd);
-out:
-    return prio;
-}
-
-static bool cdrom_is_inserted(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int ret;
-
-    ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
-    return ret == CDS_DISC_OK;
-}
-
-static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (eject_flag) {
-        if (ioctl(s->fd, CDROMEJECT, NULL) < 0)
-            perror("CDROMEJECT");
-    } else {
-        if (ioctl(s->fd, CDROMCLOSETRAY, NULL) < 0)
-            perror("CDROMEJECT");
-    }
-}
-
-static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (ioctl(s->fd, CDROM_LOCKDOOR, locked) < 0) {
-        /*
-         * Note: an error can happen if the distribution automatically
-         * mounts the CD-ROM
-         */
-        /* perror("CDROM_LOCKDOOR"); */
-    }
-}
-
-static BlockDriver bdrv_host_cdrom = {
-    .format_name        = "host_cdrom",
-    .protocol_name      = "host_cdrom",
-    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_probe_device	= cdrom_probe_device,
-    .bdrv_parse_filename = cdrom_parse_filename,
-    .bdrv_file_open     = cdrom_open,
-    .bdrv_close         = raw_close,
-    .bdrv_reopen_prepare = raw_reopen_prepare,
-    .bdrv_reopen_commit  = raw_reopen_commit,
-    .bdrv_reopen_abort   = raw_reopen_abort,
-    .bdrv_create         = hdev_create,
-    .create_opts         = &raw_create_opts,
-
-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
-    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_refresh_limits = raw_refresh_limits,
-    .bdrv_io_plug = raw_aio_plug,
-    .bdrv_io_unplug = raw_aio_unplug,
-    .bdrv_flush_io_queue = raw_aio_flush_io_queue,
-
-    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
-
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
-    /* removable device support */
-    .bdrv_is_inserted   = cdrom_is_inserted,
-    .bdrv_eject         = cdrom_eject,
-    .bdrv_lock_medium   = cdrom_lock_medium,
-
-    /* generic scsi device */
-    .bdrv_aio_ioctl     = hdev_aio_ioctl,
-};
-#endif /* __linux__ */
-
-#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
-static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    Error *local_err = NULL;
-    int ret;
-
-    s->type = FTYPE_CD;
-
-    ret = raw_open_common(bs, options, flags, 0, &local_err);
-    if (ret) {
-        if (local_err) {
-            error_propagate(errp, local_err);
-        }
-        return ret;
-    }
-
-    /* make sure the door isn't locked at this time */
-    ioctl(s->fd, CDIOCALLOW);
-    return 0;
-}
-
-static int cdrom_probe_device(const char *filename)
-{
-    if (strstart(filename, "/dev/cd", NULL) ||
-            strstart(filename, "/dev/acd", NULL))
-        return 100;
-    return 0;
-}
-
-static int cdrom_reopen(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    int fd;
-
-    /*
-     * Force reread of possibly changed/newly loaded disc,
-     * FreeBSD seems to not notice sometimes...
-     */
-    if (s->fd >= 0)
-        qemu_close(s->fd);
-    fd = qemu_open(bs->filename, s->open_flags, 0644);
-    if (fd < 0) {
-        s->fd = -1;
-        return -EIO;
-    }
-    s->fd = fd;
-
-    /* make sure the door isn't locked at this time */
-    ioctl(s->fd, CDIOCALLOW);
-    return 0;
-}
-
-static bool cdrom_is_inserted(BlockDriverState *bs)
-{
-    return raw_getlength(bs) > 0;
-}
-
-static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (s->fd < 0)
-        return;
-
-    (void) ioctl(s->fd, CDIOCALLOW);
-
-    if (eject_flag) {
-        if (ioctl(s->fd, CDIOCEJECT) < 0)
-            perror("CDIOCEJECT");
-    } else {
-        if (ioctl(s->fd, CDIOCCLOSE) < 0)
-            perror("CDIOCCLOSE");
-    }
-
-    cdrom_reopen(bs);
-}
-
-static void cdrom_lock_medium(BlockDriverState *bs, bool locked)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (s->fd < 0)
-        return;
-    if (ioctl(s->fd, (locked ? CDIOCPREVENT : CDIOCALLOW)) < 0) {
-        /*
-         * Note: an error can happen if the distribution automatically
-         * mounts the CD-ROM
-         */
-        /* perror("CDROM_LOCKDOOR"); */
-    }
-}
-
-static BlockDriver bdrv_host_cdrom = {
-    .format_name        = "host_cdrom",
-    .protocol_name      = "host_cdrom",
-    .instance_size      = sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_probe_device	= cdrom_probe_device,
-    .bdrv_parse_filename = cdrom_parse_filename,
-    .bdrv_file_open     = cdrom_open,
-    .bdrv_close         = raw_close,
-    .bdrv_reopen_prepare = raw_reopen_prepare,
-    .bdrv_reopen_commit  = raw_reopen_commit,
-    .bdrv_reopen_abort   = raw_reopen_abort,
-    .bdrv_create        = hdev_create,
-    .create_opts        = &raw_create_opts,
-
-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
-    .bdrv_aio_flush	= raw_aio_flush,
-    .bdrv_refresh_limits = raw_refresh_limits,
-    .bdrv_io_plug = raw_aio_plug,
-    .bdrv_io_unplug = raw_aio_unplug,
-    .bdrv_flush_io_queue = raw_aio_flush_io_queue,
-
-    .bdrv_truncate      = raw_truncate,
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
-
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
-    /* removable device support */
-    .bdrv_is_inserted   = cdrom_is_inserted,
-    .bdrv_eject         = cdrom_eject,
-    .bdrv_lock_medium   = cdrom_lock_medium,
-};
-#endif /* __FreeBSD__ */
-
-static void bdrv_file_init(void)
-{
-    /*
-     * Register all the drivers.  Note that order is important, the driver
-     * registered last will get probed first.
-     */
-    bdrv_register(&bdrv_file);
-    bdrv_register(&bdrv_host_device);
-#ifdef __linux__
-    bdrv_register(&bdrv_host_cdrom);
-#endif
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-    bdrv_register(&bdrv_host_cdrom);
-#endif
-}
-
-block_init(bdrv_file_init);
diff --git a/qemu/block/raw-win32.c b/qemu/block/raw-win32.c
deleted file mode 100644
index fd2389153..000000000
--- a/qemu/block/raw-win32.c
+++ /dev/null
@@ -1,731 +0,0 @@
-/*
- * Block driver for RAW files (win32)
- *
- * Copyright (c) 2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu/cutils.h"
-#include "qemu/timer.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include "raw-aio.h"
-#include "trace.h"
-#include "block/thread-pool.h"
-#include "qemu/iov.h"
-#include "qapi/qmp/qstring.h"
-#include <windows.h>
-#include <winioctl.h>
-
-#define FTYPE_FILE 0
-#define FTYPE_CD     1
-#define FTYPE_HARDDISK 2
-
-typedef struct RawWin32AIOData {
-    BlockDriverState *bs;
-    HANDLE hfile;
-    struct iovec *aio_iov;
-    int aio_niov;
-    size_t aio_nbytes;
-    off64_t aio_offset;
-    int aio_type;
-} RawWin32AIOData;
-
-typedef struct BDRVRawState {
-    HANDLE hfile;
-    int type;
-    char drive_path[16]; /* format: "d:\" */
-    QEMUWin32AIOState *aio;
-} BDRVRawState;
-
-/*
- * Read/writes the data to/from a given linear buffer.
- *
- * Returns the number of bytes handles or -errno in case of an error. Short
- * reads are only returned if the end of the file is reached.
- */
-static size_t handle_aiocb_rw(RawWin32AIOData *aiocb)
-{
-    size_t offset = 0;
-    int i;
-
-    for (i = 0; i < aiocb->aio_niov; i++) {
-        OVERLAPPED ov;
-        DWORD ret, ret_count, len;
-
-        memset(&ov, 0, sizeof(ov));
-        ov.Offset = (aiocb->aio_offset + offset);
-        ov.OffsetHigh = (aiocb->aio_offset + offset) >> 32;
-        len = aiocb->aio_iov[i].iov_len;
-        if (aiocb->aio_type & QEMU_AIO_WRITE) {
-            ret = WriteFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
-                            len, &ret_count, &ov);
-        } else {
-            ret = ReadFile(aiocb->hfile, aiocb->aio_iov[i].iov_base,
-                           len, &ret_count, &ov);
-        }
-        if (!ret) {
-            ret_count = 0;
-        }
-        if (ret_count != len) {
-            offset += ret_count;
-            break;
-        }
-        offset += len;
-    }
-
-    return offset;
-}
-
-static int aio_worker(void *arg)
-{
-    RawWin32AIOData *aiocb = arg;
-    ssize_t ret = 0;
-    size_t count;
-
-    switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
-    case QEMU_AIO_READ:
-        count = handle_aiocb_rw(aiocb);
-        if (count < aiocb->aio_nbytes) {
-            /* A short read means that we have reached EOF. Pad the buffer
-             * with zeros for bytes after EOF. */
-            iov_memset(aiocb->aio_iov, aiocb->aio_niov, count,
-                      0, aiocb->aio_nbytes - count);
-
-            count = aiocb->aio_nbytes;
-        }
-        if (count == aiocb->aio_nbytes) {
-            ret = 0;
-        } else {
-            ret = -EINVAL;
-        }
-        break;
-    case QEMU_AIO_WRITE:
-        count = handle_aiocb_rw(aiocb);
-        if (count == aiocb->aio_nbytes) {
-            ret = 0;
-        } else {
-            ret = -EINVAL;
-        }
-        break;
-    case QEMU_AIO_FLUSH:
-        if (!FlushFileBuffers(aiocb->hfile)) {
-            return -EIO;
-        }
-        break;
-    default:
-        fprintf(stderr, "invalid aio request (0x%x)\n", aiocb->aio_type);
-        ret = -EINVAL;
-        break;
-    }
-
-    g_free(aiocb);
-    return ret;
-}
-
-static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
-{
-    RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
-    ThreadPool *pool;
-
-    acb->bs = bs;
-    acb->hfile = hfile;
-    acb->aio_type = type;
-
-    if (qiov) {
-        acb->aio_iov = qiov->iov;
-        acb->aio_niov = qiov->niov;
-    }
-    acb->aio_nbytes = nb_sectors * 512;
-    acb->aio_offset = sector_num * 512;
-
-    trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
-    pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-    return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
-}
-
-int qemu_ftruncate64(int fd, int64_t length)
-{
-    LARGE_INTEGER li;
-    DWORD dw;
-    LONG high;
-    HANDLE h;
-    BOOL res;
-
-    if ((GetVersion() & 0x80000000UL) && (length >> 32) != 0)
-	return -1;
-
-    h = (HANDLE)_get_osfhandle(fd);
-
-    /* get current position, ftruncate do not change position */
-    li.HighPart = 0;
-    li.LowPart = SetFilePointer (h, 0, &li.HighPart, FILE_CURRENT);
-    if (li.LowPart == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) {
-	return -1;
-    }
-
-    high = length >> 32;
-    dw = SetFilePointer(h, (DWORD) length, &high, FILE_BEGIN);
-    if (dw == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) {
-	return -1;
-    }
-    res = SetEndOfFile(h);
-
-    /* back to old position */
-    SetFilePointer(h, li.LowPart, &li.HighPart, FILE_BEGIN);
-    return res ? 0 : -1;
-}
-
-static int set_sparse(int fd)
-{
-    DWORD returned;
-    return (int) DeviceIoControl((HANDLE)_get_osfhandle(fd), FSCTL_SET_SPARSE,
-				 NULL, 0, NULL, 0, &returned, NULL);
-}
-
-static void raw_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (s->aio) {
-        win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs));
-    }
-}
-
-static void raw_attach_aio_context(BlockDriverState *bs,
-                                   AioContext *new_context)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (s->aio) {
-        win32_aio_attach_aio_context(s->aio, new_context);
-    }
-}
-
-static void raw_probe_alignment(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    DWORD sectorsPerCluster, freeClusters, totalClusters, count;
-    DISK_GEOMETRY_EX dg;
-    BOOL status;
-
-    if (s->type == FTYPE_CD) {
-        bs->request_alignment = 2048;
-        return;
-    }
-    if (s->type == FTYPE_HARDDISK) {
-        status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
-                                 NULL, 0, &dg, sizeof(dg), &count, NULL);
-        if (status != 0) {
-            bs->request_alignment = dg.Geometry.BytesPerSector;
-            return;
-        }
-        /* try GetDiskFreeSpace too */
-    }
-
-    if (s->drive_path[0]) {
-        GetDiskFreeSpace(s->drive_path, &sectorsPerCluster,
-                         &dg.Geometry.BytesPerSector,
-                         &freeClusters, &totalClusters);
-        bs->request_alignment = dg.Geometry.BytesPerSector;
-    }
-}
-
-static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
-{
-    assert(access_flags != NULL);
-    assert(overlapped != NULL);
-
-    if (flags & BDRV_O_RDWR) {
-        *access_flags = GENERIC_READ | GENERIC_WRITE;
-    } else {
-        *access_flags = GENERIC_READ;
-    }
-
-    *overlapped = FILE_ATTRIBUTE_NORMAL;
-    if (flags & BDRV_O_NATIVE_AIO) {
-        *overlapped |= FILE_FLAG_OVERLAPPED;
-    }
-    if (flags & BDRV_O_NOCACHE) {
-        *overlapped |= FILE_FLAG_NO_BUFFERING;
-    }
-}
-
-static void raw_parse_filename(const char *filename, QDict *options,
-                               Error **errp)
-{
-    /* The filename does not have to be prefixed by the protocol name, since
-     * "file" is the default protocol; therefore, the return value of this
-     * function call can be ignored. */
-    strstart(filename, "file:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static QemuOptsList raw_runtime_opts = {
-    .name = "raw",
-    .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "File name of the image",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int raw_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    int access_flags;
-    DWORD overlapped;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *filename;
-    int ret;
-
-    s->type = FTYPE_FILE;
-
-    opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    filename = qemu_opt_get(opts, "filename");
-
-    raw_parse_flags(flags, &access_flags, &overlapped);
-
-    if (filename[0] && filename[1] == ':') {
-        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
-    } else if (filename[0] == '\\' && filename[1] == '\\') {
-        s->drive_path[0] = 0;
-    } else {
-        /* Relative path.  */
-        char buf[MAX_PATH];
-        GetCurrentDirectory(MAX_PATH, buf);
-        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
-    }
-
-    s->hfile = CreateFile(filename, access_flags,
-                          FILE_SHARE_READ, NULL,
-                          OPEN_EXISTING, overlapped, NULL);
-    if (s->hfile == INVALID_HANDLE_VALUE) {
-        int err = GetLastError();
-
-        if (err == ERROR_ACCESS_DENIED) {
-            ret = -EACCES;
-        } else {
-            ret = -EINVAL;
-        }
-        goto fail;
-    }
-
-    if (flags & BDRV_O_NATIVE_AIO) {
-        s->aio = win32_aio_init();
-        if (s->aio == NULL) {
-            CloseHandle(s->hfile);
-            error_setg(errp, "Could not initialize AIO");
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        ret = win32_aio_attach(s->aio, s->hfile);
-        if (ret < 0) {
-            win32_aio_cleanup(s->aio);
-            CloseHandle(s->hfile);
-            error_setg_errno(errp, -ret, "Could not enable AIO");
-            goto fail;
-        }
-
-        win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
-    }
-
-    raw_probe_alignment(bs);
-    ret = 0;
-fail:
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
-                         int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-                         BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVRawState *s = bs->opaque;
-    if (s->aio) {
-        return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
-                                nb_sectors, cb, opaque, QEMU_AIO_READ); 
-    } else {
-        return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
-                           cb, opaque, QEMU_AIO_READ);
-    }
-}
-
-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
-                          int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-                          BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVRawState *s = bs->opaque;
-    if (s->aio) {
-        return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
-                                nb_sectors, cb, opaque, QEMU_AIO_WRITE); 
-    } else {
-        return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
-                           cb, opaque, QEMU_AIO_WRITE);
-    }
-}
-
-static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
-                         BlockCompletionFunc *cb, void *opaque)
-{
-    BDRVRawState *s = bs->opaque;
-    return paio_submit(bs, s->hfile, 0, NULL, 0, cb, opaque, QEMU_AIO_FLUSH);
-}
-
-static void raw_close(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-
-    if (s->aio) {
-        win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs));
-        win32_aio_cleanup(s->aio);
-        s->aio = NULL;
-    }
-
-    CloseHandle(s->hfile);
-    if (bs->open_flags & BDRV_O_TEMPORARY) {
-        unlink(bs->filename);
-    }
-}
-
-static int raw_truncate(BlockDriverState *bs, int64_t offset)
-{
-    BDRVRawState *s = bs->opaque;
-    LONG low, high;
-    DWORD dwPtrLow;
-
-    low = offset;
-    high = offset >> 32;
-
-    /*
-     * An error has occurred if the return value is INVALID_SET_FILE_POINTER
-     * and GetLastError doesn't return NO_ERROR.
-     */
-    dwPtrLow = SetFilePointer(s->hfile, low, &high, FILE_BEGIN);
-    if (dwPtrLow == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) {
-        fprintf(stderr, "SetFilePointer error: %lu\n", GetLastError());
-        return -EIO;
-    }
-    if (SetEndOfFile(s->hfile) == 0) {
-        fprintf(stderr, "SetEndOfFile error: %lu\n", GetLastError());
-        return -EIO;
-    }
-    return 0;
-}
-
-static int64_t raw_getlength(BlockDriverState *bs)
-{
-    BDRVRawState *s = bs->opaque;
-    LARGE_INTEGER l;
-    ULARGE_INTEGER available, total, total_free;
-    DISK_GEOMETRY_EX dg;
-    DWORD count;
-    BOOL status;
-
-    switch(s->type) {
-    case FTYPE_FILE:
-        l.LowPart = GetFileSize(s->hfile, (PDWORD)&l.HighPart);
-        if (l.LowPart == 0xffffffffUL && GetLastError() != NO_ERROR)
-            return -EIO;
-        break;
-    case FTYPE_CD:
-        if (!GetDiskFreeSpaceEx(s->drive_path, &available, &total, &total_free))
-            return -EIO;
-        l.QuadPart = total.QuadPart;
-        break;
-    case FTYPE_HARDDISK:
-        status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
-                                 NULL, 0, &dg, sizeof(dg), &count, NULL);
-        if (status != 0) {
-            l = dg.DiskSize;
-        }
-        break;
-    default:
-        return -EIO;
-    }
-    return l.QuadPart;
-}
-
-static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
-{
-    typedef DWORD (WINAPI * get_compressed_t)(const char *filename,
-                                              DWORD * high);
-    get_compressed_t get_compressed;
-    struct _stati64 st;
-    const char *filename = bs->filename;
-    /* WinNT support GetCompressedFileSize to determine allocate size */
-    get_compressed =
-        (get_compressed_t) GetProcAddress(GetModuleHandle("kernel32"),
-                                            "GetCompressedFileSizeA");
-    if (get_compressed) {
-        DWORD high, low;
-        low = get_compressed(filename, &high);
-        if (low != 0xFFFFFFFFlu || GetLastError() == NO_ERROR) {
-            return (((int64_t) high) << 32) + low;
-        }
-    }
-
-    if (_stati64(filename, &st) < 0) {
-        return -1;
-    }
-    return st.st_size;
-}
-
-static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    int fd;
-    int64_t total_size = 0;
-
-    strstart(filename, "file:", &filename);
-
-    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-
-    fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
-                   0644);
-    if (fd < 0) {
-        error_setg_errno(errp, errno, "Could not create file");
-        return -EIO;
-    }
-    set_sparse(fd);
-    ftruncate(fd, total_size);
-    qemu_close(fd);
-    return 0;
-}
-
-
-static QemuOptsList raw_create_opts = {
-    .name = "raw-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        { /* end of list */ }
-    }
-};
-
-BlockDriver bdrv_file = {
-    .format_name	= "file",
-    .protocol_name	= "file",
-    .instance_size	= sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_parse_filename = raw_parse_filename,
-    .bdrv_file_open     = raw_open,
-    .bdrv_close         = raw_close,
-    .bdrv_create        = raw_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-
-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
-    .bdrv_aio_flush     = raw_aio_flush,
-
-    .bdrv_truncate	= raw_truncate,
-    .bdrv_getlength	= raw_getlength,
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
-
-    .create_opts        = &raw_create_opts,
-};
-
-/***********************************************/
-/* host device */
-
-static int find_cdrom(char *cdrom_name, int cdrom_name_size)
-{
-    char drives[256], *pdrv = drives;
-    UINT type;
-
-    memset(drives, 0, sizeof(drives));
-    GetLogicalDriveStrings(sizeof(drives), drives);
-    while(pdrv[0] != '\0') {
-        type = GetDriveType(pdrv);
-        switch(type) {
-        case DRIVE_CDROM:
-            snprintf(cdrom_name, cdrom_name_size, "\\\\.\\%c:", pdrv[0]);
-            return 0;
-            break;
-        }
-        pdrv += lstrlen(pdrv) + 1;
-    }
-    return -1;
-}
-
-static int find_device_type(BlockDriverState *bs, const char *filename)
-{
-    BDRVRawState *s = bs->opaque;
-    UINT type;
-    const char *p;
-
-    if (strstart(filename, "\\\\.\\", &p) ||
-        strstart(filename, "//./", &p)) {
-        if (stristart(p, "PhysicalDrive", NULL))
-            return FTYPE_HARDDISK;
-        snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", p[0]);
-        type = GetDriveType(s->drive_path);
-        switch (type) {
-        case DRIVE_REMOVABLE:
-        case DRIVE_FIXED:
-            return FTYPE_HARDDISK;
-        case DRIVE_CDROM:
-            return FTYPE_CD;
-        default:
-            return FTYPE_FILE;
-        }
-    } else {
-        return FTYPE_FILE;
-    }
-}
-
-static int hdev_probe_device(const char *filename)
-{
-    if (strstart(filename, "/dev/cdrom", NULL))
-        return 100;
-    if (is_windows_drive(filename))
-        return 100;
-    return 0;
-}
-
-static void hdev_parse_filename(const char *filename, QDict *options,
-                                Error **errp)
-{
-    /* The prefix is optional, just as for "file". */
-    strstart(filename, "host_device:", &filename);
-
-    qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
-{
-    BDRVRawState *s = bs->opaque;
-    int access_flags, create_flags;
-    int ret = 0;
-    DWORD overlapped;
-    char device_name[64];
-
-    Error *local_err = NULL;
-    const char *filename;
-
-    QemuOpts *opts = qemu_opts_create(&raw_runtime_opts, NULL, 0,
-                                      &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto done;
-    }
-
-    filename = qemu_opt_get(opts, "filename");
-
-    if (strstart(filename, "/dev/cdrom", NULL)) {
-        if (find_cdrom(device_name, sizeof(device_name)) < 0) {
-            error_setg(errp, "Could not open CD-ROM drive");
-            ret = -ENOENT;
-            goto done;
-        }
-        filename = device_name;
-    } else {
-        /* transform drive letters into device name */
-        if (((filename[0] >= 'a' && filename[0] <= 'z') ||
-             (filename[0] >= 'A' && filename[0] <= 'Z')) &&
-            filename[1] == ':' && filename[2] == '\0') {
-            snprintf(device_name, sizeof(device_name), "\\\\.\\%c:", filename[0]);
-            filename = device_name;
-        }
-    }
-    s->type = find_device_type(bs, filename);
-
-    raw_parse_flags(flags, &access_flags, &overlapped);
-
-    create_flags = OPEN_EXISTING;
-
-    s->hfile = CreateFile(filename, access_flags,
-                          FILE_SHARE_READ, NULL,
-                          create_flags, overlapped, NULL);
-    if (s->hfile == INVALID_HANDLE_VALUE) {
-        int err = GetLastError();
-
-        if (err == ERROR_ACCESS_DENIED) {
-            ret = -EACCES;
-        } else {
-            ret = -EINVAL;
-        }
-        error_setg_errno(errp, -ret, "Could not open device");
-        goto done;
-    }
-
-done:
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static BlockDriver bdrv_host_device = {
-    .format_name	= "host_device",
-    .protocol_name	= "host_device",
-    .instance_size	= sizeof(BDRVRawState),
-    .bdrv_needs_filename = true,
-    .bdrv_parse_filename = hdev_parse_filename,
-    .bdrv_probe_device	= hdev_probe_device,
-    .bdrv_file_open	= hdev_open,
-    .bdrv_close		= raw_close,
-
-    .bdrv_aio_readv     = raw_aio_readv,
-    .bdrv_aio_writev    = raw_aio_writev,
-    .bdrv_aio_flush     = raw_aio_flush,
-
-    .bdrv_detach_aio_context = raw_detach_aio_context,
-    .bdrv_attach_aio_context = raw_attach_aio_context,
-
-    .bdrv_getlength      = raw_getlength,
-    .has_variable_length = true,
-
-    .bdrv_get_allocated_file_size
-                        = raw_get_allocated_file_size,
-};
-
-static void bdrv_file_init(void)
-{
-    bdrv_register(&bdrv_file);
-    bdrv_register(&bdrv_host_device);
-}
-
-block_init(bdrv_file_init);
diff --git a/qemu/block/raw_bsd.c b/qemu/block/raw_bsd.c
deleted file mode 100644
index a6cc7e991..000000000
--- a/qemu/block/raw_bsd.c
+++ /dev/null
@@ -1,285 +0,0 @@
-/* BlockDriver implementation for "raw"
- *
- * Copyright (C) 2010, 2013, Red Hat, Inc.
- * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
- * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
- *
- * Author:
- *   Laszlo Ersek <lersek@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "block/block_int.h"
-#include "qapi/error.h"
-#include "qemu/option.h"
-
-static QemuOptsList raw_create_opts = {
-    .name = "raw-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        { /* end of list */ }
-    }
-};
-
-static int raw_reopen_prepare(BDRVReopenState *reopen_state,
-                              BlockReopenQueue *queue, Error **errp)
-{
-    return 0;
-}
-
-static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
-                                     int nb_sectors, QEMUIOVector *qiov)
-{
-    BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
-    return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
-}
-
-static int coroutine_fn
-raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-                    QEMUIOVector *qiov, int flags)
-{
-    void *buf = NULL;
-    BlockDriver *drv;
-    QEMUIOVector local_qiov;
-    int ret;
-
-    if (bs->probed && sector_num == 0) {
-        /* As long as these conditions are true, we can't get partial writes to
-         * the probe buffer and can just directly check the request. */
-        QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
-        QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
-
-        if (nb_sectors == 0) {
-            /* qemu_iovec_to_buf() would fail, but we want to return success
-             * instead of -EINVAL in this case. */
-            return 0;
-        }
-
-        buf = qemu_try_blockalign(bs->file->bs, 512);
-        if (!buf) {
-            ret = -ENOMEM;
-            goto fail;
-        }
-
-        ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
-        if (ret != 512) {
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        drv = bdrv_probe_all(buf, 512, NULL);
-        if (drv != bs->drv) {
-            ret = -EPERM;
-            goto fail;
-        }
-
-        /* Use the checked buffer, a malicious guest might be overwriting its
-         * original buffer in the background. */
-        qemu_iovec_init(&local_qiov, qiov->niov + 1);
-        qemu_iovec_add(&local_qiov, buf, 512);
-        qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
-        qiov = &local_qiov;
-    }
-
-    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
-    ret = bdrv_co_do_pwritev(bs->file->bs, sector_num * BDRV_SECTOR_SIZE,
-                             nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);
-
-fail:
-    if (qiov == &local_qiov) {
-        qemu_iovec_destroy(&local_qiov);
-    }
-    qemu_vfree(buf);
-    return ret;
-}
-
-static int coroutine_fn
-raw_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-              QEMUIOVector *qiov)
-{
-    return raw_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
-                                            int64_t sector_num,
-                                            int nb_sectors, int *pnum,
-                                            BlockDriverState **file)
-{
-    *pnum = nb_sectors;
-    *file = bs->file->bs;
-    return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
-           (sector_num << BDRV_SECTOR_BITS);
-}
-
-static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
-                                            int64_t sector_num, int nb_sectors,
-                                            BdrvRequestFlags flags)
-{
-    return bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
-}
-
-static int coroutine_fn raw_co_discard(BlockDriverState *bs,
-                                       int64_t sector_num, int nb_sectors)
-{
-    return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
-}
-
-static int64_t raw_getlength(BlockDriverState *bs)
-{
-    return bdrv_getlength(bs->file->bs);
-}
-
-static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    return bdrv_get_info(bs->file->bs, bdi);
-}
-
-static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    bs->bl = bs->file->bs->bl;
-}
-
-static int raw_truncate(BlockDriverState *bs, int64_t offset)
-{
-    return bdrv_truncate(bs->file->bs, offset);
-}
-
-static int raw_media_changed(BlockDriverState *bs)
-{
-    return bdrv_media_changed(bs->file->bs);
-}
-
-static void raw_eject(BlockDriverState *bs, bool eject_flag)
-{
-    bdrv_eject(bs->file->bs, eject_flag);
-}
-
-static void raw_lock_medium(BlockDriverState *bs, bool locked)
-{
-    bdrv_lock_medium(bs->file->bs, locked);
-}
-
-static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
-                                 unsigned long int req, void *buf,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque)
-{
-    return bdrv_aio_ioctl(bs->file->bs, req, buf, cb, opaque);
-}
-
-static int raw_has_zero_init(BlockDriverState *bs)
-{
-    return bdrv_has_zero_init(bs->file->bs);
-}
-
-static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    Error *local_err = NULL;
-    int ret;
-
-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-    return ret;
-}
-
-static int raw_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
-{
-    bs->sg = bs->file->bs->sg;
-
-    if (bs->probed && !bdrv_is_read_only(bs)) {
-        fprintf(stderr,
-                "WARNING: Image format was not specified for '%s' and probing "
-                "guessed raw.\n"
-                "         Automatically detecting the format is dangerous for "
-                "raw images, write operations on block 0 will be restricted.\n"
-                "         Specify the 'raw' format explicitly to remove the "
-                "restrictions.\n",
-                bs->file->bs->filename);
-    }
-
-    return 0;
-}
-
-static void raw_close(BlockDriverState *bs)
-{
-}
-
-static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    /* smallest possible positive score so that raw is used if and only if no
-     * other block driver works
-     */
-    return 1;
-}
-
-static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
-{
-    return bdrv_probe_blocksizes(bs->file->bs, bsz);
-}
-
-static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
-{
-    return bdrv_probe_geometry(bs->file->bs, geo);
-}
-
-BlockDriver bdrv_raw = {
-    .format_name          = "raw",
-    .bdrv_probe           = &raw_probe,
-    .bdrv_reopen_prepare  = &raw_reopen_prepare,
-    .bdrv_open            = &raw_open,
-    .bdrv_close           = &raw_close,
-    .bdrv_create          = &raw_create,
-    .bdrv_co_readv        = &raw_co_readv,
-    .bdrv_co_writev       = &raw_co_writev,
-    .bdrv_co_writev_flags = &raw_co_writev_flags,
-    .supported_write_flags = BDRV_REQ_FUA,
-    .bdrv_co_write_zeroes = &raw_co_write_zeroes,
-    .bdrv_co_discard      = &raw_co_discard,
-    .bdrv_co_get_block_status = &raw_co_get_block_status,
-    .bdrv_truncate        = &raw_truncate,
-    .bdrv_getlength       = &raw_getlength,
-    .has_variable_length  = true,
-    .bdrv_get_info        = &raw_get_info,
-    .bdrv_refresh_limits  = &raw_refresh_limits,
-    .bdrv_probe_blocksizes = &raw_probe_blocksizes,
-    .bdrv_probe_geometry  = &raw_probe_geometry,
-    .bdrv_media_changed   = &raw_media_changed,
-    .bdrv_eject           = &raw_eject,
-    .bdrv_lock_medium     = &raw_lock_medium,
-    .bdrv_aio_ioctl       = &raw_aio_ioctl,
-    .create_opts          = &raw_create_opts,
-    .bdrv_has_zero_init   = &raw_has_zero_init
-};
-
-static void bdrv_raw_init(void)
-{
-    bdrv_register(&bdrv_raw);
-}
-
-block_init(bdrv_raw_init);
diff --git a/qemu/block/rbd.c b/qemu/block/rbd.c
deleted file mode 100644
index 5bc5b3253..000000000
--- a/qemu/block/rbd.c
+++ /dev/null
@@ -1,1015 +0,0 @@
-/*
- * QEMU Block driver for RADOS (Ceph)
- *
- * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
- *                         Josh Durgin <josh.durgin@dreamhost.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "qemu/osdep.h"
-
-#include "qapi/error.h"
-#include "qemu/error-report.h"
-#include "block/block_int.h"
-#include "crypto/secret.h"
-#include "qemu/cutils.h"
-
-#include <rbd/librbd.h>
-
-/*
- * When specifying the image filename use:
- *
- * rbd:poolname/devicename[@snapshotname][:option1=value1[:option2=value2...]]
- *
- * poolname must be the name of an existing rados pool.
- *
- * devicename is the name of the rbd image.
- *
- * Each option given is used to configure rados, and may be any valid
- * Ceph option, "id", or "conf".
- *
- * The "id" option indicates what user we should authenticate as to
- * the Ceph cluster.  If it is excluded we will use the Ceph default
- * (normally 'admin').
- *
- * The "conf" option specifies a Ceph configuration file to read.  If
- * it is not specified, we will read from the default Ceph locations
- * (e.g., /etc/ceph/ceph.conf).  To avoid reading _any_ configuration
- * file, specify conf=/dev/null.
- *
- * Configuration values containing :, @, or = can be escaped with a
- * leading "\".
- */
-
-/* rbd_aio_discard added in 0.1.2 */
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
-#define LIBRBD_SUPPORTS_DISCARD
-#else
-#undef LIBRBD_SUPPORTS_DISCARD
-#endif
-
-#define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
-
-#define RBD_MAX_CONF_NAME_SIZE 128
-#define RBD_MAX_CONF_VAL_SIZE 512
-#define RBD_MAX_CONF_SIZE 1024
-#define RBD_MAX_POOL_NAME_SIZE 128
-#define RBD_MAX_SNAP_NAME_SIZE 128
-#define RBD_MAX_SNAPS 100
-
-typedef enum {
-    RBD_AIO_READ,
-    RBD_AIO_WRITE,
-    RBD_AIO_DISCARD,
-    RBD_AIO_FLUSH
-} RBDAIOCmd;
-
-typedef struct RBDAIOCB {
-    BlockAIOCB common;
-    QEMUBH *bh;
-    int64_t ret;
-    QEMUIOVector *qiov;
-    char *bounce;
-    RBDAIOCmd cmd;
-    int error;
-    struct BDRVRBDState *s;
-} RBDAIOCB;
-
-typedef struct RADOSCB {
-    RBDAIOCB *acb;
-    struct BDRVRBDState *s;
-    int64_t size;
-    char *buf;
-    int64_t ret;
-} RADOSCB;
-
-typedef struct BDRVRBDState {
-    rados_t cluster;
-    rados_ioctx_t io_ctx;
-    rbd_image_t image;
-    char name[RBD_MAX_IMAGE_NAME_SIZE];
-    char *snap;
-} BDRVRBDState;
-
-static int qemu_rbd_next_tok(char *dst, int dst_len,
-                             char *src, char delim,
-                             const char *name,
-                             char **p, Error **errp)
-{
-    int l;
-    char *end;
-
-    *p = NULL;
-
-    if (delim != '\0') {
-        for (end = src; *end; ++end) {
-            if (*end == delim) {
-                break;
-            }
-            if (*end == '\\' && end[1] != '\0') {
-                end++;
-            }
-        }
-        if (*end == delim) {
-            *p = end + 1;
-            *end = '\0';
-        }
-    }
-    l = strlen(src);
-    if (l >= dst_len) {
-        error_setg(errp, "%s too long", name);
-        return -EINVAL;
-    } else if (l == 0) {
-        error_setg(errp, "%s too short", name);
-        return -EINVAL;
-    }
-
-    pstrcpy(dst, dst_len, src);
-
-    return 0;
-}
-
-static void qemu_rbd_unescape(char *src)
-{
-    char *p;
-
-    for (p = src; *src; ++src, ++p) {
-        if (*src == '\\' && src[1] != '\0') {
-            src++;
-        }
-        *p = *src;
-    }
-    *p = '\0';
-}
-
-static int qemu_rbd_parsename(const char *filename,
-                              char *pool, int pool_len,
-                              char *snap, int snap_len,
-                              char *name, int name_len,
-                              char *conf, int conf_len,
-                              Error **errp)
-{
-    const char *start;
-    char *p, *buf;
-    int ret;
-
-    if (!strstart(filename, "rbd:", &start)) {
-        error_setg(errp, "File name must start with 'rbd:'");
-        return -EINVAL;
-    }
-
-    buf = g_strdup(start);
-    p = buf;
-    *snap = '\0';
-    *conf = '\0';
-
-    ret = qemu_rbd_next_tok(pool, pool_len, p,
-                            '/', "pool name", &p, errp);
-    if (ret < 0 || !p) {
-        ret = -EINVAL;
-        goto done;
-    }
-    qemu_rbd_unescape(pool);
-
-    if (strchr(p, '@')) {
-        ret = qemu_rbd_next_tok(name, name_len, p,
-                                '@', "object name", &p, errp);
-        if (ret < 0) {
-            goto done;
-        }
-        ret = qemu_rbd_next_tok(snap, snap_len, p,
-                                ':', "snap name", &p, errp);
-        qemu_rbd_unescape(snap);
-    } else {
-        ret = qemu_rbd_next_tok(name, name_len, p,
-                                ':', "object name", &p, errp);
-    }
-    qemu_rbd_unescape(name);
-    if (ret < 0 || !p) {
-        goto done;
-    }
-
-    ret = qemu_rbd_next_tok(conf, conf_len, p,
-                            '\0', "configuration", &p, errp);
-
-done:
-    g_free(buf);
-    return ret;
-}
-
-static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
-{
-    const char *p = conf;
-
-    while (*p) {
-        int len;
-        const char *end = strchr(p, ':');
-
-        if (end) {
-            len = end - p;
-        } else {
-            len = strlen(p);
-        }
-
-        if (strncmp(p, "id=", 3) == 0) {
-            len -= 3;
-            strncpy(clientname, p + 3, len);
-            clientname[len] = '\0';
-            return clientname;
-        }
-        if (end == NULL) {
-            break;
-        }
-        p = end + 1;
-    }
-    return NULL;
-}
-
-
-static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
-                             Error **errp)
-{
-    if (secretid == 0) {
-        return 0;
-    }
-
-    gchar *secret = qcrypto_secret_lookup_as_base64(secretid,
-                                                    errp);
-    if (!secret) {
-        return -1;
-    }
-
-    rados_conf_set(cluster, "key", secret);
-    g_free(secret);
-
-    return 0;
-}
-
-
-static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
-                             bool only_read_conf_file,
-                             Error **errp)
-{
-    char *p, *buf;
-    char name[RBD_MAX_CONF_NAME_SIZE];
-    char value[RBD_MAX_CONF_VAL_SIZE];
-    int ret = 0;
-
-    buf = g_strdup(conf);
-    p = buf;
-
-    while (p) {
-        ret = qemu_rbd_next_tok(name, sizeof(name), p,
-                                '=', "conf option name", &p, errp);
-        if (ret < 0) {
-            break;
-        }
-        qemu_rbd_unescape(name);
-
-        if (!p) {
-            error_setg(errp, "conf option %s has no value", name);
-            ret = -EINVAL;
-            break;
-        }
-
-        ret = qemu_rbd_next_tok(value, sizeof(value), p,
-                                ':', "conf option value", &p, errp);
-        if (ret < 0) {
-            break;
-        }
-        qemu_rbd_unescape(value);
-
-        if (strcmp(name, "conf") == 0) {
-            /* read the conf file alone, so it doesn't override more
-               specific settings for a particular device */
-            if (only_read_conf_file) {
-                ret = rados_conf_read_file(cluster, value);
-                if (ret < 0) {
-                    error_setg(errp, "error reading conf file %s", value);
-                    break;
-                }
-            }
-        } else if (strcmp(name, "id") == 0) {
-            /* ignore, this is parsed by qemu_rbd_parse_clientname() */
-        } else if (!only_read_conf_file) {
-            ret = rados_conf_set(cluster, name, value);
-            if (ret < 0) {
-                error_setg(errp, "invalid conf option %s", name);
-                ret = -EINVAL;
-                break;
-            }
-        }
-    }
-
-    g_free(buf);
-    return ret;
-}
-
-static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    Error *local_err = NULL;
-    int64_t bytes = 0;
-    int64_t objsize;
-    int obj_order = 0;
-    char pool[RBD_MAX_POOL_NAME_SIZE];
-    char name[RBD_MAX_IMAGE_NAME_SIZE];
-    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
-    char conf[RBD_MAX_CONF_SIZE];
-    char clientname_buf[RBD_MAX_CONF_SIZE];
-    char *clientname;
-    const char *secretid;
-    rados_t cluster;
-    rados_ioctx_t io_ctx;
-    int ret;
-
-    secretid = qemu_opt_get(opts, "password-secret");
-
-    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
-                           snap_buf, sizeof(snap_buf),
-                           name, sizeof(name),
-                           conf, sizeof(conf), &local_err) < 0) {
-        error_propagate(errp, local_err);
-        return -EINVAL;
-    }
-
-    /* Read out options */
-    bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                     BDRV_SECTOR_SIZE);
-    objsize = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 0);
-    if (objsize) {
-        if ((objsize - 1) & objsize) {    /* not a power of 2? */
-            error_setg(errp, "obj size needs to be power of 2");
-            return -EINVAL;
-        }
-        if (objsize < 4096) {
-            error_setg(errp, "obj size too small");
-            return -EINVAL;
-        }
-        obj_order = ctz32(objsize);
-    }
-
-    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
-    if (rados_create(&cluster, clientname) < 0) {
-        error_setg(errp, "error initializing");
-        return -EIO;
-    }
-
-    if (strstr(conf, "conf=") == NULL) {
-        /* try default location, but ignore failure */
-        rados_conf_read_file(cluster, NULL);
-    } else if (conf[0] != '\0' &&
-               qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
-        rados_shutdown(cluster);
-        error_propagate(errp, local_err);
-        return -EIO;
-    }
-
-    if (conf[0] != '\0' &&
-        qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
-        rados_shutdown(cluster);
-        error_propagate(errp, local_err);
-        return -EIO;
-    }
-
-    if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
-        rados_shutdown(cluster);
-        return -EIO;
-    }
-
-    if (rados_connect(cluster) < 0) {
-        error_setg(errp, "error connecting");
-        rados_shutdown(cluster);
-        return -EIO;
-    }
-
-    if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) {
-        error_setg(errp, "error opening pool %s", pool);
-        rados_shutdown(cluster);
-        return -EIO;
-    }
-
-    ret = rbd_create(io_ctx, name, bytes, &obj_order);
-    rados_ioctx_destroy(io_ctx);
-    rados_shutdown(cluster);
-
-    return ret;
-}
-
-/*
- * This aio completion is being called from rbd_finish_bh() and runs in qemu
- * BH context.
- */
-static void qemu_rbd_complete_aio(RADOSCB *rcb)
-{
-    RBDAIOCB *acb = rcb->acb;
-    int64_t r;
-
-    r = rcb->ret;
-
-    if (acb->cmd != RBD_AIO_READ) {
-        if (r < 0) {
-            acb->ret = r;
-            acb->error = 1;
-        } else if (!acb->error) {
-            acb->ret = rcb->size;
-        }
-    } else {
-        if (r < 0) {
-            memset(rcb->buf, 0, rcb->size);
-            acb->ret = r;
-            acb->error = 1;
-        } else if (r < rcb->size) {
-            memset(rcb->buf + r, 0, rcb->size - r);
-            if (!acb->error) {
-                acb->ret = rcb->size;
-            }
-        } else if (!acb->error) {
-            acb->ret = r;
-        }
-    }
-
-    g_free(rcb);
-
-    if (acb->cmd == RBD_AIO_READ) {
-        qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
-    }
-    qemu_vfree(acb->bounce);
-    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
-
-    qemu_aio_unref(acb);
-}
-
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "rbd",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "Specification of the rbd image",
-        },
-        {
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret providing the password",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
-                         Error **errp)
-{
-    BDRVRBDState *s = bs->opaque;
-    char pool[RBD_MAX_POOL_NAME_SIZE];
-    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
-    char conf[RBD_MAX_CONF_SIZE];
-    char clientname_buf[RBD_MAX_CONF_SIZE];
-    char *clientname;
-    const char *secretid;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *filename;
-    int r;
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        qemu_opts_del(opts);
-        return -EINVAL;
-    }
-
-    filename = qemu_opt_get(opts, "filename");
-    secretid = qemu_opt_get(opts, "password-secret");
-
-    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
-                           snap_buf, sizeof(snap_buf),
-                           s->name, sizeof(s->name),
-                           conf, sizeof(conf), errp) < 0) {
-        r = -EINVAL;
-        goto failed_opts;
-    }
-
-    clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
-    r = rados_create(&s->cluster, clientname);
-    if (r < 0) {
-        error_setg(errp, "error initializing");
-        goto failed_opts;
-    }
-
-    s->snap = NULL;
-    if (snap_buf[0] != '\0') {
-        s->snap = g_strdup(snap_buf);
-    }
-
-    if (strstr(conf, "conf=") == NULL) {
-        /* try default location, but ignore failure */
-        rados_conf_read_file(s->cluster, NULL);
-    } else if (conf[0] != '\0') {
-        r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
-        if (r < 0) {
-            goto failed_shutdown;
-        }
-    }
-
-    if (conf[0] != '\0') {
-        r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
-        if (r < 0) {
-            goto failed_shutdown;
-        }
-    }
-
-    if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) {
-        r = -EIO;
-        goto failed_shutdown;
-    }
-
-    /*
-     * Fallback to more conservative semantics if setting cache
-     * options fails. Ignore errors from setting rbd_cache because the
-     * only possible error is that the option does not exist, and
-     * librbd defaults to no caching. If write through caching cannot
-     * be set up, fall back to no caching.
-     */
-    if (flags & BDRV_O_NOCACHE) {
-        rados_conf_set(s->cluster, "rbd_cache", "false");
-    } else {
-        rados_conf_set(s->cluster, "rbd_cache", "true");
-    }
-
-    r = rados_connect(s->cluster);
-    if (r < 0) {
-        error_setg(errp, "error connecting");
-        goto failed_shutdown;
-    }
-
-    r = rados_ioctx_create(s->cluster, pool, &s->io_ctx);
-    if (r < 0) {
-        error_setg(errp, "error opening pool %s", pool);
-        goto failed_shutdown;
-    }
-
-    r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
-    if (r < 0) {
-        error_setg(errp, "error reading header from %s", s->name);
-        goto failed_open;
-    }
-
-    bs->read_only = (s->snap != NULL);
-
-    qemu_opts_del(opts);
-    return 0;
-
-failed_open:
-    rados_ioctx_destroy(s->io_ctx);
-failed_shutdown:
-    rados_shutdown(s->cluster);
-    g_free(s->snap);
-failed_opts:
-    qemu_opts_del(opts);
-    return r;
-}
-
-static void qemu_rbd_close(BlockDriverState *bs)
-{
-    BDRVRBDState *s = bs->opaque;
-
-    rbd_close(s->image);
-    rados_ioctx_destroy(s->io_ctx);
-    g_free(s->snap);
-    rados_shutdown(s->cluster);
-}
-
-static const AIOCBInfo rbd_aiocb_info = {
-    .aiocb_size = sizeof(RBDAIOCB),
-};
-
-static void rbd_finish_bh(void *opaque)
-{
-    RADOSCB *rcb = opaque;
-    qemu_bh_delete(rcb->acb->bh);
-    qemu_rbd_complete_aio(rcb);
-}
-
-/*
- * This is the callback function for rbd_aio_read and _write
- *
- * Note: this function is being called from a non qemu thread so
- * we need to be careful about what we do here. Generally we only
- * schedule a BH, and do the rest of the io completion handling
- * from rbd_finish_bh() which runs in a qemu context.
- */
-static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
-{
-    RBDAIOCB *acb = rcb->acb;
-
-    rcb->ret = rbd_aio_get_return_value(c);
-    rbd_aio_release(c);
-
-    acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
-                         rbd_finish_bh, rcb);
-    qemu_bh_schedule(acb->bh);
-}
-
-static int rbd_aio_discard_wrapper(rbd_image_t image,
-                                   uint64_t off,
-                                   uint64_t len,
-                                   rbd_completion_t comp)
-{
-#ifdef LIBRBD_SUPPORTS_DISCARD
-    return rbd_aio_discard(image, off, len, comp);
-#else
-    return -ENOTSUP;
-#endif
-}
-
-static int rbd_aio_flush_wrapper(rbd_image_t image,
-                                 rbd_completion_t comp)
-{
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-    return rbd_aio_flush(image, comp);
-#else
-    return -ENOTSUP;
-#endif
-}
-
-static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
-                                 int64_t sector_num,
-                                 QEMUIOVector *qiov,
-                                 int nb_sectors,
-                                 BlockCompletionFunc *cb,
-                                 void *opaque,
-                                 RBDAIOCmd cmd)
-{
-    RBDAIOCB *acb;
-    RADOSCB *rcb = NULL;
-    rbd_completion_t c;
-    int64_t off, size;
-    char *buf;
-    int r;
-
-    BDRVRBDState *s = bs->opaque;
-
-    acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
-    acb->cmd = cmd;
-    acb->qiov = qiov;
-    if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
-        acb->bounce = NULL;
-    } else {
-        acb->bounce = qemu_try_blockalign(bs, qiov->size);
-        if (acb->bounce == NULL) {
-            goto failed;
-        }
-    }
-    acb->ret = 0;
-    acb->error = 0;
-    acb->s = s;
-    acb->bh = NULL;
-
-    if (cmd == RBD_AIO_WRITE) {
-        qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
-    }
-
-    buf = acb->bounce;
-
-    off = sector_num * BDRV_SECTOR_SIZE;
-    size = nb_sectors * BDRV_SECTOR_SIZE;
-
-    rcb = g_new(RADOSCB, 1);
-    rcb->acb = acb;
-    rcb->buf = buf;
-    rcb->s = acb->s;
-    rcb->size = size;
-    r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
-    if (r < 0) {
-        goto failed;
-    }
-
-    switch (cmd) {
-    case RBD_AIO_WRITE:
-        r = rbd_aio_write(s->image, off, size, buf, c);
-        break;
-    case RBD_AIO_READ:
-        r = rbd_aio_read(s->image, off, size, buf, c);
-        break;
-    case RBD_AIO_DISCARD:
-        r = rbd_aio_discard_wrapper(s->image, off, size, c);
-        break;
-    case RBD_AIO_FLUSH:
-        r = rbd_aio_flush_wrapper(s->image, c);
-        break;
-    default:
-        r = -EINVAL;
-    }
-
-    if (r < 0) {
-        goto failed_completion;
-    }
-
-    return &acb->common;
-
-failed_completion:
-    rbd_aio_release(c);
-failed:
-    g_free(rcb);
-    qemu_vfree(acb->bounce);
-    qemu_aio_unref(acb);
-    return NULL;
-}
-
-static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
-                                      int64_t sector_num,
-                                      QEMUIOVector *qiov,
-                                      int nb_sectors,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque)
-{
-    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
-                         RBD_AIO_READ);
-}
-
-static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
-                                       int64_t sector_num,
-                                       QEMUIOVector *qiov,
-                                       int nb_sectors,
-                                       BlockCompletionFunc *cb,
-                                       void *opaque)
-{
-    return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
-                         RBD_AIO_WRITE);
-}
-
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
-                                      BlockCompletionFunc *cb,
-                                      void *opaque)
-{
-    return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
-}
-
-#else
-
-static int qemu_rbd_co_flush(BlockDriverState *bs)
-{
-#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
-    /* rbd_flush added in 0.1.1 */
-    BDRVRBDState *s = bs->opaque;
-    return rbd_flush(s->image);
-#else
-    return 0;
-#endif
-}
-#endif
-
-static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVRBDState *s = bs->opaque;
-    rbd_image_info_t info;
-    int r;
-
-    r = rbd_stat(s->image, &info, sizeof(info));
-    if (r < 0) {
-        return r;
-    }
-
-    bdi->cluster_size = info.obj_size;
-    return 0;
-}
-
-static int64_t qemu_rbd_getlength(BlockDriverState *bs)
-{
-    BDRVRBDState *s = bs->opaque;
-    rbd_image_info_t info;
-    int r;
-
-    r = rbd_stat(s->image, &info, sizeof(info));
-    if (r < 0) {
-        return r;
-    }
-
-    return info.size;
-}
-
-static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset)
-{
-    BDRVRBDState *s = bs->opaque;
-    int r;
-
-    r = rbd_resize(s->image, offset);
-    if (r < 0) {
-        return r;
-    }
-
-    return 0;
-}
-
-static int qemu_rbd_snap_create(BlockDriverState *bs,
-                                QEMUSnapshotInfo *sn_info)
-{
-    BDRVRBDState *s = bs->opaque;
-    int r;
-
-    if (sn_info->name[0] == '\0') {
-        return -EINVAL; /* we need a name for rbd snapshots */
-    }
-
-    /*
-     * rbd snapshots are using the name as the user controlled unique identifier
-     * we can't use the rbd snapid for that purpose, as it can't be set
-     */
-    if (sn_info->id_str[0] != '\0' &&
-        strcmp(sn_info->id_str, sn_info->name) != 0) {
-        return -EINVAL;
-    }
-
-    if (strlen(sn_info->name) >= sizeof(sn_info->id_str)) {
-        return -ERANGE;
-    }
-
-    r = rbd_snap_create(s->image, sn_info->name);
-    if (r < 0) {
-        error_report("failed to create snap: %s", strerror(-r));
-        return r;
-    }
-
-    return 0;
-}
-
-static int qemu_rbd_snap_remove(BlockDriverState *bs,
-                                const char *snapshot_id,
-                                const char *snapshot_name,
-                                Error **errp)
-{
-    BDRVRBDState *s = bs->opaque;
-    int r;
-
-    if (!snapshot_name) {
-        error_setg(errp, "rbd need a valid snapshot name");
-        return -EINVAL;
-    }
-
-    /* If snapshot_id is specified, it must be equal to name, see
-       qemu_rbd_snap_list() */
-    if (snapshot_id && strcmp(snapshot_id, snapshot_name)) {
-        error_setg(errp,
-                   "rbd do not support snapshot id, it should be NULL or "
-                   "equal to snapshot name");
-        return -EINVAL;
-    }
-
-    r = rbd_snap_remove(s->image, snapshot_name);
-    if (r < 0) {
-        error_setg_errno(errp, -r, "Failed to remove the snapshot");
-    }
-    return r;
-}
-
-static int qemu_rbd_snap_rollback(BlockDriverState *bs,
-                                  const char *snapshot_name)
-{
-    BDRVRBDState *s = bs->opaque;
-    int r;
-
-    r = rbd_snap_rollback(s->image, snapshot_name);
-    return r;
-}
-
-static int qemu_rbd_snap_list(BlockDriverState *bs,
-                              QEMUSnapshotInfo **psn_tab)
-{
-    BDRVRBDState *s = bs->opaque;
-    QEMUSnapshotInfo *sn_info, *sn_tab = NULL;
-    int i, snap_count;
-    rbd_snap_info_t *snaps;
-    int max_snaps = RBD_MAX_SNAPS;
-
-    do {
-        snaps = g_new(rbd_snap_info_t, max_snaps);
-        snap_count = rbd_snap_list(s->image, snaps, &max_snaps);
-        if (snap_count <= 0) {
-            g_free(snaps);
-        }
-    } while (snap_count == -ERANGE);
-
-    if (snap_count <= 0) {
-        goto done;
-    }
-
-    sn_tab = g_new0(QEMUSnapshotInfo, snap_count);
-
-    for (i = 0; i < snap_count; i++) {
-        const char *snap_name = snaps[i].name;
-
-        sn_info = sn_tab + i;
-        pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), snap_name);
-        pstrcpy(sn_info->name, sizeof(sn_info->name), snap_name);
-
-        sn_info->vm_state_size = snaps[i].size;
-        sn_info->date_sec = 0;
-        sn_info->date_nsec = 0;
-        sn_info->vm_clock_nsec = 0;
-    }
-    rbd_snap_list_end(snaps);
-    g_free(snaps);
-
- done:
-    *psn_tab = sn_tab;
-    return snap_count;
-}
-
-#ifdef LIBRBD_SUPPORTS_DISCARD
-static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
-                                        int64_t sector_num,
-                                        int nb_sectors,
-                                        BlockCompletionFunc *cb,
-                                        void *opaque)
-{
-    return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
-                         RBD_AIO_DISCARD);
-}
-#endif
-
-#ifdef LIBRBD_SUPPORTS_INVALIDATE
-static void qemu_rbd_invalidate_cache(BlockDriverState *bs,
-                                      Error **errp)
-{
-    BDRVRBDState *s = bs->opaque;
-    int r = rbd_invalidate_cache(s->image);
-    if (r < 0) {
-        error_setg_errno(errp, -r, "Failed to invalidate the cache");
-    }
-}
-#endif
-
-static QemuOptsList qemu_rbd_create_opts = {
-    .name = "rbd-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(qemu_rbd_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_CLUSTER_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "RBD object size"
-        },
-        {
-            .name = "password-secret",
-            .type = QEMU_OPT_STRING,
-            .help = "ID of secret providing the password",
-        },
-        { /* end of list */ }
-    }
-};
-
-static BlockDriver bdrv_rbd = {
-    .format_name        = "rbd",
-    .instance_size      = sizeof(BDRVRBDState),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open     = qemu_rbd_open,
-    .bdrv_close         = qemu_rbd_close,
-    .bdrv_create        = qemu_rbd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_get_info      = qemu_rbd_getinfo,
-    .create_opts        = &qemu_rbd_create_opts,
-    .bdrv_getlength     = qemu_rbd_getlength,
-    .bdrv_truncate      = qemu_rbd_truncate,
-    .protocol_name      = "rbd",
-
-    .bdrv_aio_readv         = qemu_rbd_aio_readv,
-    .bdrv_aio_writev        = qemu_rbd_aio_writev,
-
-#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
-    .bdrv_aio_flush         = qemu_rbd_aio_flush,
-#else
-    .bdrv_co_flush_to_disk  = qemu_rbd_co_flush,
-#endif
-
-#ifdef LIBRBD_SUPPORTS_DISCARD
-    .bdrv_aio_discard       = qemu_rbd_aio_discard,
-#endif
-
-    .bdrv_snapshot_create   = qemu_rbd_snap_create,
-    .bdrv_snapshot_delete   = qemu_rbd_snap_remove,
-    .bdrv_snapshot_list     = qemu_rbd_snap_list,
-    .bdrv_snapshot_goto     = qemu_rbd_snap_rollback,
-#ifdef LIBRBD_SUPPORTS_INVALIDATE
-    .bdrv_invalidate_cache  = qemu_rbd_invalidate_cache,
-#endif
-};
-
-static void bdrv_rbd_init(void)
-{
-    bdrv_register(&bdrv_rbd);
-}
-
-block_init(bdrv_rbd_init);
diff --git a/qemu/block/sheepdog.c b/qemu/block/sheepdog.c
deleted file mode 100644
index 33e0a3382..000000000
--- a/qemu/block/sheepdog.c
+++ /dev/null
@@ -1,3042 +0,0 @@
-/*
- * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version
- * 2 as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu/uri.h"
-#include "qemu/error-report.h"
-#include "qemu/sockets.h"
-#include "block/block_int.h"
-#include "sysemu/block-backend.h"
-#include "qemu/bitops.h"
-#include "qemu/cutils.h"
-
-#define SD_PROTO_VER 0x01
-
-#define SD_DEFAULT_ADDR "localhost"
-#define SD_DEFAULT_PORT 7000
-
-#define SD_OP_CREATE_AND_WRITE_OBJ  0x01
-#define SD_OP_READ_OBJ       0x02
-#define SD_OP_WRITE_OBJ      0x03
-/* 0x04 is used internally by Sheepdog */
-
-#define SD_OP_NEW_VDI        0x11
-#define SD_OP_LOCK_VDI       0x12
-#define SD_OP_RELEASE_VDI    0x13
-#define SD_OP_GET_VDI_INFO   0x14
-#define SD_OP_READ_VDIS      0x15
-#define SD_OP_FLUSH_VDI      0x16
-#define SD_OP_DEL_VDI        0x17
-#define SD_OP_GET_CLUSTER_DEFAULT   0x18
-
-#define SD_FLAG_CMD_WRITE    0x01
-#define SD_FLAG_CMD_COW      0x02
-#define SD_FLAG_CMD_CACHE    0x04 /* Writeback mode for cache */
-#define SD_FLAG_CMD_DIRECT   0x08 /* Don't use cache */
-
-#define SD_RES_SUCCESS       0x00 /* Success */
-#define SD_RES_UNKNOWN       0x01 /* Unknown error */
-#define SD_RES_NO_OBJ        0x02 /* No object found */
-#define SD_RES_EIO           0x03 /* I/O error */
-#define SD_RES_VDI_EXIST     0x04 /* Vdi exists already */
-#define SD_RES_INVALID_PARMS 0x05 /* Invalid parameters */
-#define SD_RES_SYSTEM_ERROR  0x06 /* System error */
-#define SD_RES_VDI_LOCKED    0x07 /* Vdi is locked */
-#define SD_RES_NO_VDI        0x08 /* No vdi found */
-#define SD_RES_NO_BASE_VDI   0x09 /* No base vdi found */
-#define SD_RES_VDI_READ      0x0A /* Cannot read requested vdi */
-#define SD_RES_VDI_WRITE     0x0B /* Cannot write requested vdi */
-#define SD_RES_BASE_VDI_READ 0x0C /* Cannot read base vdi */
-#define SD_RES_BASE_VDI_WRITE   0x0D /* Cannot write base vdi */
-#define SD_RES_NO_TAG        0x0E /* Requested tag is not found */
-#define SD_RES_STARTUP       0x0F /* Sheepdog is on starting up */
-#define SD_RES_VDI_NOT_LOCKED   0x10 /* Vdi is not locked */
-#define SD_RES_SHUTDOWN      0x11 /* Sheepdog is shutting down */
-#define SD_RES_NO_MEM        0x12 /* Cannot allocate memory */
-#define SD_RES_FULL_VDI      0x13 /* we already have the maximum vdis */
-#define SD_RES_VER_MISMATCH  0x14 /* Protocol version mismatch */
-#define SD_RES_NO_SPACE      0x15 /* Server has no room for new objects */
-#define SD_RES_WAIT_FOR_FORMAT  0x16 /* Waiting for a format operation */
-#define SD_RES_WAIT_FOR_JOIN    0x17 /* Waiting for other nodes joining */
-#define SD_RES_JOIN_FAILED   0x18 /* Target node had failed to join sheepdog */
-#define SD_RES_HALT          0x19 /* Sheepdog is stopped serving IO request */
-#define SD_RES_READONLY      0x1A /* Object is read-only */
-
-/*
- * Object ID rules
- *
- *  0 - 19 (20 bits): data object space
- * 20 - 31 (12 bits): reserved data object space
- * 32 - 55 (24 bits): vdi object space
- * 56 - 59 ( 4 bits): reserved vdi object space
- * 60 - 63 ( 4 bits): object type identifier space
- */
-
-#define VDI_SPACE_SHIFT   32
-#define VDI_BIT (UINT64_C(1) << 63)
-#define VMSTATE_BIT (UINT64_C(1) << 62)
-#define MAX_DATA_OBJS (UINT64_C(1) << 20)
-#define MAX_CHILDREN 1024
-#define SD_MAX_VDI_LEN 256
-#define SD_MAX_VDI_TAG_LEN 256
-#define SD_NR_VDIS   (1U << 24)
-#define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
-#define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)
-#define SD_DEFAULT_BLOCK_SIZE_SHIFT 22
-/*
- * For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and
- * (SD_EC_MAX_STRIP - 1) for parity strips
- *
- * SD_MAX_COPIES is sum of number of data strips and parity strips.
- */
-#define SD_EC_MAX_STRIP 16
-#define SD_MAX_COPIES (SD_EC_MAX_STRIP * 2 - 1)
-
-#define SD_INODE_SIZE (sizeof(SheepdogInode))
-#define CURRENT_VDI_ID 0
-
-#define LOCK_TYPE_NORMAL 0
-#define LOCK_TYPE_SHARED 1      /* for iSCSI multipath */
-
-typedef struct SheepdogReq {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t opcode_specific[8];
-} SheepdogReq;
-
-typedef struct SheepdogRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint32_t opcode_specific[7];
-} SheepdogRsp;
-
-typedef struct SheepdogObjReq {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint64_t oid;
-    uint64_t cow_oid;
-    uint8_t copies;
-    uint8_t copy_policy;
-    uint8_t reserved[6];
-    uint64_t offset;
-} SheepdogObjReq;
-
-typedef struct SheepdogObjRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint8_t copies;
-    uint8_t copy_policy;
-    uint8_t reserved[2];
-    uint32_t pad[6];
-} SheepdogObjRsp;
-
-typedef struct SheepdogVdiReq {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint64_t vdi_size;
-    uint32_t base_vdi_id;
-    uint8_t copies;
-    uint8_t copy_policy;
-    uint8_t store_policy;
-    uint8_t block_size_shift;
-    uint32_t snapid;
-    uint32_t type;
-    uint32_t pad[2];
-} SheepdogVdiReq;
-
-typedef struct SheepdogVdiRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint32_t rsvd;
-    uint32_t vdi_id;
-    uint32_t pad[5];
-} SheepdogVdiRsp;
-
-typedef struct SheepdogClusterRsp {
-    uint8_t proto_ver;
-    uint8_t opcode;
-    uint16_t flags;
-    uint32_t epoch;
-    uint32_t id;
-    uint32_t data_length;
-    uint32_t result;
-    uint8_t nr_copies;
-    uint8_t copy_policy;
-    uint8_t block_size_shift;
-    uint8_t __pad1;
-    uint32_t __pad2[6];
-} SheepdogClusterRsp;
-
-typedef struct SheepdogInode {
-    char name[SD_MAX_VDI_LEN];
-    char tag[SD_MAX_VDI_TAG_LEN];
-    uint64_t ctime;
-    uint64_t snap_ctime;
-    uint64_t vm_clock_nsec;
-    uint64_t vdi_size;
-    uint64_t vm_state_size;
-    uint16_t copy_policy;
-    uint8_t nr_copies;
-    uint8_t block_size_shift;
-    uint32_t snap_id;
-    uint32_t vdi_id;
-    uint32_t parent_vdi_id;
-    uint32_t child_vdi_id[MAX_CHILDREN];
-    uint32_t data_vdi_id[MAX_DATA_OBJS];
-} SheepdogInode;
-
-#define SD_INODE_HEADER_SIZE offsetof(SheepdogInode, data_vdi_id)
-
-/*
- * 64 bit FNV-1a non-zero initial basis
- */
-#define FNV1A_64_INIT ((uint64_t)0xcbf29ce484222325ULL)
-
-/*
- * 64 bit Fowler/Noll/Vo FNV-1a hash code
- */
-static inline uint64_t fnv_64a_buf(void *buf, size_t len, uint64_t hval)
-{
-    unsigned char *bp = buf;
-    unsigned char *be = bp + len;
-    while (bp < be) {
-        hval ^= (uint64_t) *bp++;
-        hval += (hval << 1) + (hval << 4) + (hval << 5) +
-            (hval << 7) + (hval << 8) + (hval << 40);
-    }
-    return hval;
-}
-
-static inline bool is_data_obj_writable(SheepdogInode *inode, unsigned int idx)
-{
-    return inode->vdi_id == inode->data_vdi_id[idx];
-}
-
-static inline bool is_data_obj(uint64_t oid)
-{
-    return !(VDI_BIT & oid);
-}
-
-static inline uint64_t data_oid_to_idx(uint64_t oid)
-{
-    return oid & (MAX_DATA_OBJS - 1);
-}
-
-static inline uint32_t oid_to_vid(uint64_t oid)
-{
-    return (oid & ~VDI_BIT) >> VDI_SPACE_SHIFT;
-}
-
-static inline uint64_t vid_to_vdi_oid(uint32_t vid)
-{
-    return VDI_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT);
-}
-
-static inline uint64_t vid_to_vmstate_oid(uint32_t vid, uint32_t idx)
-{
-    return VMSTATE_BIT | ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
-}
-
-static inline uint64_t vid_to_data_oid(uint32_t vid, uint32_t idx)
-{
-    return ((uint64_t)vid << VDI_SPACE_SHIFT) | idx;
-}
-
-static inline bool is_snapshot(struct SheepdogInode *inode)
-{
-    return !!inode->snap_ctime;
-}
-
-static inline size_t count_data_objs(const struct SheepdogInode *inode)
-{
-    return DIV_ROUND_UP(inode->vdi_size,
-                        (1UL << inode->block_size_shift));
-}
-
-#undef DPRINTF
-#ifdef DEBUG_SDOG
-#define DPRINTF(fmt, args...)                                       \
-    do {                                                            \
-        fprintf(stdout, "%s %d: " fmt, __func__, __LINE__, ##args); \
-    } while (0)
-#else
-#define DPRINTF(fmt, args...)
-#endif
-
-typedef struct SheepdogAIOCB SheepdogAIOCB;
-
-typedef struct AIOReq {
-    SheepdogAIOCB *aiocb;
-    unsigned int iov_offset;
-
-    uint64_t oid;
-    uint64_t base_oid;
-    uint64_t offset;
-    unsigned int data_len;
-    uint8_t flags;
-    uint32_t id;
-    bool create;
-
-    QLIST_ENTRY(AIOReq) aio_siblings;
-} AIOReq;
-
-enum AIOCBState {
-    AIOCB_WRITE_UDATA,
-    AIOCB_READ_UDATA,
-    AIOCB_FLUSH_CACHE,
-    AIOCB_DISCARD_OBJ,
-};
-
-#define AIOCBOverlapping(x, y)                                 \
-    (!(x->max_affect_data_idx < y->min_affect_data_idx          \
-       || y->max_affect_data_idx < x->min_affect_data_idx))
-
-struct SheepdogAIOCB {
-    BlockAIOCB common;
-
-    QEMUIOVector *qiov;
-
-    int64_t sector_num;
-    int nb_sectors;
-
-    int ret;
-    enum AIOCBState aiocb_type;
-
-    Coroutine *coroutine;
-    void (*aio_done_func)(SheepdogAIOCB *);
-
-    bool cancelable;
-    int nr_pending;
-
-    uint32_t min_affect_data_idx;
-    uint32_t max_affect_data_idx;
-
-    /*
-     * The difference between affect_data_idx and dirty_data_idx:
-     * affect_data_idx represents range of index of all request types.
-     * dirty_data_idx represents range of index updated by COW requests.
-     * dirty_data_idx is used for updating an inode object.
-     */
-    uint32_t min_dirty_data_idx;
-    uint32_t max_dirty_data_idx;
-
-    QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
-};
-
-typedef struct BDRVSheepdogState {
-    BlockDriverState *bs;
-    AioContext *aio_context;
-
-    SheepdogInode inode;
-
-    char name[SD_MAX_VDI_LEN];
-    bool is_snapshot;
-    uint32_t cache_flags;
-    bool discard_supported;
-
-    char *host_spec;
-    bool is_unix;
-    int fd;
-
-    CoMutex lock;
-    Coroutine *co_send;
-    Coroutine *co_recv;
-
-    uint32_t aioreq_seq_num;
-
-    /* Every aio request must be linked to either of these queues. */
-    QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
-    QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
-
-    CoQueue overlapping_queue;
-    QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
-} BDRVSheepdogState;
-
-typedef struct BDRVSheepdogReopenState {
-    int fd;
-    int cache_flags;
-} BDRVSheepdogReopenState;
-
-static const char * sd_strerror(int err)
-{
-    int i;
-
-    static const struct {
-        int err;
-        const char *desc;
-    } errors[] = {
-        {SD_RES_SUCCESS, "Success"},
-        {SD_RES_UNKNOWN, "Unknown error"},
-        {SD_RES_NO_OBJ, "No object found"},
-        {SD_RES_EIO, "I/O error"},
-        {SD_RES_VDI_EXIST, "VDI exists already"},
-        {SD_RES_INVALID_PARMS, "Invalid parameters"},
-        {SD_RES_SYSTEM_ERROR, "System error"},
-        {SD_RES_VDI_LOCKED, "VDI is already locked"},
-        {SD_RES_NO_VDI, "No vdi found"},
-        {SD_RES_NO_BASE_VDI, "No base VDI found"},
-        {SD_RES_VDI_READ, "Failed read the requested VDI"},
-        {SD_RES_VDI_WRITE, "Failed to write the requested VDI"},
-        {SD_RES_BASE_VDI_READ, "Failed to read the base VDI"},
-        {SD_RES_BASE_VDI_WRITE, "Failed to write the base VDI"},
-        {SD_RES_NO_TAG, "Failed to find the requested tag"},
-        {SD_RES_STARTUP, "The system is still booting"},
-        {SD_RES_VDI_NOT_LOCKED, "VDI isn't locked"},
-        {SD_RES_SHUTDOWN, "The system is shutting down"},
-        {SD_RES_NO_MEM, "Out of memory on the server"},
-        {SD_RES_FULL_VDI, "We already have the maximum vdis"},
-        {SD_RES_VER_MISMATCH, "Protocol version mismatch"},
-        {SD_RES_NO_SPACE, "Server has no space for new objects"},
-        {SD_RES_WAIT_FOR_FORMAT, "Sheepdog is waiting for a format operation"},
-        {SD_RES_WAIT_FOR_JOIN, "Sheepdog is waiting for other nodes joining"},
-        {SD_RES_JOIN_FAILED, "Target node had failed to join sheepdog"},
-        {SD_RES_HALT, "Sheepdog is stopped serving IO request"},
-        {SD_RES_READONLY, "Object is read-only"},
-    };
-
-    for (i = 0; i < ARRAY_SIZE(errors); ++i) {
-        if (errors[i].err == err) {
-            return errors[i].desc;
-        }
-    }
-
-    return "Invalid error code";
-}
-
-/*
- * Sheepdog I/O handling:
- *
- * 1. In sd_co_rw_vector, we send the I/O requests to the server and
- *    link the requests to the inflight_list in the
- *    BDRVSheepdogState.  The function exits without waiting for
- *    receiving the response.
- *
- * 2. We receive the response in aio_read_response, the fd handler to
- *    the sheepdog connection.  If metadata update is needed, we send
- *    the write request to the vdi object in sd_write_done, the write
- *    completion function.  We switch back to sd_co_readv/writev after
- *    all the requests belonging to the AIOCB are finished.
- */
-
-static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
-                                    uint64_t oid, unsigned int data_len,
-                                    uint64_t offset, uint8_t flags, bool create,
-                                    uint64_t base_oid, unsigned int iov_offset)
-{
-    AIOReq *aio_req;
-
-    aio_req = g_malloc(sizeof(*aio_req));
-    aio_req->aiocb = acb;
-    aio_req->iov_offset = iov_offset;
-    aio_req->oid = oid;
-    aio_req->base_oid = base_oid;
-    aio_req->offset = offset;
-    aio_req->data_len = data_len;
-    aio_req->flags = flags;
-    aio_req->id = s->aioreq_seq_num++;
-    aio_req->create = create;
-
-    acb->nr_pending++;
-    return aio_req;
-}
-
-static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
-{
-    SheepdogAIOCB *acb = aio_req->aiocb;
-
-    acb->cancelable = false;
-    QLIST_REMOVE(aio_req, aio_siblings);
-    g_free(aio_req);
-
-    acb->nr_pending--;
-}
-
-static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
-{
-    qemu_coroutine_enter(acb->coroutine, NULL);
-    qemu_aio_unref(acb);
-}
-
-/*
- * Check whether the specified acb can be canceled
- *
- * We can cancel aio when any request belonging to the acb is:
- *  - Not processed by the sheepdog server.
- *  - Not linked to the inflight queue.
- */
-static bool sd_acb_cancelable(const SheepdogAIOCB *acb)
-{
-    BDRVSheepdogState *s = acb->common.bs->opaque;
-    AIOReq *aioreq;
-
-    if (!acb->cancelable) {
-        return false;
-    }
-
-    QLIST_FOREACH(aioreq, &s->inflight_aio_head, aio_siblings) {
-        if (aioreq->aiocb == acb) {
-            return false;
-        }
-    }
-
-    return true;
-}
-
-static void sd_aio_cancel(BlockAIOCB *blockacb)
-{
-    SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
-    BDRVSheepdogState *s = acb->common.bs->opaque;
-    AIOReq *aioreq, *next;
-
-    if (sd_acb_cancelable(acb)) {
-        /* Remove outstanding requests from failed queue.  */
-        QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
-                           next) {
-            if (aioreq->aiocb == acb) {
-                free_aio_req(s, aioreq);
-            }
-        }
-
-        assert(acb->nr_pending == 0);
-        if (acb->common.cb) {
-            acb->common.cb(acb->common.opaque, -ECANCELED);
-        }
-        sd_finish_aiocb(acb);
-    }
-}
-
-static const AIOCBInfo sd_aiocb_info = {
-    .aiocb_size     = sizeof(SheepdogAIOCB),
-    .cancel_async   = sd_aio_cancel,
-};
-
-static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
-                                   int64_t sector_num, int nb_sectors)
-{
-    SheepdogAIOCB *acb;
-    uint32_t object_size;
-    BDRVSheepdogState *s = bs->opaque;
-
-    object_size = (UINT32_C(1) << s->inode.block_size_shift);
-
-    acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL);
-
-    acb->qiov = qiov;
-
-    acb->sector_num = sector_num;
-    acb->nb_sectors = nb_sectors;
-
-    acb->aio_done_func = NULL;
-    acb->cancelable = true;
-    acb->coroutine = qemu_coroutine_self();
-    acb->ret = 0;
-    acb->nr_pending = 0;
-
-    acb->min_affect_data_idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size;
-    acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE +
-                              acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size;
-
-    acb->min_dirty_data_idx = UINT32_MAX;
-    acb->max_dirty_data_idx = 0;
-
-    return acb;
-}
-
-/* Return -EIO in case of error, file descriptor on success */
-static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)
-{
-    int fd;
-
-    if (s->is_unix) {
-        fd = unix_connect(s->host_spec, errp);
-    } else {
-        fd = inet_connect(s->host_spec, errp);
-
-        if (fd >= 0) {
-            int ret = socket_set_nodelay(fd);
-            if (ret < 0) {
-                error_report("%s", strerror(errno));
-            }
-        }
-    }
-
-    if (fd >= 0) {
-        qemu_set_nonblock(fd);
-    } else {
-        fd = -EIO;
-    }
-
-    return fd;
-}
-
-/* Return 0 on success and -errno in case of error */
-static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
-                                    unsigned int *wlen)
-{
-    int ret;
-
-    ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
-    if (ret != sizeof(*hdr)) {
-        error_report("failed to send a req, %s", strerror(errno));
-        return -errno;
-    }
-
-    ret = qemu_co_send(sockfd, data, *wlen);
-    if (ret != *wlen) {
-        error_report("failed to send a req, %s", strerror(errno));
-        return -errno;
-    }
-
-    return ret;
-}
-
-static void restart_co_req(void *opaque)
-{
-    Coroutine *co = opaque;
-
-    qemu_coroutine_enter(co, NULL);
-}
-
-typedef struct SheepdogReqCo {
-    int sockfd;
-    AioContext *aio_context;
-    SheepdogReq *hdr;
-    void *data;
-    unsigned int *wlen;
-    unsigned int *rlen;
-    int ret;
-    bool finished;
-} SheepdogReqCo;
-
-static coroutine_fn void do_co_req(void *opaque)
-{
-    int ret;
-    Coroutine *co;
-    SheepdogReqCo *srco = opaque;
-    int sockfd = srco->sockfd;
-    SheepdogReq *hdr = srco->hdr;
-    void *data = srco->data;
-    unsigned int *wlen = srco->wlen;
-    unsigned int *rlen = srco->rlen;
-
-    co = qemu_coroutine_self();
-    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       NULL, restart_co_req, co);
-
-    ret = send_co_req(sockfd, hdr, data, wlen);
-    if (ret < 0) {
-        goto out;
-    }
-
-    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       restart_co_req, NULL, co);
-
-    ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
-    if (ret != sizeof(*hdr)) {
-        error_report("failed to get a rsp, %s", strerror(errno));
-        ret = -errno;
-        goto out;
-    }
-
-    if (*rlen > hdr->data_length) {
-        *rlen = hdr->data_length;
-    }
-
-    if (*rlen) {
-        ret = qemu_co_recv(sockfd, data, *rlen);
-        if (ret != *rlen) {
-            error_report("failed to get the data, %s", strerror(errno));
-            ret = -errno;
-            goto out;
-        }
-    }
-    ret = 0;
-out:
-    /* there is at most one request for this sockfd, so it is safe to
-     * set each handler to NULL. */
-    aio_set_fd_handler(srco->aio_context, sockfd, false,
-                       NULL, NULL, NULL);
-
-    srco->ret = ret;
-    srco->finished = true;
-}
-
-/*
- * Send the request to the sheep in a synchronous manner.
- *
- * Return 0 on success, -errno in case of error.
- */
-static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
-                  void *data, unsigned int *wlen, unsigned int *rlen)
-{
-    Coroutine *co;
-    SheepdogReqCo srco = {
-        .sockfd = sockfd,
-        .aio_context = aio_context,
-        .hdr = hdr,
-        .data = data,
-        .wlen = wlen,
-        .rlen = rlen,
-        .ret = 0,
-        .finished = false,
-    };
-
-    if (qemu_in_coroutine()) {
-        do_co_req(&srco);
-    } else {
-        co = qemu_coroutine_create(do_co_req);
-        qemu_coroutine_enter(co, &srco);
-        while (!srco.finished) {
-            aio_poll(aio_context, true);
-        }
-    }
-
-    return srco.ret;
-}
-
-static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                                         struct iovec *iov, int niov,
-                                         enum AIOCBState aiocb_type);
-static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
-static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag);
-static int get_sheep_fd(BDRVSheepdogState *s, Error **errp);
-static void co_write_request(void *opaque);
-
-static coroutine_fn void reconnect_to_sdog(void *opaque)
-{
-    BDRVSheepdogState *s = opaque;
-    AIOReq *aio_req, *next;
-
-    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
-                       NULL, NULL);
-    close(s->fd);
-    s->fd = -1;
-
-    /* Wait for outstanding write requests to be completed. */
-    while (s->co_send != NULL) {
-        co_write_request(opaque);
-    }
-
-    /* Try to reconnect the sheepdog server every one second. */
-    while (s->fd < 0) {
-        Error *local_err = NULL;
-        s->fd = get_sheep_fd(s, &local_err);
-        if (s->fd < 0) {
-            DPRINTF("Wait for connection to be established\n");
-            error_report_err(local_err);
-            co_aio_sleep_ns(bdrv_get_aio_context(s->bs), QEMU_CLOCK_REALTIME,
-                            1000000000ULL);
-        }
-    };
-
-    /*
-     * Now we have to resend all the request in the inflight queue.  However,
-     * resend_aioreq() can yield and newly created requests can be added to the
-     * inflight queue before the coroutine is resumed.  To avoid mixing them, we
-     * have to move all the inflight requests to the failed queue before
-     * resend_aioreq() is called.
-     */
-    QLIST_FOREACH_SAFE(aio_req, &s->inflight_aio_head, aio_siblings, next) {
-        QLIST_REMOVE(aio_req, aio_siblings);
-        QLIST_INSERT_HEAD(&s->failed_aio_head, aio_req, aio_siblings);
-    }
-
-    /* Resend all the failed aio requests. */
-    while (!QLIST_EMPTY(&s->failed_aio_head)) {
-        aio_req = QLIST_FIRST(&s->failed_aio_head);
-        QLIST_REMOVE(aio_req, aio_siblings);
-        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-        resend_aioreq(s, aio_req);
-    }
-}
-
-/*
- * Receive responses of the I/O requests.
- *
- * This function is registered as a fd handler, and called from the
- * main loop when s->fd is ready for reading responses.
- */
-static void coroutine_fn aio_read_response(void *opaque)
-{
-    SheepdogObjRsp rsp;
-    BDRVSheepdogState *s = opaque;
-    int fd = s->fd;
-    int ret;
-    AIOReq *aio_req = NULL;
-    SheepdogAIOCB *acb;
-    uint64_t idx;
-
-    /* read a header */
-    ret = qemu_co_recv(fd, &rsp, sizeof(rsp));
-    if (ret != sizeof(rsp)) {
-        error_report("failed to get the header, %s", strerror(errno));
-        goto err;
-    }
-
-    /* find the right aio_req from the inflight aio list */
-    QLIST_FOREACH(aio_req, &s->inflight_aio_head, aio_siblings) {
-        if (aio_req->id == rsp.id) {
-            break;
-        }
-    }
-    if (!aio_req) {
-        error_report("cannot find aio_req %x", rsp.id);
-        goto err;
-    }
-
-    acb = aio_req->aiocb;
-
-    switch (acb->aiocb_type) {
-    case AIOCB_WRITE_UDATA:
-        /* this coroutine context is no longer suitable for co_recv
-         * because we may send data to update vdi objects */
-        s->co_recv = NULL;
-        if (!is_data_obj(aio_req->oid)) {
-            break;
-        }
-        idx = data_oid_to_idx(aio_req->oid);
-
-        if (aio_req->create) {
-            /*
-             * If the object is newly created one, we need to update
-             * the vdi object (metadata object).  min_dirty_data_idx
-             * and max_dirty_data_idx are changed to include updated
-             * index between them.
-             */
-            if (rsp.result == SD_RES_SUCCESS) {
-                s->inode.data_vdi_id[idx] = s->inode.vdi_id;
-                acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx);
-                acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx);
-            }
-        }
-        break;
-    case AIOCB_READ_UDATA:
-        ret = qemu_co_recvv(fd, acb->qiov->iov, acb->qiov->niov,
-                            aio_req->iov_offset, rsp.data_length);
-        if (ret != rsp.data_length) {
-            error_report("failed to get the data, %s", strerror(errno));
-            goto err;
-        }
-        break;
-    case AIOCB_FLUSH_CACHE:
-        if (rsp.result == SD_RES_INVALID_PARMS) {
-            DPRINTF("disable cache since the server doesn't support it\n");
-            s->cache_flags = SD_FLAG_CMD_DIRECT;
-            rsp.result = SD_RES_SUCCESS;
-        }
-        break;
-    case AIOCB_DISCARD_OBJ:
-        switch (rsp.result) {
-        case SD_RES_INVALID_PARMS:
-            error_report("sheep(%s) doesn't support discard command",
-                         s->host_spec);
-            rsp.result = SD_RES_SUCCESS;
-            s->discard_supported = false;
-            break;
-        default:
-            break;
-        }
-    }
-
-    switch (rsp.result) {
-    case SD_RES_SUCCESS:
-        break;
-    case SD_RES_READONLY:
-        if (s->inode.vdi_id == oid_to_vid(aio_req->oid)) {
-            ret = reload_inode(s, 0, "");
-            if (ret < 0) {
-                goto err;
-            }
-        }
-        if (is_data_obj(aio_req->oid)) {
-            aio_req->oid = vid_to_data_oid(s->inode.vdi_id,
-                                           data_oid_to_idx(aio_req->oid));
-        } else {
-            aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id);
-        }
-        resend_aioreq(s, aio_req);
-        goto out;
-    default:
-        acb->ret = -EIO;
-        error_report("%s", sd_strerror(rsp.result));
-        break;
-    }
-
-    free_aio_req(s, aio_req);
-    if (!acb->nr_pending) {
-        /*
-         * We've finished all requests which belong to the AIOCB, so
-         * we can switch back to sd_co_readv/writev now.
-         */
-        acb->aio_done_func(acb);
-    }
-out:
-    s->co_recv = NULL;
-    return;
-err:
-    s->co_recv = NULL;
-    reconnect_to_sdog(opaque);
-}
-
-static void co_read_response(void *opaque)
-{
-    BDRVSheepdogState *s = opaque;
-
-    if (!s->co_recv) {
-        s->co_recv = qemu_coroutine_create(aio_read_response);
-    }
-
-    qemu_coroutine_enter(s->co_recv, opaque);
-}
-
-static void co_write_request(void *opaque)
-{
-    BDRVSheepdogState *s = opaque;
-
-    qemu_coroutine_enter(s->co_send, NULL);
-}
-
-/*
- * Return a socket descriptor to read/write objects.
- *
- * We cannot use this descriptor for other operations because
- * the block driver may be on waiting response from the server.
- */
-static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
-{
-    int fd;
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    aio_set_fd_handler(s->aio_context, fd, false,
-                       co_read_response, NULL, s);
-    return fd;
-}
-
-static int sd_parse_uri(BDRVSheepdogState *s, const char *filename,
-                        char *vdi, uint32_t *snapid, char *tag)
-{
-    URI *uri;
-    QueryParams *qp = NULL;
-    int ret = 0;
-
-    uri = uri_parse(filename);
-    if (!uri) {
-        return -EINVAL;
-    }
-
-    /* transport */
-    if (!strcmp(uri->scheme, "sheepdog")) {
-        s->is_unix = false;
-    } else if (!strcmp(uri->scheme, "sheepdog+tcp")) {
-        s->is_unix = false;
-    } else if (!strcmp(uri->scheme, "sheepdog+unix")) {
-        s->is_unix = true;
-    } else {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if (uri->path == NULL || !strcmp(uri->path, "/")) {
-        ret = -EINVAL;
-        goto out;
-    }
-    pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1);
-
-    qp = query_params_parse(uri->query);
-    if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    if (s->is_unix) {
-        /* sheepdog+unix:///vdiname?socket=path */
-        if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
-            ret = -EINVAL;
-            goto out;
-        }
-        s->host_spec = g_strdup(qp->p[0].value);
-    } else {
-        /* sheepdog[+tcp]://[host:port]/vdiname */
-        s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR,
-                                       uri->port ?: SD_DEFAULT_PORT);
-    }
-
-    /* snapshot tag */
-    if (uri->fragment) {
-        *snapid = strtoul(uri->fragment, NULL, 10);
-        if (*snapid == 0) {
-            pstrcpy(tag, SD_MAX_VDI_TAG_LEN, uri->fragment);
-        }
-    } else {
-        *snapid = CURRENT_VDI_ID; /* search current vdi */
-    }
-
-out:
-    if (qp) {
-        query_params_free(qp);
-    }
-    uri_free(uri);
-    return ret;
-}
-
-/*
- * Parse a filename (old syntax)
- *
- * filename must be one of the following formats:
- *   1. [vdiname]
- *   2. [vdiname]:[snapid]
- *   3. [vdiname]:[tag]
- *   4. [hostname]:[port]:[vdiname]
- *   5. [hostname]:[port]:[vdiname]:[snapid]
- *   6. [hostname]:[port]:[vdiname]:[tag]
- *
- * You can boot from the snapshot images by specifying `snapid` or
- * `tag'.
- *
- * You can run VMs outside the Sheepdog cluster by specifying
- * `hostname' and `port' (experimental).
- */
-static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
-                         char *vdi, uint32_t *snapid, char *tag)
-{
-    char *p, *q, *uri;
-    const char *host_spec, *vdi_spec;
-    int nr_sep, ret;
-
-    strstart(filename, "sheepdog:", (const char **)&filename);
-    p = q = g_strdup(filename);
-
-    /* count the number of separators */
-    nr_sep = 0;
-    while (*p) {
-        if (*p == ':') {
-            nr_sep++;
-        }
-        p++;
-    }
-    p = q;
-
-    /* use the first two tokens as host_spec. */
-    if (nr_sep >= 2) {
-        host_spec = p;
-        p = strchr(p, ':');
-        p++;
-        p = strchr(p, ':');
-        *p++ = '\0';
-    } else {
-        host_spec = "";
-    }
-
-    vdi_spec = p;
-
-    p = strchr(vdi_spec, ':');
-    if (p) {
-        *p++ = '#';
-    }
-
-    uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec);
-
-    ret = sd_parse_uri(s, uri, vdi, snapid, tag);
-
-    g_free(q);
-    g_free(uri);
-
-    return ret;
-}
-
-static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
-                         uint32_t snapid, const char *tag, uint32_t *vid,
-                         bool lock, Error **errp)
-{
-    int ret, fd;
-    SheepdogVdiReq hdr;
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-    unsigned int wlen, rlen = 0;
-    char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    /* This pair of strncpy calls ensures that the buffer is zero-filled,
-     * which is desirable since we'll soon be sending those bytes, and
-     * don't want the send_req to read uninitialized data.
-     */
-    strncpy(buf, filename, SD_MAX_VDI_LEN);
-    strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN);
-
-    memset(&hdr, 0, sizeof(hdr));
-    if (lock) {
-        hdr.opcode = SD_OP_LOCK_VDI;
-        hdr.type = LOCK_TYPE_NORMAL;
-    } else {
-        hdr.opcode = SD_OP_GET_VDI_INFO;
-    }
-    wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN;
-    hdr.proto_ver = SD_PROTO_VER;
-    hdr.data_length = wlen;
-    hdr.snapid = snapid;
-    hdr.flags = SD_FLAG_CMD_WRITE;
-
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
-    if (ret) {
-        error_setg_errno(errp, -ret, "cannot get vdi info");
-        goto out;
-    }
-
-    if (rsp->result != SD_RES_SUCCESS) {
-        error_setg(errp, "cannot get vdi info, %s, %s %" PRIu32 " %s",
-                   sd_strerror(rsp->result), filename, snapid, tag);
-        if (rsp->result == SD_RES_NO_VDI) {
-            ret = -ENOENT;
-        } else if (rsp->result == SD_RES_VDI_LOCKED) {
-            ret = -EBUSY;
-        } else {
-            ret = -EIO;
-        }
-        goto out;
-    }
-    *vid = rsp->vdi_id;
-
-    ret = 0;
-out:
-    closesocket(fd);
-    return ret;
-}
-
-static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
-                                         struct iovec *iov, int niov,
-                                         enum AIOCBState aiocb_type)
-{
-    int nr_copies = s->inode.nr_copies;
-    SheepdogObjReq hdr;
-    unsigned int wlen = 0;
-    int ret;
-    uint64_t oid = aio_req->oid;
-    unsigned int datalen = aio_req->data_len;
-    uint64_t offset = aio_req->offset;
-    uint8_t flags = aio_req->flags;
-    uint64_t old_oid = aio_req->base_oid;
-    bool create = aio_req->create;
-
-    if (!nr_copies) {
-        error_report("bug");
-    }
-
-    memset(&hdr, 0, sizeof(hdr));
-
-    switch (aiocb_type) {
-    case AIOCB_FLUSH_CACHE:
-        hdr.opcode = SD_OP_FLUSH_VDI;
-        break;
-    case AIOCB_READ_UDATA:
-        hdr.opcode = SD_OP_READ_OBJ;
-        hdr.flags = flags;
-        break;
-    case AIOCB_WRITE_UDATA:
-        if (create) {
-            hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
-        } else {
-            hdr.opcode = SD_OP_WRITE_OBJ;
-        }
-        wlen = datalen;
-        hdr.flags = SD_FLAG_CMD_WRITE | flags;
-        break;
-    case AIOCB_DISCARD_OBJ:
-        hdr.opcode = SD_OP_WRITE_OBJ;
-        hdr.flags = SD_FLAG_CMD_WRITE | flags;
-        s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0;
-        offset = offsetof(SheepdogInode,
-                          data_vdi_id[data_oid_to_idx(oid)]);
-        oid = vid_to_vdi_oid(s->inode.vdi_id);
-        wlen = datalen = sizeof(uint32_t);
-        break;
-    }
-
-    if (s->cache_flags) {
-        hdr.flags |= s->cache_flags;
-    }
-
-    hdr.oid = oid;
-    hdr.cow_oid = old_oid;
-    hdr.copies = s->inode.nr_copies;
-
-    hdr.data_length = datalen;
-    hdr.offset = offset;
-
-    hdr.id = aio_req->id;
-
-    qemu_co_mutex_lock(&s->lock);
-    s->co_send = qemu_coroutine_self();
-    aio_set_fd_handler(s->aio_context, s->fd, false,
-                       co_read_response, co_write_request, s);
-    socket_set_cork(s->fd, 1);
-
-    /* send a header */
-    ret = qemu_co_send(s->fd, &hdr, sizeof(hdr));
-    if (ret != sizeof(hdr)) {
-        error_report("failed to send a req, %s", strerror(errno));
-        goto out;
-    }
-
-    if (wlen) {
-        ret = qemu_co_sendv(s->fd, iov, niov, aio_req->iov_offset, wlen);
-        if (ret != wlen) {
-            error_report("failed to send a data, %s", strerror(errno));
-        }
-    }
-out:
-    socket_set_cork(s->fd, 0);
-    aio_set_fd_handler(s->aio_context, s->fd, false,
-                       co_read_response, NULL, s);
-    s->co_send = NULL;
-    qemu_co_mutex_unlock(&s->lock);
-}
-
-static int read_write_object(int fd, AioContext *aio_context, char *buf,
-                             uint64_t oid, uint8_t copies,
-                             unsigned int datalen, uint64_t offset,
-                             bool write, bool create, uint32_t cache_flags)
-{
-    SheepdogObjReq hdr;
-    SheepdogObjRsp *rsp = (SheepdogObjRsp *)&hdr;
-    unsigned int wlen, rlen;
-    int ret;
-
-    memset(&hdr, 0, sizeof(hdr));
-
-    if (write) {
-        wlen = datalen;
-        rlen = 0;
-        hdr.flags = SD_FLAG_CMD_WRITE;
-        if (create) {
-            hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
-        } else {
-            hdr.opcode = SD_OP_WRITE_OBJ;
-        }
-    } else {
-        wlen = 0;
-        rlen = datalen;
-        hdr.opcode = SD_OP_READ_OBJ;
-    }
-
-    hdr.flags |= cache_flags;
-
-    hdr.oid = oid;
-    hdr.data_length = datalen;
-    hdr.offset = offset;
-    hdr.copies = copies;
-
-    ret = do_req(fd, aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
-    if (ret) {
-        error_report("failed to send a request to the sheep");
-        return ret;
-    }
-
-    switch (rsp->result) {
-    case SD_RES_SUCCESS:
-        return 0;
-    default:
-        error_report("%s", sd_strerror(rsp->result));
-        return -EIO;
-    }
-}
-
-static int read_object(int fd, AioContext *aio_context, char *buf,
-                       uint64_t oid, uint8_t copies,
-                       unsigned int datalen, uint64_t offset,
-                       uint32_t cache_flags)
-{
-    return read_write_object(fd, aio_context, buf, oid, copies,
-                             datalen, offset, false,
-                             false, cache_flags);
-}
-
-static int write_object(int fd, AioContext *aio_context, char *buf,
-                        uint64_t oid, uint8_t copies,
-                        unsigned int datalen, uint64_t offset, bool create,
-                        uint32_t cache_flags)
-{
-    return read_write_object(fd, aio_context, buf, oid, copies,
-                             datalen, offset, true,
-                             create, cache_flags);
-}
-
-/* update inode with the latest state */
-static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
-{
-    Error *local_err = NULL;
-    SheepdogInode *inode;
-    int ret = 0, fd;
-    uint32_t vid = 0;
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return -EIO;
-    }
-
-    inode = g_malloc(SD_INODE_HEADER_SIZE);
-
-    ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err);
-    if (ret) {
-        error_report_err(local_err);
-        goto out;
-    }
-
-    ret = read_object(fd, s->aio_context, (char *)inode, vid_to_vdi_oid(vid),
-                      s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0,
-                      s->cache_flags);
-    if (ret < 0) {
-        goto out;
-    }
-
-    if (inode->vdi_id != s->inode.vdi_id) {
-        memcpy(&s->inode, inode, SD_INODE_HEADER_SIZE);
-    }
-
-out:
-    g_free(inode);
-    closesocket(fd);
-
-    return ret;
-}
-
-static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
-{
-    SheepdogAIOCB *acb = aio_req->aiocb;
-
-    aio_req->create = false;
-
-    /* check whether this request becomes a CoW one */
-    if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) {
-        int idx = data_oid_to_idx(aio_req->oid);
-
-        if (is_data_obj_writable(&s->inode, idx)) {
-            goto out;
-        }
-
-        if (s->inode.data_vdi_id[idx]) {
-            aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
-            aio_req->flags |= SD_FLAG_CMD_COW;
-        }
-        aio_req->create = true;
-    }
-out:
-    if (is_data_obj(aio_req->oid)) {
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
-                        acb->aiocb_type);
-    } else {
-        struct iovec iov;
-        iov.iov_base = &s->inode;
-        iov.iov_len = sizeof(s->inode);
-        add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
-    }
-}
-
-static void sd_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVSheepdogState *s = bs->opaque;
-
-    aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
-                       NULL, NULL);
-}
-
-static void sd_attach_aio_context(BlockDriverState *bs,
-                                  AioContext *new_context)
-{
-    BDRVSheepdogState *s = bs->opaque;
-
-    s->aio_context = new_context;
-    aio_set_fd_handler(new_context, s->fd, false,
-                       co_read_response, NULL, s);
-}
-
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
-    .name = "sheepdog",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "filename",
-            .type = QEMU_OPT_STRING,
-            .help = "URL to the sheepdog image",
-        },
-        { /* end of list */ }
-    },
-};
-
-static int sd_open(BlockDriverState *bs, QDict *options, int flags,
-                   Error **errp)
-{
-    int ret, fd;
-    uint32_t vid = 0;
-    BDRVSheepdogState *s = bs->opaque;
-    char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
-    uint32_t snapid;
-    char *buf = NULL;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    const char *filename;
-
-    s->bs = bs;
-    s->aio_context = bdrv_get_aio_context(bs);
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    filename = qemu_opt_get(opts, "filename");
-
-    QLIST_INIT(&s->inflight_aio_head);
-    QLIST_INIT(&s->failed_aio_head);
-    QLIST_INIT(&s->inflight_aiocb_head);
-    s->fd = -1;
-
-    memset(vdi, 0, sizeof(vdi));
-    memset(tag, 0, sizeof(tag));
-
-    if (strstr(filename, "://")) {
-        ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
-    } else {
-        ret = parse_vdiname(s, filename, vdi, &snapid, tag);
-    }
-    if (ret < 0) {
-        error_setg(errp, "Can't parse filename");
-        goto out;
-    }
-    s->fd = get_sheep_fd(s, errp);
-    if (s->fd < 0) {
-        ret = s->fd;
-        goto out;
-    }
-
-    ret = find_vdi_name(s, vdi, snapid, tag, &vid, true, errp);
-    if (ret) {
-        goto out;
-    }
-
-    /*
-     * QEMU block layer emulates writethrough cache as 'writeback + flush', so
-     * we always set SD_FLAG_CMD_CACHE (writeback cache) as default.
-     */
-    s->cache_flags = SD_FLAG_CMD_CACHE;
-    if (flags & BDRV_O_NOCACHE) {
-        s->cache_flags = SD_FLAG_CMD_DIRECT;
-    }
-    s->discard_supported = true;
-
-    if (snapid || tag[0] != '\0') {
-        DPRINTF("%" PRIx32 " snapshot inode was open.\n", vid);
-        s->is_snapshot = true;
-    }
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        ret = fd;
-        goto out;
-    }
-
-    buf = g_malloc(SD_INODE_SIZE);
-    ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
-                      0, SD_INODE_SIZE, 0, s->cache_flags);
-
-    closesocket(fd);
-
-    if (ret) {
-        error_setg(errp, "Can't read snapshot inode");
-        goto out;
-    }
-
-    memcpy(&s->inode, buf, sizeof(s->inode));
-
-    bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
-    pstrcpy(s->name, sizeof(s->name), vdi);
-    qemu_co_mutex_init(&s->lock);
-    qemu_co_queue_init(&s->overlapping_queue);
-    qemu_opts_del(opts);
-    g_free(buf);
-    return 0;
-out:
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
-                       false, NULL, NULL, NULL);
-    if (s->fd >= 0) {
-        closesocket(s->fd);
-    }
-    qemu_opts_del(opts);
-    g_free(buf);
-    return ret;
-}
-
-static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue,
-                             Error **errp)
-{
-    BDRVSheepdogState *s = state->bs->opaque;
-    BDRVSheepdogReopenState *re_s;
-    int ret = 0;
-
-    re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1);
-
-    re_s->cache_flags = SD_FLAG_CMD_CACHE;
-    if (state->flags & BDRV_O_NOCACHE) {
-        re_s->cache_flags = SD_FLAG_CMD_DIRECT;
-    }
-
-    re_s->fd = get_sheep_fd(s, errp);
-    if (re_s->fd < 0) {
-        ret = re_s->fd;
-        return ret;
-    }
-
-    return ret;
-}
-
-static void sd_reopen_commit(BDRVReopenState *state)
-{
-    BDRVSheepdogReopenState *re_s = state->opaque;
-    BDRVSheepdogState *s = state->bs->opaque;
-
-    if (s->fd) {
-        aio_set_fd_handler(s->aio_context, s->fd, false,
-                           NULL, NULL, NULL);
-        closesocket(s->fd);
-    }
-
-    s->fd = re_s->fd;
-    s->cache_flags = re_s->cache_flags;
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-static void sd_reopen_abort(BDRVReopenState *state)
-{
-    BDRVSheepdogReopenState *re_s = state->opaque;
-    BDRVSheepdogState *s = state->bs->opaque;
-
-    if (re_s == NULL) {
-        return;
-    }
-
-    if (re_s->fd) {
-        aio_set_fd_handler(s->aio_context, re_s->fd, false,
-                           NULL, NULL, NULL);
-        closesocket(re_s->fd);
-    }
-
-    g_free(state->opaque);
-    state->opaque = NULL;
-
-    return;
-}
-
-static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
-                        Error **errp)
-{
-    SheepdogVdiReq hdr;
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-    int fd, ret;
-    unsigned int wlen, rlen = 0;
-    char buf[SD_MAX_VDI_LEN];
-
-    fd = connect_to_sdog(s, errp);
-    if (fd < 0) {
-        return fd;
-    }
-
-    /* FIXME: would it be better to fail (e.g., return -EIO) when filename
-     * does not fit in buf?  For now, just truncate and avoid buffer overrun.
-     */
-    memset(buf, 0, sizeof(buf));
-    pstrcpy(buf, sizeof(buf), s->name);
-
-    memset(&hdr, 0, sizeof(hdr));
-    hdr.opcode = SD_OP_NEW_VDI;
-    hdr.base_vdi_id = s->inode.vdi_id;
-
-    wlen = SD_MAX_VDI_LEN;
-
-    hdr.flags = SD_FLAG_CMD_WRITE;
-    hdr.snapid = snapshot;
-
-    hdr.data_length = wlen;
-    hdr.vdi_size = s->inode.vdi_size;
-    hdr.copy_policy = s->inode.copy_policy;
-    hdr.copies = s->inode.nr_copies;
-    hdr.block_size_shift = s->inode.block_size_shift;
-
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
-
-    closesocket(fd);
-
-    if (ret) {
-        error_setg_errno(errp, -ret, "create failed");
-        return ret;
-    }
-
-    if (rsp->result != SD_RES_SUCCESS) {
-        error_setg(errp, "%s, %s", sd_strerror(rsp->result), s->inode.name);
-        return -EIO;
-    }
-
-    if (vdi_id) {
-        *vdi_id = rsp->vdi_id;
-    }
-
-    return 0;
-}
-
-static int sd_prealloc(const char *filename, Error **errp)
-{
-    BlockBackend *blk = NULL;
-    BDRVSheepdogState *base = NULL;
-    unsigned long buf_size;
-    uint32_t idx, max_idx;
-    uint32_t object_size;
-    int64_t vdi_size;
-    void *buf = NULL;
-    int ret;
-
-    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
-    if (blk == NULL) {
-        ret = -EIO;
-        goto out_with_err_set;
-    }
-
-    blk_set_allow_write_beyond_eof(blk, true);
-
-    vdi_size = blk_getlength(blk);
-    if (vdi_size < 0) {
-        ret = vdi_size;
-        goto out;
-    }
-
-    base = blk_bs(blk)->opaque;
-    object_size = (UINT32_C(1) << base->inode.block_size_shift);
-    buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
-    buf = g_malloc0(buf_size);
-
-    max_idx = DIV_ROUND_UP(vdi_size, buf_size);
-
-    for (idx = 0; idx < max_idx; idx++) {
-        /*
-         * The created image can be a cloned image, so we need to read
-         * a data from the source image.
-         */
-        ret = blk_pread(blk, idx * buf_size, buf, buf_size);
-        if (ret < 0) {
-            goto out;
-        }
-        ret = blk_pwrite(blk, idx * buf_size, buf, buf_size);
-        if (ret < 0) {
-            goto out;
-        }
-    }
-
-    ret = 0;
-out:
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Can't pre-allocate");
-    }
-out_with_err_set:
-    if (blk) {
-        blk_unref(blk);
-    }
-    g_free(buf);
-
-    return ret;
-}
-
-/*
- * Sheepdog support two kinds of redundancy, full replication and erasure
- * coding.
- *
- * # create a fully replicated vdi with x copies
- * -o redundancy=x (1 <= x <= SD_MAX_COPIES)
- *
- * # create a erasure coded vdi with x data strips and y parity strips
- * -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP)
- */
-static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
-{
-    struct SheepdogInode *inode = &s->inode;
-    const char *n1, *n2;
-    long copy, parity;
-    char p[10];
-
-    pstrcpy(p, sizeof(p), opt);
-    n1 = strtok(p, ":");
-    n2 = strtok(NULL, ":");
-
-    if (!n1) {
-        return -EINVAL;
-    }
-
-    copy = strtol(n1, NULL, 10);
-    if (copy > SD_MAX_COPIES || copy < 1) {
-        return -EINVAL;
-    }
-    if (!n2) {
-        inode->copy_policy = 0;
-        inode->nr_copies = copy;
-        return 0;
-    }
-
-    if (copy != 2 && copy != 4 && copy != 8 && copy != 16) {
-        return -EINVAL;
-    }
-
-    parity = strtol(n2, NULL, 10);
-    if (parity >= SD_EC_MAX_STRIP || parity < 1) {
-        return -EINVAL;
-    }
-
-    /*
-     * 4 bits for parity and 4 bits for data.
-     * We have to compress upper data bits because it can't represent 16
-     */
-    inode->copy_policy = ((copy / 2) << 4) + parity;
-    inode->nr_copies = copy + parity;
-
-    return 0;
-}
-
-static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
-{
-    struct SheepdogInode *inode = &s->inode;
-    uint64_t object_size;
-    int obj_order;
-
-    object_size = qemu_opt_get_size_del(opt, BLOCK_OPT_OBJECT_SIZE, 0);
-    if (object_size) {
-        if ((object_size - 1) & object_size) {    /* not a power of 2? */
-            return -EINVAL;
-        }
-        obj_order = ctz32(object_size);
-        if (obj_order < 20 || obj_order > 31) {
-            return -EINVAL;
-        }
-        inode->block_size_shift = (uint8_t)obj_order;
-    }
-
-    return 0;
-}
-
-static int sd_create(const char *filename, QemuOpts *opts,
-                     Error **errp)
-{
-    int ret = 0;
-    uint32_t vid = 0;
-    char *backing_file = NULL;
-    char *buf = NULL;
-    BDRVSheepdogState *s;
-    char tag[SD_MAX_VDI_TAG_LEN];
-    uint32_t snapid;
-    uint64_t max_vdi_size;
-    bool prealloc = false;
-
-    s = g_new0(BDRVSheepdogState, 1);
-
-    memset(tag, 0, sizeof(tag));
-    if (strstr(filename, "://")) {
-        ret = sd_parse_uri(s, filename, s->name, &snapid, tag);
-    } else {
-        ret = parse_vdiname(s, filename, s->name, &snapid, tag);
-    }
-    if (ret < 0) {
-        error_setg(errp, "Can't parse filename");
-        goto out;
-    }
-
-    s->inode.vdi_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                                 BDRV_SECTOR_SIZE);
-    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-    buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    if (!buf || !strcmp(buf, "off")) {
-        prealloc = false;
-    } else if (!strcmp(buf, "full")) {
-        prealloc = true;
-    } else {
-        error_setg(errp, "Invalid preallocation mode: '%s'", buf);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    g_free(buf);
-    buf = qemu_opt_get_del(opts, BLOCK_OPT_REDUNDANCY);
-    if (buf) {
-        ret = parse_redundancy(s, buf);
-        if (ret < 0) {
-            error_setg(errp, "Invalid redundancy mode: '%s'", buf);
-            goto out;
-        }
-    }
-    ret = parse_block_size_shift(s, opts);
-    if (ret < 0) {
-        error_setg(errp, "Invalid object_size."
-                         " obect_size needs to be power of 2"
-                         " and be limited from 2^20 to 2^31");
-        goto out;
-    }
-
-    if (backing_file) {
-        BlockBackend *blk;
-        BDRVSheepdogState *base;
-        BlockDriver *drv;
-
-        /* Currently, only Sheepdog backing image is supported. */
-        drv = bdrv_find_protocol(backing_file, true, NULL);
-        if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) {
-            error_setg(errp, "backing_file must be a sheepdog image");
-            ret = -EINVAL;
-            goto out;
-        }
-
-        blk = blk_new_open(backing_file, NULL, NULL,
-                           BDRV_O_PROTOCOL, errp);
-        if (blk == NULL) {
-            ret = -EIO;
-            goto out;
-        }
-
-        base = blk_bs(blk)->opaque;
-
-        if (!is_snapshot(&base->inode)) {
-            error_setg(errp, "cannot clone from a non snapshot vdi");
-            blk_unref(blk);
-            ret = -EINVAL;
-            goto out;
-        }
-        s->inode.vdi_id = base->inode.vdi_id;
-        blk_unref(blk);
-    }
-
-    s->aio_context = qemu_get_aio_context();
-
-    /* if block_size_shift is not specified, get cluster default value */
-    if (s->inode.block_size_shift == 0) {
-        SheepdogVdiReq hdr;
-        SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr;
-        Error *local_err = NULL;
-        int fd;
-        unsigned int wlen = 0, rlen = 0;
-
-        fd = connect_to_sdog(s, &local_err);
-        if (fd < 0) {
-            error_report_err(local_err);
-            ret = -EIO;
-            goto out;
-        }
-
-        memset(&hdr, 0, sizeof(hdr));
-        hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT;
-        hdr.proto_ver = SD_PROTO_VER;
-
-        ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
-                     NULL, &wlen, &rlen);
-        closesocket(fd);
-        if (ret) {
-            error_setg_errno(errp, -ret, "failed to get cluster default");
-            goto out;
-        }
-        if (rsp->result == SD_RES_SUCCESS) {
-            s->inode.block_size_shift = rsp->block_size_shift;
-        } else {
-            s->inode.block_size_shift = SD_DEFAULT_BLOCK_SIZE_SHIFT;
-        }
-    }
-
-    max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
-
-    if (s->inode.vdi_size > max_vdi_size) {
-        error_setg(errp, "An image is too large."
-                         " The maximum image size is %"PRIu64 "GB",
-                         max_vdi_size / 1024 / 1024 / 1024);
-        ret = -EINVAL;
-        goto out;
-    }
-
-    ret = do_sd_create(s, &vid, 0, errp);
-    if (ret) {
-        goto out;
-    }
-
-    if (prealloc) {
-        ret = sd_prealloc(filename, errp);
-    }
-out:
-    g_free(backing_file);
-    g_free(buf);
-    g_free(s);
-    return ret;
-}
-
-static void sd_close(BlockDriverState *bs)
-{
-    Error *local_err = NULL;
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogVdiReq hdr;
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-    unsigned int wlen, rlen = 0;
-    int fd, ret;
-
-    DPRINTF("%s\n", s->name);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return;
-    }
-
-    memset(&hdr, 0, sizeof(hdr));
-
-    hdr.opcode = SD_OP_RELEASE_VDI;
-    hdr.type = LOCK_TYPE_NORMAL;
-    hdr.base_vdi_id = s->inode.vdi_id;
-    wlen = strlen(s->name) + 1;
-    hdr.data_length = wlen;
-    hdr.flags = SD_FLAG_CMD_WRITE;
-
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
-                 s->name, &wlen, &rlen);
-
-    closesocket(fd);
-
-    if (!ret && rsp->result != SD_RES_SUCCESS &&
-        rsp->result != SD_RES_VDI_NOT_LOCKED) {
-        error_report("%s, %s", sd_strerror(rsp->result), s->name);
-    }
-
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
-                       false, NULL, NULL, NULL);
-    closesocket(s->fd);
-    g_free(s->host_spec);
-}
-
-static int64_t sd_getlength(BlockDriverState *bs)
-{
-    BDRVSheepdogState *s = bs->opaque;
-
-    return s->inode.vdi_size;
-}
-
-static int sd_truncate(BlockDriverState *bs, int64_t offset)
-{
-    Error *local_err = NULL;
-    BDRVSheepdogState *s = bs->opaque;
-    int ret, fd;
-    unsigned int datalen;
-    uint64_t max_vdi_size;
-
-    max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
-    if (offset < s->inode.vdi_size) {
-        error_report("shrinking is not supported");
-        return -EINVAL;
-    } else if (offset > max_vdi_size) {
-        error_report("too big image size");
-        return -EINVAL;
-    }
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return fd;
-    }
-
-    /* we don't need to update entire object */
-    datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
-    s->inode.vdi_size = offset;
-    ret = write_object(fd, s->aio_context, (char *)&s->inode,
-                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
-                       datalen, 0, false, s->cache_flags);
-    close(fd);
-
-    if (ret < 0) {
-        error_report("failed to update an inode.");
-    }
-
-    return ret;
-}
-
-/*
- * This function is called after writing data objects.  If we need to
- * update metadata, this sends a write request to the vdi object.
- * Otherwise, this switches back to sd_co_readv/writev.
- */
-static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
-{
-    BDRVSheepdogState *s = acb->common.bs->opaque;
-    struct iovec iov;
-    AIOReq *aio_req;
-    uint32_t offset, data_len, mn, mx;
-
-    mn = acb->min_dirty_data_idx;
-    mx = acb->max_dirty_data_idx;
-    if (mn <= mx) {
-        /* we need to update the vdi object. */
-        offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
-            mn * sizeof(s->inode.data_vdi_id[0]);
-        data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
-
-        acb->min_dirty_data_idx = UINT32_MAX;
-        acb->max_dirty_data_idx = 0;
-
-        iov.iov_base = &s->inode;
-        iov.iov_len = sizeof(s->inode);
-        aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
-                                data_len, offset, 0, false, 0, offset);
-        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-        add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
-
-        acb->aio_done_func = sd_finish_aiocb;
-        acb->aiocb_type = AIOCB_WRITE_UDATA;
-        return;
-    }
-
-    sd_finish_aiocb(acb);
-}
-
-/* Delete current working VDI on the snapshot chain */
-static bool sd_delete(BDRVSheepdogState *s)
-{
-    Error *local_err = NULL;
-    unsigned int wlen = SD_MAX_VDI_LEN, rlen = 0;
-    SheepdogVdiReq hdr = {
-        .opcode = SD_OP_DEL_VDI,
-        .base_vdi_id = s->inode.vdi_id,
-        .data_length = wlen,
-        .flags = SD_FLAG_CMD_WRITE,
-    };
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-    int fd, ret;
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return false;
-    }
-
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
-                 s->name, &wlen, &rlen);
-    closesocket(fd);
-    if (ret) {
-        return false;
-    }
-    switch (rsp->result) {
-    case SD_RES_NO_VDI:
-        error_report("%s was already deleted", s->name);
-        /* fall through */
-    case SD_RES_SUCCESS:
-        break;
-    default:
-        error_report("%s, %s", sd_strerror(rsp->result), s->name);
-        return false;
-    }
-
-    return true;
-}
-
-/*
- * Create a writable VDI from a snapshot
- */
-static int sd_create_branch(BDRVSheepdogState *s)
-{
-    Error *local_err = NULL;
-    int ret, fd;
-    uint32_t vid;
-    char *buf;
-    bool deleted;
-
-    DPRINTF("%" PRIx32 " is snapshot.\n", s->inode.vdi_id);
-
-    buf = g_malloc(SD_INODE_SIZE);
-
-    /*
-     * Even If deletion fails, we will just create extra snapshot based on
-     * the working VDI which was supposed to be deleted. So no need to
-     * false bail out.
-     */
-    deleted = sd_delete(s);
-    ret = do_sd_create(s, &vid, !deleted, &local_err);
-    if (ret) {
-        error_report_err(local_err);
-        goto out;
-    }
-
-    DPRINTF("%" PRIx32 " is created.\n", vid);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        ret = fd;
-        goto out;
-    }
-
-    ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
-                      s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags);
-
-    closesocket(fd);
-
-    if (ret < 0) {
-        goto out;
-    }
-
-    memcpy(&s->inode, buf, sizeof(s->inode));
-
-    s->is_snapshot = false;
-    ret = 0;
-    DPRINTF("%" PRIx32 " was newly created.\n", s->inode.vdi_id);
-
-out:
-    g_free(buf);
-
-    return ret;
-}
-
-/*
- * Send I/O requests to the server.
- *
- * This function sends requests to the server, links the requests to
- * the inflight_list in BDRVSheepdogState, and exits without
- * waiting the response.  The responses are received in the
- * `aio_read_response' function which is called from the main loop as
- * a fd handler.
- *
- * Returns 1 when we need to wait a response, 0 when there is no sent
- * request and -errno in error cases.
- */
-static int coroutine_fn sd_co_rw_vector(void *p)
-{
-    SheepdogAIOCB *acb = p;
-    int ret = 0;
-    unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE;
-    unsigned long idx;
-    uint32_t object_size;
-    uint64_t oid;
-    uint64_t offset;
-    BDRVSheepdogState *s = acb->common.bs->opaque;
-    SheepdogInode *inode = &s->inode;
-    AIOReq *aio_req;
-
-    if (acb->aiocb_type == AIOCB_WRITE_UDATA && s->is_snapshot) {
-        /*
-         * In the case we open the snapshot VDI, Sheepdog creates the
-         * writable VDI when we do a write operation first.
-         */
-        ret = sd_create_branch(s);
-        if (ret) {
-            acb->ret = -EIO;
-            goto out;
-        }
-    }
-
-    object_size = (UINT32_C(1) << inode->block_size_shift);
-    idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size;
-    offset = (acb->sector_num * BDRV_SECTOR_SIZE) % object_size;
-
-    /*
-     * Make sure we don't free the aiocb before we are done with all requests.
-     * This additional reference is dropped at the end of this function.
-     */
-    acb->nr_pending++;
-
-    while (done != total) {
-        uint8_t flags = 0;
-        uint64_t old_oid = 0;
-        bool create = false;
-
-        oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);
-
-        len = MIN(total - done, object_size - offset);
-
-        switch (acb->aiocb_type) {
-        case AIOCB_READ_UDATA:
-            if (!inode->data_vdi_id[idx]) {
-                qemu_iovec_memset(acb->qiov, done, 0, len);
-                goto done;
-            }
-            break;
-        case AIOCB_WRITE_UDATA:
-            if (!inode->data_vdi_id[idx]) {
-                create = true;
-            } else if (!is_data_obj_writable(inode, idx)) {
-                /* Copy-On-Write */
-                create = true;
-                old_oid = oid;
-                flags = SD_FLAG_CMD_COW;
-            }
-            break;
-        case AIOCB_DISCARD_OBJ:
-            /*
-             * We discard the object only when the whole object is
-             * 1) allocated 2) trimmed. Otherwise, simply skip it.
-             */
-            if (len != object_size || inode->data_vdi_id[idx] == 0) {
-                goto done;
-            }
-            break;
-        default:
-            break;
-        }
-
-        if (create) {
-            DPRINTF("update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld\n",
-                    inode->vdi_id, oid,
-                    vid_to_data_oid(inode->data_vdi_id[idx], idx), idx);
-            oid = vid_to_data_oid(inode->vdi_id, idx);
-            DPRINTF("new oid %" PRIx64 "\n", oid);
-        }
-
-        aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
-                                old_oid,
-                                acb->aiocb_type == AIOCB_DISCARD_OBJ ?
-                                0 : done);
-        QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-
-        add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
-                        acb->aiocb_type);
-    done:
-        offset = 0;
-        idx++;
-        done += len;
-    }
-out:
-    if (!--acb->nr_pending) {
-        return acb->ret;
-    }
-    return 1;
-}
-
-static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
-{
-    SheepdogAIOCB *cb;
-
-    QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
-        if (AIOCBOverlapping(aiocb, cb)) {
-            return true;
-        }
-    }
-
-    QLIST_INSERT_HEAD(&s->inflight_aiocb_head, aiocb, aiocb_siblings);
-    return false;
-}
-
-static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov)
-{
-    SheepdogAIOCB *acb;
-    int ret;
-    int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
-    BDRVSheepdogState *s = bs->opaque;
-
-    if (offset > s->inode.vdi_size) {
-        ret = sd_truncate(bs, offset);
-        if (ret < 0) {
-            return ret;
-        }
-    }
-
-    acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
-    acb->aio_done_func = sd_write_done;
-    acb->aiocb_type = AIOCB_WRITE_UDATA;
-
-retry:
-    if (check_overlapping_aiocb(s, acb)) {
-        qemu_co_queue_wait(&s->overlapping_queue);
-        goto retry;
-    }
-
-    ret = sd_co_rw_vector(acb);
-    if (ret <= 0) {
-        QLIST_REMOVE(acb, aiocb_siblings);
-        qemu_co_queue_restart_all(&s->overlapping_queue);
-        qemu_aio_unref(acb);
-        return ret;
-    }
-
-    qemu_coroutine_yield();
-
-    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&s->overlapping_queue);
-
-    return acb->ret;
-}
-
-static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
-                       int nb_sectors, QEMUIOVector *qiov)
-{
-    SheepdogAIOCB *acb;
-    int ret;
-    BDRVSheepdogState *s = bs->opaque;
-
-    acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
-    acb->aiocb_type = AIOCB_READ_UDATA;
-    acb->aio_done_func = sd_finish_aiocb;
-
-retry:
-    if (check_overlapping_aiocb(s, acb)) {
-        qemu_co_queue_wait(&s->overlapping_queue);
-        goto retry;
-    }
-
-    ret = sd_co_rw_vector(acb);
-    if (ret <= 0) {
-        QLIST_REMOVE(acb, aiocb_siblings);
-        qemu_co_queue_restart_all(&s->overlapping_queue);
-        qemu_aio_unref(acb);
-        return ret;
-    }
-
-    qemu_coroutine_yield();
-
-    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&s->overlapping_queue);
-    return acb->ret;
-}
-
-static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogAIOCB *acb;
-    AIOReq *aio_req;
-
-    if (s->cache_flags != SD_FLAG_CMD_CACHE) {
-        return 0;
-    }
-
-    acb = sd_aio_setup(bs, NULL, 0, 0);
-    acb->aiocb_type = AIOCB_FLUSH_CACHE;
-    acb->aio_done_func = sd_finish_aiocb;
-
-    aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
-                            0, 0, 0, false, 0, 0);
-    QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-    add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
-
-    qemu_coroutine_yield();
-    return acb->ret;
-}
-
-static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
-{
-    Error *local_err = NULL;
-    BDRVSheepdogState *s = bs->opaque;
-    int ret, fd;
-    uint32_t new_vid;
-    SheepdogInode *inode;
-    unsigned int datalen;
-
-    DPRINTF("sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " "
-            "is_snapshot %d\n", sn_info->name, sn_info->id_str,
-            s->name, sn_info->vm_state_size, s->is_snapshot);
-
-    if (s->is_snapshot) {
-        error_report("You can't create a snapshot of a snapshot VDI, "
-                     "%s (%" PRIu32 ").", s->name, s->inode.vdi_id);
-
-        return -EINVAL;
-    }
-
-    DPRINTF("%s %s\n", sn_info->name, sn_info->id_str);
-
-    s->inode.vm_state_size = sn_info->vm_state_size;
-    s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
-    /* It appears that inode.tag does not require a NUL terminator,
-     * which means this use of strncpy is ok.
-     */
-    strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
-    /* we don't need to update entire object */
-    datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
-    inode = g_malloc(datalen);
-
-    /* refresh inode. */
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        ret = fd;
-        goto cleanup;
-    }
-
-    ret = write_object(fd, s->aio_context, (char *)&s->inode,
-                       vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
-                       datalen, 0, false, s->cache_flags);
-    if (ret < 0) {
-        error_report("failed to write snapshot's inode.");
-        goto cleanup;
-    }
-
-    ret = do_sd_create(s, &new_vid, 1, &local_err);
-    if (ret < 0) {
-        error_reportf_err(local_err,
-                          "failed to create inode for snapshot: ");
-        goto cleanup;
-    }
-
-    ret = read_object(fd, s->aio_context, (char *)inode,
-                      vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0,
-                      s->cache_flags);
-
-    if (ret < 0) {
-        error_report("failed to read new inode info. %s", strerror(errno));
-        goto cleanup;
-    }
-
-    memcpy(&s->inode, inode, datalen);
-    DPRINTF("s->inode: name %s snap_id %x oid %x\n",
-            s->inode.name, s->inode.snap_id, s->inode.vdi_id);
-
-cleanup:
-    g_free(inode);
-    closesocket(fd);
-    return ret;
-}
-
-/*
- * We implement rollback(loadvm) operation to the specified snapshot by
- * 1) switch to the snapshot
- * 2) rely on sd_create_branch to delete working VDI and
- * 3) create a new working VDI based on the specified snapshot
- */
-static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    BDRVSheepdogState *old_s;
-    char tag[SD_MAX_VDI_TAG_LEN];
-    uint32_t snapid = 0;
-    int ret = 0;
-
-    old_s = g_new(BDRVSheepdogState, 1);
-
-    memcpy(old_s, s, sizeof(BDRVSheepdogState));
-
-    snapid = strtoul(snapshot_id, NULL, 10);
-    if (snapid) {
-        tag[0] = 0;
-    } else {
-        pstrcpy(tag, sizeof(tag), snapshot_id);
-    }
-
-    ret = reload_inode(s, snapid, tag);
-    if (ret) {
-        goto out;
-    }
-
-    ret = sd_create_branch(s);
-    if (ret) {
-        goto out;
-    }
-
-    g_free(old_s);
-
-    return 0;
-out:
-    /* recover bdrv_sd_state */
-    memcpy(s, old_s, sizeof(BDRVSheepdogState));
-    g_free(old_s);
-
-    error_report("failed to open. recover old bdrv_sd_state.");
-
-    return ret;
-}
-
-#define NR_BATCHED_DISCARD 128
-
-static bool remove_objects(BDRVSheepdogState *s)
-{
-    int fd, i = 0, nr_objs = 0;
-    Error *local_err = NULL;
-    int ret = 0;
-    bool result = true;
-    SheepdogInode *inode = &s->inode;
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return false;
-    }
-
-    nr_objs = count_data_objs(inode);
-    while (i < nr_objs) {
-        int start_idx, nr_filled_idx;
-
-        while (i < nr_objs && !inode->data_vdi_id[i]) {
-            i++;
-        }
-        start_idx = i;
-
-        nr_filled_idx = 0;
-        while (i < nr_objs && nr_filled_idx < NR_BATCHED_DISCARD) {
-            if (inode->data_vdi_id[i]) {
-                inode->data_vdi_id[i] = 0;
-                nr_filled_idx++;
-            }
-
-            i++;
-        }
-
-        ret = write_object(fd, s->aio_context,
-                           (char *)&inode->data_vdi_id[start_idx],
-                           vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies,
-                           (i - start_idx) * sizeof(uint32_t),
-                           offsetof(struct SheepdogInode,
-                                    data_vdi_id[start_idx]),
-                           false, s->cache_flags);
-        if (ret < 0) {
-            error_report("failed to discard snapshot inode.");
-            result = false;
-            goto out;
-        }
-    }
-
-out:
-    closesocket(fd);
-    return result;
-}
-
-static int sd_snapshot_delete(BlockDriverState *bs,
-                              const char *snapshot_id,
-                              const char *name,
-                              Error **errp)
-{
-    unsigned long snap_id = 0;
-    char snap_tag[SD_MAX_VDI_TAG_LEN];
-    Error *local_err = NULL;
-    int fd, ret;
-    char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
-    BDRVSheepdogState *s = bs->opaque;
-    unsigned int wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN, rlen = 0;
-    uint32_t vid;
-    SheepdogVdiReq hdr = {
-        .opcode = SD_OP_DEL_VDI,
-        .data_length = wlen,
-        .flags = SD_FLAG_CMD_WRITE,
-    };
-    SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
-
-    if (!remove_objects(s)) {
-        return -1;
-    }
-
-    memset(buf, 0, sizeof(buf));
-    memset(snap_tag, 0, sizeof(snap_tag));
-    pstrcpy(buf, SD_MAX_VDI_LEN, s->name);
-    ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id);
-    if (ret || snap_id > UINT32_MAX) {
-        error_setg(errp, "Invalid snapshot ID: %s",
-                         snapshot_id ? snapshot_id : "<null>");
-        return -EINVAL;
-    }
-
-    if (snap_id) {
-        hdr.snapid = (uint32_t) snap_id;
-    } else {
-        pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id);
-        pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag);
-    }
-
-    ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true,
-                        &local_err);
-    if (ret) {
-        return ret;
-    }
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return -1;
-    }
-
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
-                 buf, &wlen, &rlen);
-    closesocket(fd);
-    if (ret) {
-        return ret;
-    }
-
-    switch (rsp->result) {
-    case SD_RES_NO_VDI:
-        error_report("%s was already deleted", s->name);
-    case SD_RES_SUCCESS:
-        break;
-    default:
-        error_report("%s, %s", sd_strerror(rsp->result), s->name);
-        return -1;
-    }
-
-    return ret;
-}
-
-static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
-{
-    Error *local_err = NULL;
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogReq req;
-    int fd, nr = 1024, ret, max = BITS_TO_LONGS(SD_NR_VDIS) * sizeof(long);
-    QEMUSnapshotInfo *sn_tab = NULL;
-    unsigned wlen, rlen;
-    int found = 0;
-    static SheepdogInode inode;
-    unsigned long *vdi_inuse;
-    unsigned int start_nr;
-    uint64_t hval;
-    uint32_t vid;
-
-    vdi_inuse = g_malloc(max);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        ret = fd;
-        goto out;
-    }
-
-    rlen = max;
-    wlen = 0;
-
-    memset(&req, 0, sizeof(req));
-
-    req.opcode = SD_OP_READ_VDIS;
-    req.data_length = max;
-
-    ret = do_req(fd, s->aio_context, (SheepdogReq *)&req,
-                 vdi_inuse, &wlen, &rlen);
-
-    closesocket(fd);
-    if (ret) {
-        goto out;
-    }
-
-    sn_tab = g_new0(QEMUSnapshotInfo, nr);
-
-    /* calculate a vdi id with hash function */
-    hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT);
-    start_nr = hval & (SD_NR_VDIS - 1);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        ret = fd;
-        goto out;
-    }
-
-    for (vid = start_nr; found < nr; vid = (vid + 1) % SD_NR_VDIS) {
-        if (!test_bit(vid, vdi_inuse)) {
-            break;
-        }
-
-        /* we don't need to read entire object */
-        ret = read_object(fd, s->aio_context, (char *)&inode,
-                          vid_to_vdi_oid(vid),
-                          0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
-                          s->cache_flags);
-
-        if (ret) {
-            continue;
-        }
-
-        if (!strcmp(inode.name, s->name) && is_snapshot(&inode)) {
-            sn_tab[found].date_sec = inode.snap_ctime >> 32;
-            sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
-            sn_tab[found].vm_state_size = inode.vm_state_size;
-            sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec;
-
-            snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str),
-                     "%" PRIu32, inode.snap_id);
-            pstrcpy(sn_tab[found].name,
-                    MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)),
-                    inode.tag);
-            found++;
-        }
-    }
-
-    closesocket(fd);
-out:
-    *psn_tab = sn_tab;
-
-    g_free(vdi_inuse);
-
-    if (ret < 0) {
-        return ret;
-    }
-
-    return found;
-}
-
-static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
-                                int64_t pos, int size, int load)
-{
-    Error *local_err = NULL;
-    bool create;
-    int fd, ret = 0, remaining = size;
-    unsigned int data_len;
-    uint64_t vmstate_oid;
-    uint64_t offset;
-    uint32_t vdi_index;
-    uint32_t vdi_id = load ? s->inode.parent_vdi_id : s->inode.vdi_id;
-    uint32_t object_size = (UINT32_C(1) << s->inode.block_size_shift);
-
-    fd = connect_to_sdog(s, &local_err);
-    if (fd < 0) {
-        error_report_err(local_err);
-        return fd;
-    }
-
-    while (remaining) {
-        vdi_index = pos / object_size;
-        offset = pos % object_size;
-
-        data_len = MIN(remaining, object_size - offset);
-
-        vmstate_oid = vid_to_vmstate_oid(vdi_id, vdi_index);
-
-        create = (offset == 0);
-        if (load) {
-            ret = read_object(fd, s->aio_context, (char *)data, vmstate_oid,
-                              s->inode.nr_copies, data_len, offset,
-                              s->cache_flags);
-        } else {
-            ret = write_object(fd, s->aio_context, (char *)data, vmstate_oid,
-                               s->inode.nr_copies, data_len, offset, create,
-                               s->cache_flags);
-        }
-
-        if (ret < 0) {
-            error_report("failed to save vmstate %s", strerror(errno));
-            goto cleanup;
-        }
-
-        pos += data_len;
-        data += data_len;
-        remaining -= data_len;
-    }
-    ret = size;
-cleanup:
-    closesocket(fd);
-    return ret;
-}
-
-static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
-                           int64_t pos)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    void *buf;
-    int ret;
-
-    buf = qemu_blockalign(bs, qiov->size);
-    qemu_iovec_to_buf(qiov, 0, buf, qiov->size);
-    ret = do_load_save_vmstate(s, (uint8_t *) buf, pos, qiov->size, 0);
-    qemu_vfree(buf);
-
-    return ret;
-}
-
-static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
-                           int64_t pos, int size)
-{
-    BDRVSheepdogState *s = bs->opaque;
-
-    return do_load_save_vmstate(s, data, pos, size, 1);
-}
-
-
-static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors)
-{
-    SheepdogAIOCB *acb;
-    BDRVSheepdogState *s = bs->opaque;
-    int ret;
-    QEMUIOVector discard_iov;
-    struct iovec iov;
-    uint32_t zero = 0;
-
-    if (!s->discard_supported) {
-            return 0;
-    }
-
-    memset(&discard_iov, 0, sizeof(discard_iov));
-    memset(&iov, 0, sizeof(iov));
-    iov.iov_base = &zero;
-    iov.iov_len = sizeof(zero);
-    discard_iov.iov = &iov;
-    discard_iov.niov = 1;
-    acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors);
-    acb->aiocb_type = AIOCB_DISCARD_OBJ;
-    acb->aio_done_func = sd_finish_aiocb;
-
-retry:
-    if (check_overlapping_aiocb(s, acb)) {
-        qemu_co_queue_wait(&s->overlapping_queue);
-        goto retry;
-    }
-
-    ret = sd_co_rw_vector(acb);
-    if (ret <= 0) {
-        QLIST_REMOVE(acb, aiocb_siblings);
-        qemu_co_queue_restart_all(&s->overlapping_queue);
-        qemu_aio_unref(acb);
-        return ret;
-    }
-
-    qemu_coroutine_yield();
-
-    QLIST_REMOVE(acb, aiocb_siblings);
-    qemu_co_queue_restart_all(&s->overlapping_queue);
-
-    return acb->ret;
-}
-
-static coroutine_fn int64_t
-sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-                       int *pnum, BlockDriverState **file)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogInode *inode = &s->inode;
-    uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
-    uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
-    unsigned long start = offset / object_size,
-                  end = DIV_ROUND_UP((sector_num + nb_sectors) *
-                                     BDRV_SECTOR_SIZE, object_size);
-    unsigned long idx;
-    int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
-
-    for (idx = start; idx < end; idx++) {
-        if (inode->data_vdi_id[idx] == 0) {
-            break;
-        }
-    }
-    if (idx == start) {
-        /* Get the longest length of unallocated sectors */
-        ret = 0;
-        for (idx = start + 1; idx < end; idx++) {
-            if (inode->data_vdi_id[idx] != 0) {
-                break;
-            }
-        }
-    }
-
-    *pnum = (idx - start) * object_size / BDRV_SECTOR_SIZE;
-    if (*pnum > nb_sectors) {
-        *pnum = nb_sectors;
-    }
-    if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
-        *file = bs;
-    }
-    return ret;
-}
-
-static int64_t sd_get_allocated_file_size(BlockDriverState *bs)
-{
-    BDRVSheepdogState *s = bs->opaque;
-    SheepdogInode *inode = &s->inode;
-    uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
-    unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, object_size);
-    uint64_t size = 0;
-
-    for (i = 0; i < last; i++) {
-        if (inode->data_vdi_id[i] == 0) {
-            continue;
-        }
-        size += object_size;
-    }
-    return size;
-}
-
-static QemuOptsList sd_create_opts = {
-    .name = "sheepdog-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(sd_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FILE,
-            .type = QEMU_OPT_STRING,
-            .help = "File name of a base image"
-        },
-        {
-            .name = BLOCK_OPT_PREALLOC,
-            .type = QEMU_OPT_STRING,
-            .help = "Preallocation mode (allowed values: off, full)"
-        },
-        {
-            .name = BLOCK_OPT_REDUNDANCY,
-            .type = QEMU_OPT_STRING,
-            .help = "Redundancy of the image"
-        },
-        {
-            .name = BLOCK_OPT_OBJECT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Object size of the image"
-        },
-        { /* end of list */ }
-    }
-};
-
-static BlockDriver bdrv_sheepdog = {
-    .format_name    = "sheepdog",
-    .protocol_name  = "sheepdog",
-    .instance_size  = sizeof(BDRVSheepdogState),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open = sd_open,
-    .bdrv_reopen_prepare    = sd_reopen_prepare,
-    .bdrv_reopen_commit     = sd_reopen_commit,
-    .bdrv_reopen_abort      = sd_reopen_abort,
-    .bdrv_close     = sd_close,
-    .bdrv_create    = sd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_getlength = sd_getlength,
-    .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_truncate  = sd_truncate,
-
-    .bdrv_co_readv  = sd_co_readv,
-    .bdrv_co_writev = sd_co_writev,
-    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
-    .bdrv_co_discard = sd_co_discard,
-    .bdrv_co_get_block_status = sd_co_get_block_status,
-
-    .bdrv_snapshot_create   = sd_snapshot_create,
-    .bdrv_snapshot_goto     = sd_snapshot_goto,
-    .bdrv_snapshot_delete   = sd_snapshot_delete,
-    .bdrv_snapshot_list     = sd_snapshot_list,
-
-    .bdrv_save_vmstate  = sd_save_vmstate,
-    .bdrv_load_vmstate  = sd_load_vmstate,
-
-    .bdrv_detach_aio_context = sd_detach_aio_context,
-    .bdrv_attach_aio_context = sd_attach_aio_context,
-
-    .create_opts    = &sd_create_opts,
-};
-
-static BlockDriver bdrv_sheepdog_tcp = {
-    .format_name    = "sheepdog",
-    .protocol_name  = "sheepdog+tcp",
-    .instance_size  = sizeof(BDRVSheepdogState),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open = sd_open,
-    .bdrv_reopen_prepare    = sd_reopen_prepare,
-    .bdrv_reopen_commit     = sd_reopen_commit,
-    .bdrv_reopen_abort      = sd_reopen_abort,
-    .bdrv_close     = sd_close,
-    .bdrv_create    = sd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_getlength = sd_getlength,
-    .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_truncate  = sd_truncate,
-
-    .bdrv_co_readv  = sd_co_readv,
-    .bdrv_co_writev = sd_co_writev,
-    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
-    .bdrv_co_discard = sd_co_discard,
-    .bdrv_co_get_block_status = sd_co_get_block_status,
-
-    .bdrv_snapshot_create   = sd_snapshot_create,
-    .bdrv_snapshot_goto     = sd_snapshot_goto,
-    .bdrv_snapshot_delete   = sd_snapshot_delete,
-    .bdrv_snapshot_list     = sd_snapshot_list,
-
-    .bdrv_save_vmstate  = sd_save_vmstate,
-    .bdrv_load_vmstate  = sd_load_vmstate,
-
-    .bdrv_detach_aio_context = sd_detach_aio_context,
-    .bdrv_attach_aio_context = sd_attach_aio_context,
-
-    .create_opts    = &sd_create_opts,
-};
-
-static BlockDriver bdrv_sheepdog_unix = {
-    .format_name    = "sheepdog",
-    .protocol_name  = "sheepdog+unix",
-    .instance_size  = sizeof(BDRVSheepdogState),
-    .bdrv_needs_filename = true,
-    .bdrv_file_open = sd_open,
-    .bdrv_reopen_prepare    = sd_reopen_prepare,
-    .bdrv_reopen_commit     = sd_reopen_commit,
-    .bdrv_reopen_abort      = sd_reopen_abort,
-    .bdrv_close     = sd_close,
-    .bdrv_create    = sd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_getlength = sd_getlength,
-    .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_truncate  = sd_truncate,
-
-    .bdrv_co_readv  = sd_co_readv,
-    .bdrv_co_writev = sd_co_writev,
-    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
-    .bdrv_co_discard = sd_co_discard,
-    .bdrv_co_get_block_status = sd_co_get_block_status,
-
-    .bdrv_snapshot_create   = sd_snapshot_create,
-    .bdrv_snapshot_goto     = sd_snapshot_goto,
-    .bdrv_snapshot_delete   = sd_snapshot_delete,
-    .bdrv_snapshot_list     = sd_snapshot_list,
-
-    .bdrv_save_vmstate  = sd_save_vmstate,
-    .bdrv_load_vmstate  = sd_load_vmstate,
-
-    .bdrv_detach_aio_context = sd_detach_aio_context,
-    .bdrv_attach_aio_context = sd_attach_aio_context,
-
-    .create_opts    = &sd_create_opts,
-};
-
-static void bdrv_sheepdog_init(void)
-{
-    bdrv_register(&bdrv_sheepdog);
-    bdrv_register(&bdrv_sheepdog_tcp);
-    bdrv_register(&bdrv_sheepdog_unix);
-}
-block_init(bdrv_sheepdog_init);
diff --git a/qemu/block/snapshot.c b/qemu/block/snapshot.c
deleted file mode 100644
index e9d721df6..000000000
--- a/qemu/block/snapshot.c
+++ /dev/null
@@ -1,493 +0,0 @@
-/*
- * Block layer snapshot related functions
- *
- * Copyright (c) 2003-2008 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "block/snapshot.h"
-#include "block/block_int.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
-
-QemuOptsList internal_snapshot_opts = {
-    .name = "snapshot",
-    .head = QTAILQ_HEAD_INITIALIZER(internal_snapshot_opts.head),
-    .desc = {
-        {
-            .name = SNAPSHOT_OPT_ID,
-            .type = QEMU_OPT_STRING,
-            .help = "snapshot id"
-        },{
-            .name = SNAPSHOT_OPT_NAME,
-            .type = QEMU_OPT_STRING,
-            .help = "snapshot name"
-        },{
-            /* end of list */
-        }
-    },
-};
-
-int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
-                       const char *name)
-{
-    QEMUSnapshotInfo *sn_tab, *sn;
-    int nb_sns, i, ret;
-
-    ret = -ENOENT;
-    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
-    if (nb_sns < 0) {
-        return ret;
-    }
-    for (i = 0; i < nb_sns; i++) {
-        sn = &sn_tab[i];
-        if (!strcmp(sn->id_str, name) || !strcmp(sn->name, name)) {
-            *sn_info = *sn;
-            ret = 0;
-            break;
-        }
-    }
-    g_free(sn_tab);
-    return ret;
-}
-
-/**
- * Look up an internal snapshot by @id and @name.
- * @bs: block device to search
- * @id: unique snapshot ID, or NULL
- * @name: snapshot name, or NULL
- * @sn_info: location to store information on the snapshot found
- * @errp: location to store error, will be set only for exception
- *
- * This function will traverse snapshot list in @bs to search the matching
- * one, @id and @name are the matching condition:
- * If both @id and @name are specified, find the first one with id @id and
- * name @name.
- * If only @id is specified, find the first one with id @id.
- * If only @name is specified, find the first one with name @name.
- * if none is specified, abort().
- *
- * Returns: true when a snapshot is found and @sn_info will be filled, false
- * when error or not found. If all operation succeed but no matching one is
- * found, @errp will NOT be set.
- */
-bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
-                                       const char *id,
-                                       const char *name,
-                                       QEMUSnapshotInfo *sn_info,
-                                       Error **errp)
-{
-    QEMUSnapshotInfo *sn_tab, *sn;
-    int nb_sns, i;
-    bool ret = false;
-
-    assert(id || name);
-
-    nb_sns = bdrv_snapshot_list(bs, &sn_tab);
-    if (nb_sns < 0) {
-        error_setg_errno(errp, -nb_sns, "Failed to get a snapshot list");
-        return false;
-    } else if (nb_sns == 0) {
-        return false;
-    }
-
-    if (id && name) {
-        for (i = 0; i < nb_sns; i++) {
-            sn = &sn_tab[i];
-            if (!strcmp(sn->id_str, id) && !strcmp(sn->name, name)) {
-                *sn_info = *sn;
-                ret = true;
-                break;
-            }
-        }
-    } else if (id) {
-        for (i = 0; i < nb_sns; i++) {
-            sn = &sn_tab[i];
-            if (!strcmp(sn->id_str, id)) {
-                *sn_info = *sn;
-                ret = true;
-                break;
-            }
-        }
-    } else if (name) {
-        for (i = 0; i < nb_sns; i++) {
-            sn = &sn_tab[i];
-            if (!strcmp(sn->name, name)) {
-                *sn_info = *sn;
-                ret = true;
-                break;
-            }
-        }
-    }
-
-    g_free(sn_tab);
-    return ret;
-}
-
-int bdrv_can_snapshot(BlockDriverState *bs)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
-        return 0;
-    }
-
-    if (!drv->bdrv_snapshot_create) {
-        if (bs->file != NULL) {
-            return bdrv_can_snapshot(bs->file->bs);
-        }
-        return 0;
-    }
-
-    return 1;
-}
-
-int bdrv_snapshot_create(BlockDriverState *bs,
-                         QEMUSnapshotInfo *sn_info)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (drv->bdrv_snapshot_create) {
-        return drv->bdrv_snapshot_create(bs, sn_info);
-    }
-    if (bs->file) {
-        return bdrv_snapshot_create(bs->file->bs, sn_info);
-    }
-    return -ENOTSUP;
-}
-
-int bdrv_snapshot_goto(BlockDriverState *bs,
-                       const char *snapshot_id)
-{
-    BlockDriver *drv = bs->drv;
-    int ret, open_ret;
-
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (drv->bdrv_snapshot_goto) {
-        return drv->bdrv_snapshot_goto(bs, snapshot_id);
-    }
-
-    if (bs->file) {
-        drv->bdrv_close(bs);
-        ret = bdrv_snapshot_goto(bs->file->bs, snapshot_id);
-        open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL);
-        if (open_ret < 0) {
-            bdrv_unref(bs->file->bs);
-            bs->drv = NULL;
-            return open_ret;
-        }
-        return ret;
-    }
-
-    return -ENOTSUP;
-}
-
-/**
- * Delete an internal snapshot by @snapshot_id and @name.
- * @bs: block device used in the operation
- * @snapshot_id: unique snapshot ID, or NULL
- * @name: snapshot name, or NULL
- * @errp: location to store error
- *
- * If both @snapshot_id and @name are specified, delete the first one with
- * id @snapshot_id and name @name.
- * If only @snapshot_id is specified, delete the first one with id
- * @snapshot_id.
- * If only @name is specified, delete the first one with name @name.
- * if none is specified, return -EINVAL.
- *
- * Returns: 0 on success, -errno on failure. If @bs is not inserted, return
- * -ENOMEDIUM. If @snapshot_id and @name are both NULL, return -EINVAL. If @bs
- * does not support internal snapshot deletion, return -ENOTSUP. If @bs does
- * not support parameter @snapshot_id or @name, or one of them is not correctly
- * specified, return -EINVAL. If @bs can't find one matching @id and @name,
- * return -ENOENT. If @errp != NULL, it will always be filled with error
- * message on failure.
- */
-int bdrv_snapshot_delete(BlockDriverState *bs,
-                         const char *snapshot_id,
-                         const char *name,
-                         Error **errp)
-{
-    BlockDriver *drv = bs->drv;
-    int ret;
-
-    if (!drv) {
-        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
-        return -ENOMEDIUM;
-    }
-    if (!snapshot_id && !name) {
-        error_setg(errp, "snapshot_id and name are both NULL");
-        return -EINVAL;
-    }
-
-    /* drain all pending i/o before deleting snapshot */
-    bdrv_drained_begin(bs);
-
-    if (drv->bdrv_snapshot_delete) {
-        ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
-    } else if (bs->file) {
-        ret = bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp);
-    } else {
-        error_setg(errp, "Block format '%s' used by device '%s' "
-                   "does not support internal snapshot deletion",
-                   drv->format_name, bdrv_get_device_name(bs));
-        ret = -ENOTSUP;
-    }
-
-    bdrv_drained_end(bs);
-    return ret;
-}
-
-int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
-                                       const char *id_or_name,
-                                       Error **errp)
-{
-    int ret;
-    Error *local_err = NULL;
-
-    ret = bdrv_snapshot_delete(bs, id_or_name, NULL, &local_err);
-    if (ret == -ENOENT || ret == -EINVAL) {
-        error_free(local_err);
-        local_err = NULL;
-        ret = bdrv_snapshot_delete(bs, NULL, id_or_name, &local_err);
-    }
-
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-    }
-    return ret;
-}
-
-int bdrv_snapshot_list(BlockDriverState *bs,
-                       QEMUSnapshotInfo **psn_info)
-{
-    BlockDriver *drv = bs->drv;
-    if (!drv) {
-        return -ENOMEDIUM;
-    }
-    if (drv->bdrv_snapshot_list) {
-        return drv->bdrv_snapshot_list(bs, psn_info);
-    }
-    if (bs->file) {
-        return bdrv_snapshot_list(bs->file->bs, psn_info);
-    }
-    return -ENOTSUP;
-}
-
-/**
- * Temporarily load an internal snapshot by @snapshot_id and @name.
- * @bs: block device used in the operation
- * @snapshot_id: unique snapshot ID, or NULL
- * @name: snapshot name, or NULL
- * @errp: location to store error
- *
- * If both @snapshot_id and @name are specified, load the first one with
- * id @snapshot_id and name @name.
- * If only @snapshot_id is specified, load the first one with id
- * @snapshot_id.
- * If only @name is specified, load the first one with name @name.
- * if none is specified, return -EINVAL.
- *
- * Returns: 0 on success, -errno on fail. If @bs is not inserted, return
- * -ENOMEDIUM. If @bs is not readonly, return -EINVAL. If @bs did not support
- * internal snapshot, return -ENOTSUP. If qemu can't find a matching @id and
- * @name, return -ENOENT. If @errp != NULL, it will always be filled on
- * failure.
- */
-int bdrv_snapshot_load_tmp(BlockDriverState *bs,
-                           const char *snapshot_id,
-                           const char *name,
-                           Error **errp)
-{
-    BlockDriver *drv = bs->drv;
-
-    if (!drv) {
-        error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
-        return -ENOMEDIUM;
-    }
-    if (!snapshot_id && !name) {
-        error_setg(errp, "snapshot_id and name are both NULL");
-        return -EINVAL;
-    }
-    if (!bs->read_only) {
-        error_setg(errp, "Device is not readonly");
-        return -EINVAL;
-    }
-    if (drv->bdrv_snapshot_load_tmp) {
-        return drv->bdrv_snapshot_load_tmp(bs, snapshot_id, name, errp);
-    }
-    error_setg(errp, "Block format '%s' used by device '%s' "
-               "does not support temporarily loading internal snapshots",
-               drv->format_name, bdrv_get_device_name(bs));
-    return -ENOTSUP;
-}
-
-int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
-                                         const char *id_or_name,
-                                         Error **errp)
-{
-    int ret;
-    Error *local_err = NULL;
-
-    ret = bdrv_snapshot_load_tmp(bs, id_or_name, NULL, &local_err);
-    if (ret == -ENOENT || ret == -EINVAL) {
-        error_free(local_err);
-        local_err = NULL;
-        ret = bdrv_snapshot_load_tmp(bs, NULL, id_or_name, &local_err);
-    }
-
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
-
-    return ret;
-}
-
-
-/* Group operations. All block drivers are involved.
- * These functions will properly handle dataplane (take aio_context_acquire
- * when appropriate for appropriate block drivers) */
-
-bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
-{
-    bool ok = true;
-    BlockDriverState *bs = NULL;
-
-    while (ok && (bs = bdrv_next(bs))) {
-        AioContext *ctx = bdrv_get_aio_context(bs);
-
-        aio_context_acquire(ctx);
-        if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) {
-            ok = bdrv_can_snapshot(bs);
-        }
-        aio_context_release(ctx);
-    }
-
-    *first_bad_bs = bs;
-    return ok;
-}
-
-int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
-                             Error **err)
-{
-    int ret = 0;
-    BlockDriverState *bs = NULL;
-    QEMUSnapshotInfo sn1, *snapshot = &sn1;
-
-    while (ret == 0 && (bs = bdrv_next(bs))) {
-        AioContext *ctx = bdrv_get_aio_context(bs);
-
-        aio_context_acquire(ctx);
-        if (bdrv_can_snapshot(bs) &&
-                bdrv_snapshot_find(bs, snapshot, name) >= 0) {
-            ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err);
-        }
-        aio_context_release(ctx);
-    }
-
-    *first_bad_bs = bs;
-    return ret;
-}
-
-
-int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
-{
-    int err = 0;
-    BlockDriverState *bs = NULL;
-
-    while (err == 0 && (bs = bdrv_next(bs))) {
-        AioContext *ctx = bdrv_get_aio_context(bs);
-
-        aio_context_acquire(ctx);
-        if (bdrv_can_snapshot(bs)) {
-            err = bdrv_snapshot_goto(bs, name);
-        }
-        aio_context_release(ctx);
-    }
-
-    *first_bad_bs = bs;
-    return err;
-}
-
-int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
-{
-    QEMUSnapshotInfo sn;
-    int err = 0;
-    BlockDriverState *bs = NULL;
-
-    while (err == 0 && (bs = bdrv_next(bs))) {
-        AioContext *ctx = bdrv_get_aio_context(bs);
-
-        aio_context_acquire(ctx);
-        if (bdrv_can_snapshot(bs)) {
-            err = bdrv_snapshot_find(bs, &sn, name);
-        }
-        aio_context_release(ctx);
-    }
-
-    *first_bad_bs = bs;
-    return err;
-}
-
-int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
-                             BlockDriverState *vm_state_bs,
-                             uint64_t vm_state_size,
-                             BlockDriverState **first_bad_bs)
-{
-    int err = 0;
-    BlockDriverState *bs = NULL;
-
-    while (err == 0 && (bs = bdrv_next(bs))) {
-        AioContext *ctx = bdrv_get_aio_context(bs);
-
-        aio_context_acquire(ctx);
-        if (bs == vm_state_bs) {
-            sn->vm_state_size = vm_state_size;
-            err = bdrv_snapshot_create(bs, sn);
-        } else if (bdrv_can_snapshot(bs)) {
-            sn->vm_state_size = 0;
-            err = bdrv_snapshot_create(bs, sn);
-        }
-        aio_context_release(ctx);
-    }
-
-    *first_bad_bs = bs;
-    return err;
-}
-
-BlockDriverState *bdrv_all_find_vmstate_bs(void)
-{
-    bool not_found = true;
-    BlockDriverState *bs = NULL;
-
-    while (not_found && (bs = bdrv_next(bs))) {
-        AioContext *ctx = bdrv_get_aio_context(bs);
-
-        aio_context_acquire(ctx);
-        not_found = !bdrv_can_snapshot(bs);
-        aio_context_release(ctx);
-    }
-    return bs;
-}
diff --git a/qemu/block/ssh.c b/qemu/block/ssh.c
deleted file mode 100644
index 06928ed93..000000000
--- a/qemu/block/ssh.c
+++ /dev/null
@@ -1,1111 +0,0 @@
-/*
- * Secure Shell (ssh) backend for QEMU.
- *
- * Copyright (C) 2013 Red Hat Inc., Richard W.M. Jones <rjones@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-
-#include <libssh2.h>
-#include <libssh2_sftp.h>
-
-#include "block/block_int.h"
-#include "qapi/error.h"
-#include "qemu/error-report.h"
-#include "qemu/sockets.h"
-#include "qemu/uri.h"
-#include "qapi/qmp/qint.h"
-#include "qapi/qmp/qstring.h"
-
-/* DEBUG_SSH=1 enables the DPRINTF (debugging printf) statements in
- * this block driver code.
- *
- * TRACE_LIBSSH2=<bitmask> enables tracing in libssh2 itself.  Note
- * that this requires that libssh2 was specially compiled with the
- * `./configure --enable-debug' option, so most likely you will have
- * to compile it yourself.  The meaning of <bitmask> is described
- * here: http://www.libssh2.org/libssh2_trace.html
- */
-#define DEBUG_SSH     0
-#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
-
-#define DPRINTF(fmt, ...)                           \
-    do {                                            \
-        if (DEBUG_SSH) {                            \
-            fprintf(stderr, "ssh: %-15s " fmt "\n", \
-                    __func__, ##__VA_ARGS__);       \
-        }                                           \
-    } while (0)
-
-typedef struct BDRVSSHState {
-    /* Coroutine. */
-    CoMutex lock;
-
-    /* SSH connection. */
-    int sock;                         /* socket */
-    LIBSSH2_SESSION *session;         /* ssh session */
-    LIBSSH2_SFTP *sftp;               /* sftp session */
-    LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
-
-    /* See ssh_seek() function below. */
-    int64_t offset;
-    bool offset_op_read;
-
-    /* File attributes at open.  We try to keep the .filesize field
-     * updated if it changes (eg by writing at the end of the file).
-     */
-    LIBSSH2_SFTP_ATTRIBUTES attrs;
-
-    /* Used to warn if 'flush' is not supported. */
-    char *hostport;
-    bool unsafe_flush_warning;
-} BDRVSSHState;
-
-static void ssh_state_init(BDRVSSHState *s)
-{
-    memset(s, 0, sizeof *s);
-    s->sock = -1;
-    s->offset = -1;
-    qemu_co_mutex_init(&s->lock);
-}
-
-static void ssh_state_free(BDRVSSHState *s)
-{
-    g_free(s->hostport);
-    if (s->sftp_handle) {
-        libssh2_sftp_close(s->sftp_handle);
-    }
-    if (s->sftp) {
-        libssh2_sftp_shutdown(s->sftp);
-    }
-    if (s->session) {
-        libssh2_session_disconnect(s->session,
-                                   "from qemu ssh client: "
-                                   "user closed the connection");
-        libssh2_session_free(s->session);
-    }
-    if (s->sock >= 0) {
-        close(s->sock);
-    }
-}
-
-static void GCC_FMT_ATTR(3, 4)
-session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
-{
-    va_list args;
-    char *msg;
-
-    va_start(args, fs);
-    msg = g_strdup_vprintf(fs, args);
-    va_end(args);
-
-    if (s->session) {
-        char *ssh_err;
-        int ssh_err_code;
-
-        /* This is not an errno.  See <libssh2.h>. */
-        ssh_err_code = libssh2_session_last_error(s->session,
-                                                  &ssh_err, NULL, 0);
-        error_setg(errp, "%s: %s (libssh2 error code: %d)",
-                   msg, ssh_err, ssh_err_code);
-    } else {
-        error_setg(errp, "%s", msg);
-    }
-    g_free(msg);
-}
-
-static void GCC_FMT_ATTR(3, 4)
-sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
-{
-    va_list args;
-    char *msg;
-
-    va_start(args, fs);
-    msg = g_strdup_vprintf(fs, args);
-    va_end(args);
-
-    if (s->sftp) {
-        char *ssh_err;
-        int ssh_err_code;
-        unsigned long sftp_err_code;
-
-        /* This is not an errno.  See <libssh2.h>. */
-        ssh_err_code = libssh2_session_last_error(s->session,
-                                                  &ssh_err, NULL, 0);
-        /* See <libssh2_sftp.h>. */
-        sftp_err_code = libssh2_sftp_last_error((s)->sftp);
-
-        error_setg(errp,
-                   "%s: %s (libssh2 error code: %d, sftp error code: %lu)",
-                   msg, ssh_err, ssh_err_code, sftp_err_code);
-    } else {
-        error_setg(errp, "%s", msg);
-    }
-    g_free(msg);
-}
-
-static void GCC_FMT_ATTR(2, 3)
-sftp_error_report(BDRVSSHState *s, const char *fs, ...)
-{
-    va_list args;
-
-    va_start(args, fs);
-    error_vprintf(fs, args);
-
-    if ((s)->sftp) {
-        char *ssh_err;
-        int ssh_err_code;
-        unsigned long sftp_err_code;
-
-        /* This is not an errno.  See <libssh2.h>. */
-        ssh_err_code = libssh2_session_last_error(s->session,
-                                                  &ssh_err, NULL, 0);
-        /* See <libssh2_sftp.h>. */
-        sftp_err_code = libssh2_sftp_last_error((s)->sftp);
-
-        error_printf(": %s (libssh2 error code: %d, sftp error code: %lu)",
-                     ssh_err, ssh_err_code, sftp_err_code);
-    }
-
-    va_end(args);
-    error_printf("\n");
-}
-
-static int parse_uri(const char *filename, QDict *options, Error **errp)
-{
-    URI *uri = NULL;
-    QueryParams *qp;
-    int i;
-
-    uri = uri_parse(filename);
-    if (!uri) {
-        return -EINVAL;
-    }
-
-    if (strcmp(uri->scheme, "ssh") != 0) {
-        error_setg(errp, "URI scheme must be 'ssh'");
-        goto err;
-    }
-
-    if (!uri->server || strcmp(uri->server, "") == 0) {
-        error_setg(errp, "missing hostname in URI");
-        goto err;
-    }
-
-    if (!uri->path || strcmp(uri->path, "") == 0) {
-        error_setg(errp, "missing remote path in URI");
-        goto err;
-    }
-
-    qp = query_params_parse(uri->query);
-    if (!qp) {
-        error_setg(errp, "could not parse query parameters");
-        goto err;
-    }
-
-    if(uri->user && strcmp(uri->user, "") != 0) {
-        qdict_put(options, "user", qstring_from_str(uri->user));
-    }
-
-    qdict_put(options, "host", qstring_from_str(uri->server));
-
-    if (uri->port) {
-        qdict_put(options, "port", qint_from_int(uri->port));
-    }
-
-    qdict_put(options, "path", qstring_from_str(uri->path));
-
-    /* Pick out any query parameters that we understand, and ignore
-     * the rest.
-     */
-    for (i = 0; i < qp->n; ++i) {
-        if (strcmp(qp->p[i].name, "host_key_check") == 0) {
-            qdict_put(options, "host_key_check",
-                      qstring_from_str(qp->p[i].value));
-        }
-    }
-
-    query_params_free(qp);
-    uri_free(uri);
-    return 0;
-
- err:
-    if (uri) {
-      uri_free(uri);
-    }
-    return -EINVAL;
-}
-
-static void ssh_parse_filename(const char *filename, QDict *options,
-                               Error **errp)
-{
-    if (qdict_haskey(options, "user") ||
-        qdict_haskey(options, "host") ||
-        qdict_haskey(options, "port") ||
-        qdict_haskey(options, "path") ||
-        qdict_haskey(options, "host_key_check")) {
-        error_setg(errp, "user, host, port, path, host_key_check cannot be used at the same time as a file option");
-        return;
-    }
-
-    parse_uri(filename, options, errp);
-}
-
-static int check_host_key_knownhosts(BDRVSSHState *s,
-                                     const char *host, int port, Error **errp)
-{
-    const char *home;
-    char *knh_file = NULL;
-    LIBSSH2_KNOWNHOSTS *knh = NULL;
-    struct libssh2_knownhost *found;
-    int ret, r;
-    const char *hostkey;
-    size_t len;
-    int type;
-
-    hostkey = libssh2_session_hostkey(s->session, &len, &type);
-    if (!hostkey) {
-        ret = -EINVAL;
-        session_error_setg(errp, s, "failed to read remote host key");
-        goto out;
-    }
-
-    knh = libssh2_knownhost_init(s->session);
-    if (!knh) {
-        ret = -EINVAL;
-        session_error_setg(errp, s,
-                           "failed to initialize known hosts support");
-        goto out;
-    }
-
-    home = getenv("HOME");
-    if (home) {
-        knh_file = g_strdup_printf("%s/.ssh/known_hosts", home);
-    } else {
-        knh_file = g_strdup_printf("/root/.ssh/known_hosts");
-    }
-
-    /* Read all known hosts from OpenSSH-style known_hosts file. */
-    libssh2_knownhost_readfile(knh, knh_file, LIBSSH2_KNOWNHOST_FILE_OPENSSH);
-
-    r = libssh2_knownhost_checkp(knh, host, port, hostkey, len,
-                                 LIBSSH2_KNOWNHOST_TYPE_PLAIN|
-                                 LIBSSH2_KNOWNHOST_KEYENC_RAW,
-                                 &found);
-    switch (r) {
-    case LIBSSH2_KNOWNHOST_CHECK_MATCH:
-        /* OK */
-        DPRINTF("host key OK: %s", found->key);
-        break;
-    case LIBSSH2_KNOWNHOST_CHECK_MISMATCH:
-        ret = -EINVAL;
-        session_error_setg(errp, s,
-                      "host key does not match the one in known_hosts"
-                      " (found key %s)", found->key);
-        goto out;
-    case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND:
-        ret = -EINVAL;
-        session_error_setg(errp, s, "no host key was found in known_hosts");
-        goto out;
-    case LIBSSH2_KNOWNHOST_CHECK_FAILURE:
-        ret = -EINVAL;
-        session_error_setg(errp, s,
-                      "failure matching the host key with known_hosts");
-        goto out;
-    default:
-        ret = -EINVAL;
-        session_error_setg(errp, s, "unknown error matching the host key"
-                      " with known_hosts (%d)", r);
-        goto out;
-    }
-
-    /* known_hosts checking successful. */
-    ret = 0;
-
- out:
-    if (knh != NULL) {
-        libssh2_knownhost_free(knh);
-    }
-    g_free(knh_file);
-    return ret;
-}
-
-static unsigned hex2decimal(char ch)
-{
-    if (ch >= '0' && ch <= '9') {
-        return (ch - '0');
-    } else if (ch >= 'a' && ch <= 'f') {
-        return 10 + (ch - 'a');
-    } else if (ch >= 'A' && ch <= 'F') {
-        return 10 + (ch - 'A');
-    }
-
-    return -1;
-}
-
-/* Compare the binary fingerprint (hash of host key) with the
- * host_key_check parameter.
- */
-static int compare_fingerprint(const unsigned char *fingerprint, size_t len,
-                               const char *host_key_check)
-{
-    unsigned c;
-
-    while (len > 0) {
-        while (*host_key_check == ':')
-            host_key_check++;
-        if (!qemu_isxdigit(host_key_check[0]) ||
-            !qemu_isxdigit(host_key_check[1]))
-            return 1;
-        c = hex2decimal(host_key_check[0]) * 16 +
-            hex2decimal(host_key_check[1]);
-        if (c - *fingerprint != 0)
-            return c - *fingerprint;
-        fingerprint++;
-        len--;
-        host_key_check += 2;
-    }
-    return *host_key_check - '\0';
-}
-
-static int
-check_host_key_hash(BDRVSSHState *s, const char *hash,
-                    int hash_type, size_t fingerprint_len, Error **errp)
-{
-    const char *fingerprint;
-
-    fingerprint = libssh2_hostkey_hash(s->session, hash_type);
-    if (!fingerprint) {
-        session_error_setg(errp, s, "failed to read remote host key");
-        return -EINVAL;
-    }
-
-    if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len,
-                           hash) != 0) {
-        error_setg(errp, "remote host key does not match host_key_check '%s'",
-                   hash);
-        return -EPERM;
-    }
-
-    return 0;
-}
-
-static int check_host_key(BDRVSSHState *s, const char *host, int port,
-                          const char *host_key_check, Error **errp)
-{
-    /* host_key_check=no */
-    if (strcmp(host_key_check, "no") == 0) {
-        return 0;
-    }
-
-    /* host_key_check=md5:xx:yy:zz:... */
-    if (strncmp(host_key_check, "md5:", 4) == 0) {
-        return check_host_key_hash(s, &host_key_check[4],
-                                   LIBSSH2_HOSTKEY_HASH_MD5, 16, errp);
-    }
-
-    /* host_key_check=sha1:xx:yy:zz:... */
-    if (strncmp(host_key_check, "sha1:", 5) == 0) {
-        return check_host_key_hash(s, &host_key_check[5],
-                                   LIBSSH2_HOSTKEY_HASH_SHA1, 20, errp);
-    }
-
-    /* host_key_check=yes */
-    if (strcmp(host_key_check, "yes") == 0) {
-        return check_host_key_knownhosts(s, host, port, errp);
-    }
-
-    error_setg(errp, "unknown host_key_check setting (%s)", host_key_check);
-    return -EINVAL;
-}
-
-static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
-{
-    int r, ret;
-    const char *userauthlist;
-    LIBSSH2_AGENT *agent = NULL;
-    struct libssh2_agent_publickey *identity;
-    struct libssh2_agent_publickey *prev_identity = NULL;
-
-    userauthlist = libssh2_userauth_list(s->session, user, strlen(user));
-    if (strstr(userauthlist, "publickey") == NULL) {
-        ret = -EPERM;
-        error_setg(errp,
-                "remote server does not support \"publickey\" authentication");
-        goto out;
-    }
-
-    /* Connect to ssh-agent and try each identity in turn. */
-    agent = libssh2_agent_init(s->session);
-    if (!agent) {
-        ret = -EINVAL;
-        session_error_setg(errp, s, "failed to initialize ssh-agent support");
-        goto out;
-    }
-    if (libssh2_agent_connect(agent)) {
-        ret = -ECONNREFUSED;
-        session_error_setg(errp, s, "failed to connect to ssh-agent");
-        goto out;
-    }
-    if (libssh2_agent_list_identities(agent)) {
-        ret = -EINVAL;
-        session_error_setg(errp, s,
-                           "failed requesting identities from ssh-agent");
-        goto out;
-    }
-
-    for(;;) {
-        r = libssh2_agent_get_identity(agent, &identity, prev_identity);
-        if (r == 1) {           /* end of list */
-            break;
-        }
-        if (r < 0) {
-            ret = -EINVAL;
-            session_error_setg(errp, s,
-                               "failed to obtain identity from ssh-agent");
-            goto out;
-        }
-        r = libssh2_agent_userauth(agent, user, identity);
-        if (r == 0) {
-            /* Authenticated! */
-            ret = 0;
-            goto out;
-        }
-        /* Failed to authenticate with this identity, try the next one. */
-        prev_identity = identity;
-    }
-
-    ret = -EPERM;
-    error_setg(errp, "failed to authenticate using publickey authentication "
-               "and the identities held by your ssh-agent");
-
- out:
-    if (agent != NULL) {
-        /* Note: libssh2 implementation implicitly calls
-         * libssh2_agent_disconnect if necessary.
-         */
-        libssh2_agent_free(agent);
-    }
-
-    return ret;
-}
-
-static int connect_to_ssh(BDRVSSHState *s, QDict *options,
-                          int ssh_flags, int creat_mode, Error **errp)
-{
-    int r, ret;
-    const char *host, *user, *path, *host_key_check;
-    int port;
-
-    if (!qdict_haskey(options, "host")) {
-        ret = -EINVAL;
-        error_setg(errp, "No hostname was specified");
-        goto err;
-    }
-    host = qdict_get_str(options, "host");
-
-    if (qdict_haskey(options, "port")) {
-        port = qdict_get_int(options, "port");
-    } else {
-        port = 22;
-    }
-
-    if (!qdict_haskey(options, "path")) {
-        ret = -EINVAL;
-        error_setg(errp, "No path was specified");
-        goto err;
-    }
-    path = qdict_get_str(options, "path");
-
-    if (qdict_haskey(options, "user")) {
-        user = qdict_get_str(options, "user");
-    } else {
-        user = g_get_user_name();
-        if (!user) {
-            error_setg_errno(errp, errno, "Can't get user name");
-            ret = -errno;
-            goto err;
-        }
-    }
-
-    if (qdict_haskey(options, "host_key_check")) {
-        host_key_check = qdict_get_str(options, "host_key_check");
-    } else {
-        host_key_check = "yes";
-    }
-
-    /* Construct the host:port name for inet_connect. */
-    g_free(s->hostport);
-    s->hostport = g_strdup_printf("%s:%d", host, port);
-
-    /* Open the socket and connect. */
-    s->sock = inet_connect(s->hostport, errp);
-    if (s->sock < 0) {
-        ret = -EIO;
-        goto err;
-    }
-
-    /* Create SSH session. */
-    s->session = libssh2_session_init();
-    if (!s->session) {
-        ret = -EINVAL;
-        session_error_setg(errp, s, "failed to initialize libssh2 session");
-        goto err;
-    }
-
-#if TRACE_LIBSSH2 != 0
-    libssh2_trace(s->session, TRACE_LIBSSH2);
-#endif
-
-    r = libssh2_session_handshake(s->session, s->sock);
-    if (r != 0) {
-        ret = -EINVAL;
-        session_error_setg(errp, s, "failed to establish SSH session");
-        goto err;
-    }
-
-    /* Check the remote host's key against known_hosts. */
-    ret = check_host_key(s, host, port, host_key_check, errp);
-    if (ret < 0) {
-        goto err;
-    }
-
-    /* Authenticate. */
-    ret = authenticate(s, user, errp);
-    if (ret < 0) {
-        goto err;
-    }
-
-    /* Start SFTP. */
-    s->sftp = libssh2_sftp_init(s->session);
-    if (!s->sftp) {
-        session_error_setg(errp, s, "failed to initialize sftp handle");
-        ret = -EINVAL;
-        goto err;
-    }
-
-    /* Open the remote file. */
-    DPRINTF("opening file %s flags=0x%x creat_mode=0%o",
-            path, ssh_flags, creat_mode);
-    s->sftp_handle = libssh2_sftp_open(s->sftp, path, ssh_flags, creat_mode);
-    if (!s->sftp_handle) {
-        session_error_setg(errp, s, "failed to open remote file '%s'", path);
-        ret = -EINVAL;
-        goto err;
-    }
-
-    r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
-    if (r < 0) {
-        sftp_error_setg(errp, s, "failed to read file attributes");
-        return -EINVAL;
-    }
-
-    /* Delete the options we've used; any not deleted will cause the
-     * block layer to give an error about unused options.
-     */
-    qdict_del(options, "host");
-    qdict_del(options, "port");
-    qdict_del(options, "user");
-    qdict_del(options, "path");
-    qdict_del(options, "host_key_check");
-
-    return 0;
-
- err:
-    if (s->sftp_handle) {
-        libssh2_sftp_close(s->sftp_handle);
-    }
-    s->sftp_handle = NULL;
-    if (s->sftp) {
-        libssh2_sftp_shutdown(s->sftp);
-    }
-    s->sftp = NULL;
-    if (s->session) {
-        libssh2_session_disconnect(s->session,
-                                   "from qemu ssh client: "
-                                   "error opening connection");
-        libssh2_session_free(s->session);
-    }
-    s->session = NULL;
-
-    return ret;
-}
-
-static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
-                         Error **errp)
-{
-    BDRVSSHState *s = bs->opaque;
-    int ret;
-    int ssh_flags;
-
-    ssh_state_init(s);
-
-    ssh_flags = LIBSSH2_FXF_READ;
-    if (bdrv_flags & BDRV_O_RDWR) {
-        ssh_flags |= LIBSSH2_FXF_WRITE;
-    }
-
-    /* Start up SSH. */
-    ret = connect_to_ssh(s, options, ssh_flags, 0, errp);
-    if (ret < 0) {
-        goto err;
-    }
-
-    /* Go non-blocking. */
-    libssh2_session_set_blocking(s->session, 0);
-
-    return 0;
-
- err:
-    if (s->sock >= 0) {
-        close(s->sock);
-    }
-    s->sock = -1;
-
-    return ret;
-}
-
-static QemuOptsList ssh_create_opts = {
-    .name = "ssh-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(ssh_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        { /* end of list */ }
-    }
-};
-
-static int ssh_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    int r, ret;
-    int64_t total_size = 0;
-    QDict *uri_options = NULL;
-    BDRVSSHState s;
-    ssize_t r2;
-    char c[1] = { '\0' };
-
-    ssh_state_init(&s);
-
-    /* Get desired file size. */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-    DPRINTF("total_size=%" PRIi64, total_size);
-
-    uri_options = qdict_new();
-    r = parse_uri(filename, uri_options, errp);
-    if (r < 0) {
-        ret = r;
-        goto out;
-    }
-
-    r = connect_to_ssh(&s, uri_options,
-                       LIBSSH2_FXF_READ|LIBSSH2_FXF_WRITE|
-                       LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC,
-                       0644, errp);
-    if (r < 0) {
-        ret = r;
-        goto out;
-    }
-
-    if (total_size > 0) {
-        libssh2_sftp_seek64(s.sftp_handle, total_size-1);
-        r2 = libssh2_sftp_write(s.sftp_handle, c, 1);
-        if (r2 < 0) {
-            sftp_error_setg(errp, &s, "truncate failed");
-            ret = -EINVAL;
-            goto out;
-        }
-        s.attrs.filesize = total_size;
-    }
-
-    ret = 0;
-
- out:
-    ssh_state_free(&s);
-    if (uri_options != NULL) {
-        QDECREF(uri_options);
-    }
-    return ret;
-}
-
-static void ssh_close(BlockDriverState *bs)
-{
-    BDRVSSHState *s = bs->opaque;
-
-    ssh_state_free(s);
-}
-
-static int ssh_has_zero_init(BlockDriverState *bs)
-{
-    BDRVSSHState *s = bs->opaque;
-    /* Assume false, unless we can positively prove it's true. */
-    int has_zero_init = 0;
-
-    if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) {
-        if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) {
-            has_zero_init = 1;
-        }
-    }
-
-    return has_zero_init;
-}
-
-static void restart_coroutine(void *opaque)
-{
-    Coroutine *co = opaque;
-
-    DPRINTF("co=%p", co);
-
-    qemu_coroutine_enter(co, NULL);
-}
-
-static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
-{
-    int r;
-    IOHandler *rd_handler = NULL, *wr_handler = NULL;
-    Coroutine *co = qemu_coroutine_self();
-
-    r = libssh2_session_block_directions(s->session);
-
-    if (r & LIBSSH2_SESSION_BLOCK_INBOUND) {
-        rd_handler = restart_coroutine;
-    }
-    if (r & LIBSSH2_SESSION_BLOCK_OUTBOUND) {
-        wr_handler = restart_coroutine;
-    }
-
-    DPRINTF("s->sock=%d rd_handler=%p wr_handler=%p", s->sock,
-            rd_handler, wr_handler);
-
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, rd_handler, wr_handler, co);
-}
-
-static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
-                                          BlockDriverState *bs)
-{
-    DPRINTF("s->sock=%d", s->sock);
-    aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
-                       false, NULL, NULL, NULL);
-}
-
-/* A non-blocking call returned EAGAIN, so yield, ensuring the
- * handlers are set up so that we'll be rescheduled when there is an
- * interesting event on the socket.
- */
-static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
-{
-    set_fd_handler(s, bs);
-    qemu_coroutine_yield();
-    clear_fd_handler(s, bs);
-}
-
-/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
- * in the remote file.  Notice that it just updates a field in the
- * sftp_handle structure, so there is no network traffic and it cannot
- * fail.
- *
- * However, `libssh2_sftp_seek64' does have a catastrophic effect on
- * performance since it causes the handle to throw away all in-flight
- * reads and buffered readahead data.  Therefore this function tries
- * to be intelligent about when to call the underlying libssh2 function.
- */
-#define SSH_SEEK_WRITE 0
-#define SSH_SEEK_READ  1
-#define SSH_SEEK_FORCE 2
-
-static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
-{
-    bool op_read = (flags & SSH_SEEK_READ) != 0;
-    bool force = (flags & SSH_SEEK_FORCE) != 0;
-
-    if (force || op_read != s->offset_op_read || offset != s->offset) {
-        DPRINTF("seeking to offset=%" PRIi64, offset);
-        libssh2_sftp_seek64(s->sftp_handle, offset);
-        s->offset = offset;
-        s->offset_op_read = op_read;
-    }
-}
-
-static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
-                                 int64_t offset, size_t size,
-                                 QEMUIOVector *qiov)
-{
-    ssize_t r;
-    size_t got;
-    char *buf, *end_of_vec;
-    struct iovec *i;
-
-    DPRINTF("offset=%" PRIi64 " size=%zu", offset, size);
-
-    ssh_seek(s, offset, SSH_SEEK_READ);
-
-    /* This keeps track of the current iovec element ('i'), where we
-     * will write to next ('buf'), and the end of the current iovec
-     * ('end_of_vec').
-     */
-    i = &qiov->iov[0];
-    buf = i->iov_base;
-    end_of_vec = i->iov_base + i->iov_len;
-
-    /* libssh2 has a hard-coded limit of 2000 bytes per request,
-     * although it will also do readahead behind our backs.  Therefore
-     * we may have to do repeated reads here until we have read 'size'
-     * bytes.
-     */
-    for (got = 0; got < size; ) {
-    again:
-        DPRINTF("sftp_read buf=%p size=%zu", buf, end_of_vec - buf);
-        r = libssh2_sftp_read(s->sftp_handle, buf, end_of_vec - buf);
-        DPRINTF("sftp_read returned %zd", r);
-
-        if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
-            co_yield(s, bs);
-            goto again;
-        }
-        if (r < 0) {
-            sftp_error_report(s, "read failed");
-            s->offset = -1;
-            return -EIO;
-        }
-        if (r == 0) {
-            /* EOF: Short read so pad the buffer with zeroes and return it. */
-            qemu_iovec_memset(qiov, got, 0, size - got);
-            return 0;
-        }
-
-        got += r;
-        buf += r;
-        s->offset += r;
-        if (buf >= end_of_vec && got < size) {
-            i++;
-            buf = i->iov_base;
-            end_of_vec = i->iov_base + i->iov_len;
-        }
-    }
-
-    return 0;
-}
-
-static coroutine_fn int ssh_co_readv(BlockDriverState *bs,
-                                     int64_t sector_num,
-                                     int nb_sectors, QEMUIOVector *qiov)
-{
-    BDRVSSHState *s = bs->opaque;
-    int ret;
-
-    qemu_co_mutex_lock(&s->lock);
-    ret = ssh_read(s, bs, sector_num * BDRV_SECTOR_SIZE,
-                   nb_sectors * BDRV_SECTOR_SIZE, qiov);
-    qemu_co_mutex_unlock(&s->lock);
-
-    return ret;
-}
-
-static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
-                     int64_t offset, size_t size,
-                     QEMUIOVector *qiov)
-{
-    ssize_t r;
-    size_t written;
-    char *buf, *end_of_vec;
-    struct iovec *i;
-
-    DPRINTF("offset=%" PRIi64 " size=%zu", offset, size);
-
-    ssh_seek(s, offset, SSH_SEEK_WRITE);
-
-    /* This keeps track of the current iovec element ('i'), where we
-     * will read from next ('buf'), and the end of the current iovec
-     * ('end_of_vec').
-     */
-    i = &qiov->iov[0];
-    buf = i->iov_base;
-    end_of_vec = i->iov_base + i->iov_len;
-
-    for (written = 0; written < size; ) {
-    again:
-        DPRINTF("sftp_write buf=%p size=%zu", buf, end_of_vec - buf);
-        r = libssh2_sftp_write(s->sftp_handle, buf, end_of_vec - buf);
-        DPRINTF("sftp_write returned %zd", r);
-
-        if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
-            co_yield(s, bs);
-            goto again;
-        }
-        if (r < 0) {
-            sftp_error_report(s, "write failed");
-            s->offset = -1;
-            return -EIO;
-        }
-        /* The libssh2 API is very unclear about this.  A comment in
-         * the code says "nothing was acked, and no EAGAIN was
-         * received!" which apparently means that no data got sent
-         * out, and the underlying channel didn't return any EAGAIN
-         * indication.  I think this is a bug in either libssh2 or
-         * OpenSSH (server-side).  In any case, forcing a seek (to
-         * discard libssh2 internal buffers), and then trying again
-         * works for me.
-         */
-        if (r == 0) {
-            ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
-            co_yield(s, bs);
-            goto again;
-        }
-
-        written += r;
-        buf += r;
-        s->offset += r;
-        if (buf >= end_of_vec && written < size) {
-            i++;
-            buf = i->iov_base;
-            end_of_vec = i->iov_base + i->iov_len;
-        }
-
-        if (offset + written > s->attrs.filesize)
-            s->attrs.filesize = offset + written;
-    }
-
-    return 0;
-}
-
-static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
-                                      int64_t sector_num,
-                                      int nb_sectors, QEMUIOVector *qiov)
-{
-    BDRVSSHState *s = bs->opaque;
-    int ret;
-
-    qemu_co_mutex_lock(&s->lock);
-    ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE,
-                    nb_sectors * BDRV_SECTOR_SIZE, qiov);
-    qemu_co_mutex_unlock(&s->lock);
-
-    return ret;
-}
-
-static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
-{
-    if (!s->unsafe_flush_warning) {
-        error_report("warning: ssh server %s does not support fsync",
-                     s->hostport);
-        if (what) {
-            error_report("to support fsync, you need %s", what);
-        }
-        s->unsafe_flush_warning = true;
-    }
-}
-
-#ifdef HAS_LIBSSH2_SFTP_FSYNC
-
-static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
-{
-    int r;
-
-    DPRINTF("fsync");
- again:
-    r = libssh2_sftp_fsync(s->sftp_handle);
-    if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
-        co_yield(s, bs);
-        goto again;
-    }
-    if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
-        libssh2_sftp_last_error(s->sftp) == LIBSSH2_FX_OP_UNSUPPORTED) {
-        unsafe_flush_warning(s, "OpenSSH >= 6.3");
-        return 0;
-    }
-    if (r < 0) {
-        sftp_error_report(s, "fsync failed");
-        return -EIO;
-    }
-
-    return 0;
-}
-
-static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
-{
-    BDRVSSHState *s = bs->opaque;
-    int ret;
-
-    qemu_co_mutex_lock(&s->lock);
-    ret = ssh_flush(s, bs);
-    qemu_co_mutex_unlock(&s->lock);
-
-    return ret;
-}
-
-#else /* !HAS_LIBSSH2_SFTP_FSYNC */
-
-static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
-{
-    BDRVSSHState *s = bs->opaque;
-
-    unsafe_flush_warning(s, "libssh2 >= 1.4.4");
-    return 0;
-}
-
-#endif /* !HAS_LIBSSH2_SFTP_FSYNC */
-
-static int64_t ssh_getlength(BlockDriverState *bs)
-{
-    BDRVSSHState *s = bs->opaque;
-    int64_t length;
-
-    /* Note we cannot make a libssh2 call here. */
-    length = (int64_t) s->attrs.filesize;
-    DPRINTF("length=%" PRIi64, length);
-
-    return length;
-}
-
-static BlockDriver bdrv_ssh = {
-    .format_name                  = "ssh",
-    .protocol_name                = "ssh",
-    .instance_size                = sizeof(BDRVSSHState),
-    .bdrv_parse_filename          = ssh_parse_filename,
-    .bdrv_file_open               = ssh_file_open,
-    .bdrv_create                  = ssh_create,
-    .bdrv_close                   = ssh_close,
-    .bdrv_has_zero_init           = ssh_has_zero_init,
-    .bdrv_co_readv                = ssh_co_readv,
-    .bdrv_co_writev               = ssh_co_writev,
-    .bdrv_getlength               = ssh_getlength,
-    .bdrv_co_flush_to_disk        = ssh_co_flush,
-    .create_opts                  = &ssh_create_opts,
-};
-
-static void bdrv_ssh_init(void)
-{
-    int r;
-
-    r = libssh2_init(0);
-    if (r != 0) {
-        fprintf(stderr, "libssh2 initialization failed, %d\n", r);
-        exit(EXIT_FAILURE);
-    }
-
-    bdrv_register(&bdrv_ssh);
-}
-
-block_init(bdrv_ssh_init);
diff --git a/qemu/block/stream.c b/qemu/block/stream.c
deleted file mode 100644
index 332b9a183..000000000
--- a/qemu/block/stream.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Image streaming
- *
- * Copyright IBM, Corp. 2011
- *
- * Authors:
- *  Stefan Hajnoczi   <stefanha@linux.vnet.ibm.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "trace.h"
-#include "block/block_int.h"
-#include "block/blockjob.h"
-#include "qapi/error.h"
-#include "qapi/qmp/qerror.h"
-#include "qemu/ratelimit.h"
-#include "sysemu/block-backend.h"
-
-enum {
-    /*
-     * Size of data buffer for populating the image file.  This should be large
-     * enough to process multiple clusters in a single call, so that populating
-     * contiguous regions of the image is efficient.
-     */
-    STREAM_BUFFER_SIZE = 512 * 1024, /* in bytes */
-};
-
-#define SLICE_TIME 100000000ULL /* ns */
-
-typedef struct StreamBlockJob {
-    BlockJob common;
-    RateLimit limit;
-    BlockDriverState *base;
-    BlockdevOnError on_error;
-    char *backing_file_str;
-} StreamBlockJob;
-
-static int coroutine_fn stream_populate(BlockDriverState *bs,
-                                        int64_t sector_num, int nb_sectors,
-                                        void *buf)
-{
-    struct iovec iov = {
-        .iov_base = buf,
-        .iov_len  = nb_sectors * BDRV_SECTOR_SIZE,
-    };
-    QEMUIOVector qiov;
-
-    qemu_iovec_init_external(&qiov, &iov, 1);
-
-    /* Copy-on-read the unallocated clusters */
-    return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
-}
-
-typedef struct {
-    int ret;
-    bool reached_end;
-} StreamCompleteData;
-
-static void stream_complete(BlockJob *job, void *opaque)
-{
-    StreamBlockJob *s = container_of(job, StreamBlockJob, common);
-    StreamCompleteData *data = opaque;
-    BlockDriverState *base = s->base;
-
-    if (!block_job_is_cancelled(&s->common) && data->reached_end &&
-        data->ret == 0) {
-        const char *base_id = NULL, *base_fmt = NULL;
-        if (base) {
-            base_id = s->backing_file_str;
-            if (base->drv) {
-                base_fmt = base->drv->format_name;
-            }
-        }
-        data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
-        bdrv_set_backing_hd(job->bs, base);
-    }
-
-    g_free(s->backing_file_str);
-    block_job_completed(&s->common, data->ret);
-    g_free(data);
-}
-
-static void coroutine_fn stream_run(void *opaque)
-{
-    StreamBlockJob *s = opaque;
-    StreamCompleteData *data;
-    BlockDriverState *bs = s->common.bs;
-    BlockDriverState *base = s->base;
-    int64_t sector_num = 0;
-    int64_t end = -1;
-    int error = 0;
-    int ret = 0;
-    int n = 0;
-    void *buf;
-
-    if (!bs->backing) {
-        goto out;
-    }
-
-    s->common.len = bdrv_getlength(bs);
-    if (s->common.len < 0) {
-        ret = s->common.len;
-        goto out;
-    }
-
-    end = s->common.len >> BDRV_SECTOR_BITS;
-    buf = qemu_blockalign(bs, STREAM_BUFFER_SIZE);
-
-    /* Turn on copy-on-read for the whole block device so that guest read
-     * requests help us make progress.  Only do this when copying the entire
-     * backing chain since the copy-on-read operation does not take base into
-     * account.
-     */
-    if (!base) {
-        bdrv_enable_copy_on_read(bs);
-    }
-
-    for (sector_num = 0; sector_num < end; sector_num += n) {
-        uint64_t delay_ns = 0;
-        bool copy;
-
-wait:
-        /* Note that even when no rate limit is applied we need to yield
-         * with no pending I/O here so that bdrv_drain_all() returns.
-         */
-        block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
-        if (block_job_is_cancelled(&s->common)) {
-            break;
-        }
-
-        copy = false;
-
-        ret = bdrv_is_allocated(bs, sector_num,
-                                STREAM_BUFFER_SIZE / BDRV_SECTOR_SIZE, &n);
-        if (ret == 1) {
-            /* Allocated in the top, no need to copy.  */
-        } else if (ret >= 0) {
-            /* Copy if allocated in the intermediate images.  Limit to the
-             * known-unallocated area [sector_num, sector_num+n).  */
-            ret = bdrv_is_allocated_above(backing_bs(bs), base,
-                                          sector_num, n, &n);
-
-            /* Finish early if end of backing file has been reached */
-            if (ret == 0 && n == 0) {
-                n = end - sector_num;
-            }
-
-            copy = (ret == 1);
-        }
-        trace_stream_one_iteration(s, sector_num, n, ret);
-        if (copy) {
-            if (s->common.speed) {
-                delay_ns = ratelimit_calculate_delay(&s->limit, n);
-                if (delay_ns > 0) {
-                    goto wait;
-                }
-            }
-            ret = stream_populate(bs, sector_num, n, buf);
-        }
-        if (ret < 0) {
-            BlockErrorAction action =
-                block_job_error_action(&s->common, s->common.bs, s->on_error,
-                                       true, -ret);
-            if (action == BLOCK_ERROR_ACTION_STOP) {
-                n = 0;
-                continue;
-            }
-            if (error == 0) {
-                error = ret;
-            }
-            if (action == BLOCK_ERROR_ACTION_REPORT) {
-                break;
-            }
-        }
-        ret = 0;
-
-        /* Publish progress */
-        s->common.offset += n * BDRV_SECTOR_SIZE;
-    }
-
-    if (!base) {
-        bdrv_disable_copy_on_read(bs);
-    }
-
-    /* Do not remove the backing file if an error was there but ignored.  */
-    ret = error;
-
-    qemu_vfree(buf);
-
-out:
-    /* Modify backing chain and close BDSes in main loop */
-    data = g_malloc(sizeof(*data));
-    data->ret = ret;
-    data->reached_end = sector_num == end;
-    block_job_defer_to_main_loop(&s->common, stream_complete, data);
-}
-
-static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
-    StreamBlockJob *s = container_of(job, StreamBlockJob, common);
-
-    if (speed < 0) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "speed");
-        return;
-    }
-    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
-}
-
-static const BlockJobDriver stream_job_driver = {
-    .instance_size = sizeof(StreamBlockJob),
-    .job_type      = BLOCK_JOB_TYPE_STREAM,
-    .set_speed     = stream_set_speed,
-};
-
-void stream_start(BlockDriverState *bs, BlockDriverState *base,
-                  const char *backing_file_str, int64_t speed,
-                  BlockdevOnError on_error,
-                  BlockCompletionFunc *cb,
-                  void *opaque, Error **errp)
-{
-    StreamBlockJob *s;
-
-    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
-         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
-        (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
-        error_setg(errp, QERR_INVALID_PARAMETER, "on-error");
-        return;
-    }
-
-    s = block_job_create(&stream_job_driver, bs, speed, cb, opaque, errp);
-    if (!s) {
-        return;
-    }
-
-    s->base = base;
-    s->backing_file_str = g_strdup(backing_file_str);
-
-    s->on_error = on_error;
-    s->common.co = qemu_coroutine_create(stream_run);
-    trace_stream_start(bs, base, s, s->common.co, opaque);
-    qemu_coroutine_enter(s->common.co, s);
-}
diff --git a/qemu/block/throttle-groups.c b/qemu/block/throttle-groups.c
deleted file mode 100644
index 4920e0949..000000000
--- a/qemu/block/throttle-groups.c
+++ /dev/null
@@ -1,483 +0,0 @@
-/*
- * QEMU block throttling group infrastructure
- *
- * Copyright (C) Nodalink, EURL. 2014
- * Copyright (C) Igalia, S.L. 2015
- *
- * Authors:
- *   Benoît Canet <benoit.canet@nodalink.com>
- *   Alberto Garcia <berto@igalia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 or
- * (at your option) version 3 of the License.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include "block/throttle-groups.h"
-#include "qemu/queue.h"
-#include "qemu/thread.h"
-#include "sysemu/qtest.h"
-
-/* The ThrottleGroup structure (with its ThrottleState) is shared
- * among different BlockDriverState and it's independent from
- * AioContext, so in order to use it from different threads it needs
- * its own locking.
- *
- * This locking is however handled internally in this file, so it's
- * transparent to outside users.
- *
- * The whole ThrottleGroup structure is private and invisible to
- * outside users, that only use it through its ThrottleState.
- *
- * In addition to the ThrottleGroup structure, BlockDriverState has
- * fields that need to be accessed by other members of the group and
- * therefore also need to be protected by this lock. Once a BDS is
- * registered in a group those fields can be accessed by other threads
- * any time.
- *
- * Again, all this is handled internally and is mostly transparent to
- * the outside. The 'throttle_timers' field however has an additional
- * constraint because it may be temporarily invalid (see for example
- * bdrv_set_aio_context()). Therefore in this file a thread will
- * access some other BDS's timers only after verifying that that BDS
- * has throttled requests in the queue.
- */
-typedef struct ThrottleGroup {
-    char *name; /* This is constant during the lifetime of the group */
-
-    QemuMutex lock; /* This lock protects the following four fields */
-    ThrottleState ts;
-    QLIST_HEAD(, BlockDriverState) head;
-    BlockDriverState *tokens[2];
-    bool any_timer_armed[2];
-
-    /* These two are protected by the global throttle_groups_lock */
-    unsigned refcount;
-    QTAILQ_ENTRY(ThrottleGroup) list;
-} ThrottleGroup;
-
-static QemuMutex throttle_groups_lock;
-static QTAILQ_HEAD(, ThrottleGroup) throttle_groups =
-    QTAILQ_HEAD_INITIALIZER(throttle_groups);
-
-/* Increments the reference count of a ThrottleGroup given its name.
- *
- * If no ThrottleGroup is found with the given name a new one is
- * created.
- *
- * @name: the name of the ThrottleGroup
- * @ret:  the ThrottleState member of the ThrottleGroup
- */
-ThrottleState *throttle_group_incref(const char *name)
-{
-    ThrottleGroup *tg = NULL;
-    ThrottleGroup *iter;
-
-    qemu_mutex_lock(&throttle_groups_lock);
-
-    /* Look for an existing group with that name */
-    QTAILQ_FOREACH(iter, &throttle_groups, list) {
-        if (!strcmp(name, iter->name)) {
-            tg = iter;
-            break;
-        }
-    }
-
-    /* Create a new one if not found */
-    if (!tg) {
-        tg = g_new0(ThrottleGroup, 1);
-        tg->name = g_strdup(name);
-        qemu_mutex_init(&tg->lock);
-        throttle_init(&tg->ts);
-        QLIST_INIT(&tg->head);
-
-        QTAILQ_INSERT_TAIL(&throttle_groups, tg, list);
-    }
-
-    tg->refcount++;
-
-    qemu_mutex_unlock(&throttle_groups_lock);
-
-    return &tg->ts;
-}
-
-/* Decrease the reference count of a ThrottleGroup.
- *
- * When the reference count reaches zero the ThrottleGroup is
- * destroyed.
- *
- * @ts:  The ThrottleGroup to unref, given by its ThrottleState member
- */
-void throttle_group_unref(ThrottleState *ts)
-{
-    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-
-    qemu_mutex_lock(&throttle_groups_lock);
-    if (--tg->refcount == 0) {
-        QTAILQ_REMOVE(&throttle_groups, tg, list);
-        qemu_mutex_destroy(&tg->lock);
-        g_free(tg->name);
-        g_free(tg);
-    }
-    qemu_mutex_unlock(&throttle_groups_lock);
-}
-
-/* Get the name from a BlockDriverState's ThrottleGroup. The name (and
- * the pointer) is guaranteed to remain constant during the lifetime
- * of the group.
- *
- * @bs:   a BlockDriverState that is member of a throttling group
- * @ret:  the name of the group.
- */
-const char *throttle_group_get_name(BlockDriverState *bs)
-{
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    return tg->name;
-}
-
-/* Return the next BlockDriverState in the round-robin sequence,
- * simulating a circular list.
- *
- * This assumes that tg->lock is held.
- *
- * @bs:  the current BlockDriverState
- * @ret: the next BlockDriverState in the sequence
- */
-static BlockDriverState *throttle_group_next_bs(BlockDriverState *bs)
-{
-    ThrottleState *ts = bs->throttle_state;
-    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-    BlockDriverState *next = QLIST_NEXT(bs, round_robin);
-
-    if (!next) {
-        return QLIST_FIRST(&tg->head);
-    }
-
-    return next;
-}
-
-/* Return the next BlockDriverState in the round-robin sequence with
- * pending I/O requests.
- *
- * This assumes that tg->lock is held.
- *
- * @bs:        the current BlockDriverState
- * @is_write:  the type of operation (read/write)
- * @ret:       the next BlockDriverState with pending requests, or bs
- *             if there is none.
- */
-static BlockDriverState *next_throttle_token(BlockDriverState *bs,
-                                             bool is_write)
-{
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    BlockDriverState *token, *start;
-
-    start = token = tg->tokens[is_write];
-
-    /* get next bs round in round robin style */
-    token = throttle_group_next_bs(token);
-    while (token != start && !token->pending_reqs[is_write]) {
-        token = throttle_group_next_bs(token);
-    }
-
-    /* If no IO are queued for scheduling on the next round robin token
-     * then decide the token is the current bs because chances are
-     * the current bs get the current request queued.
-     */
-    if (token == start && !token->pending_reqs[is_write]) {
-        token = bs;
-    }
-
-    return token;
-}
-
-/* Check if the next I/O request for a BlockDriverState needs to be
- * throttled or not. If there's no timer set in this group, set one
- * and update the token accordingly.
- *
- * This assumes that tg->lock is held.
- *
- * @bs:         the current BlockDriverState
- * @is_write:   the type of operation (read/write)
- * @ret:        whether the I/O request needs to be throttled or not
- */
-static bool throttle_group_schedule_timer(BlockDriverState *bs,
-                                          bool is_write)
-{
-    ThrottleState *ts = bs->throttle_state;
-    ThrottleTimers *tt = &bs->throttle_timers;
-    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-    bool must_wait;
-
-    /* Check if any of the timers in this group is already armed */
-    if (tg->any_timer_armed[is_write]) {
-        return true;
-    }
-
-    must_wait = throttle_schedule_timer(ts, tt, is_write);
-
-    /* If a timer just got armed, set bs as the current token */
-    if (must_wait) {
-        tg->tokens[is_write] = bs;
-        tg->any_timer_armed[is_write] = true;
-    }
-
-    return must_wait;
-}
-
-/* Look for the next pending I/O request and schedule it.
- *
- * This assumes that tg->lock is held.
- *
- * @bs:        the current BlockDriverState
- * @is_write:  the type of operation (read/write)
- */
-static void schedule_next_request(BlockDriverState *bs, bool is_write)
-{
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    bool must_wait;
-    BlockDriverState *token;
-
-    /* Check if there's any pending request to schedule next */
-    token = next_throttle_token(bs, is_write);
-    if (!token->pending_reqs[is_write]) {
-        return;
-    }
-
-    /* Set a timer for the request if it needs to be throttled */
-    must_wait = throttle_group_schedule_timer(token, is_write);
-
-    /* If it doesn't have to wait, queue it for immediate execution */
-    if (!must_wait) {
-        /* Give preference to requests from the current bs */
-        if (qemu_in_coroutine() &&
-            qemu_co_queue_next(&bs->throttled_reqs[is_write])) {
-            token = bs;
-        } else {
-            ThrottleTimers *tt = &token->throttle_timers;
-            int64_t now = qemu_clock_get_ns(tt->clock_type);
-            timer_mod(tt->timers[is_write], now + 1);
-            tg->any_timer_armed[is_write] = true;
-        }
-        tg->tokens[is_write] = token;
-    }
-}
-
-/* Check if an I/O request needs to be throttled, wait and set a timer
- * if necessary, and schedule the next request using a round robin
- * algorithm.
- *
- * @bs:        the current BlockDriverState
- * @bytes:     the number of bytes for this I/O
- * @is_write:  the type of operation (read/write)
- */
-void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs,
-                                                        unsigned int bytes,
-                                                        bool is_write)
-{
-    bool must_wait;
-    BlockDriverState *token;
-
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    qemu_mutex_lock(&tg->lock);
-
-    /* First we check if this I/O has to be throttled. */
-    token = next_throttle_token(bs, is_write);
-    must_wait = throttle_group_schedule_timer(token, is_write);
-
-    /* Wait if there's a timer set or queued requests of this type */
-    if (must_wait || bs->pending_reqs[is_write]) {
-        bs->pending_reqs[is_write]++;
-        qemu_mutex_unlock(&tg->lock);
-        qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
-        qemu_mutex_lock(&tg->lock);
-        bs->pending_reqs[is_write]--;
-    }
-
-    /* The I/O will be executed, so do the accounting */
-    throttle_account(bs->throttle_state, is_write, bytes);
-
-    /* Schedule the next request */
-    schedule_next_request(bs, is_write);
-
-    qemu_mutex_unlock(&tg->lock);
-}
-
-/* Update the throttle configuration for a particular group. Similar
- * to throttle_config(), but guarantees atomicity within the
- * throttling group.
- *
- * @bs:  a BlockDriverState that is member of the group
- * @cfg: the configuration to set
- */
-void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg)
-{
-    ThrottleTimers *tt = &bs->throttle_timers;
-    ThrottleState *ts = bs->throttle_state;
-    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-    qemu_mutex_lock(&tg->lock);
-    /* throttle_config() cancels the timers */
-    if (timer_pending(tt->timers[0])) {
-        tg->any_timer_armed[0] = false;
-    }
-    if (timer_pending(tt->timers[1])) {
-        tg->any_timer_armed[1] = false;
-    }
-    throttle_config(ts, tt, cfg);
-    qemu_mutex_unlock(&tg->lock);
-}
-
-/* Get the throttle configuration from a particular group. Similar to
- * throttle_get_config(), but guarantees atomicity within the
- * throttling group.
- *
- * @bs:  a BlockDriverState that is member of the group
- * @cfg: the configuration will be written here
- */
-void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg)
-{
-    ThrottleState *ts = bs->throttle_state;
-    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-    qemu_mutex_lock(&tg->lock);
-    throttle_get_config(ts, cfg);
-    qemu_mutex_unlock(&tg->lock);
-}
-
-/* ThrottleTimers callback. This wakes up a request that was waiting
- * because it had been throttled.
- *
- * @bs:        the BlockDriverState whose request had been throttled
- * @is_write:  the type of operation (read/write)
- */
-static void timer_cb(BlockDriverState *bs, bool is_write)
-{
-    ThrottleState *ts = bs->throttle_state;
-    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-    bool empty_queue;
-
-    /* The timer has just been fired, so we can update the flag */
-    qemu_mutex_lock(&tg->lock);
-    tg->any_timer_armed[is_write] = false;
-    qemu_mutex_unlock(&tg->lock);
-
-    /* Run the request that was waiting for this timer */
-    empty_queue = !qemu_co_enter_next(&bs->throttled_reqs[is_write]);
-
-    /* If the request queue was empty then we have to take care of
-     * scheduling the next one */
-    if (empty_queue) {
-        qemu_mutex_lock(&tg->lock);
-        schedule_next_request(bs, is_write);
-        qemu_mutex_unlock(&tg->lock);
-    }
-}
-
-static void read_timer_cb(void *opaque)
-{
-    timer_cb(opaque, false);
-}
-
-static void write_timer_cb(void *opaque)
-{
-    timer_cb(opaque, true);
-}
-
-/* Register a BlockDriverState in the throttling group, also
- * initializing its timers and updating its throttle_state pointer to
- * point to it. If a throttling group with that name does not exist
- * yet, it will be created.
- *
- * @bs:        the BlockDriverState to insert
- * @groupname: the name of the group
- */
-void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
-{
-    int i;
-    ThrottleState *ts = throttle_group_incref(groupname);
-    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-    int clock_type = QEMU_CLOCK_REALTIME;
-
-    if (qtest_enabled()) {
-        /* For testing block IO throttling only */
-        clock_type = QEMU_CLOCK_VIRTUAL;
-    }
-
-    bs->throttle_state = ts;
-
-    qemu_mutex_lock(&tg->lock);
-    /* If the ThrottleGroup is new set this BlockDriverState as the token */
-    for (i = 0; i < 2; i++) {
-        if (!tg->tokens[i]) {
-            tg->tokens[i] = bs;
-        }
-    }
-
-    QLIST_INSERT_HEAD(&tg->head, bs, round_robin);
-
-    throttle_timers_init(&bs->throttle_timers,
-                         bdrv_get_aio_context(bs),
-                         clock_type,
-                         read_timer_cb,
-                         write_timer_cb,
-                         bs);
-
-    qemu_mutex_unlock(&tg->lock);
-}
-
-/* Unregister a BlockDriverState from its group, removing it from the
- * list, destroying the timers and setting the throttle_state pointer
- * to NULL.
- *
- * The BlockDriverState must not have pending throttled requests, so
- * the caller has to drain them first.
- *
- * The group will be destroyed if it's empty after this operation.
- *
- * @bs: the BlockDriverState to remove
- */
-void throttle_group_unregister_bs(BlockDriverState *bs)
-{
-    ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
-    int i;
-
-    assert(bs->pending_reqs[0] == 0 && bs->pending_reqs[1] == 0);
-    assert(qemu_co_queue_empty(&bs->throttled_reqs[0]));
-    assert(qemu_co_queue_empty(&bs->throttled_reqs[1]));
-
-    qemu_mutex_lock(&tg->lock);
-    for (i = 0; i < 2; i++) {
-        if (tg->tokens[i] == bs) {
-            BlockDriverState *token = throttle_group_next_bs(bs);
-            /* Take care of the case where this is the last bs in the group */
-            if (token == bs) {
-                token = NULL;
-            }
-            tg->tokens[i] = token;
-        }
-    }
-
-    /* remove the current bs from the list */
-    QLIST_REMOVE(bs, round_robin);
-    throttle_timers_destroy(&bs->throttle_timers);
-    qemu_mutex_unlock(&tg->lock);
-
-    throttle_group_unref(&tg->ts);
-    bs->throttle_state = NULL;
-}
-
-static void throttle_groups_init(void)
-{
-    qemu_mutex_init(&throttle_groups_lock);
-}
-
-block_init(throttle_groups_init);
diff --git a/qemu/block/vdi.c b/qemu/block/vdi.c
deleted file mode 100644
index 75d4819ed..000000000
--- a/qemu/block/vdi.c
+++ /dev/null
@@ -1,923 +0,0 @@
-/*
- * Block driver for the Virtual Disk Image (VDI) format
- *
- * Copyright (c) 2009, 2012 Stefan Weil
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or
- * (at your option) version 3 or any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
- * Reference:
- * http://forums.virtualbox.org/viewtopic.php?t=8046
- *
- * This driver supports create / read / write operations on VDI images.
- *
- * Todo (see also TODO in code):
- *
- * Some features like snapshots are still missing.
- *
- * Deallocation of zero-filled blocks and shrinking images are missing, too
- * (might be added to common block layer).
- *
- * Allocation of blocks could be optimized (less writes to block map and
- * header).
- *
- * Read and write of adjacent blocks could be done in one operation
- * (current code uses one operation per block (1 MiB).
- *
- * The code is not thread safe (missing locks for changes in header and
- * block table, no problem with current QEMU).
- *
- * Hints:
- *
- * Blocks (VDI documentation) correspond to clusters (QEMU).
- * QEMU's backing files could be implemented using VDI snapshot files (TODO).
- * VDI snapshot files may also contain the complete machine state.
- * Maybe this machine state can be converted to QEMU PC machine snapshot data.
- *
- * The driver keeps a block cache (little endian entries) in memory.
- * For the standard block size (1 MiB), a 1 TiB disk will use 4 MiB RAM,
- * so this seems to be reasonable.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "block/block_int.h"
-#include "sysemu/block-backend.h"
-#include "qemu/module.h"
-#include "migration/migration.h"
-#include "qemu/coroutine.h"
-#include "qemu/cutils.h"
-
-#if defined(CONFIG_UUID)
-#include <uuid/uuid.h>
-#else
-/* TODO: move uuid emulation to some central place in QEMU. */
-#include "sysemu/sysemu.h"     /* UUID_FMT */
-typedef unsigned char uuid_t[16];
-#endif
-
-/* Code configuration options. */
-
-/* Enable debug messages. */
-//~ #define CONFIG_VDI_DEBUG
-
-/* Support write operations on VDI images. */
-#define CONFIG_VDI_WRITE
-
-/* Support non-standard block (cluster) size. This is untested.
- * Maybe it will be needed for very large images.
- */
-//~ #define CONFIG_VDI_BLOCK_SIZE
-
-/* Support static (fixed, pre-allocated) images. */
-#define CONFIG_VDI_STATIC_IMAGE
-
-/* Command line option for static images. */
-#define BLOCK_OPT_STATIC "static"
-
-#define KiB     1024
-#define MiB     (KiB * KiB)
-
-#define SECTOR_SIZE 512
-#define DEFAULT_CLUSTER_SIZE (1 * MiB)
-
-#if defined(CONFIG_VDI_DEBUG)
-#define logout(fmt, ...) \
-                fprintf(stderr, "vdi\t%-24s" fmt, __func__, ##__VA_ARGS__)
-#else
-#define logout(fmt, ...) ((void)0)
-#endif
-
-/* Image signature. */
-#define VDI_SIGNATURE 0xbeda107f
-
-/* Image version. */
-#define VDI_VERSION_1_1 0x00010001
-
-/* Image type. */
-#define VDI_TYPE_DYNAMIC 1
-#define VDI_TYPE_STATIC  2
-
-/* Innotek / SUN images use these strings in header.text:
- * "<<< innotek VirtualBox Disk Image >>>\n"
- * "<<< Sun xVM VirtualBox Disk Image >>>\n"
- * "<<< Sun VirtualBox Disk Image >>>\n"
- * The value does not matter, so QEMU created images use a different text.
- */
-#define VDI_TEXT "<<< QEMU VM Virtual Disk Image >>>\n"
-
-/* A never-allocated block; semantically arbitrary content. */
-#define VDI_UNALLOCATED 0xffffffffU
-
-/* A discarded (no longer allocated) block; semantically zero-filled. */
-#define VDI_DISCARDED   0xfffffffeU
-
-#define VDI_IS_ALLOCATED(X) ((X) < VDI_DISCARDED)
-
-/* The bmap will take up VDI_BLOCKS_IN_IMAGE_MAX * sizeof(uint32_t) bytes; since
- * the bmap is read and written in a single operation, its size needs to be
- * limited to INT_MAX; furthermore, when opening an image, the bmap size is
- * rounded up to be aligned on BDRV_SECTOR_SIZE.
- * Therefore this should satisfy the following:
- * VDI_BLOCKS_IN_IMAGE_MAX * sizeof(uint32_t) + BDRV_SECTOR_SIZE == INT_MAX + 1
- * (INT_MAX + 1 is the first value not representable as an int)
- * This guarantees that any value below or equal to the constant will, when
- * multiplied by sizeof(uint32_t) and rounded up to a BDRV_SECTOR_SIZE boundary,
- * still be below or equal to INT_MAX. */
-#define VDI_BLOCKS_IN_IMAGE_MAX \
-    ((unsigned)((INT_MAX + 1u - BDRV_SECTOR_SIZE) / sizeof(uint32_t)))
-#define VDI_DISK_SIZE_MAX        ((uint64_t)VDI_BLOCKS_IN_IMAGE_MAX * \
-                                  (uint64_t)DEFAULT_CLUSTER_SIZE)
-
-#if !defined(CONFIG_UUID)
-static inline void uuid_generate(uuid_t out)
-{
-    memset(out, 0, sizeof(uuid_t));
-}
-
-static inline int uuid_is_null(const uuid_t uu)
-{
-    uuid_t null_uuid = { 0 };
-    return memcmp(uu, null_uuid, sizeof(uuid_t)) == 0;
-}
-
-# if defined(CONFIG_VDI_DEBUG)
-static inline void uuid_unparse(const uuid_t uu, char *out)
-{
-    snprintf(out, 37, UUID_FMT,
-            uu[0], uu[1], uu[2], uu[3], uu[4], uu[5], uu[6], uu[7],
-            uu[8], uu[9], uu[10], uu[11], uu[12], uu[13], uu[14], uu[15]);
-}
-# endif
-#endif
-
-typedef struct {
-    char text[0x40];
-    uint32_t signature;
-    uint32_t version;
-    uint32_t header_size;
-    uint32_t image_type;
-    uint32_t image_flags;
-    char description[256];
-    uint32_t offset_bmap;
-    uint32_t offset_data;
-    uint32_t cylinders;         /* disk geometry, unused here */
-    uint32_t heads;             /* disk geometry, unused here */
-    uint32_t sectors;           /* disk geometry, unused here */
-    uint32_t sector_size;
-    uint32_t unused1;
-    uint64_t disk_size;
-    uint32_t block_size;
-    uint32_t block_extra;       /* unused here */
-    uint32_t blocks_in_image;
-    uint32_t blocks_allocated;
-    uuid_t uuid_image;
-    uuid_t uuid_last_snap;
-    uuid_t uuid_link;
-    uuid_t uuid_parent;
-    uint64_t unused2[7];
-} QEMU_PACKED VdiHeader;
-
-typedef struct {
-    /* The block map entries are little endian (even in memory). */
-    uint32_t *bmap;
-    /* Size of block (bytes). */
-    uint32_t block_size;
-    /* Size of block (sectors). */
-    uint32_t block_sectors;
-    /* First sector of block map. */
-    uint32_t bmap_sector;
-    /* VDI header (converted to host endianness). */
-    VdiHeader header;
-
-    CoMutex write_lock;
-
-    Error *migration_blocker;
-} BDRVVdiState;
-
-/* Change UUID from little endian (IPRT = VirtualBox format) to big endian
- * format (network byte order, standard, see RFC 4122) and vice versa.
- */
-static void uuid_convert(uuid_t uuid)
-{
-    bswap32s((uint32_t *)&uuid[0]);
-    bswap16s((uint16_t *)&uuid[4]);
-    bswap16s((uint16_t *)&uuid[6]);
-}
-
-static void vdi_header_to_cpu(VdiHeader *header)
-{
-    le32_to_cpus(&header->signature);
-    le32_to_cpus(&header->version);
-    le32_to_cpus(&header->header_size);
-    le32_to_cpus(&header->image_type);
-    le32_to_cpus(&header->image_flags);
-    le32_to_cpus(&header->offset_bmap);
-    le32_to_cpus(&header->offset_data);
-    le32_to_cpus(&header->cylinders);
-    le32_to_cpus(&header->heads);
-    le32_to_cpus(&header->sectors);
-    le32_to_cpus(&header->sector_size);
-    le64_to_cpus(&header->disk_size);
-    le32_to_cpus(&header->block_size);
-    le32_to_cpus(&header->block_extra);
-    le32_to_cpus(&header->blocks_in_image);
-    le32_to_cpus(&header->blocks_allocated);
-    uuid_convert(header->uuid_image);
-    uuid_convert(header->uuid_last_snap);
-    uuid_convert(header->uuid_link);
-    uuid_convert(header->uuid_parent);
-}
-
-static void vdi_header_to_le(VdiHeader *header)
-{
-    cpu_to_le32s(&header->signature);
-    cpu_to_le32s(&header->version);
-    cpu_to_le32s(&header->header_size);
-    cpu_to_le32s(&header->image_type);
-    cpu_to_le32s(&header->image_flags);
-    cpu_to_le32s(&header->offset_bmap);
-    cpu_to_le32s(&header->offset_data);
-    cpu_to_le32s(&header->cylinders);
-    cpu_to_le32s(&header->heads);
-    cpu_to_le32s(&header->sectors);
-    cpu_to_le32s(&header->sector_size);
-    cpu_to_le64s(&header->disk_size);
-    cpu_to_le32s(&header->block_size);
-    cpu_to_le32s(&header->block_extra);
-    cpu_to_le32s(&header->blocks_in_image);
-    cpu_to_le32s(&header->blocks_allocated);
-    uuid_convert(header->uuid_image);
-    uuid_convert(header->uuid_last_snap);
-    uuid_convert(header->uuid_link);
-    uuid_convert(header->uuid_parent);
-}
-
-#if defined(CONFIG_VDI_DEBUG)
-static void vdi_header_print(VdiHeader *header)
-{
-    char uuid[37];
-    logout("text        %s", header->text);
-    logout("signature   0x%08x\n", header->signature);
-    logout("header size 0x%04x\n", header->header_size);
-    logout("image type  0x%04x\n", header->image_type);
-    logout("image flags 0x%04x\n", header->image_flags);
-    logout("description %s\n", header->description);
-    logout("offset bmap 0x%04x\n", header->offset_bmap);
-    logout("offset data 0x%04x\n", header->offset_data);
-    logout("cylinders   0x%04x\n", header->cylinders);
-    logout("heads       0x%04x\n", header->heads);
-    logout("sectors     0x%04x\n", header->sectors);
-    logout("sector size 0x%04x\n", header->sector_size);
-    logout("image size  0x%" PRIx64 " B (%" PRIu64 " MiB)\n",
-           header->disk_size, header->disk_size / MiB);
-    logout("block size  0x%04x\n", header->block_size);
-    logout("block extra 0x%04x\n", header->block_extra);
-    logout("blocks tot. 0x%04x\n", header->blocks_in_image);
-    logout("blocks all. 0x%04x\n", header->blocks_allocated);
-    uuid_unparse(header->uuid_image, uuid);
-    logout("uuid image  %s\n", uuid);
-    uuid_unparse(header->uuid_last_snap, uuid);
-    logout("uuid snap   %s\n", uuid);
-    uuid_unparse(header->uuid_link, uuid);
-    logout("uuid link   %s\n", uuid);
-    uuid_unparse(header->uuid_parent, uuid);
-    logout("uuid parent %s\n", uuid);
-}
-#endif
-
-static int vdi_check(BlockDriverState *bs, BdrvCheckResult *res,
-                     BdrvCheckMode fix)
-{
-    /* TODO: additional checks possible. */
-    BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
-    uint32_t blocks_allocated = 0;
-    uint32_t block;
-    uint32_t *bmap;
-    logout("\n");
-
-    if (fix) {
-        return -ENOTSUP;
-    }
-
-    bmap = g_try_new(uint32_t, s->header.blocks_in_image);
-    if (s->header.blocks_in_image && bmap == NULL) {
-        res->check_errors++;
-        return -ENOMEM;
-    }
-
-    memset(bmap, 0xff, s->header.blocks_in_image * sizeof(uint32_t));
-
-    /* Check block map and value of blocks_allocated. */
-    for (block = 0; block < s->header.blocks_in_image; block++) {
-        uint32_t bmap_entry = le32_to_cpu(s->bmap[block]);
-        if (VDI_IS_ALLOCATED(bmap_entry)) {
-            if (bmap_entry < s->header.blocks_in_image) {
-                blocks_allocated++;
-                if (!VDI_IS_ALLOCATED(bmap[bmap_entry])) {
-                    bmap[bmap_entry] = bmap_entry;
-                } else {
-                    fprintf(stderr, "ERROR: block index %" PRIu32
-                            " also used by %" PRIu32 "\n", bmap[bmap_entry], bmap_entry);
-                    res->corruptions++;
-                }
-            } else {
-                fprintf(stderr, "ERROR: block index %" PRIu32
-                        " too large, is %" PRIu32 "\n", block, bmap_entry);
-                res->corruptions++;
-            }
-        }
-    }
-    if (blocks_allocated != s->header.blocks_allocated) {
-        fprintf(stderr, "ERROR: allocated blocks mismatch, is %" PRIu32
-               ", should be %" PRIu32 "\n",
-               blocks_allocated, s->header.blocks_allocated);
-        res->corruptions++;
-    }
-
-    g_free(bmap);
-
-    return 0;
-}
-
-static int vdi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    /* TODO: vdi_get_info would be needed for machine snapshots.
-       vm_state_offset is still missing. */
-    BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
-    logout("\n");
-    bdi->cluster_size = s->block_size;
-    bdi->vm_state_offset = 0;
-    bdi->unallocated_blocks_are_zero = true;
-    return 0;
-}
-
-static int vdi_make_empty(BlockDriverState *bs)
-{
-    /* TODO: missing code. */
-    logout("\n");
-    /* The return value for missing code must be 0, see block.c. */
-    return 0;
-}
-
-static int vdi_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    const VdiHeader *header = (const VdiHeader *)buf;
-    int ret = 0;
-
-    logout("\n");
-
-    if (buf_size < sizeof(*header)) {
-        /* Header too small, no VDI. */
-    } else if (le32_to_cpu(header->signature) == VDI_SIGNATURE) {
-        ret = 100;
-    }
-
-    if (ret == 0) {
-        logout("no vdi image\n");
-    } else {
-        logout("%s", header->text);
-    }
-
-    return ret;
-}
-
-static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
-{
-    BDRVVdiState *s = bs->opaque;
-    VdiHeader header;
-    size_t bmap_size;
-    int ret;
-
-    logout("\n");
-
-    ret = bdrv_read(bs->file->bs, 0, (uint8_t *)&header, 1);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    vdi_header_to_cpu(&header);
-#if defined(CONFIG_VDI_DEBUG)
-    vdi_header_print(&header);
-#endif
-
-    if (header.disk_size > VDI_DISK_SIZE_MAX) {
-        error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64
-                          ", max supported is 0x%" PRIx64 ")",
-                          header.disk_size, VDI_DISK_SIZE_MAX);
-        ret = -ENOTSUP;
-        goto fail;
-    }
-
-    if (header.disk_size % SECTOR_SIZE != 0) {
-        /* 'VBoxManage convertfromraw' can create images with odd disk sizes.
-           We accept them but round the disk size to the next multiple of
-           SECTOR_SIZE. */
-        logout("odd disk size %" PRIu64 " B, round up\n", header.disk_size);
-        header.disk_size = ROUND_UP(header.disk_size, SECTOR_SIZE);
-    }
-
-    if (header.signature != VDI_SIGNATURE) {
-        error_setg(errp, "Image not in VDI format (bad signature %08" PRIx32
-                   ")", header.signature);
-        ret = -EINVAL;
-        goto fail;
-    } else if (header.version != VDI_VERSION_1_1) {
-        error_setg(errp, "unsupported VDI image (version %" PRIu32 ".%" PRIu32
-                   ")", header.version >> 16, header.version & 0xffff);
-        ret = -ENOTSUP;
-        goto fail;
-    } else if (header.offset_bmap % SECTOR_SIZE != 0) {
-        /* We only support block maps which start on a sector boundary. */
-        error_setg(errp, "unsupported VDI image (unaligned block map offset "
-                   "0x%" PRIx32 ")", header.offset_bmap);
-        ret = -ENOTSUP;
-        goto fail;
-    } else if (header.offset_data % SECTOR_SIZE != 0) {
-        /* We only support data blocks which start on a sector boundary. */
-        error_setg(errp, "unsupported VDI image (unaligned data offset 0x%"
-                   PRIx32 ")", header.offset_data);
-        ret = -ENOTSUP;
-        goto fail;
-    } else if (header.sector_size != SECTOR_SIZE) {
-        error_setg(errp, "unsupported VDI image (sector size %" PRIu32
-                   " is not %u)", header.sector_size, SECTOR_SIZE);
-        ret = -ENOTSUP;
-        goto fail;
-    } else if (header.block_size != DEFAULT_CLUSTER_SIZE) {
-        error_setg(errp, "unsupported VDI image (block size %" PRIu32
-                   " is not %u)", header.block_size, DEFAULT_CLUSTER_SIZE);
-        ret = -ENOTSUP;
-        goto fail;
-    } else if (header.disk_size >
-               (uint64_t)header.blocks_in_image * header.block_size) {
-        error_setg(errp, "unsupported VDI image (disk size %" PRIu64 ", "
-                   "image bitmap has room for %" PRIu64 ")",
-                   header.disk_size,
-                   (uint64_t)header.blocks_in_image * header.block_size);
-        ret = -ENOTSUP;
-        goto fail;
-    } else if (!uuid_is_null(header.uuid_link)) {
-        error_setg(errp, "unsupported VDI image (non-NULL link UUID)");
-        ret = -ENOTSUP;
-        goto fail;
-    } else if (!uuid_is_null(header.uuid_parent)) {
-        error_setg(errp, "unsupported VDI image (non-NULL parent UUID)");
-        ret = -ENOTSUP;
-        goto fail;
-    } else if (header.blocks_in_image > VDI_BLOCKS_IN_IMAGE_MAX) {
-        error_setg(errp, "unsupported VDI image "
-                         "(too many blocks %u, max is %u)",
-                          header.blocks_in_image, VDI_BLOCKS_IN_IMAGE_MAX);
-        ret = -ENOTSUP;
-        goto fail;
-    }
-
-    bs->total_sectors = header.disk_size / SECTOR_SIZE;
-
-    s->block_size = header.block_size;
-    s->block_sectors = header.block_size / SECTOR_SIZE;
-    s->bmap_sector = header.offset_bmap / SECTOR_SIZE;
-    s->header = header;
-
-    bmap_size = header.blocks_in_image * sizeof(uint32_t);
-    bmap_size = DIV_ROUND_UP(bmap_size, SECTOR_SIZE);
-    s->bmap = qemu_try_blockalign(bs->file->bs, bmap_size * SECTOR_SIZE);
-    if (s->bmap == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-
-    ret = bdrv_read(bs->file->bs, s->bmap_sector, (uint8_t *)s->bmap,
-                    bmap_size);
-    if (ret < 0) {
-        goto fail_free_bmap;
-    }
-
-    /* Disable migration when vdi images are used */
-    error_setg(&s->migration_blocker, "The vdi format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
-
-    qemu_co_mutex_init(&s->write_lock);
-
-    return 0;
-
- fail_free_bmap:
-    qemu_vfree(s->bmap);
-
- fail:
-    return ret;
-}
-
-static int vdi_reopen_prepare(BDRVReopenState *state,
-                              BlockReopenQueue *queue, Error **errp)
-{
-    return 0;
-}
-
-static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
-{
-    /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
-    BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
-    size_t bmap_index = sector_num / s->block_sectors;
-    size_t sector_in_block = sector_num % s->block_sectors;
-    int n_sectors = s->block_sectors - sector_in_block;
-    uint32_t bmap_entry = le32_to_cpu(s->bmap[bmap_index]);
-    uint64_t offset;
-    int result;
-
-    logout("%p, %" PRId64 ", %d, %p\n", bs, sector_num, nb_sectors, pnum);
-    if (n_sectors > nb_sectors) {
-        n_sectors = nb_sectors;
-    }
-    *pnum = n_sectors;
-    result = VDI_IS_ALLOCATED(bmap_entry);
-    if (!result) {
-        return 0;
-    }
-
-    offset = s->header.offset_data +
-                              (uint64_t)bmap_entry * s->block_size +
-                              sector_in_block * SECTOR_SIZE;
-    *file = bs->file->bs;
-    return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
-}
-
-static int vdi_co_read(BlockDriverState *bs,
-        int64_t sector_num, uint8_t *buf, int nb_sectors)
-{
-    BDRVVdiState *s = bs->opaque;
-    uint32_t bmap_entry;
-    uint32_t block_index;
-    uint32_t sector_in_block;
-    uint32_t n_sectors;
-    int ret = 0;
-
-    logout("\n");
-
-    while (ret >= 0 && nb_sectors > 0) {
-        block_index = sector_num / s->block_sectors;
-        sector_in_block = sector_num % s->block_sectors;
-        n_sectors = s->block_sectors - sector_in_block;
-        if (n_sectors > nb_sectors) {
-            n_sectors = nb_sectors;
-        }
-
-        logout("will read %u sectors starting at sector %" PRIu64 "\n",
-               n_sectors, sector_num);
-
-        /* prepare next AIO request */
-        bmap_entry = le32_to_cpu(s->bmap[block_index]);
-        if (!VDI_IS_ALLOCATED(bmap_entry)) {
-            /* Block not allocated, return zeros, no need to wait. */
-            memset(buf, 0, n_sectors * SECTOR_SIZE);
-            ret = 0;
-        } else {
-            uint64_t offset = s->header.offset_data / SECTOR_SIZE +
-                              (uint64_t)bmap_entry * s->block_sectors +
-                              sector_in_block;
-            ret = bdrv_read(bs->file->bs, offset, buf, n_sectors);
-        }
-        logout("%u sectors read\n", n_sectors);
-
-        nb_sectors -= n_sectors;
-        sector_num += n_sectors;
-        buf += n_sectors * SECTOR_SIZE;
-    }
-
-    return ret;
-}
-
-static int vdi_co_write(BlockDriverState *bs,
-        int64_t sector_num, const uint8_t *buf, int nb_sectors)
-{
-    BDRVVdiState *s = bs->opaque;
-    uint32_t bmap_entry;
-    uint32_t block_index;
-    uint32_t sector_in_block;
-    uint32_t n_sectors;
-    uint32_t bmap_first = VDI_UNALLOCATED;
-    uint32_t bmap_last = VDI_UNALLOCATED;
-    uint8_t *block = NULL;
-    int ret = 0;
-
-    logout("\n");
-
-    while (ret >= 0 && nb_sectors > 0) {
-        block_index = sector_num / s->block_sectors;
-        sector_in_block = sector_num % s->block_sectors;
-        n_sectors = s->block_sectors - sector_in_block;
-        if (n_sectors > nb_sectors) {
-            n_sectors = nb_sectors;
-        }
-
-        logout("will write %u sectors starting at sector %" PRIu64 "\n",
-               n_sectors, sector_num);
-
-        /* prepare next AIO request */
-        bmap_entry = le32_to_cpu(s->bmap[block_index]);
-        if (!VDI_IS_ALLOCATED(bmap_entry)) {
-            /* Allocate new block and write to it. */
-            uint64_t offset;
-            bmap_entry = s->header.blocks_allocated;
-            s->bmap[block_index] = cpu_to_le32(bmap_entry);
-            s->header.blocks_allocated++;
-            offset = s->header.offset_data / SECTOR_SIZE +
-                     (uint64_t)bmap_entry * s->block_sectors;
-            if (block == NULL) {
-                block = g_malloc(s->block_size);
-                bmap_first = block_index;
-            }
-            bmap_last = block_index;
-            /* Copy data to be written to new block and zero unused parts. */
-            memset(block, 0, sector_in_block * SECTOR_SIZE);
-            memcpy(block + sector_in_block * SECTOR_SIZE,
-                   buf, n_sectors * SECTOR_SIZE);
-            memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
-                   (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
-
-            /* Note that this coroutine does not yield anywhere from reading the
-             * bmap entry until here, so in regards to all the coroutines trying
-             * to write to this cluster, the one doing the allocation will
-             * always be the first to try to acquire the lock.
-             * Therefore, it is also the first that will actually be able to
-             * acquire the lock and thus the padded cluster is written before
-             * the other coroutines can write to the affected area. */
-            qemu_co_mutex_lock(&s->write_lock);
-            ret = bdrv_write(bs->file->bs, offset, block, s->block_sectors);
-            qemu_co_mutex_unlock(&s->write_lock);
-        } else {
-            uint64_t offset = s->header.offset_data / SECTOR_SIZE +
-                              (uint64_t)bmap_entry * s->block_sectors +
-                              sector_in_block;
-            qemu_co_mutex_lock(&s->write_lock);
-            /* This lock is only used to make sure the following write operation
-             * is executed after the write issued by the coroutine allocating
-             * this cluster, therefore we do not need to keep it locked.
-             * As stated above, the allocating coroutine will always try to lock
-             * the mutex before all the other concurrent accesses to that
-             * cluster, therefore at this point we can be absolutely certain
-             * that that write operation has returned (there may be other writes
-             * in flight, but they do not concern this very operation). */
-            qemu_co_mutex_unlock(&s->write_lock);
-            ret = bdrv_write(bs->file->bs, offset, buf, n_sectors);
-        }
-
-        nb_sectors -= n_sectors;
-        sector_num += n_sectors;
-        buf += n_sectors * SECTOR_SIZE;
-
-        logout("%u sectors written\n", n_sectors);
-    }
-
-    logout("finished data write\n");
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (block) {
-        /* One or more new blocks were allocated. */
-        VdiHeader *header = (VdiHeader *) block;
-        uint8_t *base;
-        uint64_t offset;
-
-        logout("now writing modified header\n");
-        assert(VDI_IS_ALLOCATED(bmap_first));
-        *header = s->header;
-        vdi_header_to_le(header);
-        ret = bdrv_write(bs->file->bs, 0, block, 1);
-        g_free(block);
-        block = NULL;
-
-        if (ret < 0) {
-            return ret;
-        }
-
-        logout("now writing modified block map entry %u...%u\n",
-               bmap_first, bmap_last);
-        /* Write modified sectors from block map. */
-        bmap_first /= (SECTOR_SIZE / sizeof(uint32_t));
-        bmap_last /= (SECTOR_SIZE / sizeof(uint32_t));
-        n_sectors = bmap_last - bmap_first + 1;
-        offset = s->bmap_sector + bmap_first;
-        base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
-        logout("will write %u block map sectors starting from entry %u\n",
-               n_sectors, bmap_first);
-        ret = bdrv_write(bs->file->bs, offset, base, n_sectors);
-    }
-
-    return ret;
-}
-
-static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    int ret = 0;
-    uint64_t bytes = 0;
-    uint32_t blocks;
-    size_t block_size = DEFAULT_CLUSTER_SIZE;
-    uint32_t image_type = VDI_TYPE_DYNAMIC;
-    VdiHeader header;
-    size_t i;
-    size_t bmap_size;
-    int64_t offset = 0;
-    Error *local_err = NULL;
-    BlockBackend *blk = NULL;
-    uint32_t *bmap = NULL;
-
-    logout("\n");
-
-    /* Read out options. */
-    bytes = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                     BDRV_SECTOR_SIZE);
-#if defined(CONFIG_VDI_BLOCK_SIZE)
-    /* TODO: Additional checks (SECTOR_SIZE * 2^n, ...). */
-    block_size = qemu_opt_get_size_del(opts,
-                                       BLOCK_OPT_CLUSTER_SIZE,
-                                       DEFAULT_CLUSTER_SIZE);
-#endif
-#if defined(CONFIG_VDI_STATIC_IMAGE)
-    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_STATIC, false)) {
-        image_type = VDI_TYPE_STATIC;
-    }
-#endif
-
-    if (bytes > VDI_DISK_SIZE_MAX) {
-        ret = -ENOTSUP;
-        error_setg(errp, "Unsupported VDI image size (size is 0x%" PRIx64
-                          ", max supported is 0x%" PRIx64 ")",
-                          bytes, VDI_DISK_SIZE_MAX);
-        goto exit;
-    }
-
-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto exit;
-    }
-
-    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (blk == NULL) {
-        error_propagate(errp, local_err);
-        ret = -EIO;
-        goto exit;
-    }
-
-    blk_set_allow_write_beyond_eof(blk, true);
-
-    /* We need enough blocks to store the given disk size,
-       so always round up. */
-    blocks = DIV_ROUND_UP(bytes, block_size);
-
-    bmap_size = blocks * sizeof(uint32_t);
-    bmap_size = ROUND_UP(bmap_size, SECTOR_SIZE);
-
-    memset(&header, 0, sizeof(header));
-    pstrcpy(header.text, sizeof(header.text), VDI_TEXT);
-    header.signature = VDI_SIGNATURE;
-    header.version = VDI_VERSION_1_1;
-    header.header_size = 0x180;
-    header.image_type = image_type;
-    header.offset_bmap = 0x200;
-    header.offset_data = 0x200 + bmap_size;
-    header.sector_size = SECTOR_SIZE;
-    header.disk_size = bytes;
-    header.block_size = block_size;
-    header.blocks_in_image = blocks;
-    if (image_type == VDI_TYPE_STATIC) {
-        header.blocks_allocated = blocks;
-    }
-    uuid_generate(header.uuid_image);
-    uuid_generate(header.uuid_last_snap);
-    /* There is no need to set header.uuid_link or header.uuid_parent here. */
-#if defined(CONFIG_VDI_DEBUG)
-    vdi_header_print(&header);
-#endif
-    vdi_header_to_le(&header);
-    ret = blk_pwrite(blk, offset, &header, sizeof(header));
-    if (ret < 0) {
-        error_setg(errp, "Error writing header to %s", filename);
-        goto exit;
-    }
-    offset += sizeof(header);
-
-    if (bmap_size > 0) {
-        bmap = g_try_malloc0(bmap_size);
-        if (bmap == NULL) {
-            ret = -ENOMEM;
-            error_setg(errp, "Could not allocate bmap");
-            goto exit;
-        }
-        for (i = 0; i < blocks; i++) {
-            if (image_type == VDI_TYPE_STATIC) {
-                bmap[i] = i;
-            } else {
-                bmap[i] = VDI_UNALLOCATED;
-            }
-        }
-        ret = blk_pwrite(blk, offset, bmap, bmap_size);
-        if (ret < 0) {
-            error_setg(errp, "Error writing bmap to %s", filename);
-            goto exit;
-        }
-        offset += bmap_size;
-    }
-
-    if (image_type == VDI_TYPE_STATIC) {
-        ret = blk_truncate(blk, offset + blocks * block_size);
-        if (ret < 0) {
-            error_setg(errp, "Failed to statically allocate %s", filename);
-            goto exit;
-        }
-    }
-
-exit:
-    blk_unref(blk);
-    g_free(bmap);
-    return ret;
-}
-
-static void vdi_close(BlockDriverState *bs)
-{
-    BDRVVdiState *s = bs->opaque;
-
-    qemu_vfree(s->bmap);
-
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
-}
-
-static QemuOptsList vdi_create_opts = {
-    .name = "vdi-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(vdi_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-#if defined(CONFIG_VDI_BLOCK_SIZE)
-        {
-            .name = BLOCK_OPT_CLUSTER_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "VDI cluster (block) size",
-            .def_value_str = stringify(DEFAULT_CLUSTER_SIZE)
-        },
-#endif
-#if defined(CONFIG_VDI_STATIC_IMAGE)
-        {
-            .name = BLOCK_OPT_STATIC,
-            .type = QEMU_OPT_BOOL,
-            .help = "VDI static (pre-allocated) image",
-            .def_value_str = "off"
-        },
-#endif
-        /* TODO: An additional option to set UUID values might be useful. */
-        { /* end of list */ }
-    }
-};
-
-static BlockDriver bdrv_vdi = {
-    .format_name = "vdi",
-    .instance_size = sizeof(BDRVVdiState),
-    .bdrv_probe = vdi_probe,
-    .bdrv_open = vdi_open,
-    .bdrv_close = vdi_close,
-    .bdrv_reopen_prepare = vdi_reopen_prepare,
-    .bdrv_create = vdi_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_co_get_block_status = vdi_co_get_block_status,
-    .bdrv_make_empty = vdi_make_empty,
-
-    .bdrv_read = vdi_co_read,
-#if defined(CONFIG_VDI_WRITE)
-    .bdrv_write = vdi_co_write,
-#endif
-
-    .bdrv_get_info = vdi_get_info,
-
-    .create_opts = &vdi_create_opts,
-    .bdrv_check = vdi_check,
-};
-
-static void bdrv_vdi_init(void)
-{
-    logout("\n");
-    bdrv_register(&bdrv_vdi);
-}
-
-block_init(bdrv_vdi_init);
diff --git a/qemu/block/vhdx-endian.c b/qemu/block/vhdx-endian.c
deleted file mode 100644
index da33cd38e..000000000
--- a/qemu/block/vhdx-endian.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Block driver for Hyper-V VHDX Images
- *
- * Copyright (c) 2013 Red Hat, Inc.,
- *
- * Authors:
- *  Jeff Cody <jcody@redhat.com>
- *
- *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
- *  by Microsoft:
- *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "block/vhdx.h"
-
-#include <uuid/uuid.h>
-
-
-/*
- * All the VHDX formats on disk are little endian - the following
- * are helper import/export functions to correctly convert
- * endianness from disk read to native cpu format, and back again.
- */
-
-
-/* VHDX File Header */
-
-
-void vhdx_header_le_import(VHDXHeader *h)
-{
-    assert(h != NULL);
-
-    le32_to_cpus(&h->signature);
-    le32_to_cpus(&h->checksum);
-    le64_to_cpus(&h->sequence_number);
-
-    leguid_to_cpus(&h->file_write_guid);
-    leguid_to_cpus(&h->data_write_guid);
-    leguid_to_cpus(&h->log_guid);
-
-    le16_to_cpus(&h->log_version);
-    le16_to_cpus(&h->version);
-    le32_to_cpus(&h->log_length);
-    le64_to_cpus(&h->log_offset);
-}
-
-void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h)
-{
-    assert(orig_h != NULL);
-    assert(new_h != NULL);
-
-    new_h->signature       = cpu_to_le32(orig_h->signature);
-    new_h->checksum        = cpu_to_le32(orig_h->checksum);
-    new_h->sequence_number = cpu_to_le64(orig_h->sequence_number);
-
-    new_h->file_write_guid = orig_h->file_write_guid;
-    new_h->data_write_guid = orig_h->data_write_guid;
-    new_h->log_guid        = orig_h->log_guid;
-
-    cpu_to_leguids(&new_h->file_write_guid);
-    cpu_to_leguids(&new_h->data_write_guid);
-    cpu_to_leguids(&new_h->log_guid);
-
-    new_h->log_version     = cpu_to_le16(orig_h->log_version);
-    new_h->version         = cpu_to_le16(orig_h->version);
-    new_h->log_length      = cpu_to_le32(orig_h->log_length);
-    new_h->log_offset      = cpu_to_le64(orig_h->log_offset);
-}
-
-
-/* VHDX Log Headers */
-
-
-void vhdx_log_desc_le_import(VHDXLogDescriptor *d)
-{
-    assert(d != NULL);
-
-    le32_to_cpus(&d->signature);
-    le64_to_cpus(&d->file_offset);
-    le64_to_cpus(&d->sequence_number);
-}
-
-void vhdx_log_desc_le_export(VHDXLogDescriptor *d)
-{
-    assert(d != NULL);
-
-    cpu_to_le32s(&d->signature);
-    cpu_to_le32s(&d->trailing_bytes);
-    cpu_to_le64s(&d->leading_bytes);
-    cpu_to_le64s(&d->file_offset);
-    cpu_to_le64s(&d->sequence_number);
-}
-
-void vhdx_log_data_le_import(VHDXLogDataSector *d)
-{
-    assert(d != NULL);
-
-    le32_to_cpus(&d->data_signature);
-    le32_to_cpus(&d->sequence_high);
-    le32_to_cpus(&d->sequence_low);
-}
-
-void vhdx_log_data_le_export(VHDXLogDataSector *d)
-{
-    assert(d != NULL);
-
-    cpu_to_le32s(&d->data_signature);
-    cpu_to_le32s(&d->sequence_high);
-    cpu_to_le32s(&d->sequence_low);
-}
-
-void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    le32_to_cpus(&hdr->signature);
-    le32_to_cpus(&hdr->checksum);
-    le32_to_cpus(&hdr->entry_length);
-    le32_to_cpus(&hdr->tail);
-    le64_to_cpus(&hdr->sequence_number);
-    le32_to_cpus(&hdr->descriptor_count);
-    leguid_to_cpus(&hdr->log_guid);
-    le64_to_cpus(&hdr->flushed_file_offset);
-    le64_to_cpus(&hdr->last_file_offset);
-}
-
-void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    cpu_to_le32s(&hdr->signature);
-    cpu_to_le32s(&hdr->checksum);
-    cpu_to_le32s(&hdr->entry_length);
-    cpu_to_le32s(&hdr->tail);
-    cpu_to_le64s(&hdr->sequence_number);
-    cpu_to_le32s(&hdr->descriptor_count);
-    cpu_to_leguids(&hdr->log_guid);
-    cpu_to_le64s(&hdr->flushed_file_offset);
-    cpu_to_le64s(&hdr->last_file_offset);
-}
-
-
-/* Region table entries */
-void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    le32_to_cpus(&hdr->signature);
-    le32_to_cpus(&hdr->checksum);
-    le32_to_cpus(&hdr->entry_count);
-}
-
-void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    cpu_to_le32s(&hdr->signature);
-    cpu_to_le32s(&hdr->checksum);
-    cpu_to_le32s(&hdr->entry_count);
-}
-
-void vhdx_region_entry_le_import(VHDXRegionTableEntry *e)
-{
-    assert(e != NULL);
-
-    leguid_to_cpus(&e->guid);
-    le64_to_cpus(&e->file_offset);
-    le32_to_cpus(&e->length);
-    le32_to_cpus(&e->data_bits);
-}
-
-void vhdx_region_entry_le_export(VHDXRegionTableEntry *e)
-{
-    assert(e != NULL);
-
-    cpu_to_leguids(&e->guid);
-    cpu_to_le64s(&e->file_offset);
-    cpu_to_le32s(&e->length);
-    cpu_to_le32s(&e->data_bits);
-}
-
-
-/* Metadata headers & table */
-void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    le64_to_cpus(&hdr->signature);
-    le16_to_cpus(&hdr->entry_count);
-}
-
-void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr)
-{
-    assert(hdr != NULL);
-
-    cpu_to_le64s(&hdr->signature);
-    cpu_to_le16s(&hdr->entry_count);
-}
-
-void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e)
-{
-    assert(e != NULL);
-
-    leguid_to_cpus(&e->item_id);
-    le32_to_cpus(&e->offset);
-    le32_to_cpus(&e->length);
-    le32_to_cpus(&e->data_bits);
-}
-void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e)
-{
-    assert(e != NULL);
-
-    cpu_to_leguids(&e->item_id);
-    cpu_to_le32s(&e->offset);
-    cpu_to_le32s(&e->length);
-    cpu_to_le32s(&e->data_bits);
-}
diff --git a/qemu/block/vhdx-log.c b/qemu/block/vhdx-log.c
deleted file mode 100644
index 7ea7187fc..000000000
--- a/qemu/block/vhdx-log.c
+++ /dev/null
@@ -1,1043 +0,0 @@
-/*
- * Block driver for Hyper-V VHDX Images
- *
- * Copyright (c) 2013 Red Hat, Inc.,
- *
- * Authors:
- *  Jeff Cody <jcody@redhat.com>
- *
- *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
- *  by Microsoft:
- *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
- *
- * This file covers the functionality of the metadata log writing, parsing, and
- * replay.
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "block/vhdx.h"
-
-
-typedef struct VHDXLogSequence {
-    bool valid;
-    uint32_t count;
-    VHDXLogEntries log;
-    VHDXLogEntryHeader hdr;
-} VHDXLogSequence;
-
-typedef struct VHDXLogDescEntries {
-    VHDXLogEntryHeader hdr;
-    VHDXLogDescriptor desc[];
-} VHDXLogDescEntries;
-
-static const MSGUID zero_guid = { 0 };
-
-/* The log located on the disk is circular buffer containing
- * sectors of 4096 bytes each.
- *
- * It is assumed for the read/write functions below that the
- * circular buffer scheme uses a 'one sector open' to indicate
- * the buffer is full.  Given the validation methods used for each
- * sector, this method should be compatible with other methods that
- * do not waste a sector.
- */
-
-
-/* Allow peeking at the hdr entry at the beginning of the current
- * read index, without advancing the read index */
-static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
-                             VHDXLogEntryHeader *hdr)
-{
-    int ret = 0;
-    uint64_t offset;
-    uint32_t read;
-
-    assert(hdr != NULL);
-
-    /* peek is only supported on sector boundaries */
-    if (log->read % VHDX_LOG_SECTOR_SIZE) {
-        ret = -EFAULT;
-        goto exit;
-    }
-
-    read = log->read;
-    /* we are guaranteed that a) log sectors are 4096 bytes,
-     * and b) the log length is a multiple of 1MB. So, there
-     * is always a round number of sectors in the buffer */
-    if ((read + sizeof(VHDXLogEntryHeader)) > log->length) {
-        read = 0;
-    }
-
-    if (read == log->write) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    offset = log->offset + read;
-
-    ret = bdrv_pread(bs->file->bs, offset, hdr, sizeof(VHDXLogEntryHeader));
-    if (ret < 0) {
-        goto exit;
-    }
-    vhdx_log_entry_hdr_le_import(hdr);
-
-exit:
-    return ret;
-}
-
-/* Index increment for log, based on sector boundaries */
-static int vhdx_log_inc_idx(uint32_t idx, uint64_t length)
-{
-    idx += VHDX_LOG_SECTOR_SIZE;
-    /* we are guaranteed that a) log sectors are 4096 bytes,
-     * and b) the log length is a multiple of 1MB. So, there
-     * is always a round number of sectors in the buffer */
-    return idx >= length ? 0 : idx;
-}
-
-
-/* Reset the log to empty */
-static void vhdx_log_reset(BlockDriverState *bs, BDRVVHDXState *s)
-{
-    MSGUID guid = { 0 };
-    s->log.read = s->log.write = 0;
-    /* a log guid of 0 indicates an empty log to any parser of v0
-     * VHDX logs */
-    vhdx_update_headers(bs, s, false, &guid);
-}
-
-/* Reads num_sectors from the log (all log sectors are 4096 bytes),
- * into buffer 'buffer'.  Upon return, *sectors_read will contain
- * the number of sectors successfully read.
- *
- * It is assumed that 'buffer' is already allocated, and of sufficient
- * size (i.e. >= 4096*num_sectors).
- *
- * If 'peek' is true, then the tail (read) pointer for the circular buffer is
- * not modified.
- *
- * 0 is returned on success, -errno otherwise.  */
-static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
-                                 uint32_t *sectors_read, void *buffer,
-                                 uint32_t num_sectors, bool peek)
-{
-    int ret = 0;
-    uint64_t offset;
-    uint32_t read;
-
-    read = log->read;
-
-    *sectors_read = 0;
-    while (num_sectors) {
-        if (read == log->write) {
-            /* empty */
-            break;
-        }
-        offset = log->offset + read;
-
-        ret = bdrv_pread(bs->file->bs, offset, buffer, VHDX_LOG_SECTOR_SIZE);
-        if (ret < 0) {
-            goto exit;
-        }
-        read = vhdx_log_inc_idx(read, log->length);
-
-        *sectors_read = *sectors_read + 1;
-        num_sectors--;
-    }
-
-exit:
-    if (!peek) {
-        log->read = read;
-    }
-    return ret;
-}
-
-/* Writes num_sectors to the log (all log sectors are 4096 bytes),
- * from buffer 'buffer'.  Upon return, *sectors_written will contain
- * the number of sectors successfully written.
- *
- * It is assumed that 'buffer' is at least 4096*num_sectors large.
- *
- * 0 is returned on success, -errno otherwise */
-static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
-                                  uint32_t *sectors_written, void *buffer,
-                                  uint32_t num_sectors)
-{
-    int ret = 0;
-    uint64_t offset;
-    uint32_t write;
-    void *buffer_tmp;
-    BDRVVHDXState *s = bs->opaque;
-
-    ret = vhdx_user_visible_write(bs, s);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    write = log->write;
-
-    buffer_tmp = buffer;
-    while (num_sectors) {
-
-        offset = log->offset + write;
-        write = vhdx_log_inc_idx(write, log->length);
-        if (write == log->read) {
-            /* full */
-            break;
-        }
-        ret = bdrv_pwrite(bs->file->bs, offset, buffer_tmp,
-                          VHDX_LOG_SECTOR_SIZE);
-        if (ret < 0) {
-            goto exit;
-        }
-        buffer_tmp += VHDX_LOG_SECTOR_SIZE;
-
-        log->write = write;
-        *sectors_written = *sectors_written + 1;
-        num_sectors--;
-    }
-
-exit:
-    return ret;
-}
-
-
-/* Validates a log entry header */
-static bool vhdx_log_hdr_is_valid(VHDXLogEntries *log, VHDXLogEntryHeader *hdr,
-                                  BDRVVHDXState *s)
-{
-    int valid = false;
-
-    if (hdr->signature != VHDX_LOG_SIGNATURE) {
-        goto exit;
-    }
-
-    /* if the individual entry length is larger than the whole log
-     * buffer, that is obviously invalid */
-    if (log->length < hdr->entry_length) {
-        goto exit;
-    }
-
-    /* length of entire entry must be in units of 4KB (log sector size) */
-    if (hdr->entry_length % (VHDX_LOG_SECTOR_SIZE)) {
-        goto exit;
-    }
-
-    /* per spec, sequence # must be > 0 */
-    if (hdr->sequence_number == 0) {
-        goto exit;
-    }
-
-    /* log entries are only valid if they match the file-wide log guid
-     * found in the active header */
-    if (!guid_eq(hdr->log_guid, s->headers[s->curr_header]->log_guid)) {
-        goto exit;
-    }
-
-    if (hdr->descriptor_count * sizeof(VHDXLogDescriptor) > hdr->entry_length) {
-        goto exit;
-    }
-
-    valid = true;
-
-exit:
-    return valid;
-}
-
-/*
- * Given a log header, this will validate that the descriptors and the
- * corresponding data sectors (if applicable)
- *
- * Validation consists of:
- *      1. Making sure the sequence numbers matches the entry header
- *      2. Verifying a valid signature ('zero' or 'desc' for descriptors)
- *      3. File offset field is a multiple of 4KB
- *      4. If a data descriptor, the corresponding data sector
- *         has its signature ('data') and matching sequence number
- *
- * @desc: the data buffer containing the descriptor
- * @hdr:  the log entry header
- *
- * Returns true if valid
- */
-static bool vhdx_log_desc_is_valid(VHDXLogDescriptor *desc,
-                                   VHDXLogEntryHeader *hdr)
-{
-    bool ret = false;
-
-    if (desc->sequence_number != hdr->sequence_number) {
-        goto exit;
-    }
-    if (desc->file_offset % VHDX_LOG_SECTOR_SIZE) {
-        goto exit;
-    }
-
-    if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
-        if (desc->zero_length % VHDX_LOG_SECTOR_SIZE == 0) {
-            /* valid */
-            ret = true;
-        }
-    } else if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
-            /* valid */
-            ret = true;
-    }
-
-exit:
-    return ret;
-}
-
-
-/* Prior to sector data for a log entry, there is the header
- * and the descriptors referenced in the header:
- *
- * [] = 4KB sector
- *
- * [ hdr, desc ][   desc   ][ ... ][ data ][ ... ]
- *
- * The first sector in a log entry has a 64 byte header, and
- * up to 126 32-byte descriptors.  If more descriptors than
- * 126 are required, then subsequent sectors can have up to 128
- * descriptors.  Each sector is 4KB.  Data follows the descriptor
- * sectors.
- *
- * This will return the number of sectors needed to encompass
- * the passed number of descriptors in desc_cnt.
- *
- * This will never return 0, even if desc_cnt is 0.
- */
-static int vhdx_compute_desc_sectors(uint32_t desc_cnt)
-{
-    uint32_t desc_sectors;
-
-    desc_cnt += 2; /* account for header in first sector */
-    desc_sectors = desc_cnt / 128;
-    if (desc_cnt % 128) {
-        desc_sectors++;
-    }
-
-    return desc_sectors;
-}
-
-
-/* Reads the log header, and subsequent descriptors (if any).  This
- * will allocate all the space for buffer, which must be NULL when
- * passed into this function. Each descriptor will also be validated,
- * and error returned if any are invalid. */
-static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
-                              VHDXLogEntries *log, VHDXLogDescEntries **buffer,
-                              bool convert_endian)
-{
-    int ret = 0;
-    uint32_t desc_sectors;
-    uint32_t sectors_read;
-    VHDXLogEntryHeader hdr;
-    VHDXLogDescEntries *desc_entries = NULL;
-    VHDXLogDescriptor desc;
-    int i;
-
-    assert(*buffer == NULL);
-
-    ret = vhdx_log_peek_hdr(bs, log, &hdr);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
-    desc_entries = qemu_try_blockalign(bs->file->bs,
-                                       desc_sectors * VHDX_LOG_SECTOR_SIZE);
-    if (desc_entries == NULL) {
-        ret = -ENOMEM;
-        goto exit;
-    }
-
-    ret = vhdx_log_read_sectors(bs, log, &sectors_read, desc_entries,
-                                desc_sectors, false);
-    if (ret < 0) {
-        goto free_and_exit;
-    }
-    if (sectors_read != desc_sectors) {
-        ret = -EINVAL;
-        goto free_and_exit;
-    }
-
-    /* put in proper endianness, and validate each desc */
-    for (i = 0; i < hdr.descriptor_count; i++) {
-        desc = desc_entries->desc[i];
-        vhdx_log_desc_le_import(&desc);
-        if (convert_endian) {
-            desc_entries->desc[i] = desc;
-        }
-        if (vhdx_log_desc_is_valid(&desc, &hdr) == false) {
-            ret = -EINVAL;
-            goto free_and_exit;
-        }
-    }
-    if (convert_endian) {
-        desc_entries->hdr = hdr;
-    }
-
-    *buffer = desc_entries;
-    goto exit;
-
-free_and_exit:
-    qemu_vfree(desc_entries);
-exit:
-    return ret;
-}
-
-
-/* Flushes the descriptor described by desc to the VHDX image file.
- * If the descriptor is a data descriptor, than 'data' must be non-NULL,
- * and >= 4096 bytes (VHDX_LOG_SECTOR_SIZE), containing the data to be
- * written.
- *
- * Verification is performed to make sure the sequence numbers of a data
- * descriptor match the sequence number in the desc.
- *
- * For a zero descriptor, it may describe multiple sectors to fill with zeroes.
- * In this case, it should be noted that zeroes are written to disk, and the
- * image file is not extended as a sparse file.  */
-static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
-                               VHDXLogDataSector *data)
-{
-    int ret = 0;
-    uint64_t seq, file_offset;
-    uint32_t offset = 0;
-    void *buffer = NULL;
-    uint64_t count = 1;
-    int i;
-
-    buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
-
-    if (desc->signature == VHDX_LOG_DESC_SIGNATURE) {
-        /* data sector */
-        if (data == NULL) {
-            ret = -EFAULT;
-            goto exit;
-        }
-
-        /* The sequence number of the data sector must match that
-         * in the descriptor */
-        seq = data->sequence_high;
-        seq <<= 32;
-        seq |= data->sequence_low & 0xffffffff;
-
-        if (seq != desc->sequence_number) {
-            ret = -EINVAL;
-            goto exit;
-        }
-
-        /* Each data sector is in total 4096 bytes, however the first
-         * 8 bytes, and last 4 bytes, are located in the descriptor */
-        memcpy(buffer, &desc->leading_bytes, 8);
-        offset += 8;
-
-        memcpy(buffer+offset, data->data, 4084);
-        offset += 4084;
-
-        memcpy(buffer+offset, &desc->trailing_bytes, 4);
-
-    } else if (desc->signature == VHDX_LOG_ZERO_SIGNATURE) {
-        /* write 'count' sectors of sector */
-        memset(buffer, 0, VHDX_LOG_SECTOR_SIZE);
-        count = desc->zero_length / VHDX_LOG_SECTOR_SIZE;
-    } else {
-        error_report("Invalid VHDX log descriptor entry signature 0x%" PRIx32,
-                      desc->signature);
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    file_offset = desc->file_offset;
-
-    /* count is only > 1 if we are writing zeroes */
-    for (i = 0; i < count; i++) {
-        ret = bdrv_pwrite_sync(bs->file->bs, file_offset, buffer,
-                               VHDX_LOG_SECTOR_SIZE);
-        if (ret < 0) {
-            goto exit;
-        }
-        file_offset += VHDX_LOG_SECTOR_SIZE;
-    }
-
-exit:
-    qemu_vfree(buffer);
-    return ret;
-}
-
-/* Flush the entire log (as described by 'logs') to the VHDX image
- * file, and then set the log to 'empty' status once complete.
- *
- * The log entries should be validate prior to flushing */
-static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
-                          VHDXLogSequence *logs)
-{
-    int ret = 0;
-    int i;
-    uint32_t cnt, sectors_read;
-    uint64_t new_file_size;
-    void *data = NULL;
-    VHDXLogDescEntries *desc_entries = NULL;
-    VHDXLogEntryHeader hdr_tmp = { 0 };
-
-    cnt = logs->count;
-
-    data = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
-
-    ret = vhdx_user_visible_write(bs, s);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    /* each iteration represents one log sequence, which may span multiple
-     * sectors */
-    while (cnt--) {
-        ret = vhdx_log_peek_hdr(bs, &logs->log, &hdr_tmp);
-        if (ret < 0) {
-            goto exit;
-        }
-        /* if the log shows a FlushedFileOffset larger than our current file
-         * size, then that means the file has been truncated / corrupted, and
-         * we must refused to open it / use it */
-        if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file->bs)) {
-            ret = -EINVAL;
-            goto exit;
-        }
-
-        ret = vhdx_log_read_desc(bs, s, &logs->log, &desc_entries, true);
-        if (ret < 0) {
-            goto exit;
-        }
-
-        for (i = 0; i < desc_entries->hdr.descriptor_count; i++) {
-            if (desc_entries->desc[i].signature == VHDX_LOG_DESC_SIGNATURE) {
-                /* data sector, so read a sector to flush */
-                ret = vhdx_log_read_sectors(bs, &logs->log, &sectors_read,
-                                            data, 1, false);
-                if (ret < 0) {
-                    goto exit;
-                }
-                if (sectors_read != 1) {
-                    ret = -EINVAL;
-                    goto exit;
-                }
-                vhdx_log_data_le_import(data);
-            }
-
-            ret = vhdx_log_flush_desc(bs, &desc_entries->desc[i], data);
-            if (ret < 0) {
-                goto exit;
-            }
-        }
-        if (bdrv_getlength(bs->file->bs) < desc_entries->hdr.last_file_offset) {
-            new_file_size = desc_entries->hdr.last_file_offset;
-            if (new_file_size % (1024*1024)) {
-                /* round up to nearest 1MB boundary */
-                new_file_size = ((new_file_size >> 20) + 1) << 20;
-                bdrv_truncate(bs->file->bs, new_file_size);
-            }
-        }
-        qemu_vfree(desc_entries);
-        desc_entries = NULL;
-    }
-
-    bdrv_flush(bs);
-    /* once the log is fully flushed, indicate that we have an empty log
-     * now.  This also sets the log guid to 0, to indicate an empty log */
-    vhdx_log_reset(bs, s);
-
-exit:
-    qemu_vfree(data);
-    qemu_vfree(desc_entries);
-    return ret;
-}
-
-static int vhdx_validate_log_entry(BlockDriverState *bs, BDRVVHDXState *s,
-                                   VHDXLogEntries *log, uint64_t seq,
-                                   bool *valid, VHDXLogEntryHeader *entry)
-{
-    int ret = 0;
-    VHDXLogEntryHeader hdr;
-    void *buffer = NULL;
-    uint32_t i, desc_sectors, total_sectors, crc;
-    uint32_t sectors_read = 0;
-    VHDXLogDescEntries *desc_buffer = NULL;
-
-    *valid = false;
-
-    ret = vhdx_log_peek_hdr(bs, log, &hdr);
-    if (ret < 0) {
-        goto inc_and_exit;
-    }
-
-    if (vhdx_log_hdr_is_valid(log, &hdr, s) == false) {
-        goto inc_and_exit;
-    }
-
-    if (seq > 0) {
-        if (hdr.sequence_number != seq + 1) {
-            goto inc_and_exit;
-        }
-    }
-
-    desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
-
-    /* Read all log sectors, and calculate log checksum */
-
-    total_sectors = hdr.entry_length / VHDX_LOG_SECTOR_SIZE;
-
-
-    /* read_desc() will increment the read idx */
-    ret = vhdx_log_read_desc(bs, s, log, &desc_buffer, false);
-    if (ret < 0) {
-        goto free_and_exit;
-    }
-
-    crc = vhdx_checksum_calc(0xffffffff, (void *)desc_buffer,
-                            desc_sectors * VHDX_LOG_SECTOR_SIZE, 4);
-    crc ^= 0xffffffff;
-
-    buffer = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
-    if (total_sectors > desc_sectors) {
-        for (i = 0; i < total_sectors - desc_sectors; i++) {
-            sectors_read = 0;
-            ret = vhdx_log_read_sectors(bs, log, &sectors_read, buffer,
-                                        1, false);
-            if (ret < 0 || sectors_read != 1) {
-                goto free_and_exit;
-            }
-            crc = vhdx_checksum_calc(crc, buffer, VHDX_LOG_SECTOR_SIZE, -1);
-            crc ^= 0xffffffff;
-        }
-    }
-    crc ^= 0xffffffff;
-    if (crc != hdr.checksum) {
-        goto free_and_exit;
-    }
-
-    *valid = true;
-    *entry = hdr;
-    goto free_and_exit;
-
-inc_and_exit:
-    log->read = vhdx_log_inc_idx(log->read, log->length);
-
-free_and_exit:
-    qemu_vfree(buffer);
-    qemu_vfree(desc_buffer);
-    return ret;
-}
-
-/* Search through the log circular buffer, and find the valid, active
- * log sequence, if any exists
- * */
-static int vhdx_log_search(BlockDriverState *bs, BDRVVHDXState *s,
-                           VHDXLogSequence *logs)
-{
-    int ret = 0;
-    uint32_t tail;
-    bool seq_valid = false;
-    VHDXLogSequence candidate = { 0 };
-    VHDXLogEntryHeader hdr = { 0 };
-    VHDXLogEntries curr_log;
-
-    memcpy(&curr_log, &s->log, sizeof(VHDXLogEntries));
-    curr_log.write = curr_log.length;   /* assume log is full */
-    curr_log.read = 0;
-
-
-    /* now we will go through the whole log sector by sector, until
-     * we find a valid, active log sequence, or reach the end of the
-     * log buffer */
-    for (;;) {
-        uint64_t curr_seq = 0;
-        VHDXLogSequence current = { 0 };
-
-        tail = curr_log.read;
-
-        ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
-                                      &seq_valid, &hdr);
-        if (ret < 0) {
-            goto exit;
-        }
-
-        if (seq_valid) {
-            current.valid     = true;
-            current.log       = curr_log;
-            current.log.read  = tail;
-            current.log.write = curr_log.read;
-            current.count     = 1;
-            current.hdr       = hdr;
-
-
-            for (;;) {
-                ret = vhdx_validate_log_entry(bs, s, &curr_log, curr_seq,
-                                              &seq_valid, &hdr);
-                if (ret < 0) {
-                    goto exit;
-                }
-                if (seq_valid == false) {
-                    break;
-                }
-                current.log.write = curr_log.read;
-                current.count++;
-
-                curr_seq = hdr.sequence_number;
-            }
-        }
-
-        if (current.valid) {
-            if (candidate.valid == false ||
-                current.hdr.sequence_number > candidate.hdr.sequence_number) {
-                candidate = current;
-            }
-        }
-
-        if (curr_log.read < tail) {
-            break;
-        }
-    }
-
-    *logs = candidate;
-
-    if (candidate.valid) {
-        /* this is the next sequence number, for writes */
-        s->log.sequence = candidate.hdr.sequence_number + 1;
-    }
-
-
-exit:
-    return ret;
-}
-
-/* Parse the replay log.  Per the VHDX spec, if the log is present
- * it must be replayed prior to opening the file, even read-only.
- *
- * If read-only, we must replay the log in RAM (or refuse to open
- * a dirty VHDX file read-only) */
-int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
-                   Error **errp)
-{
-    int ret = 0;
-    VHDXHeader *hdr;
-    VHDXLogSequence logs = { 0 };
-
-    hdr = s->headers[s->curr_header];
-
-    *flushed = false;
-
-    /* s->log.hdr is freed in vhdx_close() */
-    if (s->log.hdr == NULL) {
-        s->log.hdr = qemu_blockalign(bs, sizeof(VHDXLogEntryHeader));
-    }
-
-    s->log.offset = hdr->log_offset;
-    s->log.length = hdr->log_length;
-
-    if (s->log.offset < VHDX_LOG_MIN_SIZE ||
-        s->log.offset % VHDX_LOG_MIN_SIZE) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    /* per spec, only log version of 0 is supported */
-    if (hdr->log_version != 0) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    /* If either the log guid, or log length is zero,
-     * then a replay log is not present */
-    if (guid_eq(hdr->log_guid, zero_guid)) {
-        goto exit;
-    }
-
-    if (hdr->log_length == 0) {
-        goto exit;
-    }
-
-    if (hdr->log_length % VHDX_LOG_MIN_SIZE) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-
-    /* The log is present, we need to find if and where there is an active
-     * sequence of valid entries present in the log.  */
-
-    ret = vhdx_log_search(bs, s, &logs);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    if (logs.valid) {
-        if (bs->read_only) {
-            ret = -EPERM;
-            error_setg(errp,
-                       "VHDX image file '%s' opened read-only, but "
-                       "contains a log that needs to be replayed",
-                       bs->filename);
-            error_append_hint(errp,  "To replay the log, run:\n"
-                              "qemu-img check -r all '%s'\n",
-                              bs->filename);
-            goto exit;
-        }
-        /* now flush the log */
-        ret = vhdx_log_flush(bs, s, &logs);
-        if (ret < 0) {
-            goto exit;
-        }
-        *flushed = true;
-    }
-
-
-exit:
-    return ret;
-}
-
-
-
-static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
-                                      VHDXLogDataSector *sector, void *data,
-                                      uint64_t seq)
-{
-    /* 8 + 4084 + 4 = 4096, 1 log sector */
-    memcpy(&desc->leading_bytes, data, 8);
-    data += 8;
-    cpu_to_le64s(&desc->leading_bytes);
-    memcpy(sector->data, data, 4084);
-    data += 4084;
-    memcpy(&desc->trailing_bytes, data, 4);
-    cpu_to_le32s(&desc->trailing_bytes);
-    data += 4;
-
-    sector->sequence_high  = (uint32_t) (seq >> 32);
-    sector->sequence_low   = (uint32_t) (seq & 0xffffffff);
-    sector->data_signature = VHDX_LOG_DATA_SIGNATURE;
-
-    vhdx_log_desc_le_export(desc);
-    vhdx_log_data_le_export(sector);
-}
-
-
-static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
-                          void *data, uint32_t length, uint64_t offset)
-{
-    int ret = 0;
-    void *buffer = NULL;
-    void *merged_sector = NULL;
-    void *data_tmp, *sector_write;
-    unsigned int i;
-    int sector_offset;
-    uint32_t desc_sectors, sectors, total_length;
-    uint32_t sectors_written = 0;
-    uint32_t aligned_length;
-    uint32_t leading_length = 0;
-    uint32_t trailing_length = 0;
-    uint32_t partial_sectors = 0;
-    uint32_t bytes_written = 0;
-    uint64_t file_offset;
-    VHDXHeader *header;
-    VHDXLogEntryHeader new_hdr;
-    VHDXLogDescriptor *new_desc = NULL;
-    VHDXLogDataSector *data_sector = NULL;
-    MSGUID new_guid = { 0 };
-
-    header = s->headers[s->curr_header];
-
-    /* need to have offset read data, and be on 4096 byte boundary */
-
-    if (length > header->log_length) {
-        /* no log present.  we could create a log here instead of failing */
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    if (guid_eq(header->log_guid, zero_guid)) {
-        vhdx_guid_generate(&new_guid);
-        vhdx_update_headers(bs, s, false, &new_guid);
-    } else {
-        /* currently, we require that the log be flushed after
-         * every write. */
-        ret = -ENOTSUP;
-        goto exit;
-    }
-
-    /* 0 is an invalid sequence number, but may also represent the first
-     * log write (or a wrapped seq) */
-    if (s->log.sequence == 0) {
-        s->log.sequence = 1;
-    }
-
-    sector_offset = offset % VHDX_LOG_SECTOR_SIZE;
-    file_offset = (offset / VHDX_LOG_SECTOR_SIZE) * VHDX_LOG_SECTOR_SIZE;
-
-    aligned_length = length;
-
-    /* add in the unaligned head and tail bytes */
-    if (sector_offset) {
-        leading_length = (VHDX_LOG_SECTOR_SIZE - sector_offset);
-        leading_length = leading_length > length ? length : leading_length;
-        aligned_length -= leading_length;
-        partial_sectors++;
-    }
-
-    sectors = aligned_length / VHDX_LOG_SECTOR_SIZE;
-    trailing_length = aligned_length - (sectors * VHDX_LOG_SECTOR_SIZE);
-    if (trailing_length) {
-        partial_sectors++;
-    }
-
-    sectors += partial_sectors;
-
-    /* sectors is now how many sectors the data itself takes, not
-     * including the header and descriptor metadata */
-
-    new_hdr = (VHDXLogEntryHeader) {
-                .signature           = VHDX_LOG_SIGNATURE,
-                .tail                = s->log.tail,
-                .sequence_number     = s->log.sequence,
-                .descriptor_count    = sectors,
-                .reserved            = 0,
-                .flushed_file_offset = bdrv_getlength(bs->file->bs),
-                .last_file_offset    = bdrv_getlength(bs->file->bs),
-              };
-
-    new_hdr.log_guid = header->log_guid;
-
-    desc_sectors = vhdx_compute_desc_sectors(new_hdr.descriptor_count);
-
-    total_length = (desc_sectors + sectors) * VHDX_LOG_SECTOR_SIZE;
-    new_hdr.entry_length = total_length;
-
-    vhdx_log_entry_hdr_le_export(&new_hdr);
-
-    buffer = qemu_blockalign(bs, total_length);
-    memcpy(buffer, &new_hdr, sizeof(new_hdr));
-
-    new_desc = buffer + sizeof(new_hdr);
-    data_sector = buffer + (desc_sectors * VHDX_LOG_SECTOR_SIZE);
-    data_tmp = data;
-
-    /* All log sectors are 4KB, so for any partial sectors we must
-     * merge the data with preexisting data from the final file
-     * destination */
-    merged_sector = qemu_blockalign(bs, VHDX_LOG_SECTOR_SIZE);
-
-    for (i = 0; i < sectors; i++) {
-        new_desc->signature       = VHDX_LOG_DESC_SIGNATURE;
-        new_desc->sequence_number = s->log.sequence;
-        new_desc->file_offset     = file_offset;
-
-        if (i == 0 && leading_length) {
-            /* partial sector at the front of the buffer */
-            ret = bdrv_pread(bs->file->bs, file_offset, merged_sector,
-                             VHDX_LOG_SECTOR_SIZE);
-            if (ret < 0) {
-                goto exit;
-            }
-            memcpy(merged_sector + sector_offset, data_tmp, leading_length);
-            bytes_written = leading_length;
-            sector_write = merged_sector;
-        } else if (i == sectors - 1 && trailing_length) {
-            /* partial sector at the end of the buffer */
-            ret = bdrv_pread(bs->file->bs,
-                            file_offset,
-                            merged_sector + trailing_length,
-                            VHDX_LOG_SECTOR_SIZE - trailing_length);
-            if (ret < 0) {
-                goto exit;
-            }
-            memcpy(merged_sector, data_tmp, trailing_length);
-            bytes_written = trailing_length;
-            sector_write = merged_sector;
-        } else {
-            bytes_written = VHDX_LOG_SECTOR_SIZE;
-            sector_write = data_tmp;
-        }
-
-        /* populate the raw sector data into the proper structures,
-         * as well as update the descriptor, and convert to proper
-         * endianness */
-        vhdx_log_raw_to_le_sector(new_desc, data_sector, sector_write,
-                                  s->log.sequence);
-
-        data_tmp += bytes_written;
-        data_sector++;
-        new_desc++;
-        file_offset += VHDX_LOG_SECTOR_SIZE;
-    }
-
-    /* checksum covers entire entry, from the log header through the
-     * last data sector */
-    vhdx_update_checksum(buffer, total_length,
-                         offsetof(VHDXLogEntryHeader, checksum));
-
-    /* now write to the log */
-    ret = vhdx_log_write_sectors(bs, &s->log, &sectors_written, buffer,
-                                 desc_sectors + sectors);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    if (sectors_written != desc_sectors + sectors) {
-        /* instead of failing, we could flush the log here */
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    s->log.sequence++;
-    /* write new tail */
-    s->log.tail = s->log.write;
-
-exit:
-    qemu_vfree(buffer);
-    qemu_vfree(merged_sector);
-    return ret;
-}
-
-/* Perform a log write, and then immediately flush the entire log */
-int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
-                             void *data, uint32_t length, uint64_t offset)
-{
-    int ret = 0;
-    VHDXLogSequence logs = { .valid = true,
-                             .count = 1,
-                             .hdr = { 0 } };
-
-
-    /* Make sure data written (new and/or changed blocks) is stable
-     * on disk, before creating log entry */
-    bdrv_flush(bs);
-    ret = vhdx_log_write(bs, s, data, length, offset);
-    if (ret < 0) {
-        goto exit;
-    }
-    logs.log = s->log;
-
-    /* Make sure log is stable on disk */
-    bdrv_flush(bs);
-    ret = vhdx_log_flush(bs, s, &logs);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    s->log = logs.log;
-
-exit:
-    return ret;
-}
-
diff --git a/qemu/block/vhdx.c b/qemu/block/vhdx.c
deleted file mode 100644
index 2b7b33240..000000000
--- a/qemu/block/vhdx.c
+++ /dev/null
@@ -1,1981 +0,0 @@
-/*
- * Block driver for Hyper-V VHDX Images
- *
- * Copyright (c) 2013 Red Hat, Inc.,
- *
- * Authors:
- *  Jeff Cody <jcody@redhat.com>
- *
- *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
- *  by Microsoft:
- *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "sysemu/block-backend.h"
-#include "qemu/module.h"
-#include "qemu/crc32c.h"
-#include "block/vhdx.h"
-#include "migration/migration.h"
-
-#include <uuid/uuid.h>
-#include <glib.h>
-
-/* Options for VHDX creation */
-
-#define VHDX_BLOCK_OPT_LOG_SIZE   "log_size"
-#define VHDX_BLOCK_OPT_BLOCK_SIZE "block_size"
-#define VHDX_BLOCK_OPT_ZERO "block_state_zero"
-
-typedef enum VHDXImageType {
-    VHDX_TYPE_DYNAMIC = 0,
-    VHDX_TYPE_FIXED,
-    VHDX_TYPE_DIFFERENCING,   /* Currently unsupported */
-} VHDXImageType;
-
-/* Several metadata and region table data entries are identified by
- * guids in  a MS-specific GUID format. */
-
-
-/* ------- Known Region Table GUIDs ---------------------- */
-static const MSGUID bat_guid =      { .data1 = 0x2dc27766,
-                                      .data2 = 0xf623,
-                                      .data3 = 0x4200,
-                                      .data4 = { 0x9d, 0x64, 0x11, 0x5e,
-                                                 0x9b, 0xfd, 0x4a, 0x08} };
-
-static const MSGUID metadata_guid = { .data1 = 0x8b7ca206,
-                                      .data2 = 0x4790,
-                                      .data3 = 0x4b9a,
-                                      .data4 = { 0xb8, 0xfe, 0x57, 0x5f,
-                                                 0x05, 0x0f, 0x88, 0x6e} };
-
-
-
-/* ------- Known Metadata Entry GUIDs ---------------------- */
-static const MSGUID file_param_guid =   { .data1 = 0xcaa16737,
-                                          .data2 = 0xfa36,
-                                          .data3 = 0x4d43,
-                                          .data4 = { 0xb3, 0xb6, 0x33, 0xf0,
-                                                     0xaa, 0x44, 0xe7, 0x6b} };
-
-static const MSGUID virtual_size_guid = { .data1 = 0x2FA54224,
-                                          .data2 = 0xcd1b,
-                                          .data3 = 0x4876,
-                                          .data4 = { 0xb2, 0x11, 0x5d, 0xbe,
-                                                     0xd8, 0x3b, 0xf4, 0xb8} };
-
-static const MSGUID page83_guid =       { .data1 = 0xbeca12ab,
-                                          .data2 = 0xb2e6,
-                                          .data3 = 0x4523,
-                                          .data4 = { 0x93, 0xef, 0xc3, 0x09,
-                                                     0xe0, 0x00, 0xc7, 0x46} };
-
-
-static const MSGUID phys_sector_guid =  { .data1 = 0xcda348c7,
-                                          .data2 = 0x445d,
-                                          .data3 = 0x4471,
-                                          .data4 = { 0x9c, 0xc9, 0xe9, 0x88,
-                                                     0x52, 0x51, 0xc5, 0x56} };
-
-static const MSGUID parent_locator_guid = { .data1 = 0xa8d35f2d,
-                                            .data2 = 0xb30b,
-                                            .data3 = 0x454d,
-                                            .data4 = { 0xab, 0xf7, 0xd3,
-                                                       0xd8, 0x48, 0x34,
-                                                       0xab, 0x0c} };
-
-static const MSGUID logical_sector_guid = { .data1 = 0x8141bf1d,
-                                            .data2 = 0xa96f,
-                                            .data3 = 0x4709,
-                                            .data4 = { 0xba, 0x47, 0xf2,
-                                                       0x33, 0xa8, 0xfa,
-                                                       0xab, 0x5f} };
-
-/* Each parent type must have a valid GUID; this is for parent images
- * of type 'VHDX'.  If we were to allow e.g. a QCOW2 parent, we would
- * need to make up our own QCOW2 GUID type */
-static const MSGUID parent_vhdx_guid __attribute__((unused))
-                                     = { .data1 = 0xb04aefb7,
-                                         .data2 = 0xd19e,
-                                         .data3 = 0x4a81,
-                                         .data4 = { 0xb7, 0x89, 0x25, 0xb8,
-                                                    0xe9, 0x44, 0x59, 0x13} };
-
-
-#define META_FILE_PARAMETER_PRESENT      0x01
-#define META_VIRTUAL_DISK_SIZE_PRESENT   0x02
-#define META_PAGE_83_PRESENT             0x04
-#define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08
-#define META_PHYS_SECTOR_SIZE_PRESENT    0x10
-#define META_PARENT_LOCATOR_PRESENT      0x20
-
-#define META_ALL_PRESENT    \
-    (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \
-     META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
-     META_PHYS_SECTOR_SIZE_PRESENT)
-
-
-typedef struct VHDXSectorInfo {
-    uint32_t bat_idx;       /* BAT entry index */
-    uint32_t sectors_avail; /* sectors available in payload block */
-    uint32_t bytes_left;    /* bytes left in the block after data to r/w */
-    uint32_t bytes_avail;   /* bytes available in payload block */
-    uint64_t file_offset;   /* absolute offset in bytes, in file */
-    uint64_t block_offset;  /* block offset, in bytes */
-} VHDXSectorInfo;
-
-/* Calculates new checksum.
- *
- * Zero is substituted during crc calculation for the original crc field
- * crc_offset: byte offset in buf of the buffer crc
- * buf: buffer pointer
- * size: size of buffer (must be > crc_offset+4)
- *
- * Note: The buffer should have all multi-byte data in little-endian format,
- *       and the resulting checksum is in little endian format.
- */
-uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset)
-{
-    uint32_t crc;
-
-    assert(buf != NULL);
-    assert(size > (crc_offset + sizeof(crc)));
-
-    memset(buf + crc_offset, 0, sizeof(crc));
-    crc =  crc32c(0xffffffff, buf, size);
-    cpu_to_le32s(&crc);
-    memcpy(buf + crc_offset, &crc, sizeof(crc));
-
-    return crc;
-}
-
-uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
-                            int crc_offset)
-{
-    uint32_t crc_new;
-    uint32_t crc_orig;
-    assert(buf != NULL);
-
-    if (crc_offset > 0) {
-        memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig));
-        memset(buf + crc_offset, 0, sizeof(crc_orig));
-    }
-
-    crc_new = crc32c(crc, buf, size);
-    if (crc_offset > 0) {
-        memcpy(buf + crc_offset, &crc_orig, sizeof(crc_orig));
-    }
-
-    return crc_new;
-}
-
-/* Validates the checksum of the buffer, with an in-place CRC.
- *
- * Zero is substituted during crc calculation for the original crc field,
- * and the crc field is restored afterwards.  But the buffer will be modifed
- * during the calculation, so this may not be not suitable for multi-threaded
- * use.
- *
- * crc_offset: byte offset in buf of the buffer crc
- * buf: buffer pointer
- * size: size of buffer (must be > crc_offset+4)
- *
- * returns true if checksum is valid, false otherwise
- */
-bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset)
-{
-    uint32_t crc_orig;
-    uint32_t crc;
-
-    assert(buf != NULL);
-    assert(size > (crc_offset + 4));
-
-    memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig));
-    crc_orig = le32_to_cpu(crc_orig);
-
-    crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset);
-
-    return crc == crc_orig;
-}
-
-
-/*
- * This generates a UUID that is compliant with the MS GUIDs used
- * in the VHDX spec (and elsewhere).
- */
-void vhdx_guid_generate(MSGUID *guid)
-{
-    uuid_t uuid;
-    assert(guid != NULL);
-
-    uuid_generate(uuid);
-    memcpy(guid, uuid, sizeof(MSGUID));
-}
-
-/* Check for region overlaps inside the VHDX image */
-static int vhdx_region_check(BDRVVHDXState *s, uint64_t start, uint64_t length)
-{
-    int ret = 0;
-    uint64_t end;
-    VHDXRegionEntry *r;
-
-    end = start + length;
-    QLIST_FOREACH(r, &s->regions, entries) {
-        if (!((start >= r->end) || (end <= r->start))) {
-            ret = -EINVAL;
-            goto exit;
-        }
-    }
-
-exit:
-    return ret;
-}
-
-/* Register a region for future checks */
-static void vhdx_region_register(BDRVVHDXState *s,
-                                 uint64_t start, uint64_t length)
-{
-    VHDXRegionEntry *r;
-
-    r = g_malloc0(sizeof(*r));
-
-    r->start = start;
-    r->end = start + length;
-
-    QLIST_INSERT_HEAD(&s->regions, r, entries);
-}
-
-/* Free all registered regions */
-static void vhdx_region_unregister_all(BDRVVHDXState *s)
-{
-    VHDXRegionEntry *r, *r_next;
-
-    QLIST_FOREACH_SAFE(r, &s->regions, entries, r_next) {
-        QLIST_REMOVE(r, entries);
-        g_free(r);
-    }
-}
-
-static void vhdx_set_shift_bits(BDRVVHDXState *s)
-{
-    s->logical_sector_size_bits = ctz32(s->logical_sector_size);
-    s->sectors_per_block_bits =   ctz32(s->sectors_per_block);
-    s->chunk_ratio_bits =         ctz64(s->chunk_ratio);
-    s->block_size_bits =          ctz32(s->block_size);
-}
-
-/*
- * Per the MS VHDX Specification, for every VHDX file:
- *      - The header section is fixed size - 1 MB
- *      - The header section is always the first "object"
- *      - The first 64KB of the header is the File Identifier
- *      - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
- *      - The following 512 bytes constitute a UTF-16 string identifiying the
- *        software that created the file, and is optional and diagnostic only.
- *
- *  Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
- */
-static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) {
-        return 100;
-    }
-    return 0;
-}
-
-/*
- * Writes the header to the specified offset.
- *
- * This will optionally read in buffer data from disk (otherwise zero-fill),
- * and then update the header checksum.  Header is converted to proper
- * endianness before being written to the specified file offset
- */
-static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
-                             uint64_t offset, bool read)
-{
-    uint8_t *buffer = NULL;
-    int ret;
-    VHDXHeader *header_le;
-
-    assert(bs_file != NULL);
-    assert(hdr != NULL);
-
-    /* the header checksum is not over just the packed size of VHDXHeader,
-     * but rather over the entire 'reserved' range for the header, which is
-     * 4KB (VHDX_HEADER_SIZE). */
-
-    buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE);
-    if (read) {
-        /* if true, we can't assume the extra reserved bytes are 0 */
-        ret = bdrv_pread(bs_file, offset, buffer, VHDX_HEADER_SIZE);
-        if (ret < 0) {
-            goto exit;
-        }
-    } else {
-        memset(buffer, 0, VHDX_HEADER_SIZE);
-    }
-
-    /* overwrite the actual VHDXHeader portion */
-    header_le = (VHDXHeader *)buffer;
-    memcpy(header_le, hdr, sizeof(VHDXHeader));
-    vhdx_header_le_export(hdr, header_le);
-    vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
-                         offsetof(VHDXHeader, checksum));
-    ret = bdrv_pwrite_sync(bs_file, offset, header_le, sizeof(VHDXHeader));
-
-exit:
-    qemu_vfree(buffer);
-    return ret;
-}
-
-/* Update the VHDX headers
- *
- * This follows the VHDX spec procedures for header updates.
- *
- *  - non-current header is updated with largest sequence number
- */
-static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
-                              bool generate_data_write_guid, MSGUID *log_guid)
-{
-    int ret = 0;
-    int hdr_idx = 0;
-    uint64_t header_offset = VHDX_HEADER1_OFFSET;
-
-    VHDXHeader *active_header;
-    VHDXHeader *inactive_header;
-
-    /* operate on the non-current header */
-    if (s->curr_header == 0) {
-        hdr_idx = 1;
-        header_offset = VHDX_HEADER2_OFFSET;
-    }
-
-    active_header   = s->headers[s->curr_header];
-    inactive_header = s->headers[hdr_idx];
-
-    inactive_header->sequence_number = active_header->sequence_number + 1;
-
-    /* a new file guid must be generated before any file write, including
-     * headers */
-    inactive_header->file_write_guid = s->session_guid;
-
-    /* a new data guid only needs to be generated before any guest-visible
-     * writes (i.e. something observable via virtual disk read) */
-    if (generate_data_write_guid) {
-        vhdx_guid_generate(&inactive_header->data_write_guid);
-    }
-
-    /* update the log guid if present */
-    if (log_guid) {
-        inactive_header->log_guid = *log_guid;
-    }
-
-    ret = vhdx_write_header(bs->file->bs, inactive_header, header_offset, true);
-    if (ret < 0) {
-        goto exit;
-    }
-    s->curr_header = hdr_idx;
-
-exit:
-    return ret;
-}
-
-/*
- * The VHDX spec calls for header updates to be performed twice, so that both
- * the current and non-current header have valid info
- */
-int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s,
-                        bool generate_data_write_guid, MSGUID *log_guid)
-{
-    int ret;
-
-    ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid);
-    if (ret < 0) {
-        return ret;
-    }
-    ret = vhdx_update_header(bs, s, generate_data_write_guid, log_guid);
-    return ret;
-}
-
-/* opens the specified header block from the VHDX file header section */
-static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
-                              Error **errp)
-{
-    int ret;
-    VHDXHeader *header1;
-    VHDXHeader *header2;
-    bool h1_valid = false;
-    bool h2_valid = false;
-    uint64_t h1_seq = 0;
-    uint64_t h2_seq = 0;
-    uint8_t *buffer;
-
-    /* header1 & header2 are freed in vhdx_close() */
-    header1 = qemu_blockalign(bs, sizeof(VHDXHeader));
-    header2 = qemu_blockalign(bs, sizeof(VHDXHeader));
-
-    buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE);
-
-    s->headers[0] = header1;
-    s->headers[1] = header2;
-
-    /* We have to read the whole VHDX_HEADER_SIZE instead of
-     * sizeof(VHDXHeader), because the checksum is over the whole
-     * region */
-    ret = bdrv_pread(bs->file->bs, VHDX_HEADER1_OFFSET, buffer,
-                     VHDX_HEADER_SIZE);
-    if (ret < 0) {
-        goto fail;
-    }
-    /* copy over just the relevant portion that we need */
-    memcpy(header1, buffer, sizeof(VHDXHeader));
-
-    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) {
-        vhdx_header_le_import(header1);
-        if (header1->signature == VHDX_HEADER_SIGNATURE &&
-            header1->version == 1) {
-            h1_seq = header1->sequence_number;
-            h1_valid = true;
-        }
-    }
-
-    ret = bdrv_pread(bs->file->bs, VHDX_HEADER2_OFFSET, buffer,
-                     VHDX_HEADER_SIZE);
-    if (ret < 0) {
-        goto fail;
-    }
-    /* copy over just the relevant portion that we need */
-    memcpy(header2, buffer, sizeof(VHDXHeader));
-
-    if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4)) {
-        vhdx_header_le_import(header2);
-        if (header2->signature == VHDX_HEADER_SIGNATURE &&
-            header2->version == 1) {
-            h2_seq = header2->sequence_number;
-            h2_valid = true;
-        }
-    }
-
-    /* If there is only 1 valid header (or no valid headers), we
-     * don't care what the sequence numbers are */
-    if (h1_valid && !h2_valid) {
-        s->curr_header = 0;
-    } else if (!h1_valid && h2_valid) {
-        s->curr_header = 1;
-    } else if (!h1_valid && !h2_valid) {
-        goto fail;
-    } else {
-        /* If both headers are valid, then we choose the active one by the
-         * highest sequence number.  If the sequence numbers are equal, that is
-         * invalid */
-        if (h1_seq > h2_seq) {
-            s->curr_header = 0;
-        } else if (h2_seq > h1_seq) {
-            s->curr_header = 1;
-        } else {
-            /* The Microsoft Disk2VHD tool will create 2 identical
-             * headers, with identical sequence numbers.  If the headers are
-             * identical, don't consider the file corrupt */
-            if (!memcmp(header1, header2, sizeof(VHDXHeader))) {
-                s->curr_header = 0;
-            } else {
-                goto fail;
-            }
-        }
-    }
-
-    vhdx_region_register(s, s->headers[s->curr_header]->log_offset,
-                            s->headers[s->curr_header]->log_length);
-    goto exit;
-
-fail:
-    error_setg_errno(errp, -ret, "No valid VHDX header found");
-    qemu_vfree(header1);
-    qemu_vfree(header2);
-    s->headers[0] = NULL;
-    s->headers[1] = NULL;
-exit:
-    qemu_vfree(buffer);
-}
-
-
-static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
-{
-    int ret = 0;
-    uint8_t *buffer;
-    int offset = 0;
-    VHDXRegionTableEntry rt_entry;
-    uint32_t i;
-    bool bat_rt_found = false;
-    bool metadata_rt_found = false;
-
-    /* We have to read the whole 64KB block, because the crc32 is over the
-     * whole block */
-    buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
-
-    ret = bdrv_pread(bs->file->bs, VHDX_REGION_TABLE_OFFSET, buffer,
-                     VHDX_HEADER_BLOCK_SIZE);
-    if (ret < 0) {
-        goto fail;
-    }
-    memcpy(&s->rt, buffer, sizeof(s->rt));
-    offset += sizeof(s->rt);
-
-    if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4)) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    vhdx_region_header_le_import(&s->rt);
-
-    if (s->rt.signature != VHDX_REGION_SIGNATURE) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-
-    /* Per spec, maximum region table entry count is 2047 */
-    if (s->rt.entry_count > 2047) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    for (i = 0; i < s->rt.entry_count; i++) {
-        memcpy(&rt_entry, buffer + offset, sizeof(rt_entry));
-        offset += sizeof(rt_entry);
-
-        vhdx_region_entry_le_import(&rt_entry);
-
-        /* check for region overlap between these entries, and any
-         * other memory regions in the file */
-        ret = vhdx_region_check(s, rt_entry.file_offset, rt_entry.length);
-        if (ret < 0) {
-            goto fail;
-        }
-
-        vhdx_region_register(s, rt_entry.file_offset, rt_entry.length);
-
-        /* see if we recognize the entry */
-        if (guid_eq(rt_entry.guid, bat_guid)) {
-            /* must be unique; if we have already found it this is invalid */
-            if (bat_rt_found) {
-                ret = -EINVAL;
-                goto fail;
-            }
-            bat_rt_found = true;
-            s->bat_rt = rt_entry;
-            continue;
-        }
-
-        if (guid_eq(rt_entry.guid, metadata_guid)) {
-            /* must be unique; if we have already found it this is invalid */
-            if (metadata_rt_found) {
-                ret = -EINVAL;
-                goto fail;
-            }
-            metadata_rt_found = true;
-            s->metadata_rt = rt_entry;
-            continue;
-        }
-
-        if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) {
-            /* cannot read vhdx file - required region table entry that
-             * we do not understand.  per spec, we must fail to open */
-            ret = -ENOTSUP;
-            goto fail;
-        }
-    }
-
-    if (!bat_rt_found || !metadata_rt_found) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = 0;
-
-fail:
-    qemu_vfree(buffer);
-    return ret;
-}
-
-
-
-/* Metadata initial parser
- *
- * This loads all the metadata entry fields.  This may cause additional
- * fields to be processed (e.g. parent locator, etc..).
- *
- * There are 5 Metadata items that are always required:
- *      - File Parameters (block size, has a parent)
- *      - Virtual Disk Size (size, in bytes, of the virtual drive)
- *      - Page 83 Data (scsi page 83 guid)
- *      - Logical Sector Size (logical sector size in bytes, either 512 or
- *                             4096.  We only support 512 currently)
- *      - Physical Sector Size (512 or 4096)
- *
- * Also, if the File Parameters indicate this is a differencing file,
- * we must also look for the Parent Locator metadata item.
- */
-static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
-{
-    int ret = 0;
-    uint8_t *buffer;
-    int offset = 0;
-    uint32_t i = 0;
-    VHDXMetadataTableEntry md_entry;
-
-    buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
-
-    ret = bdrv_pread(bs->file->bs, s->metadata_rt.file_offset, buffer,
-                     VHDX_METADATA_TABLE_MAX_SIZE);
-    if (ret < 0) {
-        goto exit;
-    }
-    memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr));
-    offset += sizeof(s->metadata_hdr);
-
-    vhdx_metadata_header_le_import(&s->metadata_hdr);
-
-    if (s->metadata_hdr.signature != VHDX_METADATA_SIGNATURE) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    s->metadata_entries.present = 0;
-
-    if ((s->metadata_hdr.entry_count * sizeof(md_entry)) >
-        (VHDX_METADATA_TABLE_MAX_SIZE - offset)) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    for (i = 0; i < s->metadata_hdr.entry_count; i++) {
-        memcpy(&md_entry, buffer + offset, sizeof(md_entry));
-        offset += sizeof(md_entry);
-
-        vhdx_metadata_entry_le_import(&md_entry);
-
-        if (guid_eq(md_entry.item_id, file_param_guid)) {
-            if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) {
-                ret = -EINVAL;
-                goto exit;
-            }
-            s->metadata_entries.file_parameters_entry = md_entry;
-            s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT;
-            continue;
-        }
-
-        if (guid_eq(md_entry.item_id, virtual_size_guid)) {
-            if (s->metadata_entries.present & META_VIRTUAL_DISK_SIZE_PRESENT) {
-                ret = -EINVAL;
-                goto exit;
-            }
-            s->metadata_entries.virtual_disk_size_entry = md_entry;
-            s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT;
-            continue;
-        }
-
-        if (guid_eq(md_entry.item_id, page83_guid)) {
-            if (s->metadata_entries.present & META_PAGE_83_PRESENT) {
-                ret = -EINVAL;
-                goto exit;
-            }
-            s->metadata_entries.page83_data_entry = md_entry;
-            s->metadata_entries.present |= META_PAGE_83_PRESENT;
-            continue;
-        }
-
-        if (guid_eq(md_entry.item_id, logical_sector_guid)) {
-            if (s->metadata_entries.present &
-                META_LOGICAL_SECTOR_SIZE_PRESENT) {
-                ret = -EINVAL;
-                goto exit;
-            }
-            s->metadata_entries.logical_sector_size_entry = md_entry;
-            s->metadata_entries.present |= META_LOGICAL_SECTOR_SIZE_PRESENT;
-            continue;
-        }
-
-        if (guid_eq(md_entry.item_id, phys_sector_guid)) {
-            if (s->metadata_entries.present & META_PHYS_SECTOR_SIZE_PRESENT) {
-                ret = -EINVAL;
-                goto exit;
-            }
-            s->metadata_entries.phys_sector_size_entry = md_entry;
-            s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT;
-            continue;
-        }
-
-        if (guid_eq(md_entry.item_id, parent_locator_guid)) {
-            if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) {
-                ret = -EINVAL;
-                goto exit;
-            }
-            s->metadata_entries.parent_locator_entry = md_entry;
-            s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT;
-            continue;
-        }
-
-        if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) {
-            /* cannot read vhdx file - required region table entry that
-             * we do not understand.  per spec, we must fail to open */
-            ret = -ENOTSUP;
-            goto exit;
-        }
-    }
-
-    if (s->metadata_entries.present != META_ALL_PRESENT) {
-        ret = -ENOTSUP;
-        goto exit;
-    }
-
-    ret = bdrv_pread(bs->file->bs,
-                     s->metadata_entries.file_parameters_entry.offset
-                                         + s->metadata_rt.file_offset,
-                     &s->params,
-                     sizeof(s->params));
-
-    if (ret < 0) {
-        goto exit;
-    }
-
-    le32_to_cpus(&s->params.block_size);
-    le32_to_cpus(&s->params.data_bits);
-
-
-    /* We now have the file parameters, so we can tell if this is a
-     * differencing file (i.e.. has_parent), is dynamic or fixed
-     * sized (leave_blocks_allocated), and the block size */
-
-    /* The parent locator required iff the file parameters has_parent set */
-    if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
-        if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) {
-            /* TODO: parse  parent locator fields */
-            ret = -ENOTSUP; /* temp, until differencing files are supported */
-            goto exit;
-        } else {
-            /* if has_parent is set, but there is not parent locator present,
-             * then that is an invalid combination */
-            ret = -EINVAL;
-            goto exit;
-        }
-    }
-
-    /* determine virtual disk size, logical sector size,
-     * and phys sector size */
-
-    ret = bdrv_pread(bs->file->bs,
-                     s->metadata_entries.virtual_disk_size_entry.offset
-                                           + s->metadata_rt.file_offset,
-                     &s->virtual_disk_size,
-                     sizeof(uint64_t));
-    if (ret < 0) {
-        goto exit;
-    }
-    ret = bdrv_pread(bs->file->bs,
-                     s->metadata_entries.logical_sector_size_entry.offset
-                                             + s->metadata_rt.file_offset,
-                     &s->logical_sector_size,
-                     sizeof(uint32_t));
-    if (ret < 0) {
-        goto exit;
-    }
-    ret = bdrv_pread(bs->file->bs,
-                     s->metadata_entries.phys_sector_size_entry.offset
-                                          + s->metadata_rt.file_offset,
-                     &s->physical_sector_size,
-                     sizeof(uint32_t));
-    if (ret < 0) {
-        goto exit;
-    }
-
-    le64_to_cpus(&s->virtual_disk_size);
-    le32_to_cpus(&s->logical_sector_size);
-    le32_to_cpus(&s->physical_sector_size);
-
-    if (s->params.block_size < VHDX_BLOCK_SIZE_MIN ||
-        s->params.block_size > VHDX_BLOCK_SIZE_MAX) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    /* only 2 supported sector sizes */
-    if (s->logical_sector_size != 512 && s->logical_sector_size != 4096) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    /* Both block_size and sector_size are guaranteed powers of 2, below.
-       Due to range checks above, s->sectors_per_block can never be < 256 */
-    s->sectors_per_block = s->params.block_size / s->logical_sector_size;
-    s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
-                     (uint64_t)s->logical_sector_size /
-                     (uint64_t)s->params.block_size;
-
-    /* These values are ones we will want to use for division / multiplication
-     * later on, and they are all guaranteed (per the spec) to be powers of 2,
-     * so we can take advantage of that for shift operations during
-     * reads/writes */
-    if (s->logical_sector_size & (s->logical_sector_size - 1)) {
-        ret = -EINVAL;
-        goto exit;
-    }
-    if (s->sectors_per_block & (s->sectors_per_block - 1)) {
-        ret = -EINVAL;
-        goto exit;
-    }
-    if (s->chunk_ratio & (s->chunk_ratio - 1)) {
-        ret = -EINVAL;
-        goto exit;
-    }
-    s->block_size = s->params.block_size;
-    if (s->block_size & (s->block_size - 1)) {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    vhdx_set_shift_bits(s);
-
-    ret = 0;
-
-exit:
-    qemu_vfree(buffer);
-    return ret;
-}
-
-/*
- * Calculate the number of BAT entries, including sector
- * bitmap entries.
- */
-static void vhdx_calc_bat_entries(BDRVVHDXState *s)
-{
-    uint32_t data_blocks_cnt, bitmap_blocks_cnt;
-
-    data_blocks_cnt = DIV_ROUND_UP(s->virtual_disk_size, s->block_size);
-    bitmap_blocks_cnt = DIV_ROUND_UP(data_blocks_cnt, s->chunk_ratio);
-
-    if (s->parent_entries) {
-        s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
-    } else {
-        s->bat_entries = data_blocks_cnt +
-                         ((data_blocks_cnt - 1) >> s->chunk_ratio_bits);
-    }
-
-}
-
-static void vhdx_close(BlockDriverState *bs)
-{
-    BDRVVHDXState *s = bs->opaque;
-    qemu_vfree(s->headers[0]);
-    s->headers[0] = NULL;
-    qemu_vfree(s->headers[1]);
-    s->headers[1] = NULL;
-    qemu_vfree(s->bat);
-    s->bat = NULL;
-    qemu_vfree(s->parent_entries);
-    s->parent_entries = NULL;
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
-    qemu_vfree(s->log.hdr);
-    s->log.hdr = NULL;
-    vhdx_region_unregister_all(s);
-}
-
-static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
-{
-    BDRVVHDXState *s = bs->opaque;
-    int ret = 0;
-    uint32_t i;
-    uint64_t signature;
-    Error *local_err = NULL;
-
-    s->bat = NULL;
-    s->first_visible_write = true;
-
-    qemu_co_mutex_init(&s->lock);
-    QLIST_INIT(&s->regions);
-
-    /* validate the file signature */
-    ret = bdrv_pread(bs->file->bs, 0, &signature, sizeof(uint64_t));
-    if (ret < 0) {
-        goto fail;
-    }
-    if (memcmp(&signature, "vhdxfile", 8)) {
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* This is used for any header updates, for the file_write_guid.
-     * The spec dictates that a new value should be used for the first
-     * header update */
-    vhdx_guid_generate(&s->session_guid);
-
-    vhdx_parse_header(bs, s, &local_err);
-    if (local_err != NULL) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = vhdx_parse_log(bs, s, &s->log_replayed_on_open, errp);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    ret = vhdx_open_region_tables(bs, s);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    ret = vhdx_parse_metadata(bs, s);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    s->block_size = s->params.block_size;
-
-    /* the VHDX spec dictates that virtual_disk_size is always a multiple of
-     * logical_sector_size */
-    bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits;
-
-    vhdx_calc_bat_entries(s);
-
-    s->bat_offset = s->bat_rt.file_offset;
-
-    if (s->bat_entries > s->bat_rt.length / sizeof(VHDXBatEntry)) {
-        /* BAT allocation is not large enough for all entries */
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    /* s->bat is freed in vhdx_close() */
-    s->bat = qemu_try_blockalign(bs->file->bs, s->bat_rt.length);
-    if (s->bat == NULL) {
-        ret = -ENOMEM;
-        goto fail;
-    }
-
-    ret = bdrv_pread(bs->file->bs, s->bat_offset, s->bat, s->bat_rt.length);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    uint64_t payblocks = s->chunk_ratio;
-    /* endian convert, and verify populated BAT field file offsets against
-     * region table and log entries */
-    for (i = 0; i < s->bat_entries; i++) {
-        le64_to_cpus(&s->bat[i]);
-        if (payblocks--) {
-            /* payload bat entries */
-            if ((s->bat[i] & VHDX_BAT_STATE_BIT_MASK) ==
-                    PAYLOAD_BLOCK_FULLY_PRESENT) {
-                ret = vhdx_region_check(s, s->bat[i] & VHDX_BAT_FILE_OFF_MASK,
-                                        s->block_size);
-                if (ret < 0) {
-                    goto fail;
-                }
-            }
-        } else {
-            payblocks = s->chunk_ratio;
-            /* Once differencing files are supported, verify sector bitmap
-             * blocks here */
-        }
-    }
-
-    if (flags & BDRV_O_RDWR) {
-        ret = vhdx_update_headers(bs, s, false, NULL);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-
-    /* TODO: differencing files */
-
-    /* Disable migration when VHDX images are used */
-    error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
-
-    return 0;
-fail:
-    vhdx_close(bs);
-    return ret;
-}
-
-static int vhdx_reopen_prepare(BDRVReopenState *state,
-                               BlockReopenQueue *queue, Error **errp)
-{
-    return 0;
-}
-
-
-/*
- * Perform sector to block offset translations, to get various
- * sector and file offsets into the image.  See VHDXSectorInfo
- */
-static void vhdx_block_translate(BDRVVHDXState *s, int64_t sector_num,
-                                 int nb_sectors, VHDXSectorInfo *sinfo)
-{
-    uint32_t block_offset;
-
-    sinfo->bat_idx = sector_num >> s->sectors_per_block_bits;
-    /* effectively a modulo - this gives us the offset into the block
-     * (in sector sizes) for our sector number */
-    block_offset = sector_num - (sinfo->bat_idx << s->sectors_per_block_bits);
-    /* the chunk ratio gives us the interleaving of the sector
-     * bitmaps, so we need to advance our page block index by the
-     * sector bitmaps entry number */
-    sinfo->bat_idx += sinfo->bat_idx >> s->chunk_ratio_bits;
-
-    /* the number of sectors we can read/write in this cycle */
-    sinfo->sectors_avail = s->sectors_per_block - block_offset;
-
-    sinfo->bytes_left = sinfo->sectors_avail << s->logical_sector_size_bits;
-
-    if (sinfo->sectors_avail > nb_sectors) {
-        sinfo->sectors_avail = nb_sectors;
-    }
-
-    sinfo->bytes_avail = sinfo->sectors_avail << s->logical_sector_size_bits;
-
-    sinfo->file_offset = s->bat[sinfo->bat_idx] & VHDX_BAT_FILE_OFF_MASK;
-
-    sinfo->block_offset = block_offset << s->logical_sector_size_bits;
-
-    /* The file offset must be past the header section, so must be > 0 */
-    if (sinfo->file_offset == 0) {
-        return;
-    }
-
-    /* block offset is the offset in vhdx logical sectors, in
-     * the payload data block. Convert that to a byte offset
-     * in the block, and add in the payload data block offset
-     * in the file, in bytes, to get the final read address */
-
-    sinfo->file_offset += sinfo->block_offset;
-}
-
-
-static int vhdx_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVVHDXState *s = bs->opaque;
-
-    bdi->cluster_size = s->block_size;
-
-    bdi->unallocated_blocks_are_zero =
-        (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) == 0;
-
-    return 0;
-}
-
-
-static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors, QEMUIOVector *qiov)
-{
-    BDRVVHDXState *s = bs->opaque;
-    int ret = 0;
-    VHDXSectorInfo sinfo;
-    uint64_t bytes_done = 0;
-    QEMUIOVector hd_qiov;
-
-    qemu_iovec_init(&hd_qiov, qiov->niov);
-
-    qemu_co_mutex_lock(&s->lock);
-
-    while (nb_sectors > 0) {
-        /* We are a differencing file, so we need to inspect the sector bitmap
-         * to see if we have the data or not */
-        if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
-            /* not supported yet */
-            ret = -ENOTSUP;
-            goto exit;
-        } else {
-            vhdx_block_translate(s, sector_num, nb_sectors, &sinfo);
-
-            qemu_iovec_reset(&hd_qiov);
-            qemu_iovec_concat(&hd_qiov, qiov,  bytes_done, sinfo.bytes_avail);
-
-            /* check the payload block state */
-            switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) {
-            case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
-            case PAYLOAD_BLOCK_UNDEFINED:
-            case PAYLOAD_BLOCK_UNMAPPED:
-            case PAYLOAD_BLOCK_UNMAPPED_v095:
-            case PAYLOAD_BLOCK_ZERO:
-                /* return zero */
-                qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail);
-                break;
-            case PAYLOAD_BLOCK_FULLY_PRESENT:
-                qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_readv(bs->file->bs,
-                                    sinfo.file_offset >> BDRV_SECTOR_BITS,
-                                    sinfo.sectors_avail, &hd_qiov);
-                qemu_co_mutex_lock(&s->lock);
-                if (ret < 0) {
-                    goto exit;
-                }
-                break;
-            case PAYLOAD_BLOCK_PARTIALLY_PRESENT:
-                /* we don't yet support difference files, fall through
-                 * to error */
-            default:
-                ret = -EIO;
-                goto exit;
-                break;
-            }
-            nb_sectors -= sinfo.sectors_avail;
-            sector_num += sinfo.sectors_avail;
-            bytes_done += sinfo.bytes_avail;
-        }
-    }
-    ret = 0;
-exit:
-    qemu_co_mutex_unlock(&s->lock);
-    qemu_iovec_destroy(&hd_qiov);
-    return ret;
-}
-
-/*
- * Allocate a new payload block at the end of the file.
- *
- * Allocation will happen at 1MB alignment inside the file
- *
- * Returns the file offset start of the new payload block
- */
-static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
-                                    uint64_t *new_offset)
-{
-    *new_offset = bdrv_getlength(bs->file->bs);
-
-    /* per the spec, the address for a block is in units of 1MB */
-    *new_offset = ROUND_UP(*new_offset, 1024 * 1024);
-
-    return bdrv_truncate(bs->file->bs, *new_offset + s->block_size);
-}
-
-/*
- * Update the BAT table entry with the new file offset, and the new entry
- * state */
-static void vhdx_update_bat_table_entry(BlockDriverState *bs, BDRVVHDXState *s,
-                                       VHDXSectorInfo *sinfo,
-                                       uint64_t *bat_entry_le,
-                                       uint64_t *bat_offset, int state)
-{
-    /* The BAT entry is a uint64, with 44 bits for the file offset in units of
-     * 1MB, and 3 bits for the block state. */
-    if ((state == PAYLOAD_BLOCK_ZERO)        ||
-        (state == PAYLOAD_BLOCK_UNDEFINED)   ||
-        (state == PAYLOAD_BLOCK_NOT_PRESENT) ||
-        (state == PAYLOAD_BLOCK_UNMAPPED)) {
-        s->bat[sinfo->bat_idx]  = 0;  /* For PAYLOAD_BLOCK_ZERO, the
-                                         FileOffsetMB field is denoted as
-                                         'reserved' in the v1.0 spec.  If it is
-                                         non-zero, MS Hyper-V will fail to read
-                                         the disk image */
-    } else {
-        s->bat[sinfo->bat_idx]  = sinfo->file_offset;
-    }
-
-    s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK;
-
-    *bat_entry_le = cpu_to_le64(s->bat[sinfo->bat_idx]);
-    *bat_offset = s->bat_offset + sinfo->bat_idx * sizeof(VHDXBatEntry);
-
-}
-
-/* Per the spec, on the first write of guest-visible data to the file the
- * data write guid must be updated in the header */
-int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s)
-{
-    int ret = 0;
-    if (s->first_visible_write) {
-        s->first_visible_write = false;
-        ret = vhdx_update_headers(bs, s, true, NULL);
-    }
-    return ret;
-}
-
-static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
-                                      int nb_sectors, QEMUIOVector *qiov)
-{
-    int ret = -ENOTSUP;
-    BDRVVHDXState *s = bs->opaque;
-    VHDXSectorInfo sinfo;
-    uint64_t bytes_done = 0;
-    uint64_t bat_entry = 0;
-    uint64_t bat_entry_offset = 0;
-    QEMUIOVector hd_qiov;
-    struct iovec iov1 = { 0 };
-    struct iovec iov2 = { 0 };
-    int sectors_to_write;
-    int bat_state;
-    uint64_t bat_prior_offset = 0;
-    bool bat_update = false;
-
-    qemu_iovec_init(&hd_qiov, qiov->niov);
-
-    qemu_co_mutex_lock(&s->lock);
-
-    ret = vhdx_user_visible_write(bs, s);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    while (nb_sectors > 0) {
-        bool use_zero_buffers = false;
-        bat_update = false;
-        if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
-            /* not supported yet */
-            ret = -ENOTSUP;
-            goto exit;
-        } else {
-            vhdx_block_translate(s, sector_num, nb_sectors, &sinfo);
-            sectors_to_write = sinfo.sectors_avail;
-
-            qemu_iovec_reset(&hd_qiov);
-            /* check the payload block state */
-            bat_state = s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK;
-            switch (bat_state) {
-            case PAYLOAD_BLOCK_ZERO:
-                /* in this case, we need to preserve zero writes for
-                 * data that is not part of this write, so we must pad
-                 * the rest of the buffer to zeroes */
-
-                /* if we are on a posix system with ftruncate() that extends
-                 * a file, then it is zero-filled for us.  On Win32, the raw
-                 * layer uses SetFilePointer and SetFileEnd, which does not
-                 * zero fill AFAIK */
-
-                /* Queue another write of zero buffers if the underlying file
-                 * does not zero-fill on file extension */
-
-                if (bdrv_has_zero_init(bs->file->bs) == 0) {
-                    use_zero_buffers = true;
-
-                    /* zero fill the front, if any */
-                    if (sinfo.block_offset) {
-                        iov1.iov_len = sinfo.block_offset;
-                        iov1.iov_base = qemu_blockalign(bs, iov1.iov_len);
-                        memset(iov1.iov_base, 0, iov1.iov_len);
-                        qemu_iovec_concat_iov(&hd_qiov, &iov1, 1, 0,
-                                              iov1.iov_len);
-                        sectors_to_write += iov1.iov_len >> BDRV_SECTOR_BITS;
-                    }
-
-                    /* our actual data */
-                    qemu_iovec_concat(&hd_qiov, qiov,  bytes_done,
-                                      sinfo.bytes_avail);
-
-                    /* zero fill the back, if any */
-                    if ((sinfo.bytes_avail - sinfo.block_offset) <
-                         s->block_size) {
-                        iov2.iov_len = s->block_size -
-                                      (sinfo.bytes_avail + sinfo.block_offset);
-                        iov2.iov_base = qemu_blockalign(bs, iov2.iov_len);
-                        memset(iov2.iov_base, 0, iov2.iov_len);
-                        qemu_iovec_concat_iov(&hd_qiov, &iov2, 1, 0,
-                                              iov2.iov_len);
-                        sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS;
-                    }
-                }
-                /* fall through */
-            case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
-            case PAYLOAD_BLOCK_UNMAPPED:
-            case PAYLOAD_BLOCK_UNMAPPED_v095:
-            case PAYLOAD_BLOCK_UNDEFINED:
-                bat_prior_offset = sinfo.file_offset;
-                ret = vhdx_allocate_block(bs, s, &sinfo.file_offset);
-                if (ret < 0) {
-                    goto exit;
-                }
-                /* once we support differencing files, this may also be
-                 * partially present */
-                /* update block state to the newly specified state */
-                vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry,
-                                            &bat_entry_offset,
-                                            PAYLOAD_BLOCK_FULLY_PRESENT);
-                bat_update = true;
-                /* since we just allocated a block, file_offset is the
-                 * beginning of the payload block. It needs to be the
-                 * write address, which includes the offset into the block */
-                if (!use_zero_buffers) {
-                    sinfo.file_offset += sinfo.block_offset;
-                }
-                /* fall through */
-            case PAYLOAD_BLOCK_FULLY_PRESENT:
-                /* if the file offset address is in the header zone,
-                 * there is a problem */
-                if (sinfo.file_offset < (1024 * 1024)) {
-                    ret = -EFAULT;
-                    goto error_bat_restore;
-                }
-
-                if (!use_zero_buffers) {
-                    qemu_iovec_concat(&hd_qiov, qiov,  bytes_done,
-                                      sinfo.bytes_avail);
-                }
-                /* block exists, so we can just overwrite it */
-                qemu_co_mutex_unlock(&s->lock);
-                ret = bdrv_co_writev(bs->file->bs,
-                                    sinfo.file_offset >> BDRV_SECTOR_BITS,
-                                    sectors_to_write, &hd_qiov);
-                qemu_co_mutex_lock(&s->lock);
-                if (ret < 0) {
-                    goto error_bat_restore;
-                }
-                break;
-            case PAYLOAD_BLOCK_PARTIALLY_PRESENT:
-                /* we don't yet support difference files, fall through
-                 * to error */
-            default:
-                ret = -EIO;
-                goto exit;
-                break;
-            }
-
-            if (bat_update) {
-                /* this will update the BAT entry into the log journal, and
-                 * then flush the log journal out to disk */
-                ret =  vhdx_log_write_and_flush(bs, s, &bat_entry,
-                                                sizeof(VHDXBatEntry),
-                                                bat_entry_offset);
-                if (ret < 0) {
-                    goto exit;
-                }
-            }
-
-            nb_sectors -= sinfo.sectors_avail;
-            sector_num += sinfo.sectors_avail;
-            bytes_done += sinfo.bytes_avail;
-
-        }
-    }
-
-    goto exit;
-
-error_bat_restore:
-    if (bat_update) {
-        /* keep metadata in sync, and restore the bat entry state
-         * if error. */
-        sinfo.file_offset = bat_prior_offset;
-        vhdx_update_bat_table_entry(bs, s, &sinfo, &bat_entry,
-                                    &bat_entry_offset, bat_state);
-    }
-exit:
-    qemu_vfree(iov1.iov_base);
-    qemu_vfree(iov2.iov_base);
-    qemu_co_mutex_unlock(&s->lock);
-    qemu_iovec_destroy(&hd_qiov);
-    return ret;
-}
-
-
-
-/*
- * Create VHDX Headers
- *
- * There are 2 headers, and the highest sequence number will represent
- * the active header
- */
-static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
-                                   uint32_t log_size)
-{
-    int ret = 0;
-    VHDXHeader *hdr = NULL;
-
-    hdr = g_new0(VHDXHeader, 1);
-
-    hdr->signature       = VHDX_HEADER_SIGNATURE;
-    hdr->sequence_number = g_random_int();
-    hdr->log_version     = 0;
-    hdr->version         = 1;
-    hdr->log_length      = log_size;
-    hdr->log_offset      = VHDX_HEADER_SECTION_END;
-    vhdx_guid_generate(&hdr->file_write_guid);
-    vhdx_guid_generate(&hdr->data_write_guid);
-
-    ret = vhdx_write_header(bs, hdr, VHDX_HEADER1_OFFSET, false);
-    if (ret < 0) {
-        goto exit;
-    }
-    hdr->sequence_number++;
-    ret = vhdx_write_header(bs, hdr, VHDX_HEADER2_OFFSET, false);
-    if (ret < 0) {
-        goto exit;
-    }
-
-exit:
-    g_free(hdr);
-    return ret;
-}
-
-#define VHDX_METADATA_ENTRY_BUFFER_SIZE \
-                                    (sizeof(VHDXFileParameters)               +\
-                                     sizeof(VHDXVirtualDiskSize)              +\
-                                     sizeof(VHDXPage83Data)                   +\
-                                     sizeof(VHDXVirtualDiskLogicalSectorSize) +\
-                                     sizeof(VHDXVirtualDiskPhysicalSectorSize))
-
-/*
- * Create the Metadata entries.
- *
- * For more details on the entries, see section 3.5 (pg 29) in the
- * VHDX 1.00 specification.
- *
- * We support 5 metadata entries (all required by spec):
- *          File Parameters,
- *          Virtual Disk Size,
- *          Page 83 Data,
- *          Logical Sector Size,
- *          Physical Sector Size
- *
- * The first 64KB of the Metadata section is reserved for the metadata
- * header and entries; beyond that, the metadata items themselves reside.
- */
-static int vhdx_create_new_metadata(BlockDriverState *bs,
-                                    uint64_t image_size,
-                                    uint32_t block_size,
-                                    uint32_t sector_size,
-                                    uint64_t metadata_offset,
-                                    VHDXImageType type)
-{
-    int ret = 0;
-    uint32_t offset = 0;
-    void *buffer = NULL;
-    void *entry_buffer;
-    VHDXMetadataTableHeader *md_table;
-    VHDXMetadataTableEntry  *md_table_entry;
-
-    /* Metadata entries */
-    VHDXFileParameters     *mt_file_params;
-    VHDXVirtualDiskSize    *mt_virtual_size;
-    VHDXPage83Data         *mt_page83;
-    VHDXVirtualDiskLogicalSectorSize  *mt_log_sector_size;
-    VHDXVirtualDiskPhysicalSectorSize *mt_phys_sector_size;
-
-    entry_buffer = g_malloc0(VHDX_METADATA_ENTRY_BUFFER_SIZE);
-
-    mt_file_params = entry_buffer;
-    offset += sizeof(VHDXFileParameters);
-    mt_virtual_size = entry_buffer + offset;
-    offset += sizeof(VHDXVirtualDiskSize);
-    mt_page83 = entry_buffer + offset;
-    offset += sizeof(VHDXPage83Data);
-    mt_log_sector_size = entry_buffer + offset;
-    offset += sizeof(VHDXVirtualDiskLogicalSectorSize);
-    mt_phys_sector_size = entry_buffer + offset;
-
-    mt_file_params->block_size = cpu_to_le32(block_size);
-    if (type == VHDX_TYPE_FIXED) {
-        mt_file_params->data_bits |= VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED;
-        cpu_to_le32s(&mt_file_params->data_bits);
-    }
-
-    vhdx_guid_generate(&mt_page83->page_83_data);
-    cpu_to_leguids(&mt_page83->page_83_data);
-    mt_virtual_size->virtual_disk_size        = cpu_to_le64(image_size);
-    mt_log_sector_size->logical_sector_size   = cpu_to_le32(sector_size);
-    mt_phys_sector_size->physical_sector_size = cpu_to_le32(sector_size);
-
-    buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE);
-    md_table = buffer;
-
-    md_table->signature   = VHDX_METADATA_SIGNATURE;
-    md_table->entry_count = 5;
-    vhdx_metadata_header_le_export(md_table);
-
-
-    /* This will reference beyond the reserved table portion */
-    offset = 64 * KiB;
-
-    md_table_entry = buffer + sizeof(VHDXMetadataTableHeader);
-
-    md_table_entry[0].item_id = file_param_guid;
-    md_table_entry[0].offset  = offset;
-    md_table_entry[0].length  = sizeof(VHDXFileParameters);
-    md_table_entry[0].data_bits |= VHDX_META_FLAGS_IS_REQUIRED;
-    offset += md_table_entry[0].length;
-    vhdx_metadata_entry_le_export(&md_table_entry[0]);
-
-    md_table_entry[1].item_id = virtual_size_guid;
-    md_table_entry[1].offset  = offset;
-    md_table_entry[1].length  = sizeof(VHDXVirtualDiskSize);
-    md_table_entry[1].data_bits |= VHDX_META_FLAGS_IS_REQUIRED |
-                                   VHDX_META_FLAGS_IS_VIRTUAL_DISK;
-    offset += md_table_entry[1].length;
-    vhdx_metadata_entry_le_export(&md_table_entry[1]);
-
-    md_table_entry[2].item_id = page83_guid;
-    md_table_entry[2].offset  = offset;
-    md_table_entry[2].length  = sizeof(VHDXPage83Data);
-    md_table_entry[2].data_bits |= VHDX_META_FLAGS_IS_REQUIRED |
-                                   VHDX_META_FLAGS_IS_VIRTUAL_DISK;
-    offset += md_table_entry[2].length;
-    vhdx_metadata_entry_le_export(&md_table_entry[2]);
-
-    md_table_entry[3].item_id = logical_sector_guid;
-    md_table_entry[3].offset  = offset;
-    md_table_entry[3].length  = sizeof(VHDXVirtualDiskLogicalSectorSize);
-    md_table_entry[3].data_bits |= VHDX_META_FLAGS_IS_REQUIRED |
-                                   VHDX_META_FLAGS_IS_VIRTUAL_DISK;
-    offset += md_table_entry[3].length;
-    vhdx_metadata_entry_le_export(&md_table_entry[3]);
-
-    md_table_entry[4].item_id = phys_sector_guid;
-    md_table_entry[4].offset  = offset;
-    md_table_entry[4].length  = sizeof(VHDXVirtualDiskPhysicalSectorSize);
-    md_table_entry[4].data_bits |= VHDX_META_FLAGS_IS_REQUIRED |
-                                   VHDX_META_FLAGS_IS_VIRTUAL_DISK;
-    vhdx_metadata_entry_le_export(&md_table_entry[4]);
-
-    ret = bdrv_pwrite(bs, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer,
-                      VHDX_METADATA_ENTRY_BUFFER_SIZE);
-    if (ret < 0) {
-        goto exit;
-    }
-
-
-exit:
-    g_free(buffer);
-    g_free(entry_buffer);
-    return ret;
-}
-
-/* This create the actual BAT itself.  We currently only support
- * 'Dynamic' and 'Fixed' image types.
- *
- *  Dynamic images: default state of the BAT is all zeroes.
- *
- *  Fixed images: default state of the BAT is fully populated, with
- *                file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT.
- */
-static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
-                           uint64_t image_size, VHDXImageType type,
-                           bool use_zero_blocks, uint64_t file_offset,
-                           uint32_t length)
-{
-    int ret = 0;
-    uint64_t data_file_offset;
-    uint64_t total_sectors = 0;
-    uint64_t sector_num = 0;
-    uint64_t unused;
-    int block_state;
-    VHDXSectorInfo sinfo;
-
-    assert(s->bat == NULL);
-
-    /* this gives a data start after BAT/bitmap entries, and well
-     * past any metadata entries (with a 4 MB buffer for future
-     * expansion */
-    data_file_offset = file_offset + length + 5 * MiB;
-    total_sectors = image_size >> s->logical_sector_size_bits;
-
-    if (type == VHDX_TYPE_DYNAMIC) {
-        /* All zeroes, so we can just extend the file - the end of the BAT
-         * is the furthest thing we have written yet */
-        ret = bdrv_truncate(bs, data_file_offset);
-        if (ret < 0) {
-            goto exit;
-        }
-    } else if (type == VHDX_TYPE_FIXED) {
-        ret = bdrv_truncate(bs, data_file_offset + image_size);
-        if (ret < 0) {
-            goto exit;
-        }
-    } else {
-        ret = -ENOTSUP;
-        goto exit;
-    }
-
-    if (type == VHDX_TYPE_FIXED ||
-                use_zero_blocks ||
-                bdrv_has_zero_init(bs) == 0) {
-        /* for a fixed file, the default BAT entry is not zero */
-        s->bat = g_try_malloc0(length);
-        if (length && s->bat == NULL) {
-            ret = -ENOMEM;
-            goto exit;
-        }
-        block_state = type == VHDX_TYPE_FIXED ? PAYLOAD_BLOCK_FULLY_PRESENT :
-                                                PAYLOAD_BLOCK_NOT_PRESENT;
-        block_state = use_zero_blocks ? PAYLOAD_BLOCK_ZERO : block_state;
-        /* fill the BAT by emulating sector writes of sectors_per_block size */
-        while (sector_num < total_sectors) {
-            vhdx_block_translate(s, sector_num, s->sectors_per_block, &sinfo);
-            sinfo.file_offset = data_file_offset +
-                                (sector_num << s->logical_sector_size_bits);
-            sinfo.file_offset = ROUND_UP(sinfo.file_offset, MiB);
-            vhdx_update_bat_table_entry(bs, s, &sinfo, &unused, &unused,
-                                        block_state);
-            cpu_to_le64s(&s->bat[sinfo.bat_idx]);
-            sector_num += s->sectors_per_block;
-        }
-        ret = bdrv_pwrite(bs, file_offset, s->bat, length);
-        if (ret < 0) {
-            goto exit;
-        }
-    }
-
-
-
-exit:
-    g_free(s->bat);
-    return ret;
-}
-
-/* Creates the region table header, and region table entries.
- * There are 2 supported region table entries: BAT, and Metadata/
- *
- * As the calculations for the BAT region table are also needed
- * to create the BAT itself, we will also cause the BAT to be
- * created.
- */
-static int vhdx_create_new_region_table(BlockDriverState *bs,
-                                        uint64_t image_size,
-                                        uint32_t block_size,
-                                        uint32_t sector_size,
-                                        uint32_t log_size,
-                                        bool use_zero_blocks,
-                                        VHDXImageType type,
-                                        uint64_t *metadata_offset)
-{
-    int ret = 0;
-    uint32_t offset = 0;
-    void *buffer = NULL;
-    uint64_t bat_file_offset;
-    uint32_t bat_length;
-    BDRVVHDXState *s = NULL;
-    VHDXRegionTableHeader *region_table;
-    VHDXRegionTableEntry *rt_bat;
-    VHDXRegionTableEntry *rt_metadata;
-
-    assert(metadata_offset != NULL);
-
-    /* Populate enough of the BDRVVHDXState to be able to use the
-     * pre-existing BAT calculation, translation, and update functions */
-    s = g_new0(BDRVVHDXState, 1);
-
-    s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
-                     (uint64_t) sector_size / (uint64_t) block_size;
-
-    s->sectors_per_block = block_size / sector_size;
-    s->virtual_disk_size = image_size;
-    s->block_size = block_size;
-    s->logical_sector_size = sector_size;
-
-    vhdx_set_shift_bits(s);
-
-    vhdx_calc_bat_entries(s);
-
-    /* At this point the VHDX state is populated enough for creation */
-
-    /* a single buffer is used so we can calculate the checksum over the
-     * entire 64KB block */
-    buffer = g_malloc0(VHDX_HEADER_BLOCK_SIZE);
-    region_table = buffer;
-    offset += sizeof(VHDXRegionTableHeader);
-    rt_bat = buffer + offset;
-    offset += sizeof(VHDXRegionTableEntry);
-    rt_metadata  = buffer + offset;
-
-    region_table->signature = VHDX_REGION_SIGNATURE;
-    region_table->entry_count = 2;   /* BAT and Metadata */
-
-    rt_bat->guid        = bat_guid;
-    rt_bat->length      = ROUND_UP(s->bat_entries * sizeof(VHDXBatEntry), MiB);
-    rt_bat->file_offset = ROUND_UP(VHDX_HEADER_SECTION_END + log_size, MiB);
-    s->bat_offset = rt_bat->file_offset;
-
-    rt_metadata->guid        = metadata_guid;
-    rt_metadata->file_offset = ROUND_UP(rt_bat->file_offset + rt_bat->length,
-                                        MiB);
-    rt_metadata->length      = 1 * MiB; /* min size, and more than enough */
-    *metadata_offset = rt_metadata->file_offset;
-
-    bat_file_offset = rt_bat->file_offset;
-    bat_length = rt_bat->length;
-
-    vhdx_region_header_le_export(region_table);
-    vhdx_region_entry_le_export(rt_bat);
-    vhdx_region_entry_le_export(rt_metadata);
-
-    vhdx_update_checksum(buffer, VHDX_HEADER_BLOCK_SIZE,
-                         offsetof(VHDXRegionTableHeader, checksum));
-
-
-    /* The region table gives us the data we need to create the BAT,
-     * so do that now */
-    ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks,
-                          bat_file_offset, bat_length);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    /* Now write out the region headers to disk */
-    ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer,
-                      VHDX_HEADER_BLOCK_SIZE);
-    if (ret < 0) {
-        goto exit;
-    }
-
-    ret = bdrv_pwrite(bs, VHDX_REGION_TABLE2_OFFSET, buffer,
-                      VHDX_HEADER_BLOCK_SIZE);
-    if (ret < 0) {
-        goto exit;
-    }
-
-exit:
-    g_free(s);
-    g_free(buffer);
-    return ret;
-}
-
-/* We need to create the following elements:
- *
- *    .-----------------------------------------------------------------.
- *    |   (A)    |   (B)    |    (C)    |     (D)       |     (E)       |
- *    |  File ID |  Header1 |  Header 2 |  Region Tbl 1 |  Region Tbl 2 |
- *    |          |          |           |               |               |
- *    .-----------------------------------------------------------------.
- *    0         64KB      128KB       192KB           256KB           320KB
- *
- *
- *    .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
- *    |     (F)     |     (G)       |    (H)    |                        |
- *    | Journal Log |  BAT / Bitmap |  Metadata |  .... data ......      |
- *    |             |               |           |                        |
- *    .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
- *   1MB
- */
-static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    int ret = 0;
-    uint64_t image_size = (uint64_t) 2 * GiB;
-    uint32_t log_size   = 1 * MiB;
-    uint32_t block_size = 0;
-    uint64_t signature;
-    uint64_t metadata_offset;
-    bool use_zero_blocks = false;
-
-    gunichar2 *creator = NULL;
-    glong creator_items;
-    BlockBackend *blk;
-    char *type = NULL;
-    VHDXImageType image_type;
-    Error *local_err = NULL;
-
-    image_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-    log_size = qemu_opt_get_size_del(opts, VHDX_BLOCK_OPT_LOG_SIZE, 0);
-    block_size = qemu_opt_get_size_del(opts, VHDX_BLOCK_OPT_BLOCK_SIZE, 0);
-    type = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
-    use_zero_blocks = qemu_opt_get_bool_del(opts, VHDX_BLOCK_OPT_ZERO, true);
-
-    if (image_size > VHDX_MAX_IMAGE_SIZE) {
-        error_setg_errno(errp, EINVAL, "Image size too large; max of 64TB");
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    if (type == NULL) {
-        type = g_strdup("dynamic");
-    }
-
-    if (!strcmp(type, "dynamic")) {
-        image_type = VHDX_TYPE_DYNAMIC;
-    } else if (!strcmp(type, "fixed")) {
-        image_type = VHDX_TYPE_FIXED;
-    } else if (!strcmp(type, "differencing")) {
-        error_setg_errno(errp, ENOTSUP,
-                         "Differencing files not yet supported");
-        ret = -ENOTSUP;
-        goto exit;
-    } else {
-        ret = -EINVAL;
-        goto exit;
-    }
-
-    /* These are pretty arbitrary, and mainly designed to keep the BAT
-     * size reasonable to load into RAM */
-    if (block_size == 0) {
-        if (image_size > 32 * TiB) {
-            block_size = 64 * MiB;
-        } else if (image_size > (uint64_t) 100 * GiB) {
-            block_size = 32 * MiB;
-        } else if (image_size > 1 * GiB) {
-            block_size = 16 * MiB;
-        } else {
-            block_size = 8 * MiB;
-        }
-    }
-
-
-    /* make the log size close to what was specified, but must be
-     * min 1MB, and multiple of 1MB */
-    log_size = ROUND_UP(log_size, MiB);
-
-    block_size = ROUND_UP(block_size, MiB);
-    block_size = block_size > VHDX_BLOCK_SIZE_MAX ? VHDX_BLOCK_SIZE_MAX :
-                                                    block_size;
-
-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto exit;
-    }
-
-    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (blk == NULL) {
-        error_propagate(errp, local_err);
-        ret = -EIO;
-        goto exit;
-    }
-
-    blk_set_allow_write_beyond_eof(blk, true);
-
-    /* Create (A) */
-
-    /* The creator field is optional, but may be useful for
-     * debugging / diagnostics */
-    creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
-                              &creator_items, NULL);
-    signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
-    ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
-    if (ret < 0) {
-        goto delete_and_exit;
-    }
-    if (creator) {
-        ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
-                         creator, creator_items * sizeof(gunichar2));
-        if (ret < 0) {
-            goto delete_and_exit;
-        }
-    }
-
-
-    /* Creates (B),(C) */
-    ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size);
-    if (ret < 0) {
-        goto delete_and_exit;
-    }
-
-    /* Creates (D),(E),(G) explicitly. (F) created as by-product */
-    ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512,
-                                       log_size, use_zero_blocks, image_type,
-                                       &metadata_offset);
-    if (ret < 0) {
-        goto delete_and_exit;
-    }
-
-    /* Creates (H) */
-    ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512,
-                                   metadata_offset, image_type);
-    if (ret < 0) {
-        goto delete_and_exit;
-    }
-
-
-delete_and_exit:
-    blk_unref(blk);
-exit:
-    g_free(type);
-    g_free(creator);
-    return ret;
-}
-
-/* If opened r/w, the VHDX driver will automatically replay the log,
- * if one is present, inside the vhdx_open() call.
- *
- * If qemu-img check -r all is called, the image is automatically opened
- * r/w and any log has already been replayed, so there is nothing (currently)
- * for us to do here
- */
-static int vhdx_check(BlockDriverState *bs, BdrvCheckResult *result,
-                       BdrvCheckMode fix)
-{
-    BDRVVHDXState *s = bs->opaque;
-
-    if (s->log_replayed_on_open) {
-        result->corruptions_fixed++;
-    }
-    return 0;
-}
-
-static QemuOptsList vhdx_create_opts = {
-    .name = "vhdx-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(vhdx_create_opts.head),
-    .desc = {
-        {
-           .name = BLOCK_OPT_SIZE,
-           .type = QEMU_OPT_SIZE,
-           .help = "Virtual disk size; max of 64TB."
-       },
-       {
-           .name = VHDX_BLOCK_OPT_LOG_SIZE,
-           .type = QEMU_OPT_SIZE,
-           .def_value_str = stringify(DEFAULT_LOG_SIZE),
-           .help = "Log size; min 1MB."
-       },
-       {
-           .name = VHDX_BLOCK_OPT_BLOCK_SIZE,
-           .type = QEMU_OPT_SIZE,
-           .def_value_str = stringify(0),
-           .help = "Block Size; min 1MB, max 256MB. " \
-                   "0 means auto-calculate based on image size."
-       },
-       {
-           .name = BLOCK_OPT_SUBFMT,
-           .type = QEMU_OPT_STRING,
-           .help = "VHDX format type, can be either 'dynamic' or 'fixed'. "\
-                   "Default is 'dynamic'."
-       },
-       {
-           .name = VHDX_BLOCK_OPT_ZERO,
-           .type = QEMU_OPT_BOOL,
-           .help = "Force use of payload blocks of type 'ZERO'. "\
-                   "Non-standard, but default.  Do not set to 'off' when "\
-                   "using 'qemu-img convert' with subformat=dynamic."
-       },
-       { NULL }
-    }
-};
-
-static BlockDriver bdrv_vhdx = {
-    .format_name            = "vhdx",
-    .instance_size          = sizeof(BDRVVHDXState),
-    .bdrv_probe             = vhdx_probe,
-    .bdrv_open              = vhdx_open,
-    .bdrv_close             = vhdx_close,
-    .bdrv_reopen_prepare    = vhdx_reopen_prepare,
-    .bdrv_co_readv          = vhdx_co_readv,
-    .bdrv_co_writev         = vhdx_co_writev,
-    .bdrv_create            = vhdx_create,
-    .bdrv_get_info          = vhdx_get_info,
-    .bdrv_check             = vhdx_check,
-    .bdrv_has_zero_init     = bdrv_has_zero_init_1,
-
-    .create_opts            = &vhdx_create_opts,
-};
-
-static void bdrv_vhdx_init(void)
-{
-    bdrv_register(&bdrv_vhdx);
-}
-
-block_init(bdrv_vhdx_init);
diff --git a/qemu/block/vhdx.h b/qemu/block/vhdx.h
deleted file mode 100644
index 7003ab7a7..000000000
--- a/qemu/block/vhdx.h
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- * Block driver for Hyper-V VHDX Images
- *
- * Copyright (c) 2013 Red Hat, Inc.,
- *
- * Authors:
- *  Jeff Cody <jcody@redhat.com>
- *
- *  This is based on the "VHDX Format Specification v1.00", published 8/25/2012
- *  by Microsoft:
- *      https://www.microsoft.com/en-us/download/details.aspx?id=34750
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#ifndef BLOCK_VHDX_H
-#define BLOCK_VHDX_H
-
-#define KiB              (1 * 1024)
-#define MiB            (KiB * 1024)
-#define GiB            (MiB * 1024)
-#define TiB ((uint64_t) GiB * 1024)
-
-#define DEFAULT_LOG_SIZE 1048576 /* 1MiB */
-/* Structures and fields present in the VHDX file */
-
-/* The header section has the following blocks,
- * each block is 64KB:
- *
- * _____________________________________________________________________________
- * | File Id. |   Header 1    | Header 2   | Region Table |  Reserved (768KB)  |
- * |----------|---------------|------------|--------------|--------------------|
- * |          |               |            |              |                    |
- * 0.........64KB...........128KB........192KB..........256KB................1MB
- */
-
-#define VHDX_HEADER_BLOCK_SIZE      (64 * 1024)
-
-#define VHDX_FILE_ID_OFFSET         0
-#define VHDX_HEADER1_OFFSET         (VHDX_HEADER_BLOCK_SIZE * 1)
-#define VHDX_HEADER2_OFFSET         (VHDX_HEADER_BLOCK_SIZE * 2)
-#define VHDX_REGION_TABLE_OFFSET    (VHDX_HEADER_BLOCK_SIZE * 3)
-#define VHDX_REGION_TABLE2_OFFSET   (VHDX_HEADER_BLOCK_SIZE * 4)
-
-#define VHDX_HEADER_SECTION_END     (1 * MiB)
-/*
- * A note on the use of MS-GUID fields.  For more details on the GUID,
- * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier.
- *
- * The VHDX specification only states that these are MS GUIDs, and which
- * bytes are data1-data4. It makes no mention of what algorithm should be used
- * to generate the GUID, nor what standard.  However, looking at the specified
- * known GUID fields, it appears the GUIDs are:
- *  Standard/DCE GUID type  (noted by 10b in the MSB of byte 0 of .data4)
- *  Random algorithm        (noted by 0x4XXX for .data3)
- */
-
-/* ---- HEADER SECTION STRUCTURES ---- */
-
-/* These structures are ones that are defined in the VHDX specification
- * document */
-
-#define VHDX_FILE_SIGNATURE 0x656C696678646876ULL  /* "vhdxfile" in ASCII */
-typedef struct VHDXFileIdentifier {
-    uint64_t    signature;              /* "vhdxfile" in ASCII */
-    uint16_t    creator[256];           /* optional; utf-16 string to identify
-                                           the vhdx file creator.  Diagnostic
-                                           only */
-} VHDXFileIdentifier;
-
-
-/* the guid is a 16 byte unique ID - the definition for this used by
- * Microsoft is not just 16 bytes though - it is a structure that is defined,
- * so we need to follow it here so that endianness does not trip us up */
-
-typedef struct QEMU_PACKED MSGUID {
-    uint32_t  data1;
-    uint16_t  data2;
-    uint16_t  data3;
-    uint8_t   data4[8];
-} MSGUID;
-
-#define guid_eq(a, b) \
-    (memcmp(&(a), &(b), sizeof(MSGUID)) == 0)
-
-#define VHDX_HEADER_SIZE (4 * 1024)   /* although the vhdx_header struct in disk
-                                         is only 582 bytes, for purposes of crc
-                                         the header is the first 4KB of the 64KB
-                                         block */
-
-/* The full header is 4KB, although the actual header data is much smaller.
- * But for the checksum calculation, it is over the entire 4KB structure,
- * not just the defined portion of it */
-#define VHDX_HEADER_SIGNATURE 0x64616568
-typedef struct QEMU_PACKED VHDXHeader {
-    uint32_t    signature;              /* "head" in ASCII */
-    uint32_t    checksum;               /* CRC-32C hash of the whole header */
-    uint64_t    sequence_number;        /* Seq number of this header.  Each
-                                           VHDX file has 2 of these headers,
-                                           and only the header with the highest
-                                           sequence number is valid */
-    MSGUID      file_write_guid;        /* 128 bit unique identifier. Must be
-                                           updated to new, unique value before
-                                           the first modification is made to
-                                           file */
-    MSGUID      data_write_guid;        /* 128 bit unique identifier. Must be
-                                           updated to new, unique value before
-                                           the first modification is made to
-                                           visible data.   Visbile data is
-                                           defined as:
-                                                    - system & user metadata
-                                                    - raw block data
-                                                    - disk size
-                                                    - any change that will
-                                                      cause the virtual disk
-                                                      sector read to differ
-
-                                           This does not need to change if
-                                           blocks are re-arranged */
-    MSGUID      log_guid;               /* 128 bit unique identifier. If zero,
-                                           there is no valid log. If non-zero,
-                                           log entries with this guid are
-                                           valid. */
-    uint16_t    log_version;            /* version of the log format. Must be
-                                           set to zero */
-    uint16_t    version;                /* version of the vhdx file.  Currently,
-                                           only supported version is "1" */
-    uint32_t    log_length;             /* length of the log.  Must be multiple
-                                           of 1MB */
-    uint64_t    log_offset;             /* byte offset in the file of the log.
-                                           Must also be a multiple of 1MB */
-} VHDXHeader;
-
-/* Header for the region table block */
-#define VHDX_REGION_SIGNATURE  0x69676572  /* "regi" in ASCII */
-typedef struct QEMU_PACKED VHDXRegionTableHeader {
-    uint32_t    signature;              /* "regi" in ASCII */
-    uint32_t    checksum;               /* CRC-32C hash of the 64KB table */
-    uint32_t    entry_count;            /* number of valid entries */
-    uint32_t    reserved;
-} VHDXRegionTableHeader;
-
-/* Individual region table entry.  There may be a maximum of 2047 of these
- *
- *  There are two known region table properties.  Both are required.
- *  BAT (block allocation table):  2DC27766F62342009D64115E9BFD4A08
- *  Metadata:                      8B7CA20647904B9AB8FE575F050F886E
- */
-#define VHDX_REGION_ENTRY_REQUIRED  0x01    /* if set, parser must understand
-                                               this entry in order to open
-                                               file */
-typedef struct QEMU_PACKED VHDXRegionTableEntry {
-    MSGUID      guid;                   /* 128-bit unique identifier */
-    uint64_t    file_offset;            /* offset of the object in the file.
-                                           Must be multiple of 1MB */
-    uint32_t    length;                 /* length, in bytes, of the object */
-    uint32_t    data_bits;
-} VHDXRegionTableEntry;
-
-
-/* ---- LOG ENTRY STRUCTURES ---- */
-#define VHDX_LOG_MIN_SIZE (1024 * 1024)
-#define VHDX_LOG_SECTOR_SIZE 4096
-#define VHDX_LOG_HDR_SIZE 64
-#define VHDX_LOG_SIGNATURE 0x65676f6c
-typedef struct QEMU_PACKED VHDXLogEntryHeader {
-    uint32_t    signature;              /* "loge" in ASCII */
-    uint32_t    checksum;               /* CRC-32C hash of the 64KB table */
-    uint32_t    entry_length;           /* length in bytes, multiple of 1MB */
-    uint32_t    tail;                   /* byte offset of first log entry of a
-                                           seq, where this entry is the last
-                                           entry */
-    uint64_t    sequence_number;        /* incremented with each log entry.
-                                           May not be zero. */
-    uint32_t    descriptor_count;       /* number of descriptors in this log
-                                           entry, must be >= 0 */
-    uint32_t    reserved;
-    MSGUID      log_guid;               /* value of the log_guid from
-                                           vhdx_header.  If not found in
-                                           vhdx_header, it is invalid */
-    uint64_t    flushed_file_offset;    /* see spec for full details - this
-                                           should be vhdx file size in bytes */
-    uint64_t    last_file_offset;       /* size in bytes that all allocated
-                                           file structures fit into */
-} VHDXLogEntryHeader;
-
-#define VHDX_LOG_DESC_SIZE 32
-#define VHDX_LOG_DESC_SIGNATURE 0x63736564
-#define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a
-typedef struct QEMU_PACKED VHDXLogDescriptor {
-    uint32_t    signature;              /* "zero" or "desc" in ASCII */
-    union  {
-        uint32_t    reserved;           /* zero desc */
-        uint32_t    trailing_bytes;     /* data desc: bytes 4092-4096 of the
-                                           data sector */
-    };
-    union {
-        uint64_t    zero_length;        /* zero desc: length of the section to
-                                           zero */
-        uint64_t    leading_bytes;      /* data desc: bytes 0-7 of the data
-                                           sector */
-    };
-    uint64_t    file_offset;            /* file offset to write zeros - multiple
-                                           of 4kB */
-    uint64_t    sequence_number;        /* must match same field in
-                                           vhdx_log_entry_header */
-} VHDXLogDescriptor;
-
-#define VHDX_LOG_DATA_SIGNATURE 0x61746164
-typedef struct QEMU_PACKED VHDXLogDataSector {
-    uint32_t    data_signature;         /* "data" in ASCII */
-    uint32_t    sequence_high;          /* 4 MSB of 8 byte sequence_number */
-    uint8_t     data[4084];             /* raw data, bytes 8-4091 (inclusive).
-                                           see the data descriptor field for the
-                                           other mising bytes */
-    uint32_t    sequence_low;           /* 4 LSB of 8 byte sequence_number */
-} VHDXLogDataSector;
-
-
-
-/* block states - different state values depending on whether it is a
- * payload block, or a sector block. */
-
-#define PAYLOAD_BLOCK_NOT_PRESENT       0
-#define PAYLOAD_BLOCK_UNDEFINED         1
-#define PAYLOAD_BLOCK_ZERO              2
-#define PAYLOAD_BLOCK_UNMAPPED          3
-#define PAYLOAD_BLOCK_UNMAPPED_v095     5
-#define PAYLOAD_BLOCK_FULLY_PRESENT     6
-#define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7
-
-#define SB_BLOCK_NOT_PRESENT    0
-#define SB_BLOCK_PRESENT        6
-
-/* per the spec */
-#define VHDX_MAX_SECTORS_PER_BLOCK  (1 << 23)
-
-/* upper 44 bits are the file offset in 1MB units lower 3 bits are the state
-   other bits are reserved */
-#define VHDX_BAT_STATE_BIT_MASK 0x07
-#define VHDX_BAT_FILE_OFF_MASK  0xFFFFFFFFFFF00000ULL /* upper 44 bits */
-typedef uint64_t VHDXBatEntry;
-
-/* ---- METADATA REGION STRUCTURES ---- */
-
-#define VHDX_METADATA_ENTRY_SIZE 32
-#define VHDX_METADATA_MAX_ENTRIES 2047  /* not including the header */
-#define VHDX_METADATA_TABLE_MAX_SIZE \
-    (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1))
-#define VHDX_METADATA_SIGNATURE 0x617461646174656DULL  /* "metadata" in ASCII */
-typedef struct QEMU_PACKED VHDXMetadataTableHeader {
-    uint64_t    signature;              /* "metadata" in ASCII */
-    uint16_t    reserved;
-    uint16_t    entry_count;            /* number table entries. <= 2047 */
-    uint32_t    reserved2[5];
-} VHDXMetadataTableHeader;
-
-#define VHDX_META_FLAGS_IS_USER         0x01    /* max 1024 entries */
-#define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02    /* virtual disk metadata if set,
-                                                   otherwise file metdata */
-#define VHDX_META_FLAGS_IS_REQUIRED     0x04    /* parse must understand this
-                                                   entry to open the file */
-typedef struct QEMU_PACKED VHDXMetadataTableEntry {
-    MSGUID      item_id;                /* 128-bit identifier for metadata */
-    uint32_t    offset;                 /* byte offset of the metadata.  At
-                                           least 64kB.  Relative to start of
-                                           metadata region */
-                                        /* note: if length = 0, so is offset */
-    uint32_t    length;                 /* length of metadata. <= 1MB. */
-    uint32_t    data_bits;              /* least-significant 3 bits are flags,
-                                           the rest are reserved (see above) */
-    uint32_t    reserved2;
-} VHDXMetadataTableEntry;
-
-#define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01   /* Do not change any blocks to
-                                                   be BLOCK_NOT_PRESENT.
-                                                   If set indicates a fixed
-                                                   size VHDX file */
-#define VHDX_PARAMS_HAS_PARENT           0x02    /* has parent / backing file */
-#define VHDX_BLOCK_SIZE_MIN             (1   * MiB)
-#define VHDX_BLOCK_SIZE_MAX             (256 * MiB)
-typedef struct QEMU_PACKED VHDXFileParameters {
-    uint32_t    block_size;             /* size of each payload block, always
-                                           power of 2, <= 256MB and >= 1MB. */
-    uint32_t data_bits;                 /* least-significant 2 bits are flags,
-                                           the rest are reserved (see above) */
-} VHDXFileParameters;
-
-#define VHDX_MAX_IMAGE_SIZE  ((uint64_t) 64 * TiB)
-typedef struct QEMU_PACKED VHDXVirtualDiskSize {
-    uint64_t    virtual_disk_size;      /* Size of the virtual disk, in bytes.
-                                           Must be multiple of the sector size,
-                                           max of 64TB */
-} VHDXVirtualDiskSize;
-
-typedef struct QEMU_PACKED VHDXPage83Data {
-    MSGUID      page_83_data;           /* unique id for scsi devices that
-                                           support page 0x83 */
-} VHDXPage83Data;
-
-typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize {
-    uint32_t    logical_sector_size;    /* virtual disk sector size (in bytes).
-                                           Can only be 512 or 4096 bytes */
-} VHDXVirtualDiskLogicalSectorSize;
-
-typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize {
-    uint32_t    physical_sector_size;   /* physical sector size (in bytes).
-                                           Can only be 512 or 4096 bytes */
-} VHDXVirtualDiskPhysicalSectorSize;
-
-typedef struct QEMU_PACKED VHDXParentLocatorHeader {
-    MSGUID      locator_type;           /* type of the parent virtual disk. */
-    uint16_t    reserved;
-    uint16_t    key_value_count;        /* number of key/value pairs for this
-                                           locator */
-} VHDXParentLocatorHeader;
-
-/* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */
-typedef struct QEMU_PACKED VHDXParentLocatorEntry {
-    uint32_t    key_offset;             /* offset in metadata for key, > 0 */
-    uint32_t    value_offset;           /* offset in metadata for value, >0 */
-    uint16_t    key_length;             /* length of entry key, > 0 */
-    uint16_t    value_length;           /* length of entry value, > 0 */
-} VHDXParentLocatorEntry;
-
-
-/* ----- END VHDX SPECIFICATION STRUCTURES ---- */
-
-typedef struct VHDXMetadataEntries {
-    VHDXMetadataTableEntry file_parameters_entry;
-    VHDXMetadataTableEntry virtual_disk_size_entry;
-    VHDXMetadataTableEntry page83_data_entry;
-    VHDXMetadataTableEntry logical_sector_size_entry;
-    VHDXMetadataTableEntry phys_sector_size_entry;
-    VHDXMetadataTableEntry parent_locator_entry;
-    uint16_t present;
-} VHDXMetadataEntries;
-
-typedef struct VHDXLogEntries {
-    uint64_t offset;
-    uint64_t length;
-    uint32_t write;
-    uint32_t read;
-    VHDXLogEntryHeader *hdr;
-    void *desc_buffer;
-    uint64_t sequence;
-    uint32_t tail;
-} VHDXLogEntries;
-
-typedef struct VHDXRegionEntry {
-    uint64_t start;
-    uint64_t end;
-    QLIST_ENTRY(VHDXRegionEntry) entries;
-} VHDXRegionEntry;
-
-typedef struct BDRVVHDXState {
-    CoMutex lock;
-
-    int curr_header;
-    VHDXHeader *headers[2];
-
-    VHDXRegionTableHeader rt;
-    VHDXRegionTableEntry bat_rt;         /* region table for the BAT */
-    VHDXRegionTableEntry metadata_rt;    /* region table for the metadata */
-
-    VHDXMetadataTableHeader metadata_hdr;
-    VHDXMetadataEntries metadata_entries;
-
-    VHDXFileParameters params;
-    uint32_t block_size;
-    uint32_t block_size_bits;
-    uint32_t sectors_per_block;
-    uint32_t sectors_per_block_bits;
-
-    uint64_t virtual_disk_size;
-    uint32_t logical_sector_size;
-    uint32_t physical_sector_size;
-
-    uint64_t chunk_ratio;
-    uint32_t chunk_ratio_bits;
-    uint32_t logical_sector_size_bits;
-
-    uint32_t bat_entries;
-    VHDXBatEntry *bat;
-    uint64_t bat_offset;
-
-    bool first_visible_write;
-    MSGUID session_guid;
-
-    VHDXLogEntries log;
-
-    VHDXParentLocatorHeader parent_header;
-    VHDXParentLocatorEntry *parent_entries;
-
-    Error *migration_blocker;
-
-    bool log_replayed_on_open;
-
-    QLIST_HEAD(VHDXRegionHead, VHDXRegionEntry) regions;
-} BDRVVHDXState;
-
-void vhdx_guid_generate(MSGUID *guid);
-
-int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw,
-                        MSGUID *log_guid);
-
-uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset);
-uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
-                            int crc_offset);
-
-bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
-
-int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
-                   Error **errp);
-
-int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
-                             void *data, uint32_t length, uint64_t offset);
-
-static inline void leguid_to_cpus(MSGUID *guid)
-{
-    le32_to_cpus(&guid->data1);
-    le16_to_cpus(&guid->data2);
-    le16_to_cpus(&guid->data3);
-}
-
-static inline void cpu_to_leguids(MSGUID *guid)
-{
-    cpu_to_le32s(&guid->data1);
-    cpu_to_le16s(&guid->data2);
-    cpu_to_le16s(&guid->data3);
-}
-
-void vhdx_header_le_import(VHDXHeader *h);
-void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h);
-void vhdx_log_desc_le_import(VHDXLogDescriptor *d);
-void vhdx_log_desc_le_export(VHDXLogDescriptor *d);
-void vhdx_log_data_le_import(VHDXLogDataSector *d);
-void vhdx_log_data_le_export(VHDXLogDataSector *d);
-void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr);
-void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr);
-void vhdx_region_header_le_import(VHDXRegionTableHeader *hdr);
-void vhdx_region_header_le_export(VHDXRegionTableHeader *hdr);
-void vhdx_region_entry_le_import(VHDXRegionTableEntry *e);
-void vhdx_region_entry_le_export(VHDXRegionTableEntry *e);
-void vhdx_metadata_header_le_import(VHDXMetadataTableHeader *hdr);
-void vhdx_metadata_header_le_export(VHDXMetadataTableHeader *hdr);
-void vhdx_metadata_entry_le_import(VHDXMetadataTableEntry *e);
-void vhdx_metadata_entry_le_export(VHDXMetadataTableEntry *e);
-int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s);
-
-#endif
diff --git a/qemu/block/vmdk.c b/qemu/block/vmdk.c
deleted file mode 100644
index 45f9d3c5b..000000000
--- a/qemu/block/vmdk.c
+++ /dev/null
@@ -1,2349 +0,0 @@
-/*
- * Block driver for the VMDK format
- *
- * Copyright (c) 2004 Fabrice Bellard
- * Copyright (c) 2005 Filip Navara
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "block/block_int.h"
-#include "sysemu/block-backend.h"
-#include "qapi/qmp/qerror.h"
-#include "qemu/error-report.h"
-#include "qemu/module.h"
-#include "migration/migration.h"
-#include "qemu/cutils.h"
-#include <zlib.h>
-#include <glib.h>
-
-#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
-#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
-#define VMDK4_COMPRESSION_DEFLATE 1
-#define VMDK4_FLAG_NL_DETECT (1 << 0)
-#define VMDK4_FLAG_RGD (1 << 1)
-/* Zeroed-grain enable bit */
-#define VMDK4_FLAG_ZERO_GRAIN   (1 << 2)
-#define VMDK4_FLAG_COMPRESS (1 << 16)
-#define VMDK4_FLAG_MARKER (1 << 17)
-#define VMDK4_GD_AT_END 0xffffffffffffffffULL
-
-#define VMDK_GTE_ZEROED 0x1
-
-/* VMDK internal error codes */
-#define VMDK_OK      0
-#define VMDK_ERROR   (-1)
-/* Cluster not allocated */
-#define VMDK_UNALLOC (-2)
-#define VMDK_ZEROED  (-3)
-
-#define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
-
-typedef struct {
-    uint32_t version;
-    uint32_t flags;
-    uint32_t disk_sectors;
-    uint32_t granularity;
-    uint32_t l1dir_offset;
-    uint32_t l1dir_size;
-    uint32_t file_sectors;
-    uint32_t cylinders;
-    uint32_t heads;
-    uint32_t sectors_per_track;
-} QEMU_PACKED VMDK3Header;
-
-typedef struct {
-    uint32_t version;
-    uint32_t flags;
-    uint64_t capacity;
-    uint64_t granularity;
-    uint64_t desc_offset;
-    uint64_t desc_size;
-    /* Number of GrainTableEntries per GrainTable */
-    uint32_t num_gtes_per_gt;
-    uint64_t rgd_offset;
-    uint64_t gd_offset;
-    uint64_t grain_offset;
-    char filler[1];
-    char check_bytes[4];
-    uint16_t compressAlgorithm;
-} QEMU_PACKED VMDK4Header;
-
-#define L2_CACHE_SIZE 16
-
-typedef struct VmdkExtent {
-    BdrvChild *file;
-    bool flat;
-    bool compressed;
-    bool has_marker;
-    bool has_zero_grain;
-    int version;
-    int64_t sectors;
-    int64_t end_sector;
-    int64_t flat_start_offset;
-    int64_t l1_table_offset;
-    int64_t l1_backup_table_offset;
-    uint32_t *l1_table;
-    uint32_t *l1_backup_table;
-    unsigned int l1_size;
-    uint32_t l1_entry_sectors;
-
-    unsigned int l2_size;
-    uint32_t *l2_cache;
-    uint32_t l2_cache_offsets[L2_CACHE_SIZE];
-    uint32_t l2_cache_counts[L2_CACHE_SIZE];
-
-    int64_t cluster_sectors;
-    int64_t next_cluster_sector;
-    char *type;
-} VmdkExtent;
-
-typedef struct BDRVVmdkState {
-    CoMutex lock;
-    uint64_t desc_offset;
-    bool cid_updated;
-    bool cid_checked;
-    uint32_t cid;
-    uint32_t parent_cid;
-    int num_extents;
-    /* Extent array with num_extents entries, ascend ordered by address */
-    VmdkExtent *extents;
-    Error *migration_blocker;
-    char *create_type;
-} BDRVVmdkState;
-
-typedef struct VmdkMetaData {
-    unsigned int l1_index;
-    unsigned int l2_index;
-    unsigned int l2_offset;
-    int valid;
-    uint32_t *l2_cache_entry;
-} VmdkMetaData;
-
-typedef struct VmdkGrainMarker {
-    uint64_t lba;
-    uint32_t size;
-    uint8_t  data[0];
-} QEMU_PACKED VmdkGrainMarker;
-
-enum {
-    MARKER_END_OF_STREAM    = 0,
-    MARKER_GRAIN_TABLE      = 1,
-    MARKER_GRAIN_DIRECTORY  = 2,
-    MARKER_FOOTER           = 3,
-};
-
-static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    uint32_t magic;
-
-    if (buf_size < 4) {
-        return 0;
-    }
-    magic = be32_to_cpu(*(uint32_t *)buf);
-    if (magic == VMDK3_MAGIC ||
-        magic == VMDK4_MAGIC) {
-        return 100;
-    } else {
-        const char *p = (const char *)buf;
-        const char *end = p + buf_size;
-        while (p < end) {
-            if (*p == '#') {
-                /* skip comment line */
-                while (p < end && *p != '\n') {
-                    p++;
-                }
-                p++;
-                continue;
-            }
-            if (*p == ' ') {
-                while (p < end && *p == ' ') {
-                    p++;
-                }
-                /* skip '\r' if windows line endings used. */
-                if (p < end && *p == '\r') {
-                    p++;
-                }
-                /* only accept blank lines before 'version=' line */
-                if (p == end || *p != '\n') {
-                    return 0;
-                }
-                p++;
-                continue;
-            }
-            if (end - p >= strlen("version=X\n")) {
-                if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
-                    strncmp("version=2\n", p, strlen("version=2\n")) == 0) {
-                    return 100;
-                }
-            }
-            if (end - p >= strlen("version=X\r\n")) {
-                if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
-                    strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) {
-                    return 100;
-                }
-            }
-            return 0;
-        }
-        return 0;
-    }
-}
-
-#define SECTOR_SIZE 512
-#define DESC_SIZE (20 * SECTOR_SIZE)    /* 20 sectors of 512 bytes each */
-#define BUF_SIZE 4096
-#define HEADER_SIZE 512                 /* first sector of 512 bytes */
-
-static void vmdk_free_extents(BlockDriverState *bs)
-{
-    int i;
-    BDRVVmdkState *s = bs->opaque;
-    VmdkExtent *e;
-
-    for (i = 0; i < s->num_extents; i++) {
-        e = &s->extents[i];
-        g_free(e->l1_table);
-        g_free(e->l2_cache);
-        g_free(e->l1_backup_table);
-        g_free(e->type);
-        if (e->file != bs->file) {
-            bdrv_unref_child(bs, e->file);
-        }
-    }
-    g_free(s->extents);
-}
-
-static void vmdk_free_last_extent(BlockDriverState *bs)
-{
-    BDRVVmdkState *s = bs->opaque;
-
-    if (s->num_extents == 0) {
-        return;
-    }
-    s->num_extents--;
-    s->extents = g_renew(VmdkExtent, s->extents, s->num_extents);
-}
-
-static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
-{
-    char *desc;
-    uint32_t cid = 0xffffffff;
-    const char *p_name, *cid_str;
-    size_t cid_str_size;
-    BDRVVmdkState *s = bs->opaque;
-    int ret;
-
-    desc = g_malloc0(DESC_SIZE);
-    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
-    if (ret < 0) {
-        g_free(desc);
-        return 0;
-    }
-
-    if (parent) {
-        cid_str = "parentCID";
-        cid_str_size = sizeof("parentCID");
-    } else {
-        cid_str = "CID";
-        cid_str_size = sizeof("CID");
-    }
-
-    desc[DESC_SIZE - 1] = '\0';
-    p_name = strstr(desc, cid_str);
-    if (p_name != NULL) {
-        p_name += cid_str_size;
-        sscanf(p_name, "%" SCNx32, &cid);
-    }
-
-    g_free(desc);
-    return cid;
-}
-
-static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
-{
-    char *desc, *tmp_desc;
-    char *p_name, *tmp_str;
-    BDRVVmdkState *s = bs->opaque;
-    int ret = 0;
-
-    desc = g_malloc0(DESC_SIZE);
-    tmp_desc = g_malloc0(DESC_SIZE);
-    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
-    if (ret < 0) {
-        goto out;
-    }
-
-    desc[DESC_SIZE - 1] = '\0';
-    tmp_str = strstr(desc, "parentCID");
-    if (tmp_str == NULL) {
-        ret = -EINVAL;
-        goto out;
-    }
-
-    pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
-    p_name = strstr(desc, "CID");
-    if (p_name != NULL) {
-        p_name += sizeof("CID");
-        snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
-        pstrcat(desc, DESC_SIZE, tmp_desc);
-    }
-
-    ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
-
-out:
-    g_free(desc);
-    g_free(tmp_desc);
-    return ret;
-}
-
-static int vmdk_is_cid_valid(BlockDriverState *bs)
-{
-    BDRVVmdkState *s = bs->opaque;
-    uint32_t cur_pcid;
-
-    if (!s->cid_checked && bs->backing) {
-        BlockDriverState *p_bs = bs->backing->bs;
-
-        cur_pcid = vmdk_read_cid(p_bs, 0);
-        if (s->parent_cid != cur_pcid) {
-            /* CID not valid */
-            return 0;
-        }
-    }
-    s->cid_checked = true;
-    /* CID valid */
-    return 1;
-}
-
-/* We have nothing to do for VMDK reopen, stubs just return success */
-static int vmdk_reopen_prepare(BDRVReopenState *state,
-                               BlockReopenQueue *queue, Error **errp)
-{
-    assert(state != NULL);
-    assert(state->bs != NULL);
-    return 0;
-}
-
-static int vmdk_parent_open(BlockDriverState *bs)
-{
-    char *p_name;
-    char *desc;
-    BDRVVmdkState *s = bs->opaque;
-    int ret;
-
-    desc = g_malloc0(DESC_SIZE + 1);
-    ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
-    if (ret < 0) {
-        goto out;
-    }
-    ret = 0;
-
-    p_name = strstr(desc, "parentFileNameHint");
-    if (p_name != NULL) {
-        char *end_name;
-
-        p_name += sizeof("parentFileNameHint") + 1;
-        end_name = strchr(p_name, '\"');
-        if (end_name == NULL) {
-            ret = -EINVAL;
-            goto out;
-        }
-        if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
-            ret = -EINVAL;
-            goto out;
-        }
-
-        pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
-    }
-
-out:
-    g_free(desc);
-    return ret;
-}
-
-/* Create and append extent to the extent array. Return the added VmdkExtent
- * address. return NULL if allocation failed. */
-static int vmdk_add_extent(BlockDriverState *bs,
-                           BdrvChild *file, bool flat, int64_t sectors,
-                           int64_t l1_offset, int64_t l1_backup_offset,
-                           uint32_t l1_size,
-                           int l2_size, uint64_t cluster_sectors,
-                           VmdkExtent **new_extent,
-                           Error **errp)
-{
-    VmdkExtent *extent;
-    BDRVVmdkState *s = bs->opaque;
-    int64_t nb_sectors;
-
-    if (cluster_sectors > 0x200000) {
-        /* 0x200000 * 512Bytes = 1GB for one cluster is unrealistic */
-        error_setg(errp, "Invalid granularity, image may be corrupt");
-        return -EFBIG;
-    }
-    if (l1_size > 512 * 1024 * 1024) {
-        /* Although with big capacity and small l1_entry_sectors, we can get a
-         * big l1_size, we don't want unbounded value to allocate the table.
-         * Limit it to 512M, which is 16PB for default cluster and L2 table
-         * size */
-        error_setg(errp, "L1 size too big");
-        return -EFBIG;
-    }
-
-    nb_sectors = bdrv_nb_sectors(file->bs);
-    if (nb_sectors < 0) {
-        return nb_sectors;
-    }
-
-    s->extents = g_renew(VmdkExtent, s->extents, s->num_extents + 1);
-    extent = &s->extents[s->num_extents];
-    s->num_extents++;
-
-    memset(extent, 0, sizeof(VmdkExtent));
-    extent->file = file;
-    extent->flat = flat;
-    extent->sectors = sectors;
-    extent->l1_table_offset = l1_offset;
-    extent->l1_backup_table_offset = l1_backup_offset;
-    extent->l1_size = l1_size;
-    extent->l1_entry_sectors = l2_size * cluster_sectors;
-    extent->l2_size = l2_size;
-    extent->cluster_sectors = flat ? sectors : cluster_sectors;
-    extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
-
-    if (s->num_extents > 1) {
-        extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
-    } else {
-        extent->end_sector = extent->sectors;
-    }
-    bs->total_sectors = extent->end_sector;
-    if (new_extent) {
-        *new_extent = extent;
-    }
-    return 0;
-}
-
-static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
-                            Error **errp)
-{
-    int ret;
-    size_t l1_size;
-    int i;
-
-    /* read the L1 table */
-    l1_size = extent->l1_size * sizeof(uint32_t);
-    extent->l1_table = g_try_malloc(l1_size);
-    if (l1_size && extent->l1_table == NULL) {
-        return -ENOMEM;
-    }
-
-    ret = bdrv_pread(extent->file->bs,
-                     extent->l1_table_offset,
-                     extent->l1_table,
-                     l1_size);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "Could not read l1 table from extent '%s'",
-                         extent->file->bs->filename);
-        goto fail_l1;
-    }
-    for (i = 0; i < extent->l1_size; i++) {
-        le32_to_cpus(&extent->l1_table[i]);
-    }
-
-    if (extent->l1_backup_table_offset) {
-        extent->l1_backup_table = g_try_malloc(l1_size);
-        if (l1_size && extent->l1_backup_table == NULL) {
-            ret = -ENOMEM;
-            goto fail_l1;
-        }
-        ret = bdrv_pread(extent->file->bs,
-                         extent->l1_backup_table_offset,
-                         extent->l1_backup_table,
-                         l1_size);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret,
-                             "Could not read l1 backup table from extent '%s'",
-                             extent->file->bs->filename);
-            goto fail_l1b;
-        }
-        for (i = 0; i < extent->l1_size; i++) {
-            le32_to_cpus(&extent->l1_backup_table[i]);
-        }
-    }
-
-    extent->l2_cache =
-        g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE);
-    return 0;
- fail_l1b:
-    g_free(extent->l1_backup_table);
- fail_l1:
-    g_free(extent->l1_table);
-    return ret;
-}
-
-static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
-                                 BdrvChild *file,
-                                 int flags, Error **errp)
-{
-    int ret;
-    uint32_t magic;
-    VMDK3Header header;
-    VmdkExtent *extent;
-
-    ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
-    if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "Could not read header from file '%s'",
-                         file->bs->filename);
-        return ret;
-    }
-    ret = vmdk_add_extent(bs, file, false,
-                          le32_to_cpu(header.disk_sectors),
-                          (int64_t)le32_to_cpu(header.l1dir_offset) << 9,
-                          0,
-                          le32_to_cpu(header.l1dir_size),
-                          4096,
-                          le32_to_cpu(header.granularity),
-                          &extent,
-                          errp);
-    if (ret < 0) {
-        return ret;
-    }
-    ret = vmdk_init_tables(bs, extent, errp);
-    if (ret) {
-        /* free extent allocated by vmdk_add_extent */
-        vmdk_free_last_extent(bs);
-    }
-    return ret;
-}
-
-static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
-                               QDict *options, Error **errp);
-
-static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
-                            Error **errp)
-{
-    int64_t size;
-    char *buf;
-    int ret;
-
-    size = bdrv_getlength(file);
-    if (size < 0) {
-        error_setg_errno(errp, -size, "Could not access file");
-        return NULL;
-    }
-
-    if (size < 4) {
-        /* Both descriptor file and sparse image must be much larger than 4
-         * bytes, also callers of vmdk_read_desc want to compare the first 4
-         * bytes with VMDK4_MAGIC, let's error out if less is read. */
-        error_setg(errp, "File is too small, not a valid image");
-        return NULL;
-    }
-
-    size = MIN(size, (1 << 20) - 1);  /* avoid unbounded allocation */
-    buf = g_malloc(size + 1);
-
-    ret = bdrv_pread(file, desc_offset, buf, size);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not read from file");
-        g_free(buf);
-        return NULL;
-    }
-    buf[ret] = 0;
-
-    return buf;
-}
-
-static int vmdk_open_vmdk4(BlockDriverState *bs,
-                           BdrvChild *file,
-                           int flags, QDict *options, Error **errp)
-{
-    int ret;
-    uint32_t magic;
-    uint32_t l1_size, l1_entry_sectors;
-    VMDK4Header header;
-    VmdkExtent *extent;
-    BDRVVmdkState *s = bs->opaque;
-    int64_t l1_backup_offset = 0;
-    bool compressed;
-
-    ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
-    if (ret < 0) {
-        error_setg_errno(errp, -ret,
-                         "Could not read header from file '%s'",
-                         file->bs->filename);
-        return -EINVAL;
-    }
-    if (header.capacity == 0) {
-        uint64_t desc_offset = le64_to_cpu(header.desc_offset);
-        if (desc_offset) {
-            char *buf = vmdk_read_desc(file->bs, desc_offset << 9, errp);
-            if (!buf) {
-                return -EINVAL;
-            }
-            ret = vmdk_open_desc_file(bs, flags, buf, options, errp);
-            g_free(buf);
-            return ret;
-        }
-    }
-
-    if (!s->create_type) {
-        s->create_type = g_strdup("monolithicSparse");
-    }
-
-    if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
-        /*
-         * The footer takes precedence over the header, so read it in. The
-         * footer starts at offset -1024 from the end: One sector for the
-         * footer, and another one for the end-of-stream marker.
-         */
-        struct {
-            struct {
-                uint64_t val;
-                uint32_t size;
-                uint32_t type;
-                uint8_t pad[512 - 16];
-            } QEMU_PACKED footer_marker;
-
-            uint32_t magic;
-            VMDK4Header header;
-            uint8_t pad[512 - 4 - sizeof(VMDK4Header)];
-
-            struct {
-                uint64_t val;
-                uint32_t size;
-                uint32_t type;
-                uint8_t pad[512 - 16];
-            } QEMU_PACKED eos_marker;
-        } QEMU_PACKED footer;
-
-        ret = bdrv_pread(file->bs,
-            bs->file->bs->total_sectors * 512 - 1536,
-            &footer, sizeof(footer));
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Failed to read footer");
-            return ret;
-        }
-
-        /* Some sanity checks for the footer */
-        if (be32_to_cpu(footer.magic) != VMDK4_MAGIC ||
-            le32_to_cpu(footer.footer_marker.size) != 0  ||
-            le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER ||
-            le64_to_cpu(footer.eos_marker.val) != 0  ||
-            le32_to_cpu(footer.eos_marker.size) != 0  ||
-            le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM)
-        {
-            error_setg(errp, "Invalid footer");
-            return -EINVAL;
-        }
-
-        header = footer.header;
-    }
-
-    compressed =
-        le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
-    if (le32_to_cpu(header.version) > 3) {
-        error_setg(errp, "Unsupported VMDK version %" PRIu32,
-                   le32_to_cpu(header.version));
-        return -ENOTSUP;
-    } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) &&
-               !compressed) {
-        /* VMware KB 2064959 explains that version 3 added support for
-         * persistent changed block tracking (CBT), and backup software can
-         * read it as version=1 if it doesn't care about the changed area
-         * information. So we are safe to enable read only. */
-        error_setg(errp, "VMDK version 3 must be read only");
-        return -EINVAL;
-    }
-
-    if (le32_to_cpu(header.num_gtes_per_gt) > 512) {
-        error_setg(errp, "L2 table size too big");
-        return -EINVAL;
-    }
-
-    l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
-                        * le64_to_cpu(header.granularity);
-    if (l1_entry_sectors == 0) {
-        error_setg(errp, "L1 entry size is invalid");
-        return -EINVAL;
-    }
-    l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
-                / l1_entry_sectors;
-    if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
-        l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
-    }
-    if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) {
-        error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
-                   (int64_t)(le64_to_cpu(header.grain_offset)
-                             * BDRV_SECTOR_SIZE));
-        return -EINVAL;
-    }
-
-    ret = vmdk_add_extent(bs, file, false,
-                          le64_to_cpu(header.capacity),
-                          le64_to_cpu(header.gd_offset) << 9,
-                          l1_backup_offset,
-                          l1_size,
-                          le32_to_cpu(header.num_gtes_per_gt),
-                          le64_to_cpu(header.granularity),
-                          &extent,
-                          errp);
-    if (ret < 0) {
-        return ret;
-    }
-    extent->compressed =
-        le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
-    if (extent->compressed) {
-        g_free(s->create_type);
-        s->create_type = g_strdup("streamOptimized");
-    }
-    extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
-    extent->version = le32_to_cpu(header.version);
-    extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
-    ret = vmdk_init_tables(bs, extent, errp);
-    if (ret) {
-        /* free extent allocated by vmdk_add_extent */
-        vmdk_free_last_extent(bs);
-    }
-    return ret;
-}
-
-/* find an option value out of descriptor file */
-static int vmdk_parse_description(const char *desc, const char *opt_name,
-        char *buf, int buf_size)
-{
-    char *opt_pos, *opt_end;
-    const char *end = desc + strlen(desc);
-
-    opt_pos = strstr(desc, opt_name);
-    if (!opt_pos) {
-        return VMDK_ERROR;
-    }
-    /* Skip "=\"" following opt_name */
-    opt_pos += strlen(opt_name) + 2;
-    if (opt_pos >= end) {
-        return VMDK_ERROR;
-    }
-    opt_end = opt_pos;
-    while (opt_end < end && *opt_end != '"') {
-        opt_end++;
-    }
-    if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
-        return VMDK_ERROR;
-    }
-    pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
-    return VMDK_OK;
-}
-
-/* Open an extent file and append to bs array */
-static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
-                            char *buf, QDict *options, Error **errp)
-{
-    uint32_t magic;
-
-    magic = ldl_be_p(buf);
-    switch (magic) {
-        case VMDK3_MAGIC:
-            return vmdk_open_vmfs_sparse(bs, file, flags, errp);
-            break;
-        case VMDK4_MAGIC:
-            return vmdk_open_vmdk4(bs, file, flags, options, errp);
-            break;
-        default:
-            error_setg(errp, "Image not in VMDK format");
-            return -EINVAL;
-            break;
-    }
-}
-
-static const char *next_line(const char *s)
-{
-    while (*s) {
-        if (*s == '\n') {
-            return s + 1;
-        }
-        s++;
-    }
-    return s;
-}
-
-static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
-                              const char *desc_file_path, QDict *options,
-                              Error **errp)
-{
-    int ret;
-    int matches;
-    char access[11];
-    char type[11];
-    char fname[512];
-    const char *p, *np;
-    int64_t sectors = 0;
-    int64_t flat_offset;
-    char *extent_path;
-    BdrvChild *extent_file;
-    BDRVVmdkState *s = bs->opaque;
-    VmdkExtent *extent;
-    char extent_opt_prefix[32];
-    Error *local_err = NULL;
-
-    for (p = desc; *p; p = next_line(p)) {
-        /* parse extent line in one of below formats:
-         *
-         * RW [size in sectors] FLAT "file-name.vmdk" OFFSET
-         * RW [size in sectors] SPARSE "file-name.vmdk"
-         * RW [size in sectors] VMFS "file-name.vmdk"
-         * RW [size in sectors] VMFSSPARSE "file-name.vmdk"
-         */
-        flat_offset = -1;
-        matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
-                         access, &sectors, type, fname, &flat_offset);
-        if (matches < 4 || strcmp(access, "RW")) {
-            continue;
-        } else if (!strcmp(type, "FLAT")) {
-            if (matches != 5 || flat_offset < 0) {
-                goto invalid;
-            }
-        } else if (!strcmp(type, "VMFS")) {
-            if (matches == 4) {
-                flat_offset = 0;
-            } else {
-                goto invalid;
-            }
-        } else if (matches != 4) {
-            goto invalid;
-        }
-
-        if (sectors <= 0 ||
-            (strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
-             strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
-            (strcmp(access, "RW"))) {
-            continue;
-        }
-
-        if (!path_is_absolute(fname) && !path_has_protocol(fname) &&
-            !desc_file_path[0])
-        {
-            error_setg(errp, "Cannot use relative extent paths with VMDK "
-                       "descriptor file '%s'", bs->file->bs->filename);
-            return -EINVAL;
-        }
-
-        extent_path = g_malloc0(PATH_MAX);
-        path_combine(extent_path, PATH_MAX, desc_file_path, fname);
-
-        ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents);
-        assert(ret < 32);
-
-        extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix,
-                                      bs, &child_file, false, &local_err);
-        g_free(extent_path);
-        if (local_err) {
-            error_propagate(errp, local_err);
-            return -EINVAL;
-        }
-
-        /* save to extents array */
-        if (!strcmp(type, "FLAT") || !strcmp(type, "VMFS")) {
-            /* FLAT extent */
-
-            ret = vmdk_add_extent(bs, extent_file, true, sectors,
-                            0, 0, 0, 0, 0, &extent, errp);
-            if (ret < 0) {
-                bdrv_unref_child(bs, extent_file);
-                return ret;
-            }
-            extent->flat_start_offset = flat_offset << 9;
-        } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
-            /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
-            char *buf = vmdk_read_desc(extent_file->bs, 0, errp);
-            if (!buf) {
-                ret = -EINVAL;
-            } else {
-                ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf,
-                                       options, errp);
-            }
-            g_free(buf);
-            if (ret) {
-                bdrv_unref_child(bs, extent_file);
-                return ret;
-            }
-            extent = &s->extents[s->num_extents - 1];
-        } else {
-            error_setg(errp, "Unsupported extent type '%s'", type);
-            bdrv_unref_child(bs, extent_file);
-            return -ENOTSUP;
-        }
-        extent->type = g_strdup(type);
-    }
-    return 0;
-
-invalid:
-    np = next_line(p);
-    assert(np != p);
-    if (np[-1] == '\n') {
-        np--;
-    }
-    error_setg(errp, "Invalid extent line: %.*s", (int)(np - p), p);
-    return -EINVAL;
-}
-
-static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
-                               QDict *options, Error **errp)
-{
-    int ret;
-    char ct[128];
-    BDRVVmdkState *s = bs->opaque;
-
-    if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
-        error_setg(errp, "invalid VMDK image descriptor");
-        ret = -EINVAL;
-        goto exit;
-    }
-    if (strcmp(ct, "monolithicFlat") &&
-        strcmp(ct, "vmfs") &&
-        strcmp(ct, "vmfsSparse") &&
-        strcmp(ct, "twoGbMaxExtentSparse") &&
-        strcmp(ct, "twoGbMaxExtentFlat")) {
-        error_setg(errp, "Unsupported image type '%s'", ct);
-        ret = -ENOTSUP;
-        goto exit;
-    }
-    s->create_type = g_strdup(ct);
-    s->desc_offset = 0;
-    ret = vmdk_parse_extents(buf, bs, bs->file->bs->exact_filename, options,
-                             errp);
-exit:
-    return ret;
-}
-
-static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
-                     Error **errp)
-{
-    char *buf;
-    int ret;
-    BDRVVmdkState *s = bs->opaque;
-    uint32_t magic;
-
-    buf = vmdk_read_desc(bs->file->bs, 0, errp);
-    if (!buf) {
-        return -EINVAL;
-    }
-
-    magic = ldl_be_p(buf);
-    switch (magic) {
-        case VMDK3_MAGIC:
-        case VMDK4_MAGIC:
-            ret = vmdk_open_sparse(bs, bs->file, flags, buf, options,
-                                   errp);
-            s->desc_offset = 0x200;
-            break;
-        default:
-            ret = vmdk_open_desc_file(bs, flags, buf, options, errp);
-            break;
-    }
-    if (ret) {
-        goto fail;
-    }
-
-    /* try to open parent images, if exist */
-    ret = vmdk_parent_open(bs);
-    if (ret) {
-        goto fail;
-    }
-    s->cid = vmdk_read_cid(bs, 0);
-    s->parent_cid = vmdk_read_cid(bs, 1);
-    qemu_co_mutex_init(&s->lock);
-
-    /* Disable migration when VMDK images are used */
-    error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
-    g_free(buf);
-    return 0;
-
-fail:
-    g_free(buf);
-    g_free(s->create_type);
-    s->create_type = NULL;
-    vmdk_free_extents(bs);
-    return ret;
-}
-
-
-static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
-{
-    BDRVVmdkState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_extents; i++) {
-        if (!s->extents[i].flat) {
-            bs->bl.write_zeroes_alignment =
-                MAX(bs->bl.write_zeroes_alignment,
-                    s->extents[i].cluster_sectors);
-        }
-    }
-}
-
-/**
- * get_whole_cluster
- *
- * Copy backing file's cluster that covers @sector_num, otherwise write zero,
- * to the cluster at @cluster_sector_num.
- *
- * If @skip_start_sector < @skip_end_sector, the relative range
- * [@skip_start_sector, @skip_end_sector) is not copied or written, and leave
- * it for call to write user data in the request.
- */
-static int get_whole_cluster(BlockDriverState *bs,
-                             VmdkExtent *extent,
-                             uint64_t cluster_sector_num,
-                             uint64_t sector_num,
-                             uint64_t skip_start_sector,
-                             uint64_t skip_end_sector)
-{
-    int ret = VMDK_OK;
-    int64_t cluster_bytes;
-    uint8_t *whole_grain;
-
-    /* For COW, align request sector_num to cluster start */
-    sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors);
-    cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
-    whole_grain = qemu_blockalign(bs, cluster_bytes);
-
-    if (!bs->backing) {
-        memset(whole_grain, 0,  skip_start_sector << BDRV_SECTOR_BITS);
-        memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
-               cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
-    }
-
-    assert(skip_end_sector <= extent->cluster_sectors);
-    /* we will be here if it's first write on non-exist grain(cluster).
-     * try to read from parent image, if exist */
-    if (bs->backing && !vmdk_is_cid_valid(bs)) {
-        ret = VMDK_ERROR;
-        goto exit;
-    }
-
-    /* Read backing data before skip range */
-    if (skip_start_sector > 0) {
-        if (bs->backing) {
-            ret = bdrv_read(bs->backing->bs, sector_num,
-                            whole_grain, skip_start_sector);
-            if (ret < 0) {
-                ret = VMDK_ERROR;
-                goto exit;
-            }
-        }
-        ret = bdrv_write(extent->file->bs, cluster_sector_num, whole_grain,
-                         skip_start_sector);
-        if (ret < 0) {
-            ret = VMDK_ERROR;
-            goto exit;
-        }
-    }
-    /* Read backing data after skip range */
-    if (skip_end_sector < extent->cluster_sectors) {
-        if (bs->backing) {
-            ret = bdrv_read(bs->backing->bs, sector_num + skip_end_sector,
-                            whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
-                            extent->cluster_sectors - skip_end_sector);
-            if (ret < 0) {
-                ret = VMDK_ERROR;
-                goto exit;
-            }
-        }
-        ret = bdrv_write(extent->file->bs, cluster_sector_num + skip_end_sector,
-                         whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
-                         extent->cluster_sectors - skip_end_sector);
-        if (ret < 0) {
-            ret = VMDK_ERROR;
-            goto exit;
-        }
-    }
-
-exit:
-    qemu_vfree(whole_grain);
-    return ret;
-}
-
-static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
-                         uint32_t offset)
-{
-    offset = cpu_to_le32(offset);
-    /* update L2 table */
-    if (bdrv_pwrite_sync(
-                extent->file->bs,
-                ((int64_t)m_data->l2_offset * 512)
-                    + (m_data->l2_index * sizeof(offset)),
-                &offset, sizeof(offset)) < 0) {
-        return VMDK_ERROR;
-    }
-    /* update backup L2 table */
-    if (extent->l1_backup_table_offset != 0) {
-        m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
-        if (bdrv_pwrite_sync(
-                    extent->file->bs,
-                    ((int64_t)m_data->l2_offset * 512)
-                        + (m_data->l2_index * sizeof(offset)),
-                    &offset, sizeof(offset)) < 0) {
-            return VMDK_ERROR;
-        }
-    }
-    if (m_data->l2_cache_entry) {
-        *m_data->l2_cache_entry = offset;
-    }
-
-    return VMDK_OK;
-}
-
-/**
- * get_cluster_offset
- *
- * Look up cluster offset in extent file by sector number, and store in
- * @cluster_offset.
- *
- * For flat extents, the start offset as parsed from the description file is
- * returned.
- *
- * For sparse extents, look up in L1, L2 table. If allocate is true, return an
- * offset for a new cluster and update L2 cache. If there is a backing file,
- * COW is done before returning; otherwise, zeroes are written to the allocated
- * cluster. Both COW and zero writing skips the sector range
- * [@skip_start_sector, @skip_end_sector) passed in by caller, because caller
- * has new data to write there.
- *
- * Returns: VMDK_OK if cluster exists and mapped in the image.
- *          VMDK_UNALLOC if cluster is not mapped and @allocate is false.
- *          VMDK_ERROR if failed.
- */
-static int get_cluster_offset(BlockDriverState *bs,
-                              VmdkExtent *extent,
-                              VmdkMetaData *m_data,
-                              uint64_t offset,
-                              bool allocate,
-                              uint64_t *cluster_offset,
-                              uint64_t skip_start_sector,
-                              uint64_t skip_end_sector)
-{
-    unsigned int l1_index, l2_offset, l2_index;
-    int min_index, i, j;
-    uint32_t min_count, *l2_table;
-    bool zeroed = false;
-    int64_t ret;
-    int64_t cluster_sector;
-
-    if (m_data) {
-        m_data->valid = 0;
-    }
-    if (extent->flat) {
-        *cluster_offset = extent->flat_start_offset;
-        return VMDK_OK;
-    }
-
-    offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
-    l1_index = (offset >> 9) / extent->l1_entry_sectors;
-    if (l1_index >= extent->l1_size) {
-        return VMDK_ERROR;
-    }
-    l2_offset = extent->l1_table[l1_index];
-    if (!l2_offset) {
-        return VMDK_UNALLOC;
-    }
-    for (i = 0; i < L2_CACHE_SIZE; i++) {
-        if (l2_offset == extent->l2_cache_offsets[i]) {
-            /* increment the hit count */
-            if (++extent->l2_cache_counts[i] == 0xffffffff) {
-                for (j = 0; j < L2_CACHE_SIZE; j++) {
-                    extent->l2_cache_counts[j] >>= 1;
-                }
-            }
-            l2_table = extent->l2_cache + (i * extent->l2_size);
-            goto found;
-        }
-    }
-    /* not found: load a new entry in the least used one */
-    min_index = 0;
-    min_count = 0xffffffff;
-    for (i = 0; i < L2_CACHE_SIZE; i++) {
-        if (extent->l2_cache_counts[i] < min_count) {
-            min_count = extent->l2_cache_counts[i];
-            min_index = i;
-        }
-    }
-    l2_table = extent->l2_cache + (min_index * extent->l2_size);
-    if (bdrv_pread(
-                extent->file->bs,
-                (int64_t)l2_offset * 512,
-                l2_table,
-                extent->l2_size * sizeof(uint32_t)
-            ) != extent->l2_size * sizeof(uint32_t)) {
-        return VMDK_ERROR;
-    }
-
-    extent->l2_cache_offsets[min_index] = l2_offset;
-    extent->l2_cache_counts[min_index] = 1;
- found:
-    l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
-    cluster_sector = le32_to_cpu(l2_table[l2_index]);
-
-    if (m_data) {
-        m_data->valid = 1;
-        m_data->l1_index = l1_index;
-        m_data->l2_index = l2_index;
-        m_data->l2_offset = l2_offset;
-        m_data->l2_cache_entry = &l2_table[l2_index];
-    }
-    if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
-        zeroed = true;
-    }
-
-    if (!cluster_sector || zeroed) {
-        if (!allocate) {
-            return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
-        }
-
-        cluster_sector = extent->next_cluster_sector;
-        extent->next_cluster_sector += extent->cluster_sectors;
-
-        /* First of all we write grain itself, to avoid race condition
-         * that may to corrupt the image.
-         * This problem may occur because of insufficient space on host disk
-         * or inappropriate VM shutdown.
-         */
-        ret = get_whole_cluster(bs, extent,
-                                cluster_sector,
-                                offset >> BDRV_SECTOR_BITS,
-                                skip_start_sector, skip_end_sector);
-        if (ret) {
-            return ret;
-        }
-    }
-    *cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
-    return VMDK_OK;
-}
-
-static VmdkExtent *find_extent(BDRVVmdkState *s,
-                                int64_t sector_num, VmdkExtent *start_hint)
-{
-    VmdkExtent *extent = start_hint;
-
-    if (!extent) {
-        extent = &s->extents[0];
-    }
-    while (extent < &s->extents[s->num_extents]) {
-        if (sector_num < extent->end_sector) {
-            return extent;
-        }
-        extent++;
-    }
-    return NULL;
-}
-
-static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent,
-                                                  int64_t sector_num)
-{
-    uint64_t index_in_cluster, extent_begin_sector, extent_relative_sector_num;
-
-    extent_begin_sector = extent->end_sector - extent->sectors;
-    extent_relative_sector_num = sector_num - extent_begin_sector;
-    index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
-    return index_in_cluster;
-}
-
-static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
-{
-    BDRVVmdkState *s = bs->opaque;
-    int64_t index_in_cluster, n, ret;
-    uint64_t offset;
-    VmdkExtent *extent;
-
-    extent = find_extent(s, sector_num, NULL);
-    if (!extent) {
-        return 0;
-    }
-    qemu_co_mutex_lock(&s->lock);
-    ret = get_cluster_offset(bs, extent, NULL,
-                             sector_num * 512, false, &offset,
-                             0, 0);
-    qemu_co_mutex_unlock(&s->lock);
-
-    index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
-    switch (ret) {
-    case VMDK_ERROR:
-        ret = -EIO;
-        break;
-    case VMDK_UNALLOC:
-        ret = 0;
-        break;
-    case VMDK_ZEROED:
-        ret = BDRV_BLOCK_ZERO;
-        break;
-    case VMDK_OK:
-        ret = BDRV_BLOCK_DATA;
-        if (!extent->compressed) {
-            ret |= BDRV_BLOCK_OFFSET_VALID;
-            ret |= (offset + (index_in_cluster << BDRV_SECTOR_BITS))
-                    & BDRV_BLOCK_OFFSET_MASK;
-        }
-        *file = extent->file->bs;
-        break;
-    }
-
-    n = extent->cluster_sectors - index_in_cluster;
-    if (n > nb_sectors) {
-        n = nb_sectors;
-    }
-    *pnum = n;
-    return ret;
-}
-
-static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
-                            int64_t offset_in_cluster, const uint8_t *buf,
-                            int nb_sectors, int64_t sector_num)
-{
-    int ret;
-    VmdkGrainMarker *data = NULL;
-    uLongf buf_len;
-    const uint8_t *write_buf = buf;
-    int write_len = nb_sectors * 512;
-    int64_t write_offset;
-    int64_t write_end_sector;
-
-    if (extent->compressed) {
-        if (!extent->has_marker) {
-            ret = -EINVAL;
-            goto out;
-        }
-        buf_len = (extent->cluster_sectors << 9) * 2;
-        data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
-        if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK ||
-                buf_len == 0) {
-            ret = -EINVAL;
-            goto out;
-        }
-        data->lba = sector_num;
-        data->size = buf_len;
-        write_buf = (uint8_t *)data;
-        write_len = buf_len + sizeof(VmdkGrainMarker);
-    }
-    write_offset = cluster_offset + offset_in_cluster,
-    ret = bdrv_pwrite(extent->file->bs, write_offset, write_buf, write_len);
-
-    write_end_sector = DIV_ROUND_UP(write_offset + write_len, BDRV_SECTOR_SIZE);
-
-    if (extent->compressed) {
-        extent->next_cluster_sector = write_end_sector;
-    } else {
-        extent->next_cluster_sector = MAX(extent->next_cluster_sector,
-                                          write_end_sector);
-    }
-
-    if (ret != write_len) {
-        ret = ret < 0 ? ret : -EIO;
-        goto out;
-    }
-    ret = 0;
- out:
-    g_free(data);
-    return ret;
-}
-
-static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
-                            int64_t offset_in_cluster, uint8_t *buf,
-                            int nb_sectors)
-{
-    int ret;
-    int cluster_bytes, buf_bytes;
-    uint8_t *cluster_buf, *compressed_data;
-    uint8_t *uncomp_buf;
-    uint32_t data_len;
-    VmdkGrainMarker *marker;
-    uLongf buf_len;
-
-
-    if (!extent->compressed) {
-        ret = bdrv_pread(extent->file->bs,
-                          cluster_offset + offset_in_cluster,
-                          buf, nb_sectors * 512);
-        if (ret == nb_sectors * 512) {
-            return 0;
-        } else {
-            return -EIO;
-        }
-    }
-    cluster_bytes = extent->cluster_sectors * 512;
-    /* Read two clusters in case GrainMarker + compressed data > one cluster */
-    buf_bytes = cluster_bytes * 2;
-    cluster_buf = g_malloc(buf_bytes);
-    uncomp_buf = g_malloc(cluster_bytes);
-    ret = bdrv_pread(extent->file->bs,
-                cluster_offset,
-                cluster_buf, buf_bytes);
-    if (ret < 0) {
-        goto out;
-    }
-    compressed_data = cluster_buf;
-    buf_len = cluster_bytes;
-    data_len = cluster_bytes;
-    if (extent->has_marker) {
-        marker = (VmdkGrainMarker *)cluster_buf;
-        compressed_data = marker->data;
-        data_len = le32_to_cpu(marker->size);
-    }
-    if (!data_len || data_len > buf_bytes) {
-        ret = -EINVAL;
-        goto out;
-    }
-    ret = uncompress(uncomp_buf, &buf_len, compressed_data, data_len);
-    if (ret != Z_OK) {
-        ret = -EINVAL;
-        goto out;
-
-    }
-    if (offset_in_cluster < 0 ||
-            offset_in_cluster + nb_sectors * 512 > buf_len) {
-        ret = -EINVAL;
-        goto out;
-    }
-    memcpy(buf, uncomp_buf + offset_in_cluster, nb_sectors * 512);
-    ret = 0;
-
- out:
-    g_free(uncomp_buf);
-    g_free(cluster_buf);
-    return ret;
-}
-
-static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVVmdkState *s = bs->opaque;
-    int ret;
-    uint64_t n, index_in_cluster;
-    VmdkExtent *extent = NULL;
-    uint64_t cluster_offset;
-
-    while (nb_sectors > 0) {
-        extent = find_extent(s, sector_num, extent);
-        if (!extent) {
-            return -EIO;
-        }
-        ret = get_cluster_offset(bs, extent, NULL,
-                                 sector_num << 9, false, &cluster_offset,
-                                 0, 0);
-        index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
-        n = extent->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors) {
-            n = nb_sectors;
-        }
-        if (ret != VMDK_OK) {
-            /* if not allocated, try to read from parent image, if exist */
-            if (bs->backing && ret != VMDK_ZEROED) {
-                if (!vmdk_is_cid_valid(bs)) {
-                    return -EINVAL;
-                }
-                ret = bdrv_read(bs->backing->bs, sector_num, buf, n);
-                if (ret < 0) {
-                    return ret;
-                }
-            } else {
-                memset(buf, 0, 512 * n);
-            }
-        } else {
-            ret = vmdk_read_extent(extent,
-                            cluster_offset, index_in_cluster * 512,
-                            buf, n);
-            if (ret) {
-                return ret;
-            }
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-    }
-    return 0;
-}
-
-static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
-                                     uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    BDRVVmdkState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    ret = vmdk_read(bs, sector_num, buf, nb_sectors);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-/**
- * vmdk_write:
- * @zeroed:       buf is ignored (data is zero), use zeroed_grain GTE feature
- *                if possible, otherwise return -ENOTSUP.
- * @zero_dry_run: used for zeroed == true only, don't update L2 table, just try
- *                with each cluster. By dry run we can find if the zero write
- *                is possible without modifying image data.
- *
- * Returns: error code with 0 for success.
- */
-static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
-                      const uint8_t *buf, int nb_sectors,
-                      bool zeroed, bool zero_dry_run)
-{
-    BDRVVmdkState *s = bs->opaque;
-    VmdkExtent *extent = NULL;
-    int ret;
-    int64_t index_in_cluster, n;
-    uint64_t cluster_offset;
-    VmdkMetaData m_data;
-
-    if (sector_num > bs->total_sectors) {
-        error_report("Wrong offset: sector_num=0x%" PRIx64
-                     " total_sectors=0x%" PRIx64,
-                     sector_num, bs->total_sectors);
-        return -EIO;
-    }
-
-    while (nb_sectors > 0) {
-        extent = find_extent(s, sector_num, extent);
-        if (!extent) {
-            return -EIO;
-        }
-        index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
-        n = extent->cluster_sectors - index_in_cluster;
-        if (n > nb_sectors) {
-            n = nb_sectors;
-        }
-        ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
-                                 !(extent->compressed || zeroed),
-                                 &cluster_offset,
-                                 index_in_cluster, index_in_cluster + n);
-        if (extent->compressed) {
-            if (ret == VMDK_OK) {
-                /* Refuse write to allocated cluster for streamOptimized */
-                error_report("Could not write to allocated cluster"
-                              " for streamOptimized");
-                return -EIO;
-            } else {
-                /* allocate */
-                ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
-                                         true, &cluster_offset, 0, 0);
-            }
-        }
-        if (ret == VMDK_ERROR) {
-            return -EINVAL;
-        }
-        if (zeroed) {
-            /* Do zeroed write, buf is ignored */
-            if (extent->has_zero_grain &&
-                    index_in_cluster == 0 &&
-                    n >= extent->cluster_sectors) {
-                n = extent->cluster_sectors;
-                if (!zero_dry_run) {
-                    /* update L2 tables */
-                    if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
-                            != VMDK_OK) {
-                        return -EIO;
-                    }
-                }
-            } else {
-                return -ENOTSUP;
-            }
-        } else {
-            ret = vmdk_write_extent(extent,
-                            cluster_offset, index_in_cluster * 512,
-                            buf, n, sector_num);
-            if (ret) {
-                return ret;
-            }
-            if (m_data.valid) {
-                /* update L2 tables */
-                if (vmdk_L2update(extent, &m_data,
-                                  cluster_offset >> BDRV_SECTOR_BITS)
-                        != VMDK_OK) {
-                    return -EIO;
-                }
-            }
-        }
-        nb_sectors -= n;
-        sector_num += n;
-        buf += n * 512;
-
-        /* update CID on the first write every time the virtual disk is
-         * opened */
-        if (!s->cid_updated) {
-            ret = vmdk_write_cid(bs, g_random_int());
-            if (ret < 0) {
-                return ret;
-            }
-            s->cid_updated = true;
-        }
-    }
-    return 0;
-}
-
-static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
-                                      const uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    BDRVVmdkState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-static int vmdk_write_compressed(BlockDriverState *bs,
-                                 int64_t sector_num,
-                                 const uint8_t *buf,
-                                 int nb_sectors)
-{
-    BDRVVmdkState *s = bs->opaque;
-    if (s->num_extents == 1 && s->extents[0].compressed) {
-        return vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
-    } else {
-        return -ENOTSUP;
-    }
-}
-
-static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
-                                             int64_t sector_num,
-                                             int nb_sectors,
-                                             BdrvRequestFlags flags)
-{
-    int ret;
-    BDRVVmdkState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    /* write zeroes could fail if sectors not aligned to cluster, test it with
-     * dry_run == true before really updating image */
-    ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
-    if (!ret) {
-        ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
-    }
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-static int vmdk_create_extent(const char *filename, int64_t filesize,
-                              bool flat, bool compress, bool zeroed_grain,
-                              QemuOpts *opts, Error **errp)
-{
-    int ret, i;
-    BlockBackend *blk = NULL;
-    VMDK4Header header;
-    Error *local_err = NULL;
-    uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
-    uint32_t *gd_buf = NULL;
-    int gd_buf_size;
-
-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto exit;
-    }
-
-    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (blk == NULL) {
-        error_propagate(errp, local_err);
-        ret = -EIO;
-        goto exit;
-    }
-
-    blk_set_allow_write_beyond_eof(blk, true);
-
-    if (flat) {
-        ret = blk_truncate(blk, filesize);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not truncate file");
-        }
-        goto exit;
-    }
-    magic = cpu_to_be32(VMDK4_MAGIC);
-    memset(&header, 0, sizeof(header));
-    if (compress) {
-        header.version = 3;
-    } else if (zeroed_grain) {
-        header.version = 2;
-    } else {
-        header.version = 1;
-    }
-    header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
-                   | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
-                   | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
-    header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
-    header.capacity = filesize / BDRV_SECTOR_SIZE;
-    header.granularity = 128;
-    header.num_gtes_per_gt = BDRV_SECTOR_SIZE;
-
-    grains = DIV_ROUND_UP(filesize / BDRV_SECTOR_SIZE, header.granularity);
-    gt_size = DIV_ROUND_UP(header.num_gtes_per_gt * sizeof(uint32_t),
-                           BDRV_SECTOR_SIZE);
-    gt_count = DIV_ROUND_UP(grains, header.num_gtes_per_gt);
-    gd_sectors = DIV_ROUND_UP(gt_count * sizeof(uint32_t), BDRV_SECTOR_SIZE);
-
-    header.desc_offset = 1;
-    header.desc_size = 20;
-    header.rgd_offset = header.desc_offset + header.desc_size;
-    header.gd_offset = header.rgd_offset + gd_sectors + (gt_size * gt_count);
-    header.grain_offset =
-        ROUND_UP(header.gd_offset + gd_sectors + (gt_size * gt_count),
-                 header.granularity);
-    /* swap endianness for all header fields */
-    header.version = cpu_to_le32(header.version);
-    header.flags = cpu_to_le32(header.flags);
-    header.capacity = cpu_to_le64(header.capacity);
-    header.granularity = cpu_to_le64(header.granularity);
-    header.num_gtes_per_gt = cpu_to_le32(header.num_gtes_per_gt);
-    header.desc_offset = cpu_to_le64(header.desc_offset);
-    header.desc_size = cpu_to_le64(header.desc_size);
-    header.rgd_offset = cpu_to_le64(header.rgd_offset);
-    header.gd_offset = cpu_to_le64(header.gd_offset);
-    header.grain_offset = cpu_to_le64(header.grain_offset);
-    header.compressAlgorithm = cpu_to_le16(header.compressAlgorithm);
-
-    header.check_bytes[0] = 0xa;
-    header.check_bytes[1] = 0x20;
-    header.check_bytes[2] = 0xd;
-    header.check_bytes[3] = 0xa;
-
-    /* write all the data */
-    ret = blk_pwrite(blk, 0, &magic, sizeof(magic));
-    if (ret < 0) {
-        error_setg(errp, QERR_IO_ERROR);
-        goto exit;
-    }
-    ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header));
-    if (ret < 0) {
-        error_setg(errp, QERR_IO_ERROR);
-        goto exit;
-    }
-
-    ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not truncate file");
-        goto exit;
-    }
-
-    /* write grain directory */
-    gd_buf_size = gd_sectors * BDRV_SECTOR_SIZE;
-    gd_buf = g_malloc0(gd_buf_size);
-    for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_sectors;
-         i < gt_count; i++, tmp += gt_size) {
-        gd_buf[i] = cpu_to_le32(tmp);
-    }
-    ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
-                     gd_buf, gd_buf_size);
-    if (ret < 0) {
-        error_setg(errp, QERR_IO_ERROR);
-        goto exit;
-    }
-
-    /* write backup grain directory */
-    for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_sectors;
-         i < gt_count; i++, tmp += gt_size) {
-        gd_buf[i] = cpu_to_le32(tmp);
-    }
-    ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
-                     gd_buf, gd_buf_size);
-    if (ret < 0) {
-        error_setg(errp, QERR_IO_ERROR);
-        goto exit;
-    }
-
-    ret = 0;
-exit:
-    if (blk) {
-        blk_unref(blk);
-    }
-    g_free(gd_buf);
-    return ret;
-}
-
-static int filename_decompose(const char *filename, char *path, char *prefix,
-                              char *postfix, size_t buf_len, Error **errp)
-{
-    const char *p, *q;
-
-    if (filename == NULL || !strlen(filename)) {
-        error_setg(errp, "No filename provided");
-        return VMDK_ERROR;
-    }
-    p = strrchr(filename, '/');
-    if (p == NULL) {
-        p = strrchr(filename, '\\');
-    }
-    if (p == NULL) {
-        p = strrchr(filename, ':');
-    }
-    if (p != NULL) {
-        p++;
-        if (p - filename >= buf_len) {
-            return VMDK_ERROR;
-        }
-        pstrcpy(path, p - filename + 1, filename);
-    } else {
-        p = filename;
-        path[0] = '\0';
-    }
-    q = strrchr(p, '.');
-    if (q == NULL) {
-        pstrcpy(prefix, buf_len, p);
-        postfix[0] = '\0';
-    } else {
-        if (q - p >= buf_len) {
-            return VMDK_ERROR;
-        }
-        pstrcpy(prefix, q - p + 1, p);
-        pstrcpy(postfix, buf_len, q);
-    }
-    return VMDK_OK;
-}
-
-static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    int idx = 0;
-    BlockBackend *new_blk = NULL;
-    Error *local_err = NULL;
-    char *desc = NULL;
-    int64_t total_size = 0, filesize;
-    char *adapter_type = NULL;
-    char *backing_file = NULL;
-    char *fmt = NULL;
-    int flags = 0;
-    int ret = 0;
-    bool flat, split, compress;
-    GString *ext_desc_lines;
-    char *path = g_malloc0(PATH_MAX);
-    char *prefix = g_malloc0(PATH_MAX);
-    char *postfix = g_malloc0(PATH_MAX);
-    char *desc_line = g_malloc0(BUF_SIZE);
-    char *ext_filename = g_malloc0(PATH_MAX);
-    char *desc_filename = g_malloc0(PATH_MAX);
-    const int64_t split_size = 0x80000000;  /* VMDK has constant split size */
-    const char *desc_extent_line;
-    char *parent_desc_line = g_malloc0(BUF_SIZE);
-    uint32_t parent_cid = 0xffffffff;
-    uint32_t number_heads = 16;
-    bool zeroed_grain = false;
-    uint32_t desc_offset = 0, desc_len;
-    const char desc_template[] =
-        "# Disk DescriptorFile\n"
-        "version=1\n"
-        "CID=%" PRIx32 "\n"
-        "parentCID=%" PRIx32 "\n"
-        "createType=\"%s\"\n"
-        "%s"
-        "\n"
-        "# Extent description\n"
-        "%s"
-        "\n"
-        "# The Disk Data Base\n"
-        "#DDB\n"
-        "\n"
-        "ddb.virtualHWVersion = \"%d\"\n"
-        "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
-        "ddb.geometry.heads = \"%" PRIu32 "\"\n"
-        "ddb.geometry.sectors = \"63\"\n"
-        "ddb.adapterType = \"%s\"\n";
-
-    ext_desc_lines = g_string_new(NULL);
-
-    if (filename_decompose(filename, path, prefix, postfix, PATH_MAX, errp)) {
-        ret = -EINVAL;
-        goto exit;
-    }
-    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-    adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
-    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false)) {
-        flags |= BLOCK_FLAG_COMPAT6;
-    }
-    fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
-    if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false)) {
-        zeroed_grain = true;
-    }
-
-    if (!adapter_type) {
-        adapter_type = g_strdup("ide");
-    } else if (strcmp(adapter_type, "ide") &&
-               strcmp(adapter_type, "buslogic") &&
-               strcmp(adapter_type, "lsilogic") &&
-               strcmp(adapter_type, "legacyESX")) {
-        error_setg(errp, "Unknown adapter type: '%s'", adapter_type);
-        ret = -EINVAL;
-        goto exit;
-    }
-    if (strcmp(adapter_type, "ide") != 0) {
-        /* that's the number of heads with which vmware operates when
-           creating, exporting, etc. vmdk files with a non-ide adapter type */
-        number_heads = 255;
-    }
-    if (!fmt) {
-        /* Default format to monolithicSparse */
-        fmt = g_strdup("monolithicSparse");
-    } else if (strcmp(fmt, "monolithicFlat") &&
-               strcmp(fmt, "monolithicSparse") &&
-               strcmp(fmt, "twoGbMaxExtentSparse") &&
-               strcmp(fmt, "twoGbMaxExtentFlat") &&
-               strcmp(fmt, "streamOptimized")) {
-        error_setg(errp, "Unknown subformat: '%s'", fmt);
-        ret = -EINVAL;
-        goto exit;
-    }
-    split = !(strcmp(fmt, "twoGbMaxExtentFlat") &&
-              strcmp(fmt, "twoGbMaxExtentSparse"));
-    flat = !(strcmp(fmt, "monolithicFlat") &&
-             strcmp(fmt, "twoGbMaxExtentFlat"));
-    compress = !strcmp(fmt, "streamOptimized");
-    if (flat) {
-        desc_extent_line = "RW %" PRId64 " FLAT \"%s\" 0\n";
-    } else {
-        desc_extent_line = "RW %" PRId64 " SPARSE \"%s\"\n";
-    }
-    if (flat && backing_file) {
-        error_setg(errp, "Flat image can't have backing file");
-        ret = -ENOTSUP;
-        goto exit;
-    }
-    if (flat && zeroed_grain) {
-        error_setg(errp, "Flat image can't enable zeroed grain");
-        ret = -ENOTSUP;
-        goto exit;
-    }
-    if (backing_file) {
-        BlockBackend *blk;
-        char *full_backing = g_new0(char, PATH_MAX);
-        bdrv_get_full_backing_filename_from_filename(filename, backing_file,
-                                                     full_backing, PATH_MAX,
-                                                     &local_err);
-        if (local_err) {
-            g_free(full_backing);
-            error_propagate(errp, local_err);
-            ret = -ENOENT;
-            goto exit;
-        }
-
-        blk = blk_new_open(full_backing, NULL, NULL,
-                           BDRV_O_NO_BACKING, errp);
-        g_free(full_backing);
-        if (blk == NULL) {
-            ret = -EIO;
-            goto exit;
-        }
-        if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) {
-            blk_unref(blk);
-            ret = -EINVAL;
-            goto exit;
-        }
-        parent_cid = vmdk_read_cid(blk_bs(blk), 0);
-        blk_unref(blk);
-        snprintf(parent_desc_line, BUF_SIZE,
-                "parentFileNameHint=\"%s\"", backing_file);
-    }
-
-    /* Create extents */
-    filesize = total_size;
-    while (filesize > 0) {
-        int64_t size = filesize;
-
-        if (split && size > split_size) {
-            size = split_size;
-        }
-        if (split) {
-            snprintf(desc_filename, PATH_MAX, "%s-%c%03d%s",
-                    prefix, flat ? 'f' : 's', ++idx, postfix);
-        } else if (flat) {
-            snprintf(desc_filename, PATH_MAX, "%s-flat%s", prefix, postfix);
-        } else {
-            snprintf(desc_filename, PATH_MAX, "%s%s", prefix, postfix);
-        }
-        snprintf(ext_filename, PATH_MAX, "%s%s", path, desc_filename);
-
-        if (vmdk_create_extent(ext_filename, size,
-                               flat, compress, zeroed_grain, opts, errp)) {
-            ret = -EINVAL;
-            goto exit;
-        }
-        filesize -= size;
-
-        /* Format description line */
-        snprintf(desc_line, BUF_SIZE,
-                    desc_extent_line, size / BDRV_SECTOR_SIZE, desc_filename);
-        g_string_append(ext_desc_lines, desc_line);
-    }
-    /* generate descriptor file */
-    desc = g_strdup_printf(desc_template,
-                           g_random_int(),
-                           parent_cid,
-                           fmt,
-                           parent_desc_line,
-                           ext_desc_lines->str,
-                           (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
-                           total_size /
-                               (int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
-                           number_heads,
-                           adapter_type);
-    desc_len = strlen(desc);
-    /* the descriptor offset = 0x200 */
-    if (!split && !flat) {
-        desc_offset = 0x200;
-    } else {
-        ret = bdrv_create_file(filename, opts, &local_err);
-        if (ret < 0) {
-            error_propagate(errp, local_err);
-            goto exit;
-        }
-    }
-
-    new_blk = blk_new_open(filename, NULL, NULL,
-                           BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (new_blk == NULL) {
-        error_propagate(errp, local_err);
-        ret = -EIO;
-        goto exit;
-    }
-
-    blk_set_allow_write_beyond_eof(new_blk, true);
-
-    ret = blk_pwrite(new_blk, desc_offset, desc, desc_len);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "Could not write description");
-        goto exit;
-    }
-    /* bdrv_pwrite write padding zeros to align to sector, we don't need that
-     * for description file */
-    if (desc_offset == 0) {
-        ret = blk_truncate(new_blk, desc_len);
-        if (ret < 0) {
-            error_setg_errno(errp, -ret, "Could not truncate file");
-        }
-    }
-exit:
-    if (new_blk) {
-        blk_unref(new_blk);
-    }
-    g_free(adapter_type);
-    g_free(backing_file);
-    g_free(fmt);
-    g_free(desc);
-    g_free(path);
-    g_free(prefix);
-    g_free(postfix);
-    g_free(desc_line);
-    g_free(ext_filename);
-    g_free(desc_filename);
-    g_free(parent_desc_line);
-    g_string_free(ext_desc_lines, true);
-    return ret;
-}
-
-static void vmdk_close(BlockDriverState *bs)
-{
-    BDRVVmdkState *s = bs->opaque;
-
-    vmdk_free_extents(bs);
-    g_free(s->create_type);
-
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
-}
-
-static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
-{
-    BDRVVmdkState *s = bs->opaque;
-    int i, err;
-    int ret = 0;
-
-    for (i = 0; i < s->num_extents; i++) {
-        err = bdrv_co_flush(s->extents[i].file->bs);
-        if (err < 0) {
-            ret = err;
-        }
-    }
-    return ret;
-}
-
-static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
-{
-    int i;
-    int64_t ret = 0;
-    int64_t r;
-    BDRVVmdkState *s = bs->opaque;
-
-    ret = bdrv_get_allocated_file_size(bs->file->bs);
-    if (ret < 0) {
-        return ret;
-    }
-    for (i = 0; i < s->num_extents; i++) {
-        if (s->extents[i].file == bs->file) {
-            continue;
-        }
-        r = bdrv_get_allocated_file_size(s->extents[i].file->bs);
-        if (r < 0) {
-            return r;
-        }
-        ret += r;
-    }
-    return ret;
-}
-
-static int vmdk_has_zero_init(BlockDriverState *bs)
-{
-    int i;
-    BDRVVmdkState *s = bs->opaque;
-
-    /* If has a flat extent and its underlying storage doesn't have zero init,
-     * return 0. */
-    for (i = 0; i < s->num_extents; i++) {
-        if (s->extents[i].flat) {
-            if (!bdrv_has_zero_init(s->extents[i].file->bs)) {
-                return 0;
-            }
-        }
-    }
-    return 1;
-}
-
-static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
-{
-    ImageInfo *info = g_new0(ImageInfo, 1);
-
-    *info = (ImageInfo){
-        .filename         = g_strdup(extent->file->bs->filename),
-        .format           = g_strdup(extent->type),
-        .virtual_size     = extent->sectors * BDRV_SECTOR_SIZE,
-        .compressed       = extent->compressed,
-        .has_compressed   = extent->compressed,
-        .cluster_size     = extent->cluster_sectors * BDRV_SECTOR_SIZE,
-        .has_cluster_size = !extent->flat,
-    };
-
-    return info;
-}
-
-static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
-                      BdrvCheckMode fix)
-{
-    BDRVVmdkState *s = bs->opaque;
-    VmdkExtent *extent = NULL;
-    int64_t sector_num = 0;
-    int64_t total_sectors = bdrv_nb_sectors(bs);
-    int ret;
-    uint64_t cluster_offset;
-
-    if (fix) {
-        return -ENOTSUP;
-    }
-
-    for (;;) {
-        if (sector_num >= total_sectors) {
-            return 0;
-        }
-        extent = find_extent(s, sector_num, extent);
-        if (!extent) {
-            fprintf(stderr,
-                    "ERROR: could not find extent for sector %" PRId64 "\n",
-                    sector_num);
-            break;
-        }
-        ret = get_cluster_offset(bs, extent, NULL,
-                                 sector_num << BDRV_SECTOR_BITS,
-                                 false, &cluster_offset, 0, 0);
-        if (ret == VMDK_ERROR) {
-            fprintf(stderr,
-                    "ERROR: could not get cluster_offset for sector %"
-                    PRId64 "\n", sector_num);
-            break;
-        }
-        if (ret == VMDK_OK &&
-            cluster_offset >= bdrv_getlength(extent->file->bs))
-        {
-            fprintf(stderr,
-                    "ERROR: cluster offset for sector %"
-                    PRId64 " points after EOF\n", sector_num);
-            break;
-        }
-        sector_num += extent->cluster_sectors;
-    }
-
-    result->corruptions++;
-    return 0;
-}
-
-static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)
-{
-    int i;
-    BDRVVmdkState *s = bs->opaque;
-    ImageInfoSpecific *spec_info = g_new0(ImageInfoSpecific, 1);
-    ImageInfoList **next;
-
-    *spec_info = (ImageInfoSpecific){
-        .type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
-        .u = {
-            .vmdk.data = g_new0(ImageInfoSpecificVmdk, 1),
-        },
-    };
-
-    *spec_info->u.vmdk.data = (ImageInfoSpecificVmdk) {
-        .create_type = g_strdup(s->create_type),
-        .cid = s->cid,
-        .parent_cid = s->parent_cid,
-    };
-
-    next = &spec_info->u.vmdk.data->extents;
-    for (i = 0; i < s->num_extents; i++) {
-        *next = g_new0(ImageInfoList, 1);
-        (*next)->value = vmdk_get_extent_info(&s->extents[i]);
-        (*next)->next = NULL;
-        next = &(*next)->next;
-    }
-
-    return spec_info;
-}
-
-static bool vmdk_extents_type_eq(const VmdkExtent *a, const VmdkExtent *b)
-{
-    return a->flat == b->flat &&
-           a->compressed == b->compressed &&
-           (a->flat || a->cluster_sectors == b->cluster_sectors);
-}
-
-static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    int i;
-    BDRVVmdkState *s = bs->opaque;
-    assert(s->num_extents);
-
-    /* See if we have multiple extents but they have different cases */
-    for (i = 1; i < s->num_extents; i++) {
-        if (!vmdk_extents_type_eq(&s->extents[0], &s->extents[i])) {
-            return -ENOTSUP;
-        }
-    }
-    bdi->needs_compressed_writes = s->extents[0].compressed;
-    if (!s->extents[0].flat) {
-        bdi->cluster_size = s->extents[0].cluster_sectors << BDRV_SECTOR_BITS;
-    }
-    return 0;
-}
-
-static void vmdk_detach_aio_context(BlockDriverState *bs)
-{
-    BDRVVmdkState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_extents; i++) {
-        bdrv_detach_aio_context(s->extents[i].file->bs);
-    }
-}
-
-static void vmdk_attach_aio_context(BlockDriverState *bs,
-                                    AioContext *new_context)
-{
-    BDRVVmdkState *s = bs->opaque;
-    int i;
-
-    for (i = 0; i < s->num_extents; i++) {
-        bdrv_attach_aio_context(s->extents[i].file->bs, new_context);
-    }
-}
-
-static QemuOptsList vmdk_create_opts = {
-    .name = "vmdk-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_ADAPTER_TYPE,
-            .type = QEMU_OPT_STRING,
-            .help = "Virtual adapter type, can be one of "
-                    "ide (default), lsilogic, buslogic or legacyESX"
-        },
-        {
-            .name = BLOCK_OPT_BACKING_FILE,
-            .type = QEMU_OPT_STRING,
-            .help = "File name of a base image"
-        },
-        {
-            .name = BLOCK_OPT_COMPAT6,
-            .type = QEMU_OPT_BOOL,
-            .help = "VMDK version 6 image",
-            .def_value_str = "off"
-        },
-        {
-            .name = BLOCK_OPT_SUBFMT,
-            .type = QEMU_OPT_STRING,
-            .help =
-                "VMDK flat extent format, can be one of "
-                "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
-        },
-        {
-            .name = BLOCK_OPT_ZEROED_GRAIN,
-            .type = QEMU_OPT_BOOL,
-            .help = "Enable efficient zero writes "
-                    "using the zeroed-grain GTE feature"
-        },
-        { /* end of list */ }
-    }
-};
-
-static BlockDriver bdrv_vmdk = {
-    .format_name                  = "vmdk",
-    .instance_size                = sizeof(BDRVVmdkState),
-    .bdrv_probe                   = vmdk_probe,
-    .bdrv_open                    = vmdk_open,
-    .bdrv_check                   = vmdk_check,
-    .bdrv_reopen_prepare          = vmdk_reopen_prepare,
-    .bdrv_read                    = vmdk_co_read,
-    .bdrv_write                   = vmdk_co_write,
-    .bdrv_write_compressed        = vmdk_write_compressed,
-    .bdrv_co_write_zeroes         = vmdk_co_write_zeroes,
-    .bdrv_close                   = vmdk_close,
-    .bdrv_create                  = vmdk_create,
-    .bdrv_co_flush_to_disk        = vmdk_co_flush,
-    .bdrv_co_get_block_status     = vmdk_co_get_block_status,
-    .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
-    .bdrv_has_zero_init           = vmdk_has_zero_init,
-    .bdrv_get_specific_info       = vmdk_get_specific_info,
-    .bdrv_refresh_limits          = vmdk_refresh_limits,
-    .bdrv_get_info                = vmdk_get_info,
-    .bdrv_detach_aio_context      = vmdk_detach_aio_context,
-    .bdrv_attach_aio_context      = vmdk_attach_aio_context,
-
-    .supports_backing             = true,
-    .create_opts                  = &vmdk_create_opts,
-};
-
-static void bdrv_vmdk_init(void)
-{
-    bdrv_register(&bdrv_vmdk);
-}
-
-block_init(bdrv_vmdk_init);
diff --git a/qemu/block/vpc.c b/qemu/block/vpc.c
deleted file mode 100644
index 3e2ea698d..000000000
--- a/qemu/block/vpc.c
+++ /dev/null
@@ -1,1074 +0,0 @@
-/*
- * Block driver for Connectix / Microsoft Virtual PC images
- *
- * Copyright (c) 2005 Alex Beregszaszi
- * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qapi/error.h"
-#include "qemu-common.h"
-#include "block/block_int.h"
-#include "sysemu/block-backend.h"
-#include "qemu/module.h"
-#include "migration/migration.h"
-#if defined(CONFIG_UUID)
-#include <uuid/uuid.h>
-#endif
-
-/**************************************************************/
-
-#define HEADER_SIZE 512
-
-//#define CACHE
-
-enum vhd_type {
-    VHD_FIXED           = 2,
-    VHD_DYNAMIC         = 3,
-    VHD_DIFFERENCING    = 4,
-};
-
-/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
-#define VHD_TIMESTAMP_BASE 946684800
-
-#define VHD_CHS_MAX_C   65535LL
-#define VHD_CHS_MAX_H   16
-#define VHD_CHS_MAX_S   255
-
-#define VHD_MAX_SECTORS       0xff000000    /* 2040 GiB max image size */
-#define VHD_MAX_GEOMETRY      (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
-
-#define VPC_OPT_FORCE_SIZE "force_size"
-
-/* always big-endian */
-typedef struct vhd_footer {
-    char        creator[8]; /* "conectix" */
-    uint32_t    features;
-    uint32_t    version;
-
-    /* Offset of next header structure, 0xFFFFFFFF if none */
-    uint64_t    data_offset;
-
-    /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
-    uint32_t    timestamp;
-
-    char        creator_app[4]; /*  e.g., "vpc " */
-    uint16_t    major;
-    uint16_t    minor;
-    char        creator_os[4]; /* "Wi2k" */
-
-    uint64_t    orig_size;
-    uint64_t    current_size;
-
-    uint16_t    cyls;
-    uint8_t     heads;
-    uint8_t     secs_per_cyl;
-
-    uint32_t    type;
-
-    /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
-       the bytes in the footer without the checksum field") */
-    uint32_t    checksum;
-
-    /* UUID used to identify a parent hard disk (backing file) */
-    uint8_t     uuid[16];
-
-    uint8_t     in_saved_state;
-} QEMU_PACKED VHDFooter;
-
-typedef struct vhd_dyndisk_header {
-    char        magic[8]; /* "cxsparse" */
-
-    /* Offset of next header structure, 0xFFFFFFFF if none */
-    uint64_t    data_offset;
-
-    /* Offset of the Block Allocation Table (BAT) */
-    uint64_t    table_offset;
-
-    uint32_t    version;
-    uint32_t    max_table_entries; /* 32bit/entry */
-
-    /* 2 MB by default, must be a power of two */
-    uint32_t    block_size;
-
-    uint32_t    checksum;
-    uint8_t     parent_uuid[16];
-    uint32_t    parent_timestamp;
-    uint32_t    reserved;
-
-    /* Backing file name (in UTF-16) */
-    uint8_t     parent_name[512];
-
-    struct {
-        uint32_t    platform;
-        uint32_t    data_space;
-        uint32_t    data_length;
-        uint32_t    reserved;
-        uint64_t    data_offset;
-    } parent_locator[8];
-} QEMU_PACKED VHDDynDiskHeader;
-
-typedef struct BDRVVPCState {
-    CoMutex lock;
-    uint8_t footer_buf[HEADER_SIZE];
-    uint64_t free_data_block_offset;
-    int max_table_entries;
-    uint32_t *pagetable;
-    uint64_t bat_offset;
-    uint64_t last_bitmap_offset;
-
-    uint32_t block_size;
-    uint32_t bitmap_size;
-    bool force_use_chs;
-    bool force_use_sz;
-
-#ifdef CACHE
-    uint8_t *pageentry_u8;
-    uint32_t *pageentry_u32;
-    uint16_t *pageentry_u16;
-
-    uint64_t last_bitmap;
-#endif
-
-    Error *migration_blocker;
-} BDRVVPCState;
-
-#define VPC_OPT_SIZE_CALC "force_size_calc"
-static QemuOptsList vpc_runtime_opts = {
-    .name = "vpc-runtime-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
-    .desc = {
-        {
-            .name = VPC_OPT_SIZE_CALC,
-            .type = QEMU_OPT_STRING,
-            .help = "Force disk size calculation to use either CHS geometry, "
-                    "or use the disk current_size specified in the VHD footer. "
-                    "{chs, current_size}"
-        },
-        { /* end of list */ }
-    }
-};
-
-static uint32_t vpc_checksum(uint8_t* buf, size_t size)
-{
-    uint32_t res = 0;
-    int i;
-
-    for (i = 0; i < size; i++)
-        res += buf[i];
-
-    return ~res;
-}
-
-
-static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
-{
-    if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
-	return 100;
-    return 0;
-}
-
-static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
-                              Error **errp)
-{
-    BDRVVPCState *s = bs->opaque;
-    const char *size_calc;
-
-    size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
-
-    if (!size_calc) {
-       /* no override, use autodetect only */
-    } else if (!strcmp(size_calc, "current_size")) {
-        s->force_use_sz = true;
-    } else if (!strcmp(size_calc, "chs")) {
-        s->force_use_chs = true;
-    } else {
-        error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
-    }
-}
-
-static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
-                    Error **errp)
-{
-    BDRVVPCState *s = bs->opaque;
-    int i;
-    VHDFooter *footer;
-    VHDDynDiskHeader *dyndisk_header;
-    QemuOpts *opts = NULL;
-    Error *local_err = NULL;
-    bool use_chs;
-    uint8_t buf[HEADER_SIZE];
-    uint32_t checksum;
-    uint64_t computed_size;
-    uint64_t pagetable_size;
-    int disk_type = VHD_DYNAMIC;
-    int ret;
-
-    opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    vpc_parse_options(bs, opts, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
-    if (ret < 0) {
-        error_setg(errp, "Unable to read VHD header");
-        goto fail;
-    }
-
-    footer = (VHDFooter *) s->footer_buf;
-    if (strncmp(footer->creator, "conectix", 8)) {
-        int64_t offset = bdrv_getlength(bs->file->bs);
-        if (offset < 0) {
-            ret = offset;
-            error_setg(errp, "Invalid file size");
-            goto fail;
-        } else if (offset < HEADER_SIZE) {
-            ret = -EINVAL;
-            error_setg(errp, "File too small for a VHD header");
-            goto fail;
-        }
-
-        /* If a fixed disk, the footer is found only at the end of the file */
-        ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
-                         HEADER_SIZE);
-        if (ret < 0) {
-            goto fail;
-        }
-        if (strncmp(footer->creator, "conectix", 8)) {
-            error_setg(errp, "invalid VPC image");
-            ret = -EINVAL;
-            goto fail;
-        }
-        disk_type = VHD_FIXED;
-    }
-
-    checksum = be32_to_cpu(footer->checksum);
-    footer->checksum = 0;
-    if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
-        fprintf(stderr, "block-vpc: The header checksum of '%s' is "
-            "incorrect.\n", bs->filename);
-
-    /* Write 'checksum' back to footer, or else will leave it with zero. */
-    footer->checksum = cpu_to_be32(checksum);
-
-    /* The visible size of a image in Virtual PC depends on the geometry
-       rather than on the size stored in the footer (the size in the footer
-       is too large usually) */
-    bs->total_sectors = (int64_t)
-        be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
-
-    /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
-     * VHD image sizes differently.  VPC will rely on CHS geometry,
-     * while Hyper-V and disk2vhd use the size specified in the footer.
-     *
-     * We use a couple of approaches to try and determine the correct method:
-     * look at the Creator App field, and look for images that have CHS
-     * geometry that is the maximum value.
-     *
-     * If the CHS geometry is the maximum CHS geometry, then we assume that
-     * the size is the footer->current_size to avoid truncation.  Otherwise,
-     * we follow the table based on footer->creator_app:
-     *
-     *  Known creator apps:
-     *      'vpc '  :  CHS              Virtual PC (uses disk geometry)
-     *      'qemu'  :  CHS              QEMU (uses disk geometry)
-     *      'qem2'  :  current_size     QEMU (uses current_size)
-     *      'win '  :  current_size     Hyper-V
-     *      'd2v '  :  current_size     Disk2vhd
-     *      'tap\0' :  current_size     XenServer
-     *      'CTXS'  :  current_size     XenConverter
-     *
-     *  The user can override the table values via drive options, however
-     *  even with an override we will still use current_size for images
-     *  that have CHS geometry of the maximum size.
-     */
-    use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
-               !!strncmp(footer->creator_app, "qem2", 4) &&
-               !!strncmp(footer->creator_app, "d2v ", 4) &&
-               !!strncmp(footer->creator_app, "CTXS", 4) &&
-               !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
-
-    if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
-        bs->total_sectors = be64_to_cpu(footer->current_size) /
-                                        BDRV_SECTOR_SIZE;
-    }
-
-    /* Allow a maximum disk size of 2040 GiB */
-    if (bs->total_sectors > VHD_MAX_SECTORS) {
-        ret = -EFBIG;
-        goto fail;
-    }
-
-    if (disk_type == VHD_DYNAMIC) {
-        ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
-                         HEADER_SIZE);
-        if (ret < 0) {
-            error_setg(errp, "Error reading dynamic VHD header");
-            goto fail;
-        }
-
-        dyndisk_header = (VHDDynDiskHeader *) buf;
-
-        if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
-            error_setg(errp, "Invalid header magic");
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        s->block_size = be32_to_cpu(dyndisk_header->block_size);
-        if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
-            error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
-            ret = -EINVAL;
-            goto fail;
-        }
-        s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
-
-        s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
-
-        if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
-            error_setg(errp, "Too many blocks");
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        computed_size = (uint64_t) s->max_table_entries * s->block_size;
-        if (computed_size < bs->total_sectors * 512) {
-            error_setg(errp, "Page table too small");
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        if (s->max_table_entries > SIZE_MAX / 4 ||
-            s->max_table_entries > (int) INT_MAX / 4) {
-            error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
-                        s->max_table_entries);
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        pagetable_size = (uint64_t) s->max_table_entries * 4;
-
-        s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
-        if (s->pagetable == NULL) {
-            error_setg(errp, "Unable to allocate memory for page table");
-            ret = -ENOMEM;
-            goto fail;
-        }
-
-        s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
-
-        ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
-                         pagetable_size);
-        if (ret < 0) {
-            error_setg(errp, "Error reading pagetable");
-            goto fail;
-        }
-
-        s->free_data_block_offset =
-            ROUND_UP(s->bat_offset + pagetable_size, 512);
-
-        for (i = 0; i < s->max_table_entries; i++) {
-            be32_to_cpus(&s->pagetable[i]);
-            if (s->pagetable[i] != 0xFFFFFFFF) {
-                int64_t next = (512 * (int64_t) s->pagetable[i]) +
-                    s->bitmap_size + s->block_size;
-
-                if (next > s->free_data_block_offset) {
-                    s->free_data_block_offset = next;
-                }
-            }
-        }
-
-        if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
-            error_setg(errp, "block-vpc: free_data_block_offset points after "
-                             "the end of file. The image has been truncated.");
-            ret = -EINVAL;
-            goto fail;
-        }
-
-        s->last_bitmap_offset = (int64_t) -1;
-
-#ifdef CACHE
-        s->pageentry_u8 = g_malloc(512);
-        s->pageentry_u32 = s->pageentry_u8;
-        s->pageentry_u16 = s->pageentry_u8;
-        s->last_pagetable = -1;
-#endif
-    }
-
-    qemu_co_mutex_init(&s->lock);
-
-    /* Disable migration when VHD images are used */
-    error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
-               "does not support live migration",
-               bdrv_get_device_or_node_name(bs));
-    migrate_add_blocker(s->migration_blocker);
-
-    return 0;
-
-fail:
-    qemu_vfree(s->pagetable);
-#ifdef CACHE
-    g_free(s->pageentry_u8);
-#endif
-    return ret;
-}
-
-static int vpc_reopen_prepare(BDRVReopenState *state,
-                              BlockReopenQueue *queue, Error **errp)
-{
-    return 0;
-}
-
-/*
- * Returns the absolute byte offset of the given sector in the image file.
- * If the sector is not allocated, -1 is returned instead.
- *
- * The parameter write must be 1 if the offset will be used for a write
- * operation (the block bitmaps is updated then), 0 otherwise.
- */
-static inline int64_t get_sector_offset(BlockDriverState *bs,
-    int64_t sector_num, int write)
-{
-    BDRVVPCState *s = bs->opaque;
-    uint64_t offset = sector_num * 512;
-    uint64_t bitmap_offset, block_offset;
-    uint32_t pagetable_index, pageentry_index;
-
-    pagetable_index = offset / s->block_size;
-    pageentry_index = (offset % s->block_size) / 512;
-
-    if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
-        return -1; /* not allocated */
-
-    bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
-    block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
-
-    /* We must ensure that we don't write to any sectors which are marked as
-       unused in the bitmap. We get away with setting all bits in the block
-       bitmap each time we write to a new block. This might cause Virtual PC to
-       miss sparse read optimization, but it's not a problem in terms of
-       correctness. */
-    if (write && (s->last_bitmap_offset != bitmap_offset)) {
-        uint8_t bitmap[s->bitmap_size];
-
-        s->last_bitmap_offset = bitmap_offset;
-        memset(bitmap, 0xff, s->bitmap_size);
-        bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
-    }
-
-    return block_offset;
-}
-
-/*
- * Writes the footer to the end of the image file. This is needed when the
- * file grows as it overwrites the old footer
- *
- * Returns 0 on success and < 0 on error
- */
-static int rewrite_footer(BlockDriverState* bs)
-{
-    int ret;
-    BDRVVPCState *s = bs->opaque;
-    int64_t offset = s->free_data_block_offset;
-
-    ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
-    if (ret < 0)
-        return ret;
-
-    return 0;
-}
-
-/*
- * Allocates a new block. This involves writing a new footer and updating
- * the Block Allocation Table to use the space at the old end of the image
- * file (overwriting the old footer)
- *
- * Returns the sectors' offset in the image file on success and < 0 on error
- */
-static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
-{
-    BDRVVPCState *s = bs->opaque;
-    int64_t bat_offset;
-    uint32_t index, bat_value;
-    int ret;
-    uint8_t bitmap[s->bitmap_size];
-
-    /* Check if sector_num is valid */
-    if ((sector_num < 0) || (sector_num > bs->total_sectors))
-        return -1;
-
-    /* Write entry into in-memory BAT */
-    index = (sector_num * 512) / s->block_size;
-    if (s->pagetable[index] != 0xFFFFFFFF)
-        return -1;
-
-    s->pagetable[index] = s->free_data_block_offset / 512;
-
-    /* Initialize the block's bitmap */
-    memset(bitmap, 0xff, s->bitmap_size);
-    ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
-        s->bitmap_size);
-    if (ret < 0) {
-        return ret;
-    }
-
-    /* Write new footer (the old one will be overwritten) */
-    s->free_data_block_offset += s->block_size + s->bitmap_size;
-    ret = rewrite_footer(bs);
-    if (ret < 0)
-        goto fail;
-
-    /* Write BAT entry to disk */
-    bat_offset = s->bat_offset + (4 * index);
-    bat_value = cpu_to_be32(s->pagetable[index]);
-    ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
-    if (ret < 0)
-        goto fail;
-
-    return get_sector_offset(bs, sector_num, 0);
-
-fail:
-    s->free_data_block_offset -= (s->block_size + s->bitmap_size);
-    return -1;
-}
-
-static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
-{
-    BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
-    VHDFooter *footer = (VHDFooter *) s->footer_buf;
-
-    if (be32_to_cpu(footer->type) != VHD_FIXED) {
-        bdi->cluster_size = s->block_size;
-    }
-
-    bdi->unallocated_blocks_are_zero = true;
-    return 0;
-}
-
-static int vpc_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVVPCState *s = bs->opaque;
-    int ret;
-    int64_t offset;
-    int64_t sectors, sectors_per_block;
-    VHDFooter *footer = (VHDFooter *) s->footer_buf;
-
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors);
-    }
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 0);
-
-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
-        sectors = sectors_per_block - (sector_num % sectors_per_block);
-        if (sectors > nb_sectors) {
-            sectors = nb_sectors;
-        }
-
-        if (offset == -1) {
-            memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
-        } else {
-            ret = bdrv_pread(bs->file->bs, offset, buf,
-                sectors * BDRV_SECTOR_SIZE);
-            if (ret != sectors * BDRV_SECTOR_SIZE) {
-                return -1;
-            }
-        }
-
-        nb_sectors -= sectors;
-        sector_num += sectors;
-        buf += sectors * BDRV_SECTOR_SIZE;
-    }
-    return 0;
-}
-
-static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
-                                    uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    BDRVVPCState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    ret = vpc_read(bs, sector_num, buf, nb_sectors);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-static int vpc_write(BlockDriverState *bs, int64_t sector_num,
-    const uint8_t *buf, int nb_sectors)
-{
-    BDRVVPCState *s = bs->opaque;
-    int64_t offset;
-    int64_t sectors, sectors_per_block;
-    int ret;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
-
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors);
-    }
-    while (nb_sectors > 0) {
-        offset = get_sector_offset(bs, sector_num, 1);
-
-        sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
-        sectors = sectors_per_block - (sector_num % sectors_per_block);
-        if (sectors > nb_sectors) {
-            sectors = nb_sectors;
-        }
-
-        if (offset == -1) {
-            offset = alloc_block(bs, sector_num);
-            if (offset < 0)
-                return -1;
-        }
-
-        ret = bdrv_pwrite(bs->file->bs, offset, buf,
-                          sectors * BDRV_SECTOR_SIZE);
-        if (ret != sectors * BDRV_SECTOR_SIZE) {
-            return -1;
-        }
-
-        nb_sectors -= sectors;
-        sector_num += sectors;
-        buf += sectors * BDRV_SECTOR_SIZE;
-    }
-
-    return 0;
-}
-
-static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
-                                     const uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    BDRVVPCState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    ret = vpc_write(bs, sector_num, buf, nb_sectors);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
-        int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
-{
-    BDRVVPCState *s = bs->opaque;
-    VHDFooter *footer = (VHDFooter*) s->footer_buf;
-    int64_t start, offset;
-    bool allocated;
-    int n;
-
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        *pnum = nb_sectors;
-        *file = bs->file->bs;
-        return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
-               (sector_num << BDRV_SECTOR_BITS);
-    }
-
-    offset = get_sector_offset(bs, sector_num, 0);
-    start = offset;
-    allocated = (offset != -1);
-    *pnum = 0;
-
-    do {
-        /* All sectors in a block are contiguous (without using the bitmap) */
-        n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
-          - sector_num;
-        n = MIN(n, nb_sectors);
-
-        *pnum += n;
-        sector_num += n;
-        nb_sectors -= n;
-        /* *pnum can't be greater than one block for allocated
-         * sectors since there is always a bitmap in between. */
-        if (allocated) {
-            *file = bs->file->bs;
-            return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
-        }
-        if (nb_sectors == 0) {
-            break;
-        }
-        offset = get_sector_offset(bs, sector_num, 0);
-    } while (offset == -1);
-
-    return 0;
-}
-
-/*
- * Calculates the number of cylinders, heads and sectors per cylinder
- * based on a given number of sectors. This is the algorithm described
- * in the VHD specification.
- *
- * Note that the geometry doesn't always exactly match total_sectors but
- * may round it down.
- *
- * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
- * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
- * and instead allow up to 255 heads.
- */
-static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
-    uint8_t* heads, uint8_t* secs_per_cyl)
-{
-    uint32_t cyls_times_heads;
-
-    total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
-
-    if (total_sectors >= 65535LL * 16 * 63) {
-        *secs_per_cyl = 255;
-        *heads = 16;
-        cyls_times_heads = total_sectors / *secs_per_cyl;
-    } else {
-        *secs_per_cyl = 17;
-        cyls_times_heads = total_sectors / *secs_per_cyl;
-        *heads = (cyls_times_heads + 1023) / 1024;
-
-        if (*heads < 4) {
-            *heads = 4;
-        }
-
-        if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
-            *secs_per_cyl = 31;
-            *heads = 16;
-            cyls_times_heads = total_sectors / *secs_per_cyl;
-        }
-
-        if (cyls_times_heads >= (*heads * 1024)) {
-            *secs_per_cyl = 63;
-            *heads = 16;
-            cyls_times_heads = total_sectors / *secs_per_cyl;
-        }
-    }
-
-    *cyls = cyls_times_heads / *heads;
-
-    return 0;
-}
-
-static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
-                               int64_t total_sectors)
-{
-    VHDDynDiskHeader *dyndisk_header =
-        (VHDDynDiskHeader *) buf;
-    size_t block_size, num_bat_entries;
-    int i;
-    int ret;
-    int64_t offset = 0;
-
-    /* Write the footer (twice: at the beginning and at the end) */
-    block_size = 0x200000;
-    num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
-
-    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
-    ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
-    if (ret < 0) {
-        goto fail;
-    }
-
-    /* Write the initial BAT */
-    offset = 3 * 512;
-
-    memset(buf, 0xFF, 512);
-    for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
-        ret = blk_pwrite(blk, offset, buf, 512);
-        if (ret < 0) {
-            goto fail;
-        }
-        offset += 512;
-    }
-
-    /* Prepare the Dynamic Disk Header */
-    memset(buf, 0, 1024);
-
-    memcpy(dyndisk_header->magic, "cxsparse", 8);
-
-    /*
-     * Note: The spec is actually wrong here for data_offset, it says
-     * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
-     */
-    dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
-    dyndisk_header->table_offset = cpu_to_be64(3 * 512);
-    dyndisk_header->version = cpu_to_be32(0x00010000);
-    dyndisk_header->block_size = cpu_to_be32(block_size);
-    dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
-
-    dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
-
-    /* Write the header */
-    offset = 512;
-
-    ret = blk_pwrite(blk, offset, buf, 1024);
-    if (ret < 0) {
-        goto fail;
-    }
-
- fail:
-    return ret;
-}
-
-static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
-                             int64_t total_size)
-{
-    int ret;
-
-    /* Add footer to total size */
-    total_size += HEADER_SIZE;
-
-    ret = blk_truncate(blk, total_size);
-    if (ret < 0) {
-        return ret;
-    }
-
-    ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
-    if (ret < 0) {
-        return ret;
-    }
-
-    return ret;
-}
-
-static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
-{
-    uint8_t buf[1024];
-    VHDFooter *footer = (VHDFooter *) buf;
-    char *disk_type_param;
-    int i;
-    uint16_t cyls = 0;
-    uint8_t heads = 0;
-    uint8_t secs_per_cyl = 0;
-    int64_t total_sectors;
-    int64_t total_size;
-    int disk_type;
-    int ret = -EIO;
-    bool force_size;
-    Error *local_err = NULL;
-    BlockBackend *blk = NULL;
-
-    /* Read out options */
-    total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                          BDRV_SECTOR_SIZE);
-    disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
-    if (disk_type_param) {
-        if (!strcmp(disk_type_param, "dynamic")) {
-            disk_type = VHD_DYNAMIC;
-        } else if (!strcmp(disk_type_param, "fixed")) {
-            disk_type = VHD_FIXED;
-        } else {
-            error_setg(errp, "Invalid disk type, %s", disk_type_param);
-            ret = -EINVAL;
-            goto out;
-        }
-    } else {
-        disk_type = VHD_DYNAMIC;
-    }
-
-    force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
-
-    ret = bdrv_create_file(filename, opts, &local_err);
-    if (ret < 0) {
-        error_propagate(errp, local_err);
-        goto out;
-    }
-
-    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
-    if (blk == NULL) {
-        error_propagate(errp, local_err);
-        ret = -EIO;
-        goto out;
-    }
-
-    blk_set_allow_write_beyond_eof(blk, true);
-
-    /*
-     * Calculate matching total_size and geometry. Increase the number of
-     * sectors requested until we get enough (or fail). This ensures that
-     * qemu-img convert doesn't truncate images, but rather rounds up.
-     *
-     * If the image size can't be represented by a spec conformant CHS geometry,
-     * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
-     * the image size from the VHD footer to calculate total_sectors.
-     */
-    if (force_size) {
-        /* This will force the use of total_size for sector count, below */
-        cyls         = VHD_CHS_MAX_C;
-        heads        = VHD_CHS_MAX_H;
-        secs_per_cyl = VHD_CHS_MAX_S;
-    } else {
-        total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
-        for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
-            calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
-        }
-    }
-
-    if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
-        total_sectors = total_size / BDRV_SECTOR_SIZE;
-        /* Allow a maximum disk size of 2040 GiB */
-        if (total_sectors > VHD_MAX_SECTORS) {
-            error_setg(errp, "Disk size is too large, max size is 2040 GiB");
-            ret = -EFBIG;
-            goto out;
-        }
-    } else {
-        total_sectors = (int64_t)cyls * heads * secs_per_cyl;
-        total_size = total_sectors * BDRV_SECTOR_SIZE;
-    }
-
-    /* Prepare the Hard Disk Footer */
-    memset(buf, 0, 1024);
-
-    memcpy(footer->creator, "conectix", 8);
-    if (force_size) {
-        memcpy(footer->creator_app, "qem2", 4);
-    } else {
-        memcpy(footer->creator_app, "qemu", 4);
-    }
-    memcpy(footer->creator_os, "Wi2k", 4);
-
-    footer->features = cpu_to_be32(0x02);
-    footer->version = cpu_to_be32(0x00010000);
-    if (disk_type == VHD_DYNAMIC) {
-        footer->data_offset = cpu_to_be64(HEADER_SIZE);
-    } else {
-        footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
-    }
-    footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
-
-    /* Version of Virtual PC 2007 */
-    footer->major = cpu_to_be16(0x0005);
-    footer->minor = cpu_to_be16(0x0003);
-    footer->orig_size = cpu_to_be64(total_size);
-    footer->current_size = cpu_to_be64(total_size);
-    footer->cyls = cpu_to_be16(cyls);
-    footer->heads = heads;
-    footer->secs_per_cyl = secs_per_cyl;
-
-    footer->type = cpu_to_be32(disk_type);
-
-#if defined(CONFIG_UUID)
-    uuid_generate(footer->uuid);
-#endif
-
-    footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
-
-    if (disk_type == VHD_DYNAMIC) {
-        ret = create_dynamic_disk(blk, buf, total_sectors);
-    } else {
-        ret = create_fixed_disk(blk, buf, total_size);
-    }
-    if (ret < 0) {
-        error_setg(errp, "Unable to create or write VHD header");
-    }
-
-out:
-    blk_unref(blk);
-    g_free(disk_type_param);
-    return ret;
-}
-
-static int vpc_has_zero_init(BlockDriverState *bs)
-{
-    BDRVVPCState *s = bs->opaque;
-    VHDFooter *footer =  (VHDFooter *) s->footer_buf;
-
-    if (be32_to_cpu(footer->type) == VHD_FIXED) {
-        return bdrv_has_zero_init(bs->file->bs);
-    } else {
-        return 1;
-    }
-}
-
-static void vpc_close(BlockDriverState *bs)
-{
-    BDRVVPCState *s = bs->opaque;
-    qemu_vfree(s->pagetable);
-#ifdef CACHE
-    g_free(s->pageentry_u8);
-#endif
-
-    migrate_del_blocker(s->migration_blocker);
-    error_free(s->migration_blocker);
-}
-
-static QemuOptsList vpc_create_opts = {
-    .name = "vpc-create-opts",
-    .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
-    .desc = {
-        {
-            .name = BLOCK_OPT_SIZE,
-            .type = QEMU_OPT_SIZE,
-            .help = "Virtual disk size"
-        },
-        {
-            .name = BLOCK_OPT_SUBFMT,
-            .type = QEMU_OPT_STRING,
-            .help =
-                "Type of virtual hard disk format. Supported formats are "
-                "{dynamic (default) | fixed} "
-        },
-        {
-            .name = VPC_OPT_FORCE_SIZE,
-            .type = QEMU_OPT_BOOL,
-            .help = "Force disk size calculation to use the actual size "
-                    "specified, rather than using the nearest CHS-based "
-                    "calculation"
-        },
-        { /* end of list */ }
-    }
-};
-
-static BlockDriver bdrv_vpc = {
-    .format_name    = "vpc",
-    .instance_size  = sizeof(BDRVVPCState),
-
-    .bdrv_probe             = vpc_probe,
-    .bdrv_open              = vpc_open,
-    .bdrv_close             = vpc_close,
-    .bdrv_reopen_prepare    = vpc_reopen_prepare,
-    .bdrv_create            = vpc_create,
-
-    .bdrv_read                  = vpc_co_read,
-    .bdrv_write                 = vpc_co_write,
-    .bdrv_co_get_block_status   = vpc_co_get_block_status,
-
-    .bdrv_get_info          = vpc_get_info,
-
-    .create_opts            = &vpc_create_opts,
-    .bdrv_has_zero_init     = vpc_has_zero_init,
-};
-
-static void bdrv_vpc_init(void)
-{
-    bdrv_register(&bdrv_vpc);
-}
-
-block_init(bdrv_vpc_init);
diff --git a/qemu/block/vvfat.c b/qemu/block/vvfat.c
deleted file mode 100644
index 183fc4f04..000000000
--- a/qemu/block/vvfat.c
+++ /dev/null
@@ -1,3050 +0,0 @@
-/* vim:set shiftwidth=4 ts=4: */
-/*
- * QEMU Block driver for virtual VFAT (shadows a local directory)
- *
- * Copyright (c) 2004,2005 Johannes E. Schindelin
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include <dirent.h>
-#include "qapi/error.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include "migration/migration.h"
-#include "qapi/qmp/qint.h"
-#include "qapi/qmp/qbool.h"
-#include "qapi/qmp/qstring.h"
-#include "qemu/cutils.h"
-
-#ifndef S_IWGRP
-#define S_IWGRP 0
-#endif
-#ifndef S_IWOTH
-#define S_IWOTH 0
-#endif
-
-/* TODO: add ":bootsector=blabla.img:" */
-/* LATER TODO: add automatic boot sector generation from
-    BOOTEASY.ASM and Ranish Partition Manager
-    Note that DOS assumes the system files to be the first files in the
-    file system (test if the boot sector still relies on that fact)! */
-/* MAYBE TODO: write block-visofs.c */
-/* TODO: call try_commit() only after a timeout */
-
-/* #define DEBUG */
-
-#ifdef DEBUG
-
-#define DLOG(a) a
-
-static void checkpoint(void);
-
-#ifdef __MINGW32__
-void nonono(const char* file, int line, const char* msg) {
-    fprintf(stderr, "Nonono! %s:%d %s\n", file, line, msg);
-    exit(-5);
-}
-#undef assert
-#define assert(a) do {if (!(a)) nonono(__FILE__, __LINE__, #a);}while(0)
-#endif
-
-#else
-
-#define DLOG(a)
-
-#endif
-
-/* dynamic array functions */
-typedef struct array_t {
-    char* pointer;
-    unsigned int size,next,item_size;
-} array_t;
-
-static inline void array_init(array_t* array,unsigned int item_size)
-{
-    array->pointer = NULL;
-    array->size=0;
-    array->next=0;
-    array->item_size=item_size;
-}
-
-static inline void array_free(array_t* array)
-{
-    g_free(array->pointer);
-    array->size=array->next=0;
-}
-
-/* does not automatically grow */
-static inline void* array_get(array_t* array,unsigned int index) {
-    assert(index < array->next);
-    return array->pointer + index * array->item_size;
-}
-
-static inline int array_ensure_allocated(array_t* array, int index)
-{
-    if((index + 1) * array->item_size > array->size) {
-	int new_size = (index + 32) * array->item_size;
-	array->pointer = g_realloc(array->pointer, new_size);
-	if (!array->pointer)
-	    return -1;
-	array->size = new_size;
-	array->next = index + 1;
-    }
-
-    return 0;
-}
-
-static inline void* array_get_next(array_t* array) {
-    unsigned int next = array->next;
-    void* result;
-
-    if (array_ensure_allocated(array, next) < 0)
-	return NULL;
-
-    array->next = next + 1;
-    result = array_get(array, next);
-
-    return result;
-}
-
-static inline void* array_insert(array_t* array,unsigned int index,unsigned int count) {
-    if((array->next+count)*array->item_size>array->size) {
-	int increment=count*array->item_size;
-	array->pointer=g_realloc(array->pointer,array->size+increment);
-	if(!array->pointer)
-            return NULL;
-	array->size+=increment;
-    }
-    memmove(array->pointer+(index+count)*array->item_size,
-		array->pointer+index*array->item_size,
-		(array->next-index)*array->item_size);
-    array->next+=count;
-    return array->pointer+index*array->item_size;
-}
-
-/* this performs a "roll", so that the element which was at index_from becomes
- * index_to, but the order of all other elements is preserved. */
-static inline int array_roll(array_t* array,int index_to,int index_from,int count)
-{
-    char* buf;
-    char* from;
-    char* to;
-    int is;
-
-    if(!array ||
-	    index_to<0 || index_to>=array->next ||
-	    index_from<0 || index_from>=array->next)
-	return -1;
-
-    if(index_to==index_from)
-	return 0;
-
-    is=array->item_size;
-    from=array->pointer+index_from*is;
-    to=array->pointer+index_to*is;
-    buf=g_malloc(is*count);
-    memcpy(buf,from,is*count);
-
-    if(index_to<index_from)
-	memmove(to+is*count,to,from-to);
-    else
-	memmove(from,from+is*count,to-from);
-
-    memcpy(to,buf,is*count);
-
-    g_free(buf);
-
-    return 0;
-}
-
-static inline int array_remove_slice(array_t* array,int index, int count)
-{
-    assert(index >=0);
-    assert(count > 0);
-    assert(index + count <= array->next);
-    if(array_roll(array,array->next-1,index,count))
-	return -1;
-    array->next -= count;
-    return 0;
-}
-
-static int array_remove(array_t* array,int index)
-{
-    return array_remove_slice(array, index, 1);
-}
-
-/* return the index for a given member */
-static int array_index(array_t* array, void* pointer)
-{
-    size_t offset = (char*)pointer - array->pointer;
-    assert((offset % array->item_size) == 0);
-    assert(offset/array->item_size < array->next);
-    return offset/array->item_size;
-}
-
-/* These structures are used to fake a disk and the VFAT filesystem.
- * For this reason we need to use QEMU_PACKED. */
-
-typedef struct bootsector_t {
-    uint8_t jump[3];
-    uint8_t name[8];
-    uint16_t sector_size;
-    uint8_t sectors_per_cluster;
-    uint16_t reserved_sectors;
-    uint8_t number_of_fats;
-    uint16_t root_entries;
-    uint16_t total_sectors16;
-    uint8_t media_type;
-    uint16_t sectors_per_fat;
-    uint16_t sectors_per_track;
-    uint16_t number_of_heads;
-    uint32_t hidden_sectors;
-    uint32_t total_sectors;
-    union {
-        struct {
-	    uint8_t drive_number;
-	    uint8_t current_head;
-	    uint8_t signature;
-	    uint32_t id;
-	    uint8_t volume_label[11];
-	} QEMU_PACKED fat16;
-	struct {
-	    uint32_t sectors_per_fat;
-	    uint16_t flags;
-	    uint8_t major,minor;
-	    uint32_t first_cluster_of_root_directory;
-	    uint16_t info_sector;
-	    uint16_t backup_boot_sector;
-	    uint16_t ignored;
-	} QEMU_PACKED fat32;
-    } u;
-    uint8_t fat_type[8];
-    uint8_t ignored[0x1c0];
-    uint8_t magic[2];
-} QEMU_PACKED bootsector_t;
-
-typedef struct {
-    uint8_t head;
-    uint8_t sector;
-    uint8_t cylinder;
-} mbr_chs_t;
-
-typedef struct partition_t {
-    uint8_t attributes; /* 0x80 = bootable */
-    mbr_chs_t start_CHS;
-    uint8_t   fs_type; /* 0x1 = FAT12, 0x6 = FAT16, 0xe = FAT16_LBA, 0xb = FAT32, 0xc = FAT32_LBA */
-    mbr_chs_t end_CHS;
-    uint32_t start_sector_long;
-    uint32_t length_sector_long;
-} QEMU_PACKED partition_t;
-
-typedef struct mbr_t {
-    uint8_t ignored[0x1b8];
-    uint32_t nt_id;
-    uint8_t ignored2[2];
-    partition_t partition[4];
-    uint8_t magic[2];
-} QEMU_PACKED mbr_t;
-
-typedef struct direntry_t {
-    uint8_t name[8 + 3];
-    uint8_t attributes;
-    uint8_t reserved[2];
-    uint16_t ctime;
-    uint16_t cdate;
-    uint16_t adate;
-    uint16_t begin_hi;
-    uint16_t mtime;
-    uint16_t mdate;
-    uint16_t begin;
-    uint32_t size;
-} QEMU_PACKED direntry_t;
-
-/* this structure are used to transparently access the files */
-
-typedef struct mapping_t {
-    /* begin is the first cluster, end is the last+1 */
-    uint32_t begin,end;
-    /* as s->directory is growable, no pointer may be used here */
-    unsigned int dir_index;
-    /* the clusters of a file may be in any order; this points to the first */
-    int first_mapping_index;
-    union {
-	/* offset is
-	 * - the offset in the file (in clusters) for a file, or
-	 * - the next cluster of the directory for a directory, and
-	 * - the address of the buffer for a faked entry
-	 */
-	struct {
-	    uint32_t offset;
-	} file;
-	struct {
-	    int parent_mapping_index;
-	    int first_dir_index;
-	} dir;
-    } info;
-    /* path contains the full path, i.e. it always starts with s->path */
-    char* path;
-
-    enum { MODE_UNDEFINED = 0, MODE_NORMAL = 1, MODE_MODIFIED = 2,
-	MODE_DIRECTORY = 4, MODE_FAKED = 8,
-	MODE_DELETED = 16, MODE_RENAMED = 32 } mode;
-    int read_only;
-} mapping_t;
-
-#ifdef DEBUG
-static void print_direntry(const struct direntry_t*);
-static void print_mapping(const struct mapping_t* mapping);
-#endif
-
-/* here begins the real VVFAT driver */
-
-typedef struct BDRVVVFATState {
-    CoMutex lock;
-    BlockDriverState* bs; /* pointer to parent */
-    unsigned int first_sectors_number; /* 1 for a single partition, 0x40 for a disk with partition table */
-    unsigned char first_sectors[0x40*0x200];
-
-    int fat_type; /* 16 or 32 */
-    array_t fat,directory,mapping;
-    char volume_label[11];
-
-    unsigned int cluster_size;
-    unsigned int sectors_per_cluster;
-    unsigned int sectors_per_fat;
-    unsigned int sectors_of_root_directory;
-    uint32_t last_cluster_of_root_directory;
-    unsigned int faked_sectors; /* how many sectors are faked before file data */
-    uint32_t sector_count; /* total number of sectors of the partition */
-    uint32_t cluster_count; /* total number of clusters of this partition */
-    uint32_t max_fat_value;
-
-    int current_fd;
-    mapping_t* current_mapping;
-    unsigned char* cluster; /* points to current cluster */
-    unsigned char* cluster_buffer; /* points to a buffer to hold temp data */
-    unsigned int current_cluster;
-
-    /* write support */
-    BlockDriverState* write_target;
-    char* qcow_filename;
-    BlockDriverState* qcow;
-    void* fat2;
-    char* used_clusters;
-    array_t commits;
-    const char* path;
-    int downcase_short_names;
-
-    Error *migration_blocker;
-} BDRVVVFATState;
-
-/* take the sector position spos and convert it to Cylinder/Head/Sector position
- * if the position is outside the specified geometry, fill maximum value for CHS
- * and return 1 to signal overflow.
- */
-static int sector2CHS(mbr_chs_t *chs, int spos, int cyls, int heads, int secs)
-{
-    int head,sector;
-    sector   = spos % secs;  spos /= secs;
-    head     = spos % heads; spos /= heads;
-    if (spos >= cyls) {
-        /* Overflow,
-        it happens if 32bit sector positions are used, while CHS is only 24bit.
-        Windows/Dos is said to take 1023/255/63 as nonrepresentable CHS */
-        chs->head     = 0xFF;
-        chs->sector   = 0xFF;
-        chs->cylinder = 0xFF;
-        return 1;
-    }
-    chs->head     = (uint8_t)head;
-    chs->sector   = (uint8_t)( (sector+1) | ((spos>>8)<<6) );
-    chs->cylinder = (uint8_t)spos;
-    return 0;
-}
-
-static void init_mbr(BDRVVVFATState *s, int cyls, int heads, int secs)
-{
-    /* TODO: if the files mbr.img and bootsect.img exist, use them */
-    mbr_t* real_mbr=(mbr_t*)s->first_sectors;
-    partition_t* partition = &(real_mbr->partition[0]);
-    int lba;
-
-    memset(s->first_sectors,0,512);
-
-    /* Win NT Disk Signature */
-    real_mbr->nt_id= cpu_to_le32(0xbe1afdfa);
-
-    partition->attributes=0x80; /* bootable */
-
-    /* LBA is used when partition is outside the CHS geometry */
-    lba  = sector2CHS(&partition->start_CHS, s->first_sectors_number - 1,
-                     cyls, heads, secs);
-    lba |= sector2CHS(&partition->end_CHS,   s->bs->total_sectors - 1,
-                     cyls, heads, secs);
-
-    /*LBA partitions are identified only by start/length_sector_long not by CHS*/
-    partition->start_sector_long  = cpu_to_le32(s->first_sectors_number - 1);
-    partition->length_sector_long = cpu_to_le32(s->bs->total_sectors
-                                                - s->first_sectors_number + 1);
-
-    /* FAT12/FAT16/FAT32 */
-    /* DOS uses different types when partition is LBA,
-       probably to prevent older versions from using CHS on them */
-    partition->fs_type= s->fat_type==12 ? 0x1:
-                        s->fat_type==16 ? (lba?0xe:0x06):
-                         /*fat_tyoe==32*/ (lba?0xc:0x0b);
-
-    real_mbr->magic[0]=0x55; real_mbr->magic[1]=0xaa;
-}
-
-/* direntry functions */
-
-/* dest is assumed to hold 258 bytes, and pads with 0xffff up to next multiple of 26 */
-static inline int short2long_name(char* dest,const char* src)
-{
-    int i;
-    int len;
-    for(i=0;i<129 && src[i];i++) {
-        dest[2*i]=src[i];
-	dest[2*i+1]=0;
-    }
-    len=2*i;
-    dest[2*i]=dest[2*i+1]=0;
-    for(i=2*i+2;(i%26);i++)
-	dest[i]=0xff;
-    return len;
-}
-
-static inline direntry_t* create_long_filename(BDRVVVFATState* s,const char* filename)
-{
-    char buffer[258];
-    int length=short2long_name(buffer,filename),
-        number_of_entries=(length+25)/26,i;
-    direntry_t* entry;
-
-    for(i=0;i<number_of_entries;i++) {
-	entry=array_get_next(&(s->directory));
-	entry->attributes=0xf;
-	entry->reserved[0]=0;
-	entry->begin=0;
-	entry->name[0]=(number_of_entries-i)|(i==0?0x40:0);
-    }
-    for(i=0;i<26*number_of_entries;i++) {
-	int offset=(i%26);
-	if(offset<10) offset=1+offset;
-	else if(offset<22) offset=14+offset-10;
-	else offset=28+offset-22;
-	entry=array_get(&(s->directory),s->directory.next-1-(i/26));
-	entry->name[offset]=buffer[i];
-    }
-    return array_get(&(s->directory),s->directory.next-number_of_entries);
-}
-
-static char is_free(const direntry_t* direntry)
-{
-    return direntry->name[0]==0xe5 || direntry->name[0]==0x00;
-}
-
-static char is_volume_label(const direntry_t* direntry)
-{
-    return direntry->attributes == 0x28;
-}
-
-static char is_long_name(const direntry_t* direntry)
-{
-    return direntry->attributes == 0xf;
-}
-
-static char is_short_name(const direntry_t* direntry)
-{
-    return !is_volume_label(direntry) && !is_long_name(direntry)
-	&& !is_free(direntry);
-}
-
-static char is_directory(const direntry_t* direntry)
-{
-    return direntry->attributes & 0x10 && direntry->name[0] != 0xe5;
-}
-
-static inline char is_dot(const direntry_t* direntry)
-{
-    return is_short_name(direntry) && direntry->name[0] == '.';
-}
-
-static char is_file(const direntry_t* direntry)
-{
-    return is_short_name(direntry) && !is_directory(direntry);
-}
-
-static inline uint32_t begin_of_direntry(const direntry_t* direntry)
-{
-    return le16_to_cpu(direntry->begin)|(le16_to_cpu(direntry->begin_hi)<<16);
-}
-
-static inline uint32_t filesize_of_direntry(const direntry_t* direntry)
-{
-    return le32_to_cpu(direntry->size);
-}
-
-static void set_begin_of_direntry(direntry_t* direntry, uint32_t begin)
-{
-    direntry->begin = cpu_to_le16(begin & 0xffff);
-    direntry->begin_hi = cpu_to_le16((begin >> 16) & 0xffff);
-}
-
-/* fat functions */
-
-static inline uint8_t fat_chksum(const direntry_t* entry)
-{
-    uint8_t chksum=0;
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(entry->name); i++) {
-        chksum = (((chksum & 0xfe) >> 1) |
-                  ((chksum & 0x01) ? 0x80 : 0)) + entry->name[i];
-    }
-
-    return chksum;
-}
-
-/* if return_time==0, this returns the fat_date, else the fat_time */
-static uint16_t fat_datetime(time_t time,int return_time) {
-    struct tm* t;
-    struct tm t1;
-    t = &t1;
-    localtime_r(&time,t);
-    if(return_time)
-	return cpu_to_le16((t->tm_sec/2)|(t->tm_min<<5)|(t->tm_hour<<11));
-    return cpu_to_le16((t->tm_mday)|((t->tm_mon+1)<<5)|((t->tm_year-80)<<9));
-}
-
-static inline void fat_set(BDRVVVFATState* s,unsigned int cluster,uint32_t value)
-{
-    if(s->fat_type==32) {
-	uint32_t* entry=array_get(&(s->fat),cluster);
-	*entry=cpu_to_le32(value);
-    } else if(s->fat_type==16) {
-	uint16_t* entry=array_get(&(s->fat),cluster);
-	*entry=cpu_to_le16(value&0xffff);
-    } else {
-	int offset = (cluster*3/2);
-	unsigned char* p = array_get(&(s->fat), offset);
-        switch (cluster&1) {
-	case 0:
-		p[0] = value&0xff;
-		p[1] = (p[1]&0xf0) | ((value>>8)&0xf);
-		break;
-	case 1:
-		p[0] = (p[0]&0xf) | ((value&0xf)<<4);
-		p[1] = (value>>4);
-		break;
-	}
-    }
-}
-
-static inline uint32_t fat_get(BDRVVVFATState* s,unsigned int cluster)
-{
-    if(s->fat_type==32) {
-	uint32_t* entry=array_get(&(s->fat),cluster);
-	return le32_to_cpu(*entry);
-    } else if(s->fat_type==16) {
-	uint16_t* entry=array_get(&(s->fat),cluster);
-	return le16_to_cpu(*entry);
-    } else {
-	const uint8_t* x=(uint8_t*)(s->fat.pointer)+cluster*3/2;
-	return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff;
-    }
-}
-
-static inline int fat_eof(BDRVVVFATState* s,uint32_t fat_entry)
-{
-    if(fat_entry>s->max_fat_value-8)
-	return -1;
-    return 0;
-}
-
-static inline void init_fat(BDRVVVFATState* s)
-{
-    if (s->fat_type == 12) {
-	array_init(&(s->fat),1);
-	array_ensure_allocated(&(s->fat),
-		s->sectors_per_fat * 0x200 * 3 / 2 - 1);
-    } else {
-	array_init(&(s->fat),(s->fat_type==32?4:2));
-	array_ensure_allocated(&(s->fat),
-		s->sectors_per_fat * 0x200 / s->fat.item_size - 1);
-    }
-    memset(s->fat.pointer,0,s->fat.size);
-
-    switch(s->fat_type) {
-	case 12: s->max_fat_value=0xfff; break;
-	case 16: s->max_fat_value=0xffff; break;
-	case 32: s->max_fat_value=0x0fffffff; break;
-	default: s->max_fat_value=0; /* error... */
-    }
-
-}
-
-/* TODO: in create_short_filename, 0xe5->0x05 is not yet handled! */
-/* TODO: in parse_short_filename, 0x05->0xe5 is not yet handled! */
-static inline direntry_t* create_short_and_long_name(BDRVVVFATState* s,
-	unsigned int directory_start, const char* filename, int is_dot)
-{
-    int i,j,long_index=s->directory.next;
-    direntry_t* entry = NULL;
-    direntry_t* entry_long = NULL;
-
-    if(is_dot) {
-	entry=array_get_next(&(s->directory));
-        memset(entry->name, 0x20, sizeof(entry->name));
-	memcpy(entry->name,filename,strlen(filename));
-	return entry;
-    }
-
-    entry_long=create_long_filename(s,filename);
-
-    i = strlen(filename);
-    for(j = i - 1; j>0  && filename[j]!='.';j--);
-    if (j > 0)
-	i = (j > 8 ? 8 : j);
-    else if (i > 8)
-	i = 8;
-
-    entry=array_get_next(&(s->directory));
-    memset(entry->name, 0x20, sizeof(entry->name));
-    memcpy(entry->name, filename, i);
-
-    if (j > 0) {
-        for (i = 0; i < 3 && filename[j + 1 + i]; i++) {
-            entry->name[8 + i] = filename[j + 1 + i];
-        }
-    }
-
-    /* upcase & remove unwanted characters */
-    for(i=10;i>=0;i--) {
-	if(i==10 || i==7) for(;i>0 && entry->name[i]==' ';i--);
-	if(entry->name[i]<=' ' || entry->name[i]>0x7f
-		|| strchr(".*?<>|\":/\\[];,+='",entry->name[i]))
-	    entry->name[i]='_';
-        else if(entry->name[i]>='a' && entry->name[i]<='z')
-            entry->name[i]+='A'-'a';
-    }
-
-    /* mangle duplicates */
-    while(1) {
-	direntry_t* entry1=array_get(&(s->directory),directory_start);
-	int j;
-
-	for(;entry1<entry;entry1++)
-	    if(!is_long_name(entry1) && !memcmp(entry1->name,entry->name,11))
-		break; /* found dupe */
-	if(entry1==entry) /* no dupe found */
-	    break;
-
-	/* use all 8 characters of name */
-	if(entry->name[7]==' ') {
-	    int j;
-	    for(j=6;j>0 && entry->name[j]==' ';j--)
-		entry->name[j]='~';
-	}
-
-	/* increment number */
-	for(j=7;j>0 && entry->name[j]=='9';j--)
-	    entry->name[j]='0';
-	if(j>0) {
-	    if(entry->name[j]<'0' || entry->name[j]>'9')
-	        entry->name[j]='0';
-	    else
-	        entry->name[j]++;
-	}
-    }
-
-    /* calculate checksum; propagate to long name */
-    if(entry_long) {
-        uint8_t chksum=fat_chksum(entry);
-
-	/* calculate anew, because realloc could have taken place */
-	entry_long=array_get(&(s->directory),long_index);
-	while(entry_long<entry && is_long_name(entry_long)) {
-	    entry_long->reserved[1]=chksum;
-	    entry_long++;
-	}
-    }
-
-    return entry;
-}
-
-/*
- * Read a directory. (the index of the corresponding mapping must be passed).
- */
-static int read_directory(BDRVVVFATState* s, int mapping_index)
-{
-    mapping_t* mapping = array_get(&(s->mapping), mapping_index);
-    direntry_t* direntry;
-    const char* dirname = mapping->path;
-    int first_cluster = mapping->begin;
-    int parent_index = mapping->info.dir.parent_mapping_index;
-    mapping_t* parent_mapping = (mapping_t*)
-        (parent_index >= 0 ? array_get(&(s->mapping), parent_index) : NULL);
-    int first_cluster_of_parent = parent_mapping ? parent_mapping->begin : -1;
-
-    DIR* dir=opendir(dirname);
-    struct dirent* entry;
-    int i;
-
-    assert(mapping->mode & MODE_DIRECTORY);
-
-    if(!dir) {
-	mapping->end = mapping->begin;
-	return -1;
-    }
-
-    i = mapping->info.dir.first_dir_index =
-	    first_cluster == 0 ? 0 : s->directory.next;
-
-    /* actually read the directory, and allocate the mappings */
-    while((entry=readdir(dir))) {
-	unsigned int length=strlen(dirname)+2+strlen(entry->d_name);
-        char* buffer;
-	direntry_t* direntry;
-        struct stat st;
-	int is_dot=!strcmp(entry->d_name,".");
-	int is_dotdot=!strcmp(entry->d_name,"..");
-
-	if(first_cluster == 0 && (is_dotdot || is_dot))
-	    continue;
-
-	buffer = g_malloc(length);
-	snprintf(buffer,length,"%s/%s",dirname,entry->d_name);
-
-	if(stat(buffer,&st)<0) {
-            g_free(buffer);
-            continue;
-	}
-
-	/* create directory entry for this file */
-	direntry=create_short_and_long_name(s, i, entry->d_name,
-		is_dot || is_dotdot);
-	direntry->attributes=(S_ISDIR(st.st_mode)?0x10:0x20);
-	direntry->reserved[0]=direntry->reserved[1]=0;
-	direntry->ctime=fat_datetime(st.st_ctime,1);
-	direntry->cdate=fat_datetime(st.st_ctime,0);
-	direntry->adate=fat_datetime(st.st_atime,0);
-	direntry->begin_hi=0;
-	direntry->mtime=fat_datetime(st.st_mtime,1);
-	direntry->mdate=fat_datetime(st.st_mtime,0);
-	if(is_dotdot)
-	    set_begin_of_direntry(direntry, first_cluster_of_parent);
-	else if(is_dot)
-	    set_begin_of_direntry(direntry, first_cluster);
-	else
-	    direntry->begin=0; /* do that later */
-        if (st.st_size > 0x7fffffff) {
-	    fprintf(stderr, "File %s is larger than 2GB\n", buffer);
-            g_free(buffer);
-            closedir(dir);
-	    return -2;
-        }
-	direntry->size=cpu_to_le32(S_ISDIR(st.st_mode)?0:st.st_size);
-
-	/* create mapping for this file */
-	if(!is_dot && !is_dotdot && (S_ISDIR(st.st_mode) || st.st_size)) {
-	    s->current_mapping = array_get_next(&(s->mapping));
-	    s->current_mapping->begin=0;
-	    s->current_mapping->end=st.st_size;
-	    /*
-	     * we get the direntry of the most recent direntry, which
-	     * contains the short name and all the relevant information.
-	     */
-	    s->current_mapping->dir_index=s->directory.next-1;
-	    s->current_mapping->first_mapping_index = -1;
-	    if (S_ISDIR(st.st_mode)) {
-		s->current_mapping->mode = MODE_DIRECTORY;
-		s->current_mapping->info.dir.parent_mapping_index =
-		    mapping_index;
-	    } else {
-		s->current_mapping->mode = MODE_UNDEFINED;
-		s->current_mapping->info.file.offset = 0;
-	    }
-	    s->current_mapping->path=buffer;
-	    s->current_mapping->read_only =
-		(st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) == 0;
-        } else {
-            g_free(buffer);
-        }
-    }
-    closedir(dir);
-
-    /* fill with zeroes up to the end of the cluster */
-    while(s->directory.next%(0x10*s->sectors_per_cluster)) {
-	direntry_t* direntry=array_get_next(&(s->directory));
-	memset(direntry,0,sizeof(direntry_t));
-    }
-
-/* TODO: if there are more entries, bootsector has to be adjusted! */
-#define ROOT_ENTRIES (0x02 * 0x10 * s->sectors_per_cluster)
-    if (mapping_index == 0 && s->directory.next < ROOT_ENTRIES) {
-	/* root directory */
-	int cur = s->directory.next;
-	array_ensure_allocated(&(s->directory), ROOT_ENTRIES - 1);
-	s->directory.next = ROOT_ENTRIES;
-	memset(array_get(&(s->directory), cur), 0,
-		(ROOT_ENTRIES - cur) * sizeof(direntry_t));
-    }
-
-     /* reget the mapping, since s->mapping was possibly realloc()ed */
-    mapping = array_get(&(s->mapping), mapping_index);
-    first_cluster += (s->directory.next - mapping->info.dir.first_dir_index)
-	* 0x20 / s->cluster_size;
-    mapping->end = first_cluster;
-
-    direntry = array_get(&(s->directory), mapping->dir_index);
-    set_begin_of_direntry(direntry, mapping->begin);
-
-    return 0;
-}
-
-static inline uint32_t sector2cluster(BDRVVVFATState* s,off_t sector_num)
-{
-    return (sector_num-s->faked_sectors)/s->sectors_per_cluster;
-}
-
-static inline off_t cluster2sector(BDRVVVFATState* s, uint32_t cluster_num)
-{
-    return s->faked_sectors + s->sectors_per_cluster * cluster_num;
-}
-
-static int init_directories(BDRVVVFATState* s,
-                            const char *dirname, int heads, int secs,
-                            Error **errp)
-{
-    bootsector_t* bootsector;
-    mapping_t* mapping;
-    unsigned int i;
-    unsigned int cluster;
-
-    memset(&(s->first_sectors[0]),0,0x40*0x200);
-
-    s->cluster_size=s->sectors_per_cluster*0x200;
-    s->cluster_buffer=g_malloc(s->cluster_size);
-
-    /*
-     * The formula: sc = spf+1+spf*spc*(512*8/fat_type),
-     * where sc is sector_count,
-     * spf is sectors_per_fat,
-     * spc is sectors_per_clusters, and
-     * fat_type = 12, 16 or 32.
-     */
-    i = 1+s->sectors_per_cluster*0x200*8/s->fat_type;
-    s->sectors_per_fat=(s->sector_count+i)/i; /* round up */
-
-    array_init(&(s->mapping),sizeof(mapping_t));
-    array_init(&(s->directory),sizeof(direntry_t));
-
-    /* add volume label */
-    {
-	direntry_t* entry=array_get_next(&(s->directory));
-	entry->attributes=0x28; /* archive | volume label */
-        memcpy(entry->name, s->volume_label, sizeof(entry->name));
-    }
-
-    /* Now build FAT, and write back information into directory */
-    init_fat(s);
-
-    s->faked_sectors=s->first_sectors_number+s->sectors_per_fat*2;
-    s->cluster_count=sector2cluster(s, s->sector_count);
-
-    mapping = array_get_next(&(s->mapping));
-    mapping->begin = 0;
-    mapping->dir_index = 0;
-    mapping->info.dir.parent_mapping_index = -1;
-    mapping->first_mapping_index = -1;
-    mapping->path = g_strdup(dirname);
-    i = strlen(mapping->path);
-    if (i > 0 && mapping->path[i - 1] == '/')
-	mapping->path[i - 1] = '\0';
-    mapping->mode = MODE_DIRECTORY;
-    mapping->read_only = 0;
-    s->path = mapping->path;
-
-    for (i = 0, cluster = 0; i < s->mapping.next; i++) {
-	/* MS-DOS expects the FAT to be 0 for the root directory
-	 * (except for the media byte). */
-	/* LATER TODO: still true for FAT32? */
-	int fix_fat = (i != 0);
-	mapping = array_get(&(s->mapping), i);
-
-        if (mapping->mode & MODE_DIRECTORY) {
-	    mapping->begin = cluster;
-	    if(read_directory(s, i)) {
-                error_setg(errp, "Could not read directory %s",
-                           mapping->path);
-		return -1;
-	    }
-	    mapping = array_get(&(s->mapping), i);
-	} else {
-	    assert(mapping->mode == MODE_UNDEFINED);
-	    mapping->mode=MODE_NORMAL;
-	    mapping->begin = cluster;
-	    if (mapping->end > 0) {
-		direntry_t* direntry = array_get(&(s->directory),
-			mapping->dir_index);
-
-		mapping->end = cluster + 1 + (mapping->end-1)/s->cluster_size;
-		set_begin_of_direntry(direntry, mapping->begin);
-	    } else {
-		mapping->end = cluster + 1;
-		fix_fat = 0;
-	    }
-	}
-
-	assert(mapping->begin < mapping->end);
-
-	/* next free cluster */
-	cluster = mapping->end;
-
-	if(cluster > s->cluster_count) {
-            error_setg(errp,
-                       "Directory does not fit in FAT%d (capacity %.2f MB)",
-                       s->fat_type, s->sector_count / 2000.0);
-            return -1;
-	}
-
-	/* fix fat for entry */
-	if (fix_fat) {
-	    int j;
-	    for(j = mapping->begin; j < mapping->end - 1; j++)
-		fat_set(s, j, j+1);
-	    fat_set(s, mapping->end - 1, s->max_fat_value);
-	}
-    }
-
-    mapping = array_get(&(s->mapping), 0);
-    s->sectors_of_root_directory = mapping->end * s->sectors_per_cluster;
-    s->last_cluster_of_root_directory = mapping->end;
-
-    /* the FAT signature */
-    fat_set(s,0,s->max_fat_value);
-    fat_set(s,1,s->max_fat_value);
-
-    s->current_mapping = NULL;
-
-    bootsector=(bootsector_t*)(s->first_sectors+(s->first_sectors_number-1)*0x200);
-    bootsector->jump[0]=0xeb;
-    bootsector->jump[1]=0x3e;
-    bootsector->jump[2]=0x90;
-    memcpy(bootsector->name,"QEMU    ",8);
-    bootsector->sector_size=cpu_to_le16(0x200);
-    bootsector->sectors_per_cluster=s->sectors_per_cluster;
-    bootsector->reserved_sectors=cpu_to_le16(1);
-    bootsector->number_of_fats=0x2; /* number of FATs */
-    bootsector->root_entries=cpu_to_le16(s->sectors_of_root_directory*0x10);
-    bootsector->total_sectors16=s->sector_count>0xffff?0:cpu_to_le16(s->sector_count);
-    bootsector->media_type=(s->first_sectors_number>1?0xf8:0xf0); /* media descriptor (f8=hd, f0=3.5 fd)*/
-    s->fat.pointer[0] = bootsector->media_type;
-    bootsector->sectors_per_fat=cpu_to_le16(s->sectors_per_fat);
-    bootsector->sectors_per_track = cpu_to_le16(secs);
-    bootsector->number_of_heads = cpu_to_le16(heads);
-    bootsector->hidden_sectors=cpu_to_le32(s->first_sectors_number==1?0:0x3f);
-    bootsector->total_sectors=cpu_to_le32(s->sector_count>0xffff?s->sector_count:0);
-
-    /* LATER TODO: if FAT32, this is wrong */
-    bootsector->u.fat16.drive_number=s->first_sectors_number==1?0:0x80; /* fda=0, hda=0x80 */
-    bootsector->u.fat16.current_head=0;
-    bootsector->u.fat16.signature=0x29;
-    bootsector->u.fat16.id=cpu_to_le32(0xfabe1afd);
-
-    memcpy(bootsector->u.fat16.volume_label, s->volume_label,
-           sizeof(bootsector->u.fat16.volume_label));
-    memcpy(bootsector->fat_type,(s->fat_type==12?"FAT12   ":s->fat_type==16?"FAT16   ":"FAT32   "),8);
-    bootsector->magic[0]=0x55; bootsector->magic[1]=0xaa;
-
-    return 0;
-}
-
-#ifdef DEBUG
-static BDRVVVFATState *vvv = NULL;
-#endif
-
-static int enable_write_target(BDRVVVFATState *s, Error **errp);
-static int is_consistent(BDRVVVFATState *s);
-
-static QemuOptsList runtime_opts = {
-    .name = "vvfat",
-    .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
-    .desc = {
-        {
-            .name = "dir",
-            .type = QEMU_OPT_STRING,
-            .help = "Host directory to map to the vvfat device",
-        },
-        {
-            .name = "fat-type",
-            .type = QEMU_OPT_NUMBER,
-            .help = "FAT type (12, 16 or 32)",
-        },
-        {
-            .name = "floppy",
-            .type = QEMU_OPT_BOOL,
-            .help = "Create a floppy rather than a hard disk image",
-        },
-        {
-            .name = "label",
-            .type = QEMU_OPT_STRING,
-            .help = "Use a volume label other than QEMU VVFAT",
-        },
-        {
-            .name = "rw",
-            .type = QEMU_OPT_BOOL,
-            .help = "Make the image writable",
-        },
-        { /* end of list */ }
-    },
-};
-
-static void vvfat_parse_filename(const char *filename, QDict *options,
-                                 Error **errp)
-{
-    int fat_type = 0;
-    bool floppy = false;
-    bool rw = false;
-    int i;
-
-    if (!strstart(filename, "fat:", NULL)) {
-        error_setg(errp, "File name string must start with 'fat:'");
-        return;
-    }
-
-    /* Parse options */
-    if (strstr(filename, ":32:")) {
-        fat_type = 32;
-    } else if (strstr(filename, ":16:")) {
-        fat_type = 16;
-    } else if (strstr(filename, ":12:")) {
-        fat_type = 12;
-    }
-
-    if (strstr(filename, ":floppy:")) {
-        floppy = true;
-    }
-
-    if (strstr(filename, ":rw:")) {
-        rw = true;
-    }
-
-    /* Get the directory name without options */
-    i = strrchr(filename, ':') - filename;
-    assert(i >= 3);
-    if (filename[i - 2] == ':' && qemu_isalpha(filename[i - 1])) {
-        /* workaround for DOS drive names */
-        filename += i - 1;
-    } else {
-        filename += i + 1;
-    }
-
-    /* Fill in the options QDict */
-    qdict_put(options, "dir", qstring_from_str(filename));
-    qdict_put(options, "fat-type", qint_from_int(fat_type));
-    qdict_put(options, "floppy", qbool_from_bool(floppy));
-    qdict_put(options, "rw", qbool_from_bool(rw));
-}
-
-static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
-                      Error **errp)
-{
-    BDRVVVFATState *s = bs->opaque;
-    int cyls, heads, secs;
-    bool floppy;
-    const char *dirname, *label;
-    QemuOpts *opts;
-    Error *local_err = NULL;
-    int ret;
-
-#ifdef DEBUG
-    vvv = s;
-#endif
-
-    opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
-    qemu_opts_absorb_qdict(opts, options, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    dirname = qemu_opt_get(opts, "dir");
-    if (!dirname) {
-        error_setg(errp, "vvfat block driver requires a 'dir' option");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-    s->fat_type = qemu_opt_get_number(opts, "fat-type", 0);
-    floppy = qemu_opt_get_bool(opts, "floppy", false);
-
-    memset(s->volume_label, ' ', sizeof(s->volume_label));
-    label = qemu_opt_get(opts, "label");
-    if (label) {
-        size_t label_length = strlen(label);
-        if (label_length > 11) {
-            error_setg(errp, "vvfat label cannot be longer than 11 bytes");
-            ret = -EINVAL;
-            goto fail;
-        }
-        memcpy(s->volume_label, label, label_length);
-    } else {
-        memcpy(s->volume_label, "QEMU VVFAT", 10);
-    }
-
-    if (floppy) {
-        /* 1.44MB or 2.88MB floppy.  2.88MB can be FAT12 (default) or FAT16. */
-        if (!s->fat_type) {
-            s->fat_type = 12;
-            secs = 36;
-            s->sectors_per_cluster = 2;
-        } else {
-            secs = s->fat_type == 12 ? 18 : 36;
-            s->sectors_per_cluster = 1;
-        }
-        s->first_sectors_number = 1;
-        cyls = 80;
-        heads = 2;
-    } else {
-        /* 32MB or 504MB disk*/
-        if (!s->fat_type) {
-            s->fat_type = 16;
-        }
-        s->first_sectors_number = 0x40;
-        cyls = s->fat_type == 12 ? 64 : 1024;
-        heads = 16;
-        secs = 63;
-    }
-
-    switch (s->fat_type) {
-    case 32:
-	    fprintf(stderr, "Big fat greek warning: FAT32 has not been tested. "
-                "You are welcome to do so!\n");
-        break;
-    case 16:
-    case 12:
-        break;
-    default:
-        error_setg(errp, "Valid FAT types are only 12, 16 and 32");
-        ret = -EINVAL;
-        goto fail;
-    }
-
-
-    s->bs = bs;
-
-    /* LATER TODO: if FAT32, adjust */
-    s->sectors_per_cluster=0x10;
-
-    s->current_cluster=0xffffffff;
-
-    /* read only is the default for safety */
-    bs->read_only = 1;
-    s->qcow = s->write_target = NULL;
-    s->qcow_filename = NULL;
-    s->fat2 = NULL;
-    s->downcase_short_names = 1;
-
-    fprintf(stderr, "vvfat %s chs %d,%d,%d\n",
-            dirname, cyls, heads, secs);
-
-    s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
-
-    if (qemu_opt_get_bool(opts, "rw", false)) {
-        ret = enable_write_target(s, errp);
-        if (ret < 0) {
-            goto fail;
-        }
-        bs->read_only = 0;
-    }
-
-    bs->total_sectors = cyls * heads * secs;
-
-    if (init_directories(s, dirname, heads, secs, errp)) {
-        ret = -EIO;
-        goto fail;
-    }
-
-    s->sector_count = s->faked_sectors + s->sectors_per_cluster*s->cluster_count;
-
-    if (s->first_sectors_number == 0x40) {
-        init_mbr(s, cyls, heads, secs);
-    }
-
-    //    assert(is_consistent(s));
-    qemu_co_mutex_init(&s->lock);
-
-    /* Disable migration when vvfat is used rw */
-    if (s->qcow) {
-        error_setg(&s->migration_blocker,
-                   "The vvfat (rw) format used by node '%s' "
-                   "does not support live migration",
-                   bdrv_get_device_or_node_name(bs));
-        migrate_add_blocker(s->migration_blocker);
-    }
-
-    ret = 0;
-fail:
-    qemu_opts_del(opts);
-    return ret;
-}
-
-static inline void vvfat_close_current_file(BDRVVVFATState *s)
-{
-    if(s->current_mapping) {
-	s->current_mapping = NULL;
-	if (s->current_fd) {
-		qemu_close(s->current_fd);
-		s->current_fd = 0;
-	}
-    }
-    s->current_cluster = -1;
-}
-
-/* mappings between index1 and index2-1 are supposed to be ordered
- * return value is the index of the last mapping for which end>cluster_num
- */
-static inline int find_mapping_for_cluster_aux(BDRVVVFATState* s,int cluster_num,int index1,int index2)
-{
-    while(1) {
-        int index3;
-	mapping_t* mapping;
-	index3=(index1+index2)/2;
-	mapping=array_get(&(s->mapping),index3);
-	assert(mapping->begin < mapping->end);
-	if(mapping->begin>=cluster_num) {
-	    assert(index2!=index3 || index2==0);
-	    if(index2==index3)
-		return index1;
-	    index2=index3;
-	} else {
-	    if(index1==index3)
-		return mapping->end<=cluster_num ? index2 : index1;
-	    index1=index3;
-	}
-	assert(index1<=index2);
-	DLOG(mapping=array_get(&(s->mapping),index1);
-	assert(mapping->begin<=cluster_num);
-	assert(index2 >= s->mapping.next ||
-		((mapping = array_get(&(s->mapping),index2)) &&
-		mapping->end>cluster_num)));
-    }
-}
-
-static inline mapping_t* find_mapping_for_cluster(BDRVVVFATState* s,int cluster_num)
-{
-    int index=find_mapping_for_cluster_aux(s,cluster_num,0,s->mapping.next);
-    mapping_t* mapping;
-    if(index>=s->mapping.next)
-        return NULL;
-    mapping=array_get(&(s->mapping),index);
-    if(mapping->begin>cluster_num)
-        return NULL;
-    assert(mapping->begin<=cluster_num && mapping->end>cluster_num);
-    return mapping;
-}
-
-static int open_file(BDRVVVFATState* s,mapping_t* mapping)
-{
-    if(!mapping)
-	return -1;
-    if(!s->current_mapping ||
-	    strcmp(s->current_mapping->path,mapping->path)) {
-	/* open file */
-	int fd = qemu_open(mapping->path, O_RDONLY | O_BINARY | O_LARGEFILE);
-	if(fd<0)
-	    return -1;
-	vvfat_close_current_file(s);
-	s->current_fd = fd;
-	s->current_mapping = mapping;
-    }
-    return 0;
-}
-
-static inline int read_cluster(BDRVVVFATState *s,int cluster_num)
-{
-    if(s->current_cluster != cluster_num) {
-	int result=0;
-	off_t offset;
-	assert(!s->current_mapping || s->current_fd || (s->current_mapping->mode & MODE_DIRECTORY));
-	if(!s->current_mapping
-		|| s->current_mapping->begin>cluster_num
-		|| s->current_mapping->end<=cluster_num) {
-	    /* binary search of mappings for file */
-	    mapping_t* mapping=find_mapping_for_cluster(s,cluster_num);
-
-	    assert(!mapping || (cluster_num>=mapping->begin && cluster_num<mapping->end));
-
-	    if (mapping && mapping->mode & MODE_DIRECTORY) {
-		vvfat_close_current_file(s);
-		s->current_mapping = mapping;
-read_cluster_directory:
-		offset = s->cluster_size*(cluster_num-s->current_mapping->begin);
-		s->cluster = (unsigned char*)s->directory.pointer+offset
-			+ 0x20*s->current_mapping->info.dir.first_dir_index;
-		assert(((s->cluster-(unsigned char*)s->directory.pointer)%s->cluster_size)==0);
-		assert((char*)s->cluster+s->cluster_size <= s->directory.pointer+s->directory.next*s->directory.item_size);
-		s->current_cluster = cluster_num;
-		return 0;
-	    }
-
-	    if(open_file(s,mapping))
-		return -2;
-	} else if (s->current_mapping->mode & MODE_DIRECTORY)
-	    goto read_cluster_directory;
-
-	assert(s->current_fd);
-
-	offset=s->cluster_size*(cluster_num-s->current_mapping->begin)+s->current_mapping->info.file.offset;
-	if(lseek(s->current_fd, offset, SEEK_SET)!=offset)
-	    return -3;
-	s->cluster=s->cluster_buffer;
-	result=read(s->current_fd,s->cluster,s->cluster_size);
-	if(result<0) {
-	    s->current_cluster = -1;
-	    return -1;
-	}
-	s->current_cluster = cluster_num;
-    }
-    return 0;
-}
-
-#ifdef DEBUG
-static void print_direntry(const direntry_t* direntry)
-{
-    int j = 0;
-    char buffer[1024];
-
-    fprintf(stderr, "direntry %p: ", direntry);
-    if(!direntry)
-	return;
-    if(is_long_name(direntry)) {
-	unsigned char* c=(unsigned char*)direntry;
-	int i;
-	for(i=1;i<11 && c[i] && c[i]!=0xff;i+=2)
-#define ADD_CHAR(c) {buffer[j] = (c); if (buffer[j] < ' ') buffer[j] = 0xb0; j++;}
-	    ADD_CHAR(c[i]);
-	for(i=14;i<26 && c[i] && c[i]!=0xff;i+=2)
-	    ADD_CHAR(c[i]);
-	for(i=28;i<32 && c[i] && c[i]!=0xff;i+=2)
-	    ADD_CHAR(c[i]);
-	buffer[j] = 0;
-	fprintf(stderr, "%s\n", buffer);
-    } else {
-	int i;
-	for(i=0;i<11;i++)
-	    ADD_CHAR(direntry->name[i]);
-	buffer[j] = 0;
-	fprintf(stderr,"%s attributes=0x%02x begin=%d size=%d\n",
-		buffer,
-		direntry->attributes,
-		begin_of_direntry(direntry),le32_to_cpu(direntry->size));
-    }
-}
-
-static void print_mapping(const mapping_t* mapping)
-{
-    fprintf(stderr, "mapping (%p): begin, end = %d, %d, dir_index = %d, "
-        "first_mapping_index = %d, name = %s, mode = 0x%x, " ,
-        mapping, mapping->begin, mapping->end, mapping->dir_index,
-        mapping->first_mapping_index, mapping->path, mapping->mode);
-
-    if (mapping->mode & MODE_DIRECTORY)
-	fprintf(stderr, "parent_mapping_index = %d, first_dir_index = %d\n", mapping->info.dir.parent_mapping_index, mapping->info.dir.first_dir_index);
-    else
-	fprintf(stderr, "offset = %d\n", mapping->info.file.offset);
-}
-#endif
-
-static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
-                    uint8_t *buf, int nb_sectors)
-{
-    BDRVVVFATState *s = bs->opaque;
-    int i;
-
-    for(i=0;i<nb_sectors;i++,sector_num++) {
-	if (sector_num >= bs->total_sectors)
-	   return -1;
-	if (s->qcow) {
-	    int n;
-            if (bdrv_is_allocated(s->qcow, sector_num, nb_sectors-i, &n)) {
-DLOG(fprintf(stderr, "sectors %d+%d allocated\n", (int)sector_num, n));
-                if (bdrv_read(s->qcow, sector_num, buf + i*0x200, n)) {
-                    return -1;
-                }
-                i += n - 1;
-                sector_num += n - 1;
-                continue;
-            }
-DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num));
-	}
-	if(sector_num<s->faked_sectors) {
-	    if(sector_num<s->first_sectors_number)
-		memcpy(buf+i*0x200,&(s->first_sectors[sector_num*0x200]),0x200);
-	    else if(sector_num-s->first_sectors_number<s->sectors_per_fat)
-		memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number)*0x200]),0x200);
-	    else if(sector_num-s->first_sectors_number-s->sectors_per_fat<s->sectors_per_fat)
-		memcpy(buf+i*0x200,&(s->fat.pointer[(sector_num-s->first_sectors_number-s->sectors_per_fat)*0x200]),0x200);
-	} else {
-	    uint32_t sector=sector_num-s->faked_sectors,
-	    sector_offset_in_cluster=(sector%s->sectors_per_cluster),
-	    cluster_num=sector/s->sectors_per_cluster;
-	    if(cluster_num > s->cluster_count || read_cluster(s, cluster_num) != 0) {
-		/* LATER TODO: strict: return -1; */
-		memset(buf+i*0x200,0,0x200);
-		continue;
-	    }
-	    memcpy(buf+i*0x200,s->cluster+sector_offset_in_cluster*0x200,0x200);
-	}
-    }
-    return 0;
-}
-
-static coroutine_fn int vvfat_co_read(BlockDriverState *bs, int64_t sector_num,
-                                      uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    BDRVVVFATState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    ret = vvfat_read(bs, sector_num, buf, nb_sectors);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-/* LATER TODO: statify all functions */
-
-/*
- * Idea of the write support (use snapshot):
- *
- * 1. check if all data is consistent, recording renames, modifications,
- *    new files and directories (in s->commits).
- *
- * 2. if the data is not consistent, stop committing
- *
- * 3. handle renames, and create new files and directories (do not yet
- *    write their contents)
- *
- * 4. walk the directories, fixing the mapping and direntries, and marking
- *    the handled mappings as not deleted
- *
- * 5. commit the contents of the files
- *
- * 6. handle deleted files and directories
- *
- */
-
-typedef struct commit_t {
-    char* path;
-    union {
-	struct { uint32_t cluster; } rename;
-	struct { int dir_index; uint32_t modified_offset; } writeout;
-	struct { uint32_t first_cluster; } new_file;
-	struct { uint32_t cluster; } mkdir;
-    } param;
-    /* DELETEs and RMDIRs are handled differently: see handle_deletes() */
-    enum {
-	ACTION_RENAME, ACTION_WRITEOUT, ACTION_NEW_FILE, ACTION_MKDIR
-    } action;
-} commit_t;
-
-static void clear_commits(BDRVVVFATState* s)
-{
-    int i;
-DLOG(fprintf(stderr, "clear_commits (%d commits)\n", s->commits.next));
-    for (i = 0; i < s->commits.next; i++) {
-	commit_t* commit = array_get(&(s->commits), i);
-	assert(commit->path || commit->action == ACTION_WRITEOUT);
-	if (commit->action != ACTION_WRITEOUT) {
-	    assert(commit->path);
-            g_free(commit->path);
-	} else
-	    assert(commit->path == NULL);
-    }
-    s->commits.next = 0;
-}
-
-static void schedule_rename(BDRVVVFATState* s,
-	uint32_t cluster, char* new_path)
-{
-    commit_t* commit = array_get_next(&(s->commits));
-    commit->path = new_path;
-    commit->param.rename.cluster = cluster;
-    commit->action = ACTION_RENAME;
-}
-
-static void schedule_writeout(BDRVVVFATState* s,
-	int dir_index, uint32_t modified_offset)
-{
-    commit_t* commit = array_get_next(&(s->commits));
-    commit->path = NULL;
-    commit->param.writeout.dir_index = dir_index;
-    commit->param.writeout.modified_offset = modified_offset;
-    commit->action = ACTION_WRITEOUT;
-}
-
-static void schedule_new_file(BDRVVVFATState* s,
-	char* path, uint32_t first_cluster)
-{
-    commit_t* commit = array_get_next(&(s->commits));
-    commit->path = path;
-    commit->param.new_file.first_cluster = first_cluster;
-    commit->action = ACTION_NEW_FILE;
-}
-
-static void schedule_mkdir(BDRVVVFATState* s, uint32_t cluster, char* path)
-{
-    commit_t* commit = array_get_next(&(s->commits));
-    commit->path = path;
-    commit->param.mkdir.cluster = cluster;
-    commit->action = ACTION_MKDIR;
-}
-
-typedef struct {
-    /*
-     * Since the sequence number is at most 0x3f, and the filename
-     * length is at most 13 times the sequence number, the maximal
-     * filename length is 0x3f * 13 bytes.
-     */
-    unsigned char name[0x3f * 13 + 1];
-    int checksum, len;
-    int sequence_number;
-} long_file_name;
-
-static void lfn_init(long_file_name* lfn)
-{
-   lfn->sequence_number = lfn->len = 0;
-   lfn->checksum = 0x100;
-}
-
-/* return 0 if parsed successfully, > 0 if no long name, < 0 if error */
-static int parse_long_name(long_file_name* lfn,
-	const direntry_t* direntry)
-{
-    int i, j, offset;
-    const unsigned char* pointer = (const unsigned char*)direntry;
-
-    if (!is_long_name(direntry))
-	return 1;
-
-    if (pointer[0] & 0x40) {
-	lfn->sequence_number = pointer[0] & 0x3f;
-	lfn->checksum = pointer[13];
-	lfn->name[0] = 0;
-	lfn->name[lfn->sequence_number * 13] = 0;
-    } else if ((pointer[0] & 0x3f) != --lfn->sequence_number)
-	return -1;
-    else if (pointer[13] != lfn->checksum)
-	return -2;
-    else if (pointer[12] || pointer[26] || pointer[27])
-	return -3;
-
-    offset = 13 * (lfn->sequence_number - 1);
-    for (i = 0, j = 1; i < 13; i++, j+=2) {
-	if (j == 11)
-	    j = 14;
-	else if (j == 26)
-	    j = 28;
-
-	if (pointer[j+1] == 0)
-	    lfn->name[offset + i] = pointer[j];
-	else if (pointer[j+1] != 0xff || (pointer[0] & 0x40) == 0)
-	    return -4;
-	else
-	    lfn->name[offset + i] = 0;
-    }
-
-    if (pointer[0] & 0x40)
-	lfn->len = offset + strlen((char*)lfn->name + offset);
-
-    return 0;
-}
-
-/* returns 0 if successful, >0 if no short_name, and <0 on error */
-static int parse_short_name(BDRVVVFATState* s,
-	long_file_name* lfn, direntry_t* direntry)
-{
-    int i, j;
-
-    if (!is_short_name(direntry))
-	return 1;
-
-    for (j = 7; j >= 0 && direntry->name[j] == ' '; j--);
-    for (i = 0; i <= j; i++) {
-	if (direntry->name[i] <= ' ' || direntry->name[i] > 0x7f)
-	    return -1;
-	else if (s->downcase_short_names)
-	    lfn->name[i] = qemu_tolower(direntry->name[i]);
-	else
-	    lfn->name[i] = direntry->name[i];
-    }
-
-    for (j = 2; j >= 0 && direntry->name[8 + j] == ' '; j--) {
-    }
-    if (j >= 0) {
-	lfn->name[i++] = '.';
-	lfn->name[i + j + 1] = '\0';
-	for (;j >= 0; j--) {
-            uint8_t c = direntry->name[8 + j];
-            if (c <= ' ' || c > 0x7f) {
-                return -2;
-            } else if (s->downcase_short_names) {
-                lfn->name[i + j] = qemu_tolower(c);
-            } else {
-                lfn->name[i + j] = c;
-            }
-	}
-    } else
-	lfn->name[i + j + 1] = '\0';
-
-    lfn->len = strlen((char*)lfn->name);
-
-    return 0;
-}
-
-static inline uint32_t modified_fat_get(BDRVVVFATState* s,
-	unsigned int cluster)
-{
-    if (cluster < s->last_cluster_of_root_directory) {
-	if (cluster + 1 == s->last_cluster_of_root_directory)
-	    return s->max_fat_value;
-	else
-	    return cluster + 1;
-    }
-
-    if (s->fat_type==32) {
-        uint32_t* entry=((uint32_t*)s->fat2)+cluster;
-        return le32_to_cpu(*entry);
-    } else if (s->fat_type==16) {
-        uint16_t* entry=((uint16_t*)s->fat2)+cluster;
-        return le16_to_cpu(*entry);
-    } else {
-        const uint8_t* x=s->fat2+cluster*3/2;
-        return ((x[0]|(x[1]<<8))>>(cluster&1?4:0))&0x0fff;
-    }
-}
-
-static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num)
-{
-    int was_modified = 0;
-    int i, dummy;
-
-    if (s->qcow == NULL)
-	return 0;
-
-    for (i = 0; !was_modified && i < s->sectors_per_cluster; i++)
-	was_modified = bdrv_is_allocated(s->qcow,
-		cluster2sector(s, cluster_num) + i, 1, &dummy);
-
-    return was_modified;
-}
-
-static const char* get_basename(const char* path)
-{
-    char* basename = strrchr(path, '/');
-    if (basename == NULL)
-	return path;
-    else
-	return basename + 1; /* strip '/' */
-}
-
-/*
- * The array s->used_clusters holds the states of the clusters. If it is
- * part of a file, it has bit 2 set, in case of a directory, bit 1. If it
- * was modified, bit 3 is set.
- * If any cluster is allocated, but not part of a file or directory, this
- * driver refuses to commit.
- */
-typedef enum {
-     USED_DIRECTORY = 1, USED_FILE = 2, USED_ANY = 3, USED_ALLOCATED = 4
-} used_t;
-
-/*
- * get_cluster_count_for_direntry() not only determines how many clusters
- * are occupied by direntry, but also if it was renamed or modified.
- *
- * A file is thought to be renamed *only* if there already was a file with
- * exactly the same first cluster, but a different name.
- *
- * Further, the files/directories handled by this function are
- * assumed to be *not* deleted (and *only* those).
- */
-static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
-	direntry_t* direntry, const char* path)
-{
-    /*
-     * This is a little bit tricky:
-     * IF the guest OS just inserts a cluster into the file chain,
-     * and leaves the rest alone, (i.e. the original file had clusters
-     * 15 -> 16, but now has 15 -> 32 -> 16), then the following happens:
-     *
-     * - do_commit will write the cluster into the file at the given
-     *   offset, but
-     *
-     * - the cluster which is overwritten should be moved to a later
-     *   position in the file.
-     *
-     * I am not aware that any OS does something as braindead, but this
-     * situation could happen anyway when not committing for a long time.
-     * Just to be sure that this does not bite us, detect it, and copy the
-     * contents of the clusters to-be-overwritten into the qcow.
-     */
-    int copy_it = 0;
-    int was_modified = 0;
-    int32_t ret = 0;
-
-    uint32_t cluster_num = begin_of_direntry(direntry);
-    uint32_t offset = 0;
-    int first_mapping_index = -1;
-    mapping_t* mapping = NULL;
-    const char* basename2 = NULL;
-
-    vvfat_close_current_file(s);
-
-    /* the root directory */
-    if (cluster_num == 0)
-	return 0;
-
-    /* write support */
-    if (s->qcow) {
-	basename2 = get_basename(path);
-
-	mapping = find_mapping_for_cluster(s, cluster_num);
-
-	if (mapping) {
-	    const char* basename;
-
-	    assert(mapping->mode & MODE_DELETED);
-	    mapping->mode &= ~MODE_DELETED;
-
-	    basename = get_basename(mapping->path);
-
-	    assert(mapping->mode & MODE_NORMAL);
-
-	    /* rename */
-	    if (strcmp(basename, basename2))
-		schedule_rename(s, cluster_num, g_strdup(path));
-	} else if (is_file(direntry))
-	    /* new file */
-	    schedule_new_file(s, g_strdup(path), cluster_num);
-	else {
-            abort();
-	    return 0;
-	}
-    }
-
-    while(1) {
-	if (s->qcow) {
-	    if (!copy_it && cluster_was_modified(s, cluster_num)) {
-		if (mapping == NULL ||
-			mapping->begin > cluster_num ||
-			mapping->end <= cluster_num)
-		mapping = find_mapping_for_cluster(s, cluster_num);
-
-
-		if (mapping &&
-			(mapping->mode & MODE_DIRECTORY) == 0) {
-
-		    /* was modified in qcow */
-		    if (offset != mapping->info.file.offset + s->cluster_size
-			    * (cluster_num - mapping->begin)) {
-			/* offset of this cluster in file chain has changed */
-                        abort();
-			copy_it = 1;
-		    } else if (offset == 0) {
-			const char* basename = get_basename(mapping->path);
-
-			if (strcmp(basename, basename2))
-			    copy_it = 1;
-			first_mapping_index = array_index(&(s->mapping), mapping);
-		    }
-
-		    if (mapping->first_mapping_index != first_mapping_index
-			    && mapping->info.file.offset > 0) {
-                        abort();
-			copy_it = 1;
-		    }
-
-		    /* need to write out? */
-		    if (!was_modified && is_file(direntry)) {
-			was_modified = 1;
-			schedule_writeout(s, mapping->dir_index, offset);
-		    }
-		}
-	    }
-
-	    if (copy_it) {
-		int i, dummy;
-		/*
-		 * This is horribly inefficient, but that is okay, since
-		 * it is rarely executed, if at all.
-		 */
-		int64_t offset = cluster2sector(s, cluster_num);
-
-		vvfat_close_current_file(s);
-                for (i = 0; i < s->sectors_per_cluster; i++) {
-                    if (!bdrv_is_allocated(s->qcow, offset + i, 1, &dummy)) {
-                        if (vvfat_read(s->bs, offset, s->cluster_buffer, 1)) {
-                            return -1;
-                        }
-                        if (bdrv_write(s->qcow, offset, s->cluster_buffer, 1)) {
-                            return -2;
-                        }
-                    }
-                }
-	    }
-	}
-
-	ret++;
-	if (s->used_clusters[cluster_num] & USED_ANY)
-	    return 0;
-	s->used_clusters[cluster_num] = USED_FILE;
-
-	cluster_num = modified_fat_get(s, cluster_num);
-
-	if (fat_eof(s, cluster_num))
-	    return ret;
-	else if (cluster_num < 2 || cluster_num > s->max_fat_value - 16)
-	    return -1;
-
-	offset += s->cluster_size;
-    }
-}
-
-/*
- * This function looks at the modified data (qcow).
- * It returns 0 upon inconsistency or error, and the number of clusters
- * used by the directory, its subdirectories and their files.
- */
-static int check_directory_consistency(BDRVVVFATState *s,
-	int cluster_num, const char* path)
-{
-    int ret = 0;
-    unsigned char* cluster = g_malloc(s->cluster_size);
-    direntry_t* direntries = (direntry_t*)cluster;
-    mapping_t* mapping = find_mapping_for_cluster(s, cluster_num);
-
-    long_file_name lfn;
-    int path_len = strlen(path);
-    char path2[PATH_MAX + 1];
-
-    assert(path_len < PATH_MAX); /* len was tested before! */
-    pstrcpy(path2, sizeof(path2), path);
-    path2[path_len] = '/';
-    path2[path_len + 1] = '\0';
-
-    if (mapping) {
-	const char* basename = get_basename(mapping->path);
-	const char* basename2 = get_basename(path);
-
-	assert(mapping->mode & MODE_DIRECTORY);
-
-	assert(mapping->mode & MODE_DELETED);
-	mapping->mode &= ~MODE_DELETED;
-
-	if (strcmp(basename, basename2))
-	    schedule_rename(s, cluster_num, g_strdup(path));
-    } else
-	/* new directory */
-	schedule_mkdir(s, cluster_num, g_strdup(path));
-
-    lfn_init(&lfn);
-    do {
-	int i;
-	int subret = 0;
-
-	ret++;
-
-	if (s->used_clusters[cluster_num] & USED_ANY) {
-	    fprintf(stderr, "cluster %d used more than once\n", (int)cluster_num);
-            goto fail;
-	}
-	s->used_clusters[cluster_num] = USED_DIRECTORY;
-
-DLOG(fprintf(stderr, "read cluster %d (sector %d)\n", (int)cluster_num, (int)cluster2sector(s, cluster_num)));
-	subret = vvfat_read(s->bs, cluster2sector(s, cluster_num), cluster,
-		s->sectors_per_cluster);
-	if (subret) {
-	    fprintf(stderr, "Error fetching direntries\n");
-	fail:
-            g_free(cluster);
-	    return 0;
-	}
-
-	for (i = 0; i < 0x10 * s->sectors_per_cluster; i++) {
-	    int cluster_count = 0;
-
-DLOG(fprintf(stderr, "check direntry %d:\n", i); print_direntry(direntries + i));
-	    if (is_volume_label(direntries + i) || is_dot(direntries + i) ||
-		    is_free(direntries + i))
-		continue;
-
-	    subret = parse_long_name(&lfn, direntries + i);
-	    if (subret < 0) {
-		fprintf(stderr, "Error in long name\n");
-		goto fail;
-	    }
-	    if (subret == 0 || is_free(direntries + i))
-		continue;
-
-	    if (fat_chksum(direntries+i) != lfn.checksum) {
-		subret = parse_short_name(s, &lfn, direntries + i);
-		if (subret < 0) {
-		    fprintf(stderr, "Error in short name (%d)\n", subret);
-		    goto fail;
-		}
-		if (subret > 0 || !strcmp((char*)lfn.name, ".")
-			|| !strcmp((char*)lfn.name, ".."))
-		    continue;
-	    }
-	    lfn.checksum = 0x100; /* cannot use long name twice */
-
-	    if (path_len + 1 + lfn.len >= PATH_MAX) {
-		fprintf(stderr, "Name too long: %s/%s\n", path, lfn.name);
-		goto fail;
-	    }
-            pstrcpy(path2 + path_len + 1, sizeof(path2) - path_len - 1,
-                    (char*)lfn.name);
-
-	    if (is_directory(direntries + i)) {
-		if (begin_of_direntry(direntries + i) == 0) {
-		    DLOG(fprintf(stderr, "invalid begin for directory: %s\n", path2); print_direntry(direntries + i));
-		    goto fail;
-		}
-		cluster_count = check_directory_consistency(s,
-			begin_of_direntry(direntries + i), path2);
-		if (cluster_count == 0) {
-		    DLOG(fprintf(stderr, "problem in directory %s:\n", path2); print_direntry(direntries + i));
-		    goto fail;
-		}
-	    } else if (is_file(direntries + i)) {
-		/* check file size with FAT */
-		cluster_count = get_cluster_count_for_direntry(s, direntries + i, path2);
-		if (cluster_count !=
-			(le32_to_cpu(direntries[i].size) + s->cluster_size
-			 - 1) / s->cluster_size) {
-		    DLOG(fprintf(stderr, "Cluster count mismatch\n"));
-		    goto fail;
-		}
-	    } else
-                abort(); /* cluster_count = 0; */
-
-	    ret += cluster_count;
-	}
-
-	cluster_num = modified_fat_get(s, cluster_num);
-    } while(!fat_eof(s, cluster_num));
-
-    g_free(cluster);
-    return ret;
-}
-
-/* returns 1 on success */
-static int is_consistent(BDRVVVFATState* s)
-{
-    int i, check;
-    int used_clusters_count = 0;
-
-DLOG(checkpoint());
-    /*
-     * - get modified FAT
-     * - compare the two FATs (TODO)
-     * - get buffer for marking used clusters
-     * - recurse direntries from root (using bs->bdrv_read to make
-     *    sure to get the new data)
-     *   - check that the FAT agrees with the size
-     *   - count the number of clusters occupied by this directory and
-     *     its files
-     * - check that the cumulative used cluster count agrees with the
-     *   FAT
-     * - if all is fine, return number of used clusters
-     */
-    if (s->fat2 == NULL) {
-	int size = 0x200 * s->sectors_per_fat;
-	s->fat2 = g_malloc(size);
-	memcpy(s->fat2, s->fat.pointer, size);
-    }
-    check = vvfat_read(s->bs,
-	    s->first_sectors_number, s->fat2, s->sectors_per_fat);
-    if (check) {
-	fprintf(stderr, "Could not copy fat\n");
-	return 0;
-    }
-    assert (s->used_clusters);
-    for (i = 0; i < sector2cluster(s, s->sector_count); i++)
-	s->used_clusters[i] &= ~USED_ANY;
-
-    clear_commits(s);
-
-    /* mark every mapped file/directory as deleted.
-     * (check_directory_consistency() will unmark those still present). */
-    if (s->qcow)
-	for (i = 0; i < s->mapping.next; i++) {
-	    mapping_t* mapping = array_get(&(s->mapping), i);
-	    if (mapping->first_mapping_index < 0)
-		mapping->mode |= MODE_DELETED;
-	}
-
-    used_clusters_count = check_directory_consistency(s, 0, s->path);
-    if (used_clusters_count <= 0) {
-	DLOG(fprintf(stderr, "problem in directory\n"));
-	return 0;
-    }
-
-    check = s->last_cluster_of_root_directory;
-    for (i = check; i < sector2cluster(s, s->sector_count); i++) {
-	if (modified_fat_get(s, i)) {
-	    if(!s->used_clusters[i]) {
-		DLOG(fprintf(stderr, "FAT was modified (%d), but cluster is not used?\n", i));
-		return 0;
-	    }
-	    check++;
-	}
-
-	if (s->used_clusters[i] == USED_ALLOCATED) {
-	    /* allocated, but not used... */
-	    DLOG(fprintf(stderr, "unused, modified cluster: %d\n", i));
-	    return 0;
-	}
-    }
-
-    if (check != used_clusters_count)
-	return 0;
-
-    return used_clusters_count;
-}
-
-static inline void adjust_mapping_indices(BDRVVVFATState* s,
-	int offset, int adjust)
-{
-    int i;
-
-    for (i = 0; i < s->mapping.next; i++) {
-	mapping_t* mapping = array_get(&(s->mapping), i);
-
-#define ADJUST_MAPPING_INDEX(name) \
-	if (mapping->name >= offset) \
-	    mapping->name += adjust
-
-	ADJUST_MAPPING_INDEX(first_mapping_index);
-	if (mapping->mode & MODE_DIRECTORY)
-	    ADJUST_MAPPING_INDEX(info.dir.parent_mapping_index);
-    }
-}
-
-/* insert or update mapping */
-static mapping_t* insert_mapping(BDRVVVFATState* s,
-	uint32_t begin, uint32_t end)
-{
-    /*
-     * - find mapping where mapping->begin >= begin,
-     * - if mapping->begin > begin: insert
-     *   - adjust all references to mappings!
-     * - else: adjust
-     * - replace name
-     */
-    int index = find_mapping_for_cluster_aux(s, begin, 0, s->mapping.next);
-    mapping_t* mapping = NULL;
-    mapping_t* first_mapping = array_get(&(s->mapping), 0);
-
-    if (index < s->mapping.next && (mapping = array_get(&(s->mapping), index))
-	    && mapping->begin < begin) {
-	mapping->end = begin;
-	index++;
-	mapping = array_get(&(s->mapping), index);
-    }
-    if (index >= s->mapping.next || mapping->begin > begin) {
-	mapping = array_insert(&(s->mapping), index, 1);
-	mapping->path = NULL;
-	adjust_mapping_indices(s, index, +1);
-    }
-
-    mapping->begin = begin;
-    mapping->end = end;
-
-DLOG(mapping_t* next_mapping;
-assert(index + 1 >= s->mapping.next ||
-((next_mapping = array_get(&(s->mapping), index + 1)) &&
- next_mapping->begin >= end)));
-
-    if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer)
-	s->current_mapping = array_get(&(s->mapping),
-		s->current_mapping - first_mapping);
-
-    return mapping;
-}
-
-static int remove_mapping(BDRVVVFATState* s, int mapping_index)
-{
-    mapping_t* mapping = array_get(&(s->mapping), mapping_index);
-    mapping_t* first_mapping = array_get(&(s->mapping), 0);
-
-    /* free mapping */
-    if (mapping->first_mapping_index < 0) {
-        g_free(mapping->path);
-    }
-
-    /* remove from s->mapping */
-    array_remove(&(s->mapping), mapping_index);
-
-    /* adjust all references to mappings */
-    adjust_mapping_indices(s, mapping_index, -1);
-
-    if (s->current_mapping && first_mapping != (mapping_t*)s->mapping.pointer)
-	s->current_mapping = array_get(&(s->mapping),
-		s->current_mapping - first_mapping);
-
-    return 0;
-}
-
-static void adjust_dirindices(BDRVVVFATState* s, int offset, int adjust)
-{
-    int i;
-    for (i = 0; i < s->mapping.next; i++) {
-	mapping_t* mapping = array_get(&(s->mapping), i);
-	if (mapping->dir_index >= offset)
-	    mapping->dir_index += adjust;
-	if ((mapping->mode & MODE_DIRECTORY) &&
-		mapping->info.dir.first_dir_index >= offset)
-	    mapping->info.dir.first_dir_index += adjust;
-    }
-}
-
-static direntry_t* insert_direntries(BDRVVVFATState* s,
-	int dir_index, int count)
-{
-    /*
-     * make room in s->directory,
-     * adjust_dirindices
-     */
-    direntry_t* result = array_insert(&(s->directory), dir_index, count);
-    if (result == NULL)
-	return NULL;
-    adjust_dirindices(s, dir_index, count);
-    return result;
-}
-
-static int remove_direntries(BDRVVVFATState* s, int dir_index, int count)
-{
-    int ret = array_remove_slice(&(s->directory), dir_index, count);
-    if (ret)
-	return ret;
-    adjust_dirindices(s, dir_index, -count);
-    return 0;
-}
-
-/*
- * Adapt the mappings of the cluster chain starting at first cluster
- * (i.e. if a file starts at first_cluster, the chain is followed according
- * to the modified fat, and the corresponding entries in s->mapping are
- * adjusted)
- */
-static int commit_mappings(BDRVVVFATState* s,
-	uint32_t first_cluster, int dir_index)
-{
-    mapping_t* mapping = find_mapping_for_cluster(s, first_cluster);
-    direntry_t* direntry = array_get(&(s->directory), dir_index);
-    uint32_t cluster = first_cluster;
-
-    vvfat_close_current_file(s);
-
-    assert(mapping);
-    assert(mapping->begin == first_cluster);
-    mapping->first_mapping_index = -1;
-    mapping->dir_index = dir_index;
-    mapping->mode = (dir_index <= 0 || is_directory(direntry)) ?
-	MODE_DIRECTORY : MODE_NORMAL;
-
-    while (!fat_eof(s, cluster)) {
-	uint32_t c, c1;
-
-	for (c = cluster, c1 = modified_fat_get(s, c); c + 1 == c1;
-		c = c1, c1 = modified_fat_get(s, c1));
-
-	c++;
-	if (c > mapping->end) {
-	    int index = array_index(&(s->mapping), mapping);
-	    int i, max_i = s->mapping.next - index;
-	    for (i = 1; i < max_i && mapping[i].begin < c; i++);
-	    while (--i > 0)
-		remove_mapping(s, index + 1);
-	}
-	assert(mapping == array_get(&(s->mapping), s->mapping.next - 1)
-		|| mapping[1].begin >= c);
-	mapping->end = c;
-
-	if (!fat_eof(s, c1)) {
-	    int i = find_mapping_for_cluster_aux(s, c1, 0, s->mapping.next);
-	    mapping_t* next_mapping = i >= s->mapping.next ? NULL :
-		array_get(&(s->mapping), i);
-
-	    if (next_mapping == NULL || next_mapping->begin > c1) {
-		int i1 = array_index(&(s->mapping), mapping);
-
-		next_mapping = insert_mapping(s, c1, c1+1);
-
-		if (c1 < c)
-		    i1++;
-		mapping = array_get(&(s->mapping), i1);
-	    }
-
-	    next_mapping->dir_index = mapping->dir_index;
-	    next_mapping->first_mapping_index =
-		mapping->first_mapping_index < 0 ?
-		array_index(&(s->mapping), mapping) :
-		mapping->first_mapping_index;
-	    next_mapping->path = mapping->path;
-	    next_mapping->mode = mapping->mode;
-	    next_mapping->read_only = mapping->read_only;
-	    if (mapping->mode & MODE_DIRECTORY) {
-		next_mapping->info.dir.parent_mapping_index =
-			mapping->info.dir.parent_mapping_index;
-		next_mapping->info.dir.first_dir_index =
-			mapping->info.dir.first_dir_index +
-			0x10 * s->sectors_per_cluster *
-			(mapping->end - mapping->begin);
-	    } else
-		next_mapping->info.file.offset = mapping->info.file.offset +
-			mapping->end - mapping->begin;
-
-	    mapping = next_mapping;
-	}
-
-	cluster = c1;
-    }
-
-    return 0;
-}
-
-static int commit_direntries(BDRVVVFATState* s,
-	int dir_index, int parent_mapping_index)
-{
-    direntry_t* direntry = array_get(&(s->directory), dir_index);
-    uint32_t first_cluster = dir_index == 0 ? 0 : begin_of_direntry(direntry);
-    mapping_t* mapping = find_mapping_for_cluster(s, first_cluster);
-
-    int factor = 0x10 * s->sectors_per_cluster;
-    int old_cluster_count, new_cluster_count;
-    int current_dir_index = mapping->info.dir.first_dir_index;
-    int first_dir_index = current_dir_index;
-    int ret, i;
-    uint32_t c;
-
-DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", mapping->path, parent_mapping_index));
-
-    assert(direntry);
-    assert(mapping);
-    assert(mapping->begin == first_cluster);
-    assert(mapping->info.dir.first_dir_index < s->directory.next);
-    assert(mapping->mode & MODE_DIRECTORY);
-    assert(dir_index == 0 || is_directory(direntry));
-
-    mapping->info.dir.parent_mapping_index = parent_mapping_index;
-
-    if (first_cluster == 0) {
-	old_cluster_count = new_cluster_count =
-	    s->last_cluster_of_root_directory;
-    } else {
-	for (old_cluster_count = 0, c = first_cluster; !fat_eof(s, c);
-		c = fat_get(s, c))
-	    old_cluster_count++;
-
-	for (new_cluster_count = 0, c = first_cluster; !fat_eof(s, c);
-		c = modified_fat_get(s, c))
-	    new_cluster_count++;
-    }
-
-    if (new_cluster_count > old_cluster_count) {
-	if (insert_direntries(s,
-		current_dir_index + factor * old_cluster_count,
-		factor * (new_cluster_count - old_cluster_count)) == NULL)
-	    return -1;
-    } else if (new_cluster_count < old_cluster_count)
-	remove_direntries(s,
-		current_dir_index + factor * new_cluster_count,
-		factor * (old_cluster_count - new_cluster_count));
-
-    for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) {
-        direntry_t *first_direntry;
-	void* direntry = array_get(&(s->directory), current_dir_index);
-	int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry,
-		s->sectors_per_cluster);
-	if (ret)
-	    return ret;
-
-        /* The first directory entry on the filesystem is the volume name */
-        first_direntry = (direntry_t*) s->directory.pointer;
-        assert(!memcmp(first_direntry->name, s->volume_label, 11));
-
-	current_dir_index += factor;
-    }
-
-    ret = commit_mappings(s, first_cluster, dir_index);
-    if (ret)
-	return ret;
-
-    /* recurse */
-    for (i = 0; i < factor * new_cluster_count; i++) {
-	direntry = array_get(&(s->directory), first_dir_index + i);
-	if (is_directory(direntry) && !is_dot(direntry)) {
-	    mapping = find_mapping_for_cluster(s, first_cluster);
-	    assert(mapping->mode & MODE_DIRECTORY);
-	    ret = commit_direntries(s, first_dir_index + i,
-		array_index(&(s->mapping), mapping));
-	    if (ret)
-		return ret;
-	}
-    }
-
-    return 0;
-}
-
-/* commit one file (adjust contents, adjust mapping),
-   return first_mapping_index */
-static int commit_one_file(BDRVVVFATState* s,
-	int dir_index, uint32_t offset)
-{
-    direntry_t* direntry = array_get(&(s->directory), dir_index);
-    uint32_t c = begin_of_direntry(direntry);
-    uint32_t first_cluster = c;
-    mapping_t* mapping = find_mapping_for_cluster(s, c);
-    uint32_t size = filesize_of_direntry(direntry);
-    char* cluster = g_malloc(s->cluster_size);
-    uint32_t i;
-    int fd = 0;
-
-    assert(offset < size);
-    assert((offset % s->cluster_size) == 0);
-
-    for (i = s->cluster_size; i < offset; i += s->cluster_size)
-	c = modified_fat_get(s, c);
-
-    fd = qemu_open(mapping->path, O_RDWR | O_CREAT | O_BINARY, 0666);
-    if (fd < 0) {
-	fprintf(stderr, "Could not open %s... (%s, %d)\n", mapping->path,
-		strerror(errno), errno);
-        g_free(cluster);
-	return fd;
-    }
-    if (offset > 0) {
-        if (lseek(fd, offset, SEEK_SET) != offset) {
-            qemu_close(fd);
-            g_free(cluster);
-            return -3;
-        }
-    }
-
-    while (offset < size) {
-	uint32_t c1;
-	int rest_size = (size - offset > s->cluster_size ?
-		s->cluster_size : size - offset);
-	int ret;
-
-	c1 = modified_fat_get(s, c);
-
-	assert((size - offset == 0 && fat_eof(s, c)) ||
-		(size > offset && c >=2 && !fat_eof(s, c)));
-
-	ret = vvfat_read(s->bs, cluster2sector(s, c),
-	    (uint8_t*)cluster, (rest_size + 0x1ff) / 0x200);
-
-        if (ret < 0) {
-            qemu_close(fd);
-            g_free(cluster);
-            return ret;
-        }
-
-        if (write(fd, cluster, rest_size) < 0) {
-            qemu_close(fd);
-            g_free(cluster);
-            return -2;
-        }
-
-	offset += rest_size;
-	c = c1;
-    }
-
-    if (ftruncate(fd, size)) {
-        perror("ftruncate()");
-        qemu_close(fd);
-        g_free(cluster);
-        return -4;
-    }
-    qemu_close(fd);
-    g_free(cluster);
-
-    return commit_mappings(s, first_cluster, dir_index);
-}
-
-#ifdef DEBUG
-/* test, if all mappings point to valid direntries */
-static void check1(BDRVVVFATState* s)
-{
-    int i;
-    for (i = 0; i < s->mapping.next; i++) {
-	mapping_t* mapping = array_get(&(s->mapping), i);
-	if (mapping->mode & MODE_DELETED) {
-	    fprintf(stderr, "deleted\n");
-	    continue;
-	}
-	assert(mapping->dir_index < s->directory.next);
-	direntry_t* direntry = array_get(&(s->directory), mapping->dir_index);
-	assert(mapping->begin == begin_of_direntry(direntry) || mapping->first_mapping_index >= 0);
-	if (mapping->mode & MODE_DIRECTORY) {
-	    assert(mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster * (mapping->end - mapping->begin) <= s->directory.next);
-	    assert((mapping->info.dir.first_dir_index % (0x10 * s->sectors_per_cluster)) == 0);
-	}
-    }
-}
-
-/* test, if all direntries have mappings */
-static void check2(BDRVVVFATState* s)
-{
-    int i;
-    int first_mapping = -1;
-
-    for (i = 0; i < s->directory.next; i++) {
-	direntry_t* direntry = array_get(&(s->directory), i);
-
-	if (is_short_name(direntry) && begin_of_direntry(direntry)) {
-	    mapping_t* mapping = find_mapping_for_cluster(s, begin_of_direntry(direntry));
-	    assert(mapping);
-	    assert(mapping->dir_index == i || is_dot(direntry));
-	    assert(mapping->begin == begin_of_direntry(direntry) || is_dot(direntry));
-	}
-
-	if ((i % (0x10 * s->sectors_per_cluster)) == 0) {
-	    /* cluster start */
-	    int j, count = 0;
-
-	    for (j = 0; j < s->mapping.next; j++) {
-		mapping_t* mapping = array_get(&(s->mapping), j);
-		if (mapping->mode & MODE_DELETED)
-		    continue;
-		if (mapping->mode & MODE_DIRECTORY) {
-		    if (mapping->info.dir.first_dir_index <= i && mapping->info.dir.first_dir_index + 0x10 * s->sectors_per_cluster > i) {
-			assert(++count == 1);
-			if (mapping->first_mapping_index == -1)
-			    first_mapping = array_index(&(s->mapping), mapping);
-			else
-			    assert(first_mapping == mapping->first_mapping_index);
-			if (mapping->info.dir.parent_mapping_index < 0)
-			    assert(j == 0);
-			else {
-			    mapping_t* parent = array_get(&(s->mapping), mapping->info.dir.parent_mapping_index);
-			    assert(parent->mode & MODE_DIRECTORY);
-			    assert(parent->info.dir.first_dir_index < mapping->info.dir.first_dir_index);
-			}
-		    }
-		}
-	    }
-	    if (count == 0)
-		first_mapping = -1;
-	}
-    }
-}
-#endif
-
-static int handle_renames_and_mkdirs(BDRVVVFATState* s)
-{
-    int i;
-
-#ifdef DEBUG
-    fprintf(stderr, "handle_renames\n");
-    for (i = 0; i < s->commits.next; i++) {
-	commit_t* commit = array_get(&(s->commits), i);
-	fprintf(stderr, "%d, %s (%d, %d)\n", i, commit->path ? commit->path : "(null)", commit->param.rename.cluster, commit->action);
-    }
-#endif
-
-    for (i = 0; i < s->commits.next;) {
-	commit_t* commit = array_get(&(s->commits), i);
-	if (commit->action == ACTION_RENAME) {
-	    mapping_t* mapping = find_mapping_for_cluster(s,
-		    commit->param.rename.cluster);
-	    char* old_path = mapping->path;
-
-	    assert(commit->path);
-	    mapping->path = commit->path;
-	    if (rename(old_path, mapping->path))
-		return -2;
-
-	    if (mapping->mode & MODE_DIRECTORY) {
-		int l1 = strlen(mapping->path);
-		int l2 = strlen(old_path);
-		int diff = l1 - l2;
-		direntry_t* direntry = array_get(&(s->directory),
-			mapping->info.dir.first_dir_index);
-		uint32_t c = mapping->begin;
-		int i = 0;
-
-		/* recurse */
-		while (!fat_eof(s, c)) {
-		    do {
-			direntry_t* d = direntry + i;
-
-			if (is_file(d) || (is_directory(d) && !is_dot(d))) {
-			    mapping_t* m = find_mapping_for_cluster(s,
-				    begin_of_direntry(d));
-			    int l = strlen(m->path);
-			    char* new_path = g_malloc(l + diff + 1);
-
-			    assert(!strncmp(m->path, mapping->path, l2));
-
-                            pstrcpy(new_path, l + diff + 1, mapping->path);
-                            pstrcpy(new_path + l1, l + diff + 1 - l1,
-                                    m->path + l2);
-
-			    schedule_rename(s, m->begin, new_path);
-			}
-			i++;
-		    } while((i % (0x10 * s->sectors_per_cluster)) != 0);
-		    c = fat_get(s, c);
-		}
-	    }
-
-            g_free(old_path);
-	    array_remove(&(s->commits), i);
-	    continue;
-	} else if (commit->action == ACTION_MKDIR) {
-	    mapping_t* mapping;
-	    int j, parent_path_len;
-
-#ifdef __MINGW32__
-            if (mkdir(commit->path))
-                return -5;
-#else
-            if (mkdir(commit->path, 0755))
-                return -5;
-#endif
-
-	    mapping = insert_mapping(s, commit->param.mkdir.cluster,
-		    commit->param.mkdir.cluster + 1);
-	    if (mapping == NULL)
-		return -6;
-
-	    mapping->mode = MODE_DIRECTORY;
-	    mapping->read_only = 0;
-	    mapping->path = commit->path;
-	    j = s->directory.next;
-	    assert(j);
-	    insert_direntries(s, s->directory.next,
-		    0x10 * s->sectors_per_cluster);
-	    mapping->info.dir.first_dir_index = j;
-
-	    parent_path_len = strlen(commit->path)
-		- strlen(get_basename(commit->path)) - 1;
-	    for (j = 0; j < s->mapping.next; j++) {
-		mapping_t* m = array_get(&(s->mapping), j);
-		if (m->first_mapping_index < 0 && m != mapping &&
-			!strncmp(m->path, mapping->path, parent_path_len) &&
-			strlen(m->path) == parent_path_len)
-		    break;
-	    }
-	    assert(j < s->mapping.next);
-	    mapping->info.dir.parent_mapping_index = j;
-
-	    array_remove(&(s->commits), i);
-	    continue;
-	}
-
-	i++;
-    }
-    return 0;
-}
-
-/*
- * TODO: make sure that the short name is not matching *another* file
- */
-static int handle_commits(BDRVVVFATState* s)
-{
-    int i, fail = 0;
-
-    vvfat_close_current_file(s);
-
-    for (i = 0; !fail && i < s->commits.next; i++) {
-	commit_t* commit = array_get(&(s->commits), i);
-	switch(commit->action) {
-	case ACTION_RENAME: case ACTION_MKDIR:
-            abort();
-	    fail = -2;
-	    break;
-	case ACTION_WRITEOUT: {
-#ifndef NDEBUG
-            /* these variables are only used by assert() below */
-	    direntry_t* entry = array_get(&(s->directory),
-		    commit->param.writeout.dir_index);
-	    uint32_t begin = begin_of_direntry(entry);
-	    mapping_t* mapping = find_mapping_for_cluster(s, begin);
-#endif
-
-	    assert(mapping);
-	    assert(mapping->begin == begin);
-	    assert(commit->path == NULL);
-
-	    if (commit_one_file(s, commit->param.writeout.dir_index,
-			commit->param.writeout.modified_offset))
-		fail = -3;
-
-	    break;
-	}
-	case ACTION_NEW_FILE: {
-	    int begin = commit->param.new_file.first_cluster;
-	    mapping_t* mapping = find_mapping_for_cluster(s, begin);
-	    direntry_t* entry;
-	    int i;
-
-	    /* find direntry */
-	    for (i = 0; i < s->directory.next; i++) {
-		entry = array_get(&(s->directory), i);
-		if (is_file(entry) && begin_of_direntry(entry) == begin)
-		    break;
-	    }
-
-	    if (i >= s->directory.next) {
-		fail = -6;
-		continue;
-	    }
-
-	    /* make sure there exists an initial mapping */
-	    if (mapping && mapping->begin != begin) {
-		mapping->end = begin;
-		mapping = NULL;
-	    }
-	    if (mapping == NULL) {
-		mapping = insert_mapping(s, begin, begin+1);
-	    }
-	    /* most members will be fixed in commit_mappings() */
-	    assert(commit->path);
-	    mapping->path = commit->path;
-	    mapping->read_only = 0;
-	    mapping->mode = MODE_NORMAL;
-	    mapping->info.file.offset = 0;
-
-	    if (commit_one_file(s, i, 0))
-		fail = -7;
-
-	    break;
-	}
-	default:
-            abort();
-	}
-    }
-    if (i > 0 && array_remove_slice(&(s->commits), 0, i))
-	return -1;
-    return fail;
-}
-
-static int handle_deletes(BDRVVVFATState* s)
-{
-    int i, deferred = 1, deleted = 1;
-
-    /* delete files corresponding to mappings marked as deleted */
-    /* handle DELETEs and unused mappings (modified_fat_get(s, mapping->begin) == 0) */
-    while (deferred && deleted) {
-	deferred = 0;
-	deleted = 0;
-
-	for (i = 1; i < s->mapping.next; i++) {
-	    mapping_t* mapping = array_get(&(s->mapping), i);
-	    if (mapping->mode & MODE_DELETED) {
-		direntry_t* entry = array_get(&(s->directory),
-			mapping->dir_index);
-
-		if (is_free(entry)) {
-		    /* remove file/directory */
-		    if (mapping->mode & MODE_DIRECTORY) {
-			int j, next_dir_index = s->directory.next,
-			first_dir_index = mapping->info.dir.first_dir_index;
-
-			if (rmdir(mapping->path) < 0) {
-			    if (errno == ENOTEMPTY) {
-				deferred++;
-				continue;
-			    } else
-				return -5;
-			}
-
-			for (j = 1; j < s->mapping.next; j++) {
-			    mapping_t* m = array_get(&(s->mapping), j);
-			    if (m->mode & MODE_DIRECTORY &&
-				    m->info.dir.first_dir_index >
-				    first_dir_index &&
-				    m->info.dir.first_dir_index <
-				    next_dir_index)
-				next_dir_index =
-				    m->info.dir.first_dir_index;
-			}
-			remove_direntries(s, first_dir_index,
-				next_dir_index - first_dir_index);
-
-			deleted++;
-		    }
-		} else {
-		    if (unlink(mapping->path))
-			return -4;
-		    deleted++;
-		}
-		DLOG(fprintf(stderr, "DELETE (%d)\n", i); print_mapping(mapping); print_direntry(entry));
-		remove_mapping(s, i);
-	    }
-	}
-    }
-
-    return 0;
-}
-
-/*
- * synchronize mapping with new state:
- *
- * - copy FAT (with bdrv_read)
- * - mark all filenames corresponding to mappings as deleted
- * - recurse direntries from root (using bs->bdrv_read)
- * - delete files corresponding to mappings marked as deleted
- */
-static int do_commit(BDRVVVFATState* s)
-{
-    int ret = 0;
-
-    /* the real meat are the commits. Nothing to do? Move along! */
-    if (s->commits.next == 0)
-	return 0;
-
-    vvfat_close_current_file(s);
-
-    ret = handle_renames_and_mkdirs(s);
-    if (ret) {
-	fprintf(stderr, "Error handling renames (%d)\n", ret);
-        abort();
-	return ret;
-    }
-
-    /* copy FAT (with bdrv_read) */
-    memcpy(s->fat.pointer, s->fat2, 0x200 * s->sectors_per_fat);
-
-    /* recurse direntries from root (using bs->bdrv_read) */
-    ret = commit_direntries(s, 0, -1);
-    if (ret) {
-	fprintf(stderr, "Fatal: error while committing (%d)\n", ret);
-        abort();
-	return ret;
-    }
-
-    ret = handle_commits(s);
-    if (ret) {
-	fprintf(stderr, "Error handling commits (%d)\n", ret);
-        abort();
-	return ret;
-    }
-
-    ret = handle_deletes(s);
-    if (ret) {
-	fprintf(stderr, "Error deleting\n");
-        abort();
-	return ret;
-    }
-
-    if (s->qcow->drv->bdrv_make_empty) {
-        s->qcow->drv->bdrv_make_empty(s->qcow);
-    }
-
-    memset(s->used_clusters, 0, sector2cluster(s, s->sector_count));
-
-DLOG(checkpoint());
-    return 0;
-}
-
-static int try_commit(BDRVVVFATState* s)
-{
-    vvfat_close_current_file(s);
-DLOG(checkpoint());
-    if(!is_consistent(s))
-	return -1;
-    return do_commit(s);
-}
-
-static int vvfat_write(BlockDriverState *bs, int64_t sector_num,
-                    const uint8_t *buf, int nb_sectors)
-{
-    BDRVVVFATState *s = bs->opaque;
-    int i, ret;
-
-DLOG(checkpoint());
-
-    /* Check if we're operating in read-only mode */
-    if (s->qcow == NULL) {
-        return -EACCES;
-    }
-
-    vvfat_close_current_file(s);
-
-    /*
-     * Some sanity checks:
-     * - do not allow writing to the boot sector
-     * - do not allow to write non-ASCII filenames
-     */
-
-    if (sector_num < s->first_sectors_number)
-	return -1;
-
-    for (i = sector2cluster(s, sector_num);
-	    i <= sector2cluster(s, sector_num + nb_sectors - 1);) {
-	mapping_t* mapping = find_mapping_for_cluster(s, i);
-	if (mapping) {
-	    if (mapping->read_only) {
-		fprintf(stderr, "Tried to write to write-protected file %s\n",
-			mapping->path);
-		return -1;
-	    }
-
-	    if (mapping->mode & MODE_DIRECTORY) {
-		int begin = cluster2sector(s, i);
-		int end = begin + s->sectors_per_cluster, k;
-		int dir_index;
-		const direntry_t* direntries;
-		long_file_name lfn;
-
-		lfn_init(&lfn);
-
-		if (begin < sector_num)
-		    begin = sector_num;
-		if (end > sector_num + nb_sectors)
-		    end = sector_num + nb_sectors;
-		dir_index  = mapping->dir_index +
-		    0x10 * (begin - mapping->begin * s->sectors_per_cluster);
-		direntries = (direntry_t*)(buf + 0x200 * (begin - sector_num));
-
-		for (k = 0; k < (end - begin) * 0x10; k++) {
-		    /* do not allow non-ASCII filenames */
-		    if (parse_long_name(&lfn, direntries + k) < 0) {
-			fprintf(stderr, "Warning: non-ASCII filename\n");
-			return -1;
-		    }
-		    /* no access to the direntry of a read-only file */
-		    else if (is_short_name(direntries+k) &&
-			    (direntries[k].attributes & 1)) {
-			if (memcmp(direntries + k,
-				    array_get(&(s->directory), dir_index + k),
-				    sizeof(direntry_t))) {
-			    fprintf(stderr, "Warning: tried to write to write-protected file\n");
-			    return -1;
-			}
-		    }
-		}
-	    }
-	    i = mapping->end;
-	} else
-	    i++;
-    }
-
-    /*
-     * Use qcow backend. Commit later.
-     */
-DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, nb_sectors));
-    ret = bdrv_write(s->qcow, sector_num, buf, nb_sectors);
-    if (ret < 0) {
-	fprintf(stderr, "Error writing to qcow backend\n");
-	return ret;
-    }
-
-    for (i = sector2cluster(s, sector_num);
-	    i <= sector2cluster(s, sector_num + nb_sectors - 1); i++)
-	if (i >= 0)
-	    s->used_clusters[i] |= USED_ALLOCATED;
-
-DLOG(checkpoint());
-    /* TODO: add timeout */
-    try_commit(s);
-
-DLOG(checkpoint());
-    return 0;
-}
-
-static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num,
-                                       const uint8_t *buf, int nb_sectors)
-{
-    int ret;
-    BDRVVVFATState *s = bs->opaque;
-    qemu_co_mutex_lock(&s->lock);
-    ret = vvfat_write(bs, sector_num, buf, nb_sectors);
-    qemu_co_mutex_unlock(&s->lock);
-    return ret;
-}
-
-static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
-	int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file)
-{
-    BDRVVVFATState* s = bs->opaque;
-    *n = s->sector_count - sector_num;
-    if (*n > nb_sectors) {
-        *n = nb_sectors;
-    } else if (*n < 0) {
-        return 0;
-    }
-    return BDRV_BLOCK_DATA;
-}
-
-static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
-	const uint8_t* buffer, int nb_sectors) {
-    BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    return try_commit(s);
-}
-
-static void write_target_close(BlockDriverState *bs) {
-    BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
-    bdrv_unref(s->qcow);
-    g_free(s->qcow_filename);
-}
-
-static BlockDriver vvfat_write_target = {
-    .format_name        = "vvfat_write_target",
-    .bdrv_write         = write_target_commit,
-    .bdrv_close         = write_target_close,
-};
-
-static int enable_write_target(BDRVVVFATState *s, Error **errp)
-{
-    BlockDriver *bdrv_qcow = NULL;
-    BlockDriverState *backing;
-    QemuOpts *opts = NULL;
-    int ret;
-    int size = sector2cluster(s, s->sector_count);
-    QDict *options;
-
-    s->used_clusters = calloc(size, 1);
-
-    array_init(&(s->commits), sizeof(commit_t));
-
-    s->qcow_filename = g_malloc(PATH_MAX);
-    ret = get_tmp_filename(s->qcow_filename, PATH_MAX);
-    if (ret < 0) {
-        error_setg_errno(errp, -ret, "can't create temporary file");
-        goto err;
-    }
-
-    bdrv_qcow = bdrv_find_format("qcow");
-    if (!bdrv_qcow) {
-        error_setg(errp, "Failed to locate qcow driver");
-        ret = -ENOENT;
-        goto err;
-    }
-
-    opts = qemu_opts_create(bdrv_qcow->create_opts, NULL, 0, &error_abort);
-    qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s->sector_count * 512,
-                        &error_abort);
-    qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, "fat:", &error_abort);
-
-    ret = bdrv_create(bdrv_qcow, s->qcow_filename, opts, errp);
-    qemu_opts_del(opts);
-    if (ret < 0) {
-        goto err;
-    }
-
-    s->qcow = NULL;
-    options = qdict_new();
-    qdict_put(options, "driver", qstring_from_str("qcow"));
-    ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, options,
-                    BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp);
-    if (ret < 0) {
-        goto err;
-    }
-
-#ifndef _WIN32
-    unlink(s->qcow_filename);
-#endif
-
-    backing = bdrv_new();
-    bdrv_set_backing_hd(s->bs, backing);
-    bdrv_unref(backing);
-
-    s->bs->backing->bs->drv = &vvfat_write_target;
-    s->bs->backing->bs->opaque = g_new(void *, 1);
-    *(void**)s->bs->backing->bs->opaque = s;
-
-    return 0;
-
-err:
-    g_free(s->qcow_filename);
-    s->qcow_filename = NULL;
-    return ret;
-}
-
-static void vvfat_close(BlockDriverState *bs)
-{
-    BDRVVVFATState *s = bs->opaque;
-
-    vvfat_close_current_file(s);
-    array_free(&(s->fat));
-    array_free(&(s->directory));
-    array_free(&(s->mapping));
-    g_free(s->cluster_buffer);
-
-    if (s->qcow) {
-        migrate_del_blocker(s->migration_blocker);
-        error_free(s->migration_blocker);
-    }
-}
-
-static BlockDriver bdrv_vvfat = {
-    .format_name            = "vvfat",
-    .protocol_name          = "fat",
-    .instance_size          = sizeof(BDRVVVFATState),
-
-    .bdrv_parse_filename    = vvfat_parse_filename,
-    .bdrv_file_open         = vvfat_open,
-    .bdrv_close             = vvfat_close,
-
-    .bdrv_read              = vvfat_co_read,
-    .bdrv_write             = vvfat_co_write,
-    .bdrv_co_get_block_status = vvfat_co_get_block_status,
-};
-
-static void bdrv_vvfat_init(void)
-{
-    bdrv_register(&bdrv_vvfat);
-}
-
-block_init(bdrv_vvfat_init);
-
-#ifdef DEBUG
-static void checkpoint(void) {
-    assert(((mapping_t*)array_get(&(vvv->mapping), 0))->end == 2);
-    check1(vvv);
-    check2(vvv);
-    assert(!vvv->current_mapping || vvv->current_fd || (vvv->current_mapping->mode & MODE_DIRECTORY));
-#if 0
-    if (((direntry_t*)vvv->directory.pointer)[1].attributes != 0xf)
-	fprintf(stderr, "Nonono!\n");
-    mapping_t* mapping;
-    direntry_t* direntry;
-    assert(vvv->mapping.size >= vvv->mapping.item_size * vvv->mapping.next);
-    assert(vvv->directory.size >= vvv->directory.item_size * vvv->directory.next);
-    if (vvv->mapping.next<47)
-	return;
-    assert((mapping = array_get(&(vvv->mapping), 47)));
-    assert(mapping->dir_index < vvv->directory.next);
-    direntry = array_get(&(vvv->directory), mapping->dir_index);
-    assert(!memcmp(direntry->name, "USB     H  ", 11) || direntry->name[0]==0);
-#endif
-}
-#endif
diff --git a/qemu/block/win32-aio.c b/qemu/block/win32-aio.c
deleted file mode 100644
index 2d509a9a7..000000000
--- a/qemu/block/win32-aio.c
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Block driver for RAW files (win32)
- *
- * Copyright (c) 2006 Fabrice Bellard
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include "qemu/osdep.h"
-#include "qemu-common.h"
-#include "qemu/timer.h"
-#include "block/block_int.h"
-#include "qemu/module.h"
-#include "block/aio.h"
-#include "raw-aio.h"
-#include "qemu/event_notifier.h"
-#include "qemu/iov.h"
-#include <windows.h>
-#include <winioctl.h>
-
-#define FTYPE_FILE 0
-#define FTYPE_CD     1
-#define FTYPE_HARDDISK 2
-
-struct QEMUWin32AIOState {
-    HANDLE hIOCP;
-    EventNotifier e;
-    int count;
-    bool is_aio_context_attached;
-};
-
-typedef struct QEMUWin32AIOCB {
-    BlockAIOCB common;
-    struct QEMUWin32AIOState *ctx;
-    int nbytes;
-    OVERLAPPED ov;
-    QEMUIOVector *qiov;
-    void *buf;
-    bool is_read;
-    bool is_linear;
-} QEMUWin32AIOCB;
-
-/*
- * Completes an AIO request (calls the callback and frees the ACB).
- */
-static void win32_aio_process_completion(QEMUWin32AIOState *s,
-    QEMUWin32AIOCB *waiocb, DWORD count)
-{
-    int ret;
-    s->count--;
-
-    if (waiocb->ov.Internal != 0) {
-        ret = -EIO;
-    } else {
-        ret = 0;
-        if (count < waiocb->nbytes) {
-            /* Short reads mean EOF, pad with zeros. */
-            if (waiocb->is_read) {
-                qemu_iovec_memset(waiocb->qiov, count, 0,
-                    waiocb->qiov->size - count);
-            } else {
-                ret = -EINVAL;
-            }
-       }
-    }
-
-    if (!waiocb->is_linear) {
-        if (ret == 0 && waiocb->is_read) {
-            QEMUIOVector *qiov = waiocb->qiov;
-            iov_from_buf(qiov->iov, qiov->niov, 0, waiocb->buf, qiov->size);
-        }
-        qemu_vfree(waiocb->buf);
-    }
-
-
-    waiocb->common.cb(waiocb->common.opaque, ret);
-    qemu_aio_unref(waiocb);
-}
-
-static void win32_aio_completion_cb(EventNotifier *e)
-{
-    QEMUWin32AIOState *s = container_of(e, QEMUWin32AIOState, e);
-    DWORD count;
-    ULONG_PTR key;
-    OVERLAPPED *ov;
-
-    event_notifier_test_and_clear(&s->e);
-    while (GetQueuedCompletionStatus(s->hIOCP, &count, &key, &ov, 0)) {
-        QEMUWin32AIOCB *waiocb = container_of(ov, QEMUWin32AIOCB, ov);
-
-        win32_aio_process_completion(s, waiocb, count);
-    }
-}
-
-static const AIOCBInfo win32_aiocb_info = {
-    .aiocb_size         = sizeof(QEMUWin32AIOCB),
-};
-
-BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
-        QEMUWin32AIOState *aio, HANDLE hfile,
-        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-        BlockCompletionFunc *cb, void *opaque, int type)
-{
-    struct QEMUWin32AIOCB *waiocb;
-    uint64_t offset = sector_num * 512;
-    DWORD rc;
-
-    waiocb = qemu_aio_get(&win32_aiocb_info, bs, cb, opaque);
-    waiocb->nbytes = nb_sectors * 512;
-    waiocb->qiov = qiov;
-    waiocb->is_read = (type == QEMU_AIO_READ);
-
-    if (qiov->niov > 1) {
-        waiocb->buf = qemu_try_blockalign(bs, qiov->size);
-        if (waiocb->buf == NULL) {
-            goto out;
-        }
-        if (type & QEMU_AIO_WRITE) {
-            iov_to_buf(qiov->iov, qiov->niov, 0, waiocb->buf, qiov->size);
-        }
-        waiocb->is_linear = false;
-    } else {
-        waiocb->buf = qiov->iov[0].iov_base;
-        waiocb->is_linear = true;
-    }
-
-    memset(&waiocb->ov, 0, sizeof(waiocb->ov));
-    waiocb->ov.Offset = (DWORD)offset;
-    waiocb->ov.OffsetHigh = (DWORD)(offset >> 32);
-    waiocb->ov.hEvent = event_notifier_get_handle(&aio->e);
-
-    aio->count++;
-
-    if (type & QEMU_AIO_READ) {
-        rc = ReadFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov);
-    } else {
-        rc = WriteFile(hfile, waiocb->buf, waiocb->nbytes, NULL, &waiocb->ov);
-    }
-    if(rc == 0 && GetLastError() != ERROR_IO_PENDING) {
-        goto out_dec_count;
-    }
-    return &waiocb->common;
-
-out_dec_count:
-    aio->count--;
-out:
-    qemu_aio_unref(waiocb);
-    return NULL;
-}
-
-int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
-{
-    if (CreateIoCompletionPort(hfile, aio->hIOCP, (ULONG_PTR) 0, 0) == NULL) {
-        return -EINVAL;
-    } else {
-        return 0;
-    }
-}
-
-void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
-                                  AioContext *old_context)
-{
-    aio_set_event_notifier(old_context, &aio->e, false, NULL);
-    aio->is_aio_context_attached = false;
-}
-
-void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
-                                  AioContext *new_context)
-{
-    aio->is_aio_context_attached = true;
-    aio_set_event_notifier(new_context, &aio->e, false,
-                           win32_aio_completion_cb);
-}
-
-QEMUWin32AIOState *win32_aio_init(void)
-{
-    QEMUWin32AIOState *s;
-
-    s = g_malloc0(sizeof(*s));
-    if (event_notifier_init(&s->e, false) < 0) {
-        goto out_free_state;
-    }
-
-    s->hIOCP = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
-    if (s->hIOCP == NULL) {
-        goto out_close_efd;
-    }
-
-    return s;
-
-out_close_efd:
-    event_notifier_cleanup(&s->e);
-out_free_state:
-    g_free(s);
-    return NULL;
-}
-
-void win32_aio_cleanup(QEMUWin32AIOState *aio)
-{
-    assert(!aio->is_aio_context_attached);
-    CloseHandle(aio->hIOCP);
-    event_notifier_cleanup(&aio->e);
-    g_free(aio);
-}
diff --git a/qemu/block/write-threshold.c b/qemu/block/write-threshold.c
deleted file mode 100644
index cc2ca7183..000000000
--- a/qemu/block/write-threshold.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * QEMU System Emulator block write threshold notification
- *
- * Copyright Red Hat, Inc. 2014
- *
- * Authors:
- *  Francesco Romani <fromani@redhat.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- */
-
-#include "qemu/osdep.h"
-#include "block/block_int.h"
-#include "qemu/coroutine.h"
-#include "block/write-threshold.h"
-#include "qemu/notify.h"
-#include "qapi-event.h"
-#include "qmp-commands.h"
-
-
-uint64_t bdrv_write_threshold_get(const BlockDriverState *bs)
-{
-    return bs->write_threshold_offset;
-}
-
-bool bdrv_write_threshold_is_set(const BlockDriverState *bs)
-{
-    return bs->write_threshold_offset > 0;
-}
-
-static void write_threshold_disable(BlockDriverState *bs)
-{
-    if (bdrv_write_threshold_is_set(bs)) {
-        notifier_with_return_remove(&bs->write_threshold_notifier);
-        bs->write_threshold_offset = 0;
-    }
-}
-
-uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
-                                       const BdrvTrackedRequest *req)
-{
-    if (bdrv_write_threshold_is_set(bs)) {
-        if (req->offset > bs->write_threshold_offset) {
-            return (req->offset - bs->write_threshold_offset) + req->bytes;
-        }
-        if ((req->offset + req->bytes) > bs->write_threshold_offset) {
-            return (req->offset + req->bytes) - bs->write_threshold_offset;
-        }
-    }
-    return 0;
-}
-
-static int coroutine_fn before_write_notify(NotifierWithReturn *notifier,
-                                            void *opaque)
-{
-    BdrvTrackedRequest *req = opaque;
-    BlockDriverState *bs = req->bs;
-    uint64_t amount = 0;
-
-    amount = bdrv_write_threshold_exceeded(bs, req);
-    if (amount > 0) {
-        qapi_event_send_block_write_threshold(
-            bs->node_name,
-            amount,
-            bs->write_threshold_offset,
-            &error_abort);
-
-        /* autodisable to avoid flooding the monitor */
-        write_threshold_disable(bs);
-    }
-
-    return 0; /* should always let other notifiers run */
-}
-
-static void write_threshold_register_notifier(BlockDriverState *bs)
-{
-    bs->write_threshold_notifier.notify = before_write_notify;
-    notifier_with_return_list_add(&bs->before_write_notifiers,
-                                  &bs->write_threshold_notifier);
-}
-
-static void write_threshold_update(BlockDriverState *bs,
-                                   int64_t threshold_bytes)
-{
-    bs->write_threshold_offset = threshold_bytes;
-}
-
-void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes)
-{
-    if (bdrv_write_threshold_is_set(bs)) {
-        if (threshold_bytes > 0) {
-            write_threshold_update(bs, threshold_bytes);
-        } else {
-            write_threshold_disable(bs);
-        }
-    } else {
-        if (threshold_bytes > 0) {
-            /* avoid multiple registration */
-            write_threshold_register_notifier(bs);
-            write_threshold_update(bs, threshold_bytes);
-        }
-        /* discard bogus disable request */
-    }
-}
-
-void qmp_block_set_write_threshold(const char *node_name,
-                                   uint64_t threshold_bytes,
-                                   Error **errp)
-{
-    BlockDriverState *bs;
-    AioContext *aio_context;
-
-    bs = bdrv_find_node(node_name);
-    if (!bs) {
-        error_setg(errp, "Device '%s' not found", node_name);
-        return;
-    }
-
-    aio_context = bdrv_get_aio_context(bs);
-    aio_context_acquire(aio_context);
-
-    bdrv_write_threshold_set(bs, threshold_bytes);
-
-    aio_context_release(aio_context);
-}
author	RajithaY <rajithax.yerrumsetty@intel.com>	2017-04-25 03:31:15 -0700
committer	Rajitha Yerrumchetty <rajithax.yerrumsetty@intel.com>	2017-05-22 06:48:08 +0000
commit	bb756eebdac6fd24e8919e2c43f7d2c8c4091f59 (patch)
tree	ca11e03542edf2d8f631efeca5e1626d211107e3 /qemu/block
parent	a14b48d18a9ed03ec191cf16b162206998a895ce (diff)