summaryrefslogtreecommitdiffstats
path: root/qemu/block
diff options
context:
space:
mode:
authorDon Dugger <n0ano@n0ano.com>2016-06-03 03:33:22 +0000
committerGerrit Code Review <gerrit@172.30.200.206>2016-06-03 03:33:23 +0000
commitda27230f80795d0028333713f036d44c53cb0e68 (patch)
treeb3d379eaf000adf72b36cb01cdf4d79c3e3f064c /qemu/block
parent0e68cb048bb8aadb14675f5d4286d8ab2fc35449 (diff)
parent437fd90c0250dee670290f9b714253671a990160 (diff)
Merge "These changes are the raw update to qemu-2.6."
Diffstat (limited to 'qemu/block')
-rw-r--r--qemu/block/Makefile.objs6
-rw-r--r--qemu/block/accounting.c124
-rw-r--r--qemu/block/archipelago.c4
-rw-r--r--qemu/block/backup.c182
-rw-r--r--qemu/block/blkdebug.c150
-rwxr-xr-xqemu/block/blkreplay.c160
-rw-r--r--qemu/block/blkverify.c77
-rw-r--r--qemu/block/block-backend.c1089
-rw-r--r--qemu/block/bochs.c10
-rw-r--r--qemu/block/cloop.c12
-rw-r--r--qemu/block/commit.c15
-rw-r--r--qemu/block/crypto.c586
-rw-r--r--qemu/block/curl.c83
-rw-r--r--qemu/block/dirty-bitmap.c387
-rw-r--r--qemu/block/dmg.c22
-rw-r--r--qemu/block/gluster.c159
-rw-r--r--qemu/block/io.c452
-rw-r--r--qemu/block/iscsi.c253
-rw-r--r--qemu/block/linux-aio.c6
-rw-r--r--qemu/block/mirror.c465
-rw-r--r--qemu/block/nbd-client.c113
-rw-r--r--qemu/block/nbd-client.h12
-rw-r--r--qemu/block/nbd.c203
-rw-r--r--qemu/block/nfs.c71
-rw-r--r--qemu/block/null.c44
-rw-r--r--qemu/block/parallels.c72
-rw-r--r--qemu/block/qapi.c237
-rw-r--r--qemu/block/qcow.c90
-rw-r--r--qemu/block/qcow2-cache.c85
-rw-r--r--qemu/block/qcow2-cluster.c150
-rw-r--r--qemu/block/qcow2-refcount.c584
-rw-r--r--qemu/block/qcow2-snapshot.c54
-rw-r--r--qemu/block/qcow2.c1032
-rw-r--r--qemu/block/qcow2.h41
-rw-r--r--qemu/block/qed-check.c1
-rw-r--r--qemu/block/qed-cluster.c1
-rw-r--r--qemu/block/qed-gencb.c1
-rw-r--r--qemu/block/qed-l2-cache.c1
-rw-r--r--qemu/block/qed-table.c5
-rw-r--r--qemu/block/qed.c102
-rw-r--r--qemu/block/qed.h1
-rw-r--r--qemu/block/quorum.c138
-rw-r--r--qemu/block/raw-aio.h2
-rw-r--r--qemu/block/raw-posix.c484
-rw-r--r--qemu/block/raw-win32.c12
-rw-r--r--qemu/block/raw_bsd.c69
-rw-r--r--qemu/block/rbd.c52
-rw-r--r--qemu/block/sheepdog.c393
-rw-r--r--qemu/block/snapshot.c169
-rw-r--r--qemu/block/ssh.c15
-rw-r--r--qemu/block/stream.c50
-rw-r--r--qemu/block/throttle-groups.c58
-rw-r--r--qemu/block/vdi.c47
-rw-r--r--qemu/block/vhdx-endian.c1
-rw-r--r--qemu/block/vhdx-log.c40
-rw-r--r--qemu/block/vhdx.c85
-rw-r--r--qemu/block/vmdk.c358
-rw-r--r--qemu/block/vpc.c306
-rw-r--r--qemu/block/vvfat.c44
-rw-r--r--qemu/block/win32-aio.c6
-rw-r--r--qemu/block/write-threshold.c3
61 files changed, 6908 insertions, 2566 deletions
diff --git a/qemu/block/Makefile.objs b/qemu/block/Makefile.objs
index 58ef2ef3f..44a541622 100644
--- a/qemu/block/Makefile.objs
+++ b/qemu/block/Makefile.objs
@@ -4,7 +4,7 @@ block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
block-obj-y += quorum.o
-block-obj-y += parallels.o blkdebug.o blkverify.o
+block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
block-obj-y += block-backend.o snapshot.o qapi.o
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
@@ -20,9 +20,11 @@ block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
-block-obj-y += accounting.o
+block-obj-y += accounting.o dirty-bitmap.o
block-obj-y += write-threshold.o
+block-obj-y += crypto.o
+
common-obj-y += stream.o
common-obj-y += commit.o
common-obj-y += backup.o
diff --git a/qemu/block/accounting.c b/qemu/block/accounting.c
index 01d594ffd..3f457c4e7 100644
--- a/qemu/block/accounting.c
+++ b/qemu/block/accounting.c
@@ -2,6 +2,7 @@
* QEMU System Emulator block accounting
*
* Copyright (c) 2011 Christoph Hellwig
+ * Copyright (c) 2015 Igalia, S.L.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -22,9 +23,58 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/accounting.h"
#include "block/block_int.h"
#include "qemu/timer.h"
+#include "sysemu/qtest.h"
+
+static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
+static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;
+
+void block_acct_init(BlockAcctStats *stats, bool account_invalid,
+ bool account_failed)
+{
+ stats->account_invalid = account_invalid;
+ stats->account_failed = account_failed;
+
+ if (qtest_enabled()) {
+ clock_type = QEMU_CLOCK_VIRTUAL;
+ }
+}
+
+void block_acct_cleanup(BlockAcctStats *stats)
+{
+ BlockAcctTimedStats *s, *next;
+ QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
+ g_free(s);
+ }
+}
+
+void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
+{
+ BlockAcctTimedStats *s;
+ unsigned i;
+
+ s = g_new0(BlockAcctTimedStats, 1);
+ s->interval_length = interval_length;
+ QSLIST_INSERT_HEAD(&stats->intervals, s, entries);
+
+ for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
+ timed_average_init(&s->latency[i], clock_type,
+ (uint64_t) interval_length * NANOSECONDS_PER_SECOND);
+ }
+}
+
+BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
+ BlockAcctTimedStats *s)
+{
+ if (s == NULL) {
+ return QSLIST_FIRST(&stats->intervals);
+ } else {
+ return QSLIST_NEXT(s, entries);
+ }
+}
void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
int64_t bytes, enum BlockAcctType type)
@@ -32,26 +82,69 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
assert(type < BLOCK_MAX_IOTYPE);
cookie->bytes = bytes;
- cookie->start_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ cookie->start_time_ns = qemu_clock_get_ns(clock_type);
cookie->type = type;
}
void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
{
+ BlockAcctTimedStats *s;
+ int64_t time_ns = qemu_clock_get_ns(clock_type);
+ int64_t latency_ns = time_ns - cookie->start_time_ns;
+
+ if (qtest_enabled()) {
+ latency_ns = qtest_latency_ns;
+ }
+
assert(cookie->type < BLOCK_MAX_IOTYPE);
stats->nr_bytes[cookie->type] += cookie->bytes;
stats->nr_ops[cookie->type]++;
- stats->total_time_ns[cookie->type] +=
- qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - cookie->start_time_ns;
+ stats->total_time_ns[cookie->type] += latency_ns;
+ stats->last_access_time_ns = time_ns;
+
+ QSLIST_FOREACH(s, &stats->intervals, entries) {
+ timed_average_account(&s->latency[cookie->type], latency_ns);
+ }
}
+void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
+{
+ assert(cookie->type < BLOCK_MAX_IOTYPE);
+
+ stats->failed_ops[cookie->type]++;
+
+ if (stats->account_failed) {
+ BlockAcctTimedStats *s;
+ int64_t time_ns = qemu_clock_get_ns(clock_type);
+ int64_t latency_ns = time_ns - cookie->start_time_ns;
+
+ if (qtest_enabled()) {
+ latency_ns = qtest_latency_ns;
+ }
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
- unsigned int nb_sectors)
+ stats->total_time_ns[cookie->type] += latency_ns;
+ stats->last_access_time_ns = time_ns;
+
+ QSLIST_FOREACH(s, &stats->intervals, entries) {
+ timed_average_account(&s->latency[cookie->type], latency_ns);
+ }
+ }
+}
+
+void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
{
- if (stats->wr_highest_sector < sector_num + nb_sectors - 1) {
- stats->wr_highest_sector = sector_num + nb_sectors - 1;
+ assert(type < BLOCK_MAX_IOTYPE);
+
+ /* block_acct_done() and block_acct_failed() update
+ * total_time_ns[], but this one does not. The reason is that
+ * invalid requests are accounted during their submission,
+ * therefore there's no actual I/O involved. */
+
+ stats->invalid_ops[type]++;
+
+ if (stats->account_invalid) {
+ stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
}
}
@@ -61,3 +154,20 @@ void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
assert(type < BLOCK_MAX_IOTYPE);
stats->merged[type] += num_requests;
}
+
+int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
+{
+ return qemu_clock_get_ns(clock_type) - stats->last_access_time_ns;
+}
+
+double block_acct_queue_depth(BlockAcctTimedStats *stats,
+ enum BlockAcctType type)
+{
+ uint64_t sum, elapsed;
+
+ assert(type < BLOCK_MAX_IOTYPE);
+
+ sum = timed_average_sum(&stats->latency[type], &elapsed);
+
+ return (double) sum / elapsed;
+}
diff --git a/qemu/block/archipelago.c b/qemu/block/archipelago.c
index 855655c6b..b9f5e69d4 100644
--- a/qemu/block/archipelago.c
+++ b/qemu/block/archipelago.c
@@ -50,7 +50,8 @@
*
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "block/block_int.h"
#include "qemu/error-report.h"
#include "qemu/thread.h"
@@ -59,7 +60,6 @@
#include "qapi/qmp/qjson.h"
#include "qemu/atomic.h"
-#include <inttypes.h>
#include <xseg/xseg.h>
#include <xseg/protocol.h>
diff --git a/qemu/block/backup.c b/qemu/block/backup.c
index 965654d52..491fd1406 100644
--- a/qemu/block/backup.c
+++ b/qemu/block/backup.c
@@ -11,21 +11,20 @@
*
*/
-#include <stdio.h>
-#include <errno.h>
-#include <unistd.h>
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/block.h"
#include "block/block_int.h"
#include "block/blockjob.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
+#include "qemu/cutils.h"
+#include "sysemu/block-backend.h"
+#include "qemu/bitmap.h"
-#define BACKUP_CLUSTER_BITS 16
-#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
-#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
-
+#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
#define SLICE_TIME 100000000ULL /* ns */
typedef struct CowRequest {
@@ -46,10 +45,17 @@ typedef struct BackupBlockJob {
BlockdevOnError on_target_error;
CoRwlock flush_rwlock;
uint64_t sectors_read;
- HBitmap *bitmap;
+ unsigned long *done_bitmap;
+ int64_t cluster_size;
QLIST_HEAD(, CowRequest) inflight_reqs;
} BackupBlockJob;
+/* Size of a cluster in sectors, instead of bytes. */
+static inline int64_t cluster_size_sectors(BackupBlockJob *job)
+{
+ return job->cluster_size / BDRV_SECTOR_SIZE;
+}
+
/* See if in-flight requests overlap and wait for them to complete */
static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
int64_t start,
@@ -89,7 +95,8 @@ static void cow_request_end(CowRequest *req)
static int coroutine_fn backup_do_cow(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
- bool *error_is_read)
+ bool *error_is_read,
+ bool is_write_notifier)
{
BackupBlockJob *job = (BackupBlockJob *)bs->job;
CowRequest cow_request;
@@ -97,13 +104,14 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
QEMUIOVector bounce_qiov;
void *bounce_buffer = NULL;
int ret = 0;
+ int64_t sectors_per_cluster = cluster_size_sectors(job);
int64_t start, end;
int n;
qemu_co_rwlock_rdlock(&job->flush_rwlock);
- start = sector_num / BACKUP_SECTORS_PER_CLUSTER;
- end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER);
+ start = sector_num / sectors_per_cluster;
+ end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
@@ -111,26 +119,32 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
cow_request_begin(&cow_request, job, start, end);
for (; start < end; start++) {
- if (hbitmap_get(job->bitmap, start)) {
+ if (test_bit(start, job->done_bitmap)) {
trace_backup_do_cow_skip(job, start);
continue; /* already copied */
}
trace_backup_do_cow_process(job, start);
- n = MIN(BACKUP_SECTORS_PER_CLUSTER,
+ n = MIN(sectors_per_cluster,
job->common.len / BDRV_SECTOR_SIZE -
- start * BACKUP_SECTORS_PER_CLUSTER);
+ start * sectors_per_cluster);
if (!bounce_buffer) {
- bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE);
+ bounce_buffer = qemu_blockalign(bs, job->cluster_size);
}
iov.iov_base = bounce_buffer;
iov.iov_len = n * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
- ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
- &bounce_qiov);
+ if (is_write_notifier) {
+ ret = bdrv_co_readv_no_serialising(bs,
+ start * sectors_per_cluster,
+ n, &bounce_qiov);
+ } else {
+ ret = bdrv_co_readv(bs, start * sectors_per_cluster, n,
+ &bounce_qiov);
+ }
if (ret < 0) {
trace_backup_do_cow_read_fail(job, start, ret);
if (error_is_read) {
@@ -141,11 +155,11 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
ret = bdrv_co_write_zeroes(job->target,
- start * BACKUP_SECTORS_PER_CLUSTER,
+ start * sectors_per_cluster,
n, BDRV_REQ_MAY_UNMAP);
} else {
ret = bdrv_co_writev(job->target,
- start * BACKUP_SECTORS_PER_CLUSTER, n,
+ start * sectors_per_cluster, n,
&bounce_qiov);
}
if (ret < 0) {
@@ -156,7 +170,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
goto out;
}
- hbitmap_set(job->bitmap, start, 1);
+ set_bit(start, job->done_bitmap);
/* Publish progress, guest I/O counts as progress too. Note that the
* offset field is an opaque progress value, it is not a disk offset.
@@ -190,7 +204,7 @@ static int coroutine_fn backup_before_write_notify(
assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
- return backup_do_cow(req->bs, sector_num, nb_sectors, NULL);
+ return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true);
}
static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -208,7 +222,41 @@ static void backup_iostatus_reset(BlockJob *job)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
- bdrv_iostatus_reset(s->target);
+ if (s->target->blk) {
+ blk_iostatus_reset(s->target->blk);
+ }
+}
+
+static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
+{
+ BdrvDirtyBitmap *bm;
+ BlockDriverState *bs = job->common.bs;
+
+ if (ret < 0 || block_job_is_cancelled(&job->common)) {
+ /* Merge the successor back into the parent, delete nothing. */
+ bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
+ assert(bm);
+ } else {
+ /* Everything is fine, delete this bitmap and install the backup. */
+ bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
+ assert(bm);
+ }
+}
+
+static void backup_commit(BlockJob *job)
+{
+ BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+ if (s->sync_bitmap) {
+ backup_cleanup_sync_bitmap(s, 0);
+ }
+}
+
+static void backup_abort(BlockJob *job)
+{
+ BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+ if (s->sync_bitmap) {
+ backup_cleanup_sync_bitmap(s, -1);
+ }
}
static const BlockJobDriver backup_job_driver = {
@@ -216,6 +264,8 @@ static const BlockJobDriver backup_job_driver = {
.job_type = BLOCK_JOB_TYPE_BACKUP,
.set_speed = backup_set_speed,
.iostatus_reset = backup_iostatus_reset,
+ .commit = backup_commit,
+ .abort = backup_abort,
};
static BlockErrorAction backup_error_action(BackupBlockJob *job,
@@ -280,21 +330,22 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
int64_t cluster;
int64_t end;
int64_t last_cluster = -1;
+ int64_t sectors_per_cluster = cluster_size_sectors(job);
BlockDriverState *bs = job->common.bs;
HBitmapIter hbi;
granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
- clusters_per_iter = MAX((granularity / BACKUP_CLUSTER_SIZE), 1);
+ clusters_per_iter = MAX((granularity / job->cluster_size), 1);
bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
/* Find the next dirty sector(s) */
while ((sector = hbitmap_iter_next(&hbi)) != -1) {
- cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
+ cluster = sector / sectors_per_cluster;
/* Fake progress updates for any clusters we skipped */
if (cluster != last_cluster + 1) {
job->common.offset += ((cluster - last_cluster - 1) *
- BACKUP_CLUSTER_SIZE);
+ job->cluster_size);
}
for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
@@ -302,8 +353,9 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
if (yield_and_check(job)) {
return ret;
}
- ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
- BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
+ ret = backup_do_cow(bs, cluster * sectors_per_cluster,
+ sectors_per_cluster, &error_is_read,
+ false);
if ((ret < 0) &&
backup_error_action(job, error_is_read, -ret) ==
BLOCK_ERROR_ACTION_REPORT) {
@@ -314,17 +366,17 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
/* If the bitmap granularity is smaller than the backup granularity,
* we need to advance the iterator pointer to the next cluster. */
- if (granularity < BACKUP_CLUSTER_SIZE) {
- bdrv_set_dirty_iter(&hbi, cluster * BACKUP_SECTORS_PER_CLUSTER);
+ if (granularity < job->cluster_size) {
+ bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster);
}
last_cluster = cluster - 1;
}
/* Play some final catchup with the progress meter */
- end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
+ end = DIV_ROUND_UP(job->common.len, job->cluster_size);
if (last_cluster + 1 < end) {
- job->common.offset += ((end - last_cluster - 1) * BACKUP_CLUSTER_SIZE);
+ job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
}
return ret;
@@ -341,19 +393,21 @@ static void coroutine_fn backup_run(void *opaque)
.notify = backup_before_write_notify,
};
int64_t start, end;
+ int64_t sectors_per_cluster = cluster_size_sectors(job);
int ret = 0;
QLIST_INIT(&job->inflight_reqs);
qemu_co_rwlock_init(&job->flush_rwlock);
start = 0;
- end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
+ end = DIV_ROUND_UP(job->common.len, job->cluster_size);
- job->bitmap = hbitmap_alloc(end, 0);
+ job->done_bitmap = bitmap_new(end);
- bdrv_set_enable_write_cache(target, true);
- bdrv_set_on_error(target, on_target_error, on_target_error);
- bdrv_iostatus_enable(target);
+ if (target->blk) {
+ blk_set_on_error(target->blk, on_target_error, on_target_error);
+ blk_iostatus_enable(target->blk);
+ }
bdrv_add_before_write_notifier(bs, &before_write);
@@ -382,7 +436,7 @@ static void coroutine_fn backup_run(void *opaque)
/* Check to see if these blocks are already in the
* backing file. */
- for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
+ for (i = 0; i < sectors_per_cluster;) {
/* bdrv_is_allocated() only returns true/false based
* on the first set of sectors it comes across that
* are are all in the same state.
@@ -391,8 +445,8 @@ static void coroutine_fn backup_run(void *opaque)
* needed but at some point that is always the case. */
alloced =
bdrv_is_allocated(bs,
- start * BACKUP_SECTORS_PER_CLUSTER + i,
- BACKUP_SECTORS_PER_CLUSTER - i, &n);
+ start * sectors_per_cluster + i,
+ sectors_per_cluster - i, &n);
i += n;
if (alloced == 1 || n == 0) {
@@ -407,8 +461,8 @@ static void coroutine_fn backup_run(void *opaque)
}
}
/* FULL sync mode we copy the whole drive. */
- ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
- BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
+ ret = backup_do_cow(bs, start * sectors_per_cluster,
+ sectors_per_cluster, &error_is_read, false);
if (ret < 0) {
/* Depending on error action, fail now or retry cluster */
BlockErrorAction action =
@@ -428,22 +482,11 @@ static void coroutine_fn backup_run(void *opaque)
/* wait until pending backup_do_cow() calls have completed */
qemu_co_rwlock_wrlock(&job->flush_rwlock);
qemu_co_rwlock_unlock(&job->flush_rwlock);
+ g_free(job->done_bitmap);
- if (job->sync_bitmap) {
- BdrvDirtyBitmap *bm;
- if (ret < 0 || block_job_is_cancelled(&job->common)) {
- /* Merge the successor back into the parent, delete nothing. */
- bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
- assert(bm);
- } else {
- /* Everything is fine, delete this bitmap and install the backup. */
- bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
- assert(bm);
- }
+ if (target->blk) {
+ blk_iostatus_disable(target->blk);
}
- hbitmap_free(job->bitmap);
-
- bdrv_iostatus_disable(target);
bdrv_op_unblock_all(target, job->common.blocker);
data = g_malloc(sizeof(*data));
@@ -457,9 +500,11 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockCompletionFunc *cb, void *opaque,
- Error **errp)
+ BlockJobTxn *txn, Error **errp)
{
int64_t len;
+ BlockDriverInfo bdi;
+ int ret;
assert(bs);
assert(target);
@@ -472,7 +517,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- !bdrv_iostatus_is_enabled(bs)) {
+ (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
return;
}
@@ -529,16 +574,35 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
goto error;
}
- bdrv_op_block_all(target, job->common.blocker);
-
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
job->target = target;
job->sync_mode = sync_mode;
job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
sync_bitmap : NULL;
+
+ /* If there is no backing file on the target, we cannot rely on COW if our
+ * backup cluster size is smaller than the target cluster size. Even for
+ * targets with a backing file, try to avoid COW if possible. */
+ ret = bdrv_get_info(job->target, &bdi);
+ if (ret < 0 && !target->backing) {
+ error_setg_errno(errp, -ret,
+ "Couldn't determine the cluster size of the target image, "
+ "which has no backing file");
+ error_append_hint(errp,
+ "Aborting, since this may create an unusable destination image\n");
+ goto error;
+ } else if (ret < 0 && target->backing) {
+ /* Not fatal; just trudge on ahead. */
+ job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
+ } else {
+ job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+ }
+
+ bdrv_op_block_all(target, job->common.blocker);
job->common.len = len;
job->common.co = qemu_coroutine_create(backup_run);
+ block_job_txn_add_job(txn, &job->common);
qemu_coroutine_enter(job->common.co, job);
return;
diff --git a/qemu/block/blkdebug.c b/qemu/block/blkdebug.c
index bc247f46f..20d25bda6 100644
--- a/qemu/block/blkdebug.c
+++ b/qemu/block/blkdebug.c
@@ -22,7 +22,9 @@
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
#include "qemu/config-file.h"
#include "block/block_int.h"
#include "qemu/module.h"
@@ -30,12 +32,13 @@
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qstring.h"
+#include "sysemu/qtest.h"
typedef struct BDRVBlkdebugState {
int state;
int new_state;
- QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
+ QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
} BDRVBlkdebugState;
@@ -63,7 +66,7 @@ enum {
};
typedef struct BlkdebugRule {
- BlkDebugEvent event;
+ BlkdebugEvent event;
int action;
int state;
union {
@@ -142,69 +145,12 @@ static QemuOptsList *config_groups[] = {
NULL
};
-static const char *event_names[BLKDBG_EVENT_MAX] = {
- [BLKDBG_L1_UPDATE] = "l1_update",
- [BLKDBG_L1_GROW_ALLOC_TABLE] = "l1_grow.alloc_table",
- [BLKDBG_L1_GROW_WRITE_TABLE] = "l1_grow.write_table",
- [BLKDBG_L1_GROW_ACTIVATE_TABLE] = "l1_grow.activate_table",
-
- [BLKDBG_L2_LOAD] = "l2_load",
- [BLKDBG_L2_UPDATE] = "l2_update",
- [BLKDBG_L2_UPDATE_COMPRESSED] = "l2_update_compressed",
- [BLKDBG_L2_ALLOC_COW_READ] = "l2_alloc.cow_read",
- [BLKDBG_L2_ALLOC_WRITE] = "l2_alloc.write",
-
- [BLKDBG_READ_AIO] = "read_aio",
- [BLKDBG_READ_BACKING_AIO] = "read_backing_aio",
- [BLKDBG_READ_COMPRESSED] = "read_compressed",
-
- [BLKDBG_WRITE_AIO] = "write_aio",
- [BLKDBG_WRITE_COMPRESSED] = "write_compressed",
-
- [BLKDBG_VMSTATE_LOAD] = "vmstate_load",
- [BLKDBG_VMSTATE_SAVE] = "vmstate_save",
-
- [BLKDBG_COW_READ] = "cow_read",
- [BLKDBG_COW_WRITE] = "cow_write",
-
- [BLKDBG_REFTABLE_LOAD] = "reftable_load",
- [BLKDBG_REFTABLE_GROW] = "reftable_grow",
- [BLKDBG_REFTABLE_UPDATE] = "reftable_update",
-
- [BLKDBG_REFBLOCK_LOAD] = "refblock_load",
- [BLKDBG_REFBLOCK_UPDATE] = "refblock_update",
- [BLKDBG_REFBLOCK_UPDATE_PART] = "refblock_update_part",
- [BLKDBG_REFBLOCK_ALLOC] = "refblock_alloc",
- [BLKDBG_REFBLOCK_ALLOC_HOOKUP] = "refblock_alloc.hookup",
- [BLKDBG_REFBLOCK_ALLOC_WRITE] = "refblock_alloc.write",
- [BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS] = "refblock_alloc.write_blocks",
- [BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE] = "refblock_alloc.write_table",
- [BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE] = "refblock_alloc.switch_table",
-
- [BLKDBG_CLUSTER_ALLOC] = "cluster_alloc",
- [BLKDBG_CLUSTER_ALLOC_BYTES] = "cluster_alloc_bytes",
- [BLKDBG_CLUSTER_FREE] = "cluster_free",
-
- [BLKDBG_FLUSH_TO_OS] = "flush_to_os",
- [BLKDBG_FLUSH_TO_DISK] = "flush_to_disk",
-
- [BLKDBG_PWRITEV_RMW_HEAD] = "pwritev_rmw.head",
- [BLKDBG_PWRITEV_RMW_AFTER_HEAD] = "pwritev_rmw.after_head",
- [BLKDBG_PWRITEV_RMW_TAIL] = "pwritev_rmw.tail",
- [BLKDBG_PWRITEV_RMW_AFTER_TAIL] = "pwritev_rmw.after_tail",
- [BLKDBG_PWRITEV] = "pwritev",
- [BLKDBG_PWRITEV_ZERO] = "pwritev_zero",
- [BLKDBG_PWRITEV_DONE] = "pwritev_done",
-
- [BLKDBG_EMPTY_IMAGE_PREPARE] = "empty_image_prepare",
-};
-
-static int get_event_by_name(const char *name, BlkDebugEvent *event)
+static int get_event_by_name(const char *name, BlkdebugEvent *event)
{
int i;
- for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
- if (!strcmp(event_names[i], name)) {
+ for (i = 0; i < BLKDBG__MAX; i++) {
+ if (!strcmp(BlkdebugEvent_lookup[i], name)) {
*event = i;
return 0;
}
@@ -223,7 +169,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
struct add_rule_data *d = opaque;
BDRVBlkdebugState *s = d->s;
const char* event_name;
- BlkDebugEvent event;
+ BlkdebugEvent event;
struct BlkdebugRule *rule;
/* Find the right event for the rule */
@@ -426,11 +372,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
/* Set initial state */
s->state = 1;
- /* Open the backing file */
- assert(bs->file == NULL);
- ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image",
- bs, &child_file, false, &local_err);
- if (ret < 0) {
+ /* Open the image file */
+ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
+ bs, &child_file, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
error_propagate(errp, local_err);
goto out;
}
@@ -449,7 +395,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
fail_unref:
- bdrv_unref(bs->file);
+ bdrv_unref_child(bs, bs->file);
out:
qemu_opts_del(opts);
return ret;
@@ -510,7 +456,8 @@ static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
return inject_error(bs, cb, opaque, rule);
}
- return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+ return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors,
+ cb, opaque);
}
static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
@@ -532,7 +479,8 @@ static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
return inject_error(bs, cb, opaque, rule);
}
- return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+ return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
+ cb, opaque);
}
static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
@@ -551,7 +499,7 @@ static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
return inject_error(bs, cb, opaque, rule);
}
- return bdrv_aio_flush(bs->file, cb, opaque);
+ return bdrv_aio_flush(bs->file->bs, cb, opaque);
}
@@ -561,7 +509,7 @@ static void blkdebug_close(BlockDriverState *bs)
BlkdebugRule *rule, *next;
int i;
- for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
+ for (i = 0; i < BLKDBG__MAX; i++) {
QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
remove_rule(rule);
}
@@ -581,9 +529,13 @@ static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
remove_rule(rule);
QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next);
- printf("blkdebug: Suspended request '%s'\n", r.tag);
+ if (!qtest_enabled()) {
+ printf("blkdebug: Suspended request '%s'\n", r.tag);
+ }
qemu_coroutine_yield();
- printf("blkdebug: Resuming request '%s'\n", r.tag);
+ if (!qtest_enabled()) {
+ printf("blkdebug: Resuming request '%s'\n", r.tag);
+ }
QLIST_REMOVE(&r, next);
g_free(r.tag);
@@ -620,13 +572,13 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
return injected;
}
-static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event)
+static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
{
BDRVBlkdebugState *s = bs->opaque;
struct BlkdebugRule *rule, *next;
bool injected;
- assert((int)event >= 0 && event < BLKDBG_EVENT_MAX);
+ assert((int)event >= 0 && event < BLKDBG__MAX);
injected = false;
s->new_state = s->state;
@@ -641,7 +593,7 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
{
BDRVBlkdebugState *s = bs->opaque;
struct BlkdebugRule *rule;
- BlkDebugEvent blkdebug_event;
+ BlkdebugEvent blkdebug_event;
if (get_event_by_name(event, &blkdebug_event) < 0) {
return -ENOENT;
@@ -683,7 +635,7 @@ static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
BlkdebugRule *rule, *next;
int i, ret = -ENOENT;
- for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
+ for (i = 0; i < BLKDBG__MAX; i++) {
QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
if (rule->action == ACTION_SUSPEND &&
!strcmp(rule->options.suspend.tag, tag)) {
@@ -716,55 +668,50 @@ static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
static int64_t blkdebug_getlength(BlockDriverState *bs)
{
- return bdrv_getlength(bs->file);
+ return bdrv_getlength(bs->file->bs);
}
static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
{
- return bdrv_truncate(bs->file, offset);
+ return bdrv_truncate(bs->file->bs, offset);
}
-static void blkdebug_refresh_filename(BlockDriverState *bs)
+static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
{
QDict *opts;
const QDictEntry *e;
bool force_json = false;
- for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
+ for (e = qdict_first(options); e; e = qdict_next(options, e)) {
if (strcmp(qdict_entry_key(e), "config") &&
- strcmp(qdict_entry_key(e), "x-image") &&
- strcmp(qdict_entry_key(e), "image") &&
- strncmp(qdict_entry_key(e), "image.", strlen("image.")))
+ strcmp(qdict_entry_key(e), "x-image"))
{
force_json = true;
break;
}
}
- if (force_json && !bs->file->full_open_options) {
+ if (force_json && !bs->file->bs->full_open_options) {
/* The config file cannot be recreated, so creating a plain filename
* is impossible */
return;
}
- if (!force_json && bs->file->exact_filename[0]) {
+ if (!force_json && bs->file->bs->exact_filename[0]) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"blkdebug:%s:%s",
- qdict_get_try_str(bs->options, "config") ?: "",
- bs->file->exact_filename);
+ qdict_get_try_str(options, "config") ?: "",
+ bs->file->bs->exact_filename);
}
opts = qdict_new();
qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));
- QINCREF(bs->file->full_open_options);
- qdict_put_obj(opts, "image", QOBJECT(bs->file->full_open_options));
+ QINCREF(bs->file->bs->full_open_options);
+ qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));
- for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
- if (strcmp(qdict_entry_key(e), "x-image") &&
- strcmp(qdict_entry_key(e), "image") &&
- strncmp(qdict_entry_key(e), "image.", strlen("image.")))
- {
+ for (e = qdict_first(options); e; e = qdict_next(options, e)) {
+ if (strcmp(qdict_entry_key(e), "x-image")) {
qobject_incref(qdict_entry_value(e));
qdict_put_obj(opts, qdict_entry_key(e), qdict_entry_value(e));
}
@@ -773,6 +720,12 @@ static void blkdebug_refresh_filename(BlockDriverState *bs)
bs->full_open_options = opts;
}
+static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
static BlockDriver bdrv_blkdebug = {
.format_name = "blkdebug",
.protocol_name = "blkdebug",
@@ -781,6 +734,7 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_parse_filename = blkdebug_parse_filename,
.bdrv_file_open = blkdebug_open,
.bdrv_close = blkdebug_close,
+ .bdrv_reopen_prepare = blkdebug_reopen_prepare,
.bdrv_getlength = blkdebug_getlength,
.bdrv_truncate = blkdebug_truncate,
.bdrv_refresh_filename = blkdebug_refresh_filename,
diff --git a/qemu/block/blkreplay.c b/qemu/block/blkreplay.c
new file mode 100755
index 000000000..42f1813af
--- /dev/null
+++ b/qemu/block/blkreplay.c
@@ -0,0 +1,160 @@
+/*
+ * Block protocol for record/replay
+ *
+ * Copyright (c) 2010-2016 Institute for System Programming
+ * of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "sysemu/replay.h"
+#include "qapi/error.h"
+
+typedef struct Request {
+ Coroutine *co;
+ QEMUBH *bh;
+} Request;
+
+/* Next request id.
+ This counter is global, because requests from different
+ block devices should not get overlapping ids. */
+static uint64_t request_id;
+
+static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ Error *local_err = NULL;
+ int ret;
+
+ /* Open the image file */
+ bs->file = bdrv_open_child(NULL, options, "image",
+ bs, &child_file, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ ret = 0;
+fail:
+ if (ret < 0) {
+ bdrv_unref_child(bs, bs->file);
+ }
+ return ret;
+}
+
+static void blkreplay_close(BlockDriverState *bs)
+{
+}
+
+static int64_t blkreplay_getlength(BlockDriverState *bs)
+{
+ return bdrv_getlength(bs->file->bs);
+}
+
+/* This bh is used for synchronization of return from coroutines.
+ It continues yielded coroutine which then finishes its execution.
+ BH is called adjusted to some replay checkpoint, therefore
+ record and replay will always finish coroutines deterministically.
+*/
+static void blkreplay_bh_cb(void *opaque)
+{
+ Request *req = opaque;
+ qemu_coroutine_enter(req->co, NULL);
+ qemu_bh_delete(req->bh);
+ g_free(req);
+}
+
+static void block_request_create(uint64_t reqid, BlockDriverState *bs,
+ Coroutine *co)
+{
+ Request *req = g_new(Request, 1);
+ *req = (Request) {
+ .co = co,
+ .bh = aio_bh_new(bdrv_get_aio_context(bs), blkreplay_bh_cb, req),
+ };
+ replay_block_event(req->bh, reqid);
+}
+
+static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_flush(bs->file->bs);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static BlockDriver bdrv_blkreplay = {
+ .format_name = "blkreplay",
+ .protocol_name = "blkreplay",
+ .instance_size = 0,
+
+ .bdrv_file_open = blkreplay_open,
+ .bdrv_close = blkreplay_close,
+ .bdrv_getlength = blkreplay_getlength,
+
+ .bdrv_co_readv = blkreplay_co_readv,
+ .bdrv_co_writev = blkreplay_co_writev,
+
+ .bdrv_co_write_zeroes = blkreplay_co_write_zeroes,
+ .bdrv_co_discard = blkreplay_co_discard,
+ .bdrv_co_flush = blkreplay_co_flush,
+};
+
+static void bdrv_blkreplay_init(void)
+{
+ bdrv_register(&bdrv_blkreplay);
+}
+
+block_init(bdrv_blkreplay_init);
diff --git a/qemu/block/blkverify.c b/qemu/block/blkverify.c
index d277e6322..9414b7a84 100644
--- a/qemu/block/blkverify.c
+++ b/qemu/block/blkverify.c
@@ -7,14 +7,16 @@
* See the COPYING file in the top-level directory.
*/
-#include <stdarg.h>
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
#include "block/block_int.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
typedef struct {
- BlockDriverState *test_file;
+ BdrvChild *test_file;
} BDRVBlkverifyState;
typedef struct BlkverifyAIOCB BlkverifyAIOCB;
@@ -123,26 +125,29 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
}
/* Open the raw file */
- assert(bs->file == NULL);
- ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options,
- "raw", bs, &child_file, false, &local_err);
- if (ret < 0) {
+ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw",
+ bs, &child_file, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
error_propagate(errp, local_err);
goto fail;
}
/* Open the test file */
- assert(s->test_file == NULL);
- ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options,
- "test", bs, &child_format, false, &local_err);
- if (ret < 0) {
+ s->test_file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options,
+ "test", bs, &child_format, false,
+ &local_err);
+ if (local_err) {
+ ret = -EINVAL;
error_propagate(errp, local_err);
- s->test_file = NULL;
goto fail;
}
ret = 0;
fail:
+ if (ret < 0) {
+ bdrv_unref_child(bs, bs->file);
+ }
qemu_opts_del(opts);
return ret;
}
@@ -151,7 +156,7 @@ static void blkverify_close(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
- bdrv_unref(s->test_file);
+ bdrv_unref_child(bs, s->test_file);
s->test_file = NULL;
}
@@ -159,7 +164,7 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
- return bdrv_getlength(s->test_file);
+ return bdrv_getlength(s->test_file->bs);
}
static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
@@ -238,13 +243,13 @@ static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
nb_sectors, cb, opaque);
acb->verify = blkverify_verify_readv;
- acb->buf = qemu_blockalign(bs->file, qiov->size);
+ acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
- bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
+ bdrv_aio_readv(s->test_file->bs, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
- bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
+ bdrv_aio_readv(bs->file->bs, sector_num, &acb->raw_qiov, nb_sectors,
blkverify_aio_cb, acb);
return &acb->common;
}
@@ -257,9 +262,9 @@ static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
nb_sectors, cb, opaque);
- bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
+ bdrv_aio_writev(s->test_file->bs, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
- bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+ bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
return &acb->common;
}
@@ -271,7 +276,7 @@ static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
BDRVBlkverifyState *s = bs->opaque;
/* Only flush test file, the raw file is not important */
- return bdrv_aio_flush(s->test_file, cb, opaque);
+ return bdrv_aio_flush(s->test_file->bs, cb, opaque);
}
static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -279,13 +284,13 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
{
BDRVBlkverifyState *s = bs->opaque;
- bool perm = bdrv_recurse_is_first_non_filter(bs->file, candidate);
+ bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
if (perm) {
return true;
}
- return bdrv_recurse_is_first_non_filter(s->test_file, candidate);
+ return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
}
/* Propagate AioContext changes to ->test_file */
@@ -293,7 +298,7 @@ static void blkverify_detach_aio_context(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
- bdrv_detach_aio_context(s->test_file);
+ bdrv_detach_aio_context(s->test_file->bs);
}
static void blkverify_attach_aio_context(BlockDriverState *bs,
@@ -301,32 +306,38 @@ static void blkverify_attach_aio_context(BlockDriverState *bs,
{
BDRVBlkverifyState *s = bs->opaque;
- bdrv_attach_aio_context(s->test_file, new_context);
+ bdrv_attach_aio_context(s->test_file->bs, new_context);
}
-static void blkverify_refresh_filename(BlockDriverState *bs)
+static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVBlkverifyState *s = bs->opaque;
- /* bs->file has already been refreshed */
- bdrv_refresh_filename(s->test_file);
+ /* bs->file->bs has already been refreshed */
+ bdrv_refresh_filename(s->test_file->bs);
- if (bs->file->full_open_options && s->test_file->full_open_options) {
+ if (bs->file->bs->full_open_options
+ && s->test_file->bs->full_open_options)
+ {
QDict *opts = qdict_new();
qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify")));
- QINCREF(bs->file->full_open_options);
- qdict_put_obj(opts, "raw", QOBJECT(bs->file->full_open_options));
- QINCREF(s->test_file->full_open_options);
- qdict_put_obj(opts, "test", QOBJECT(s->test_file->full_open_options));
+ QINCREF(bs->file->bs->full_open_options);
+ qdict_put_obj(opts, "raw", QOBJECT(bs->file->bs->full_open_options));
+ QINCREF(s->test_file->bs->full_open_options);
+ qdict_put_obj(opts, "test",
+ QOBJECT(s->test_file->bs->full_open_options));
bs->full_open_options = opts;
}
- if (bs->file->exact_filename[0] && s->test_file->exact_filename[0]) {
+ if (bs->file->bs->exact_filename[0]
+ && s->test_file->bs->exact_filename[0])
+ {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"blkverify:%s:%s",
- bs->file->exact_filename, s->test_file->exact_filename);
+ bs->file->bs->exact_filename,
+ s->test_file->bs->exact_filename);
}
}
diff --git a/qemu/block/block-backend.c b/qemu/block/block-backend.c
index aee8a1202..16c9d5e0f 100644
--- a/qemu/block/block-backend.c
+++ b/qemu/block/block-backend.c
@@ -10,74 +10,105 @@
* or later. See the COPYING.LIB file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "sysemu/block-backend.h"
#include "block/block_int.h"
+#include "block/blockjob.h"
+#include "block/throttle-groups.h"
#include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
#include "qapi-event.h"
+#include "qemu/id.h"
/* Number of coroutines to reserve per attached device model */
#define COROUTINE_POOL_RESERVATION 64
+#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
+
struct BlockBackend {
char *name;
int refcnt;
- BlockDriverState *bs;
+ BdrvChild *root;
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
- QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
+ QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
+ QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
void *dev; /* attached device model, if any */
/* TODO change to DeviceState when all users are qdevified */
const BlockDevOps *dev_ops;
void *dev_opaque;
+
+ /* the block size for which the guest device expects atomicity */
+ int guest_block_size;
+
+ /* If the BDS tree is removed, some of its options are stored here (which
+ * can be used to restore those options in the new BDS on insert) */
+ BlockBackendRootState root_state;
+
+ bool enable_write_cache;
+
+ /* I/O stats (display with "info blockstats"). */
+ BlockAcctStats stats;
+
+ BlockdevOnError on_read_error, on_write_error;
+ bool iostatus_enabled;
+ BlockDeviceIoStatus iostatus;
+
+ bool allow_write_beyond_eof;
+
+ NotifierList remove_bs_notifiers, insert_bs_notifiers;
};
typedef struct BlockBackendAIOCB {
BlockAIOCB common;
QEMUBH *bh;
+ BlockBackend *blk;
int ret;
} BlockBackendAIOCB;
static const AIOCBInfo block_backend_aiocb_info = {
+ .get_aio_context = blk_aiocb_get_aio_context,
.aiocb_size = sizeof(BlockBackendAIOCB),
};
static void drive_info_del(DriveInfo *dinfo);
-/* All the BlockBackends (except for hidden ones) */
-static QTAILQ_HEAD(, BlockBackend) blk_backends =
- QTAILQ_HEAD_INITIALIZER(blk_backends);
+/* All BlockBackends */
+static QTAILQ_HEAD(, BlockBackend) block_backends =
+ QTAILQ_HEAD_INITIALIZER(block_backends);
+
+/* All BlockBackends referenced by the monitor and which are iterated through by
+ * blk_next() */
+static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
+ QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
+
+static void blk_root_inherit_options(int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options)
+{
+ /* We're not supposed to call this function for root nodes */
+ abort();
+}
+
+static const BdrvChildRole child_root = {
+ .inherit_options = blk_root_inherit_options,
+};
/*
- * Create a new BlockBackend with @name, with a reference count of one.
- * @name must not be null or empty.
- * Fail if a BlockBackend with this name already exists.
+ * Create a new BlockBackend with a reference count of one.
* Store an error through @errp on failure, unless it's null.
* Return the new BlockBackend on success, null on failure.
*/
-BlockBackend *blk_new(const char *name, Error **errp)
+BlockBackend *blk_new(Error **errp)
{
BlockBackend *blk;
- assert(name && name[0]);
- if (!id_wellformed(name)) {
- error_setg(errp, "Invalid device name");
- return NULL;
- }
- if (blk_by_name(name)) {
- error_setg(errp, "Device with id '%s' already exists", name);
- return NULL;
- }
- if (bdrv_find_node(name)) {
- error_setg(errp,
- "Device name '%s' conflicts with an existing node name",
- name);
- return NULL;
- }
-
blk = g_new0(BlockBackend, 1);
- blk->name = g_strdup(name);
blk->refcnt = 1;
- QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
+ notifier_list_init(&blk->remove_bs_notifiers);
+ notifier_list_init(&blk->insert_bs_notifiers);
+ QTAILQ_INSERT_TAIL(&block_backends, blk, link);
return blk;
}
@@ -85,18 +116,18 @@ BlockBackend *blk_new(const char *name, Error **errp)
* Create a new BlockBackend with a new BlockDriverState attached.
* Otherwise just like blk_new(), which see.
*/
-BlockBackend *blk_new_with_bs(const char *name, Error **errp)
+BlockBackend *blk_new_with_bs(Error **errp)
{
BlockBackend *blk;
BlockDriverState *bs;
- blk = blk_new(name, errp);
+ blk = blk_new(errp);
if (!blk) {
return NULL;
}
bs = bdrv_new_root();
- blk->bs = bs;
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root);
bs->blk = blk;
return blk;
}
@@ -113,44 +144,46 @@ BlockBackend *blk_new_with_bs(const char *name, Error **errp)
* though, so callers of this function have to be able to specify @filename and
* @flags.
*/
-BlockBackend *blk_new_open(const char *name, const char *filename,
- const char *reference, QDict *options, int flags,
- Error **errp)
+BlockBackend *blk_new_open(const char *filename, const char *reference,
+ QDict *options, int flags, Error **errp)
{
BlockBackend *blk;
int ret;
- blk = blk_new_with_bs(name, errp);
+ blk = blk_new_with_bs(errp);
if (!blk) {
QDECREF(options);
return NULL;
}
- ret = bdrv_open(&blk->bs, filename, reference, options, flags, NULL, errp);
+ ret = bdrv_open(&blk->root->bs, filename, reference, options, flags, errp);
if (ret < 0) {
blk_unref(blk);
return NULL;
}
+ blk_set_enable_write_cache(blk, true);
+
return blk;
}
static void blk_delete(BlockBackend *blk)
{
assert(!blk->refcnt);
+ assert(!blk->name);
assert(!blk->dev);
- if (blk->bs) {
- assert(blk->bs->blk == blk);
- blk->bs->blk = NULL;
- bdrv_unref(blk->bs);
- blk->bs = NULL;
+ if (blk->root) {
+ blk_remove_bs(blk);
}
- /* Avoid double-remove after blk_hide_on_behalf_of_hmp_drive_del() */
- if (blk->name[0]) {
- QTAILQ_REMOVE(&blk_backends, blk, link);
+ assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
+ assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
+ if (blk->root_state.throttle_state) {
+ g_free(blk->root_state.throttle_group);
+ throttle_group_unref(blk->root_state.throttle_state);
}
- g_free(blk->name);
+ QTAILQ_REMOVE(&block_backends, blk, link);
drive_info_del(blk->legacy_dinfo);
+ block_acct_cleanup(&blk->stats);
g_free(blk);
}
@@ -164,6 +197,11 @@ static void drive_info_del(DriveInfo *dinfo)
g_free(dinfo);
}
+int blk_get_refcnt(BlockBackend *blk)
+{
+ return blk ? blk->refcnt : 0;
+}
+
/*
* Increment @blk's reference count.
* @blk must not be null.
@@ -189,7 +227,32 @@ void blk_unref(BlockBackend *blk)
}
/*
- * Return the BlockBackend after @blk.
+ * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
+ * ones which are hidden (i.e. are not referenced by the monitor).
+ */
+static BlockBackend *blk_all_next(BlockBackend *blk)
+{
+ return blk ? QTAILQ_NEXT(blk, link)
+ : QTAILQ_FIRST(&block_backends);
+}
+
+void blk_remove_all_bs(void)
+{
+ BlockBackend *blk = NULL;
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *ctx = blk_get_aio_context(blk);
+
+ aio_context_acquire(ctx);
+ if (blk->root) {
+ blk_remove_bs(blk);
+ }
+ aio_context_release(ctx);
+ }
+}
+
+/*
+ * Return the monitor-owned BlockBackend after @blk.
* If @blk is null, return the first one.
* Else, return @blk's next sibling, which may be null.
*
@@ -200,17 +263,91 @@ void blk_unref(BlockBackend *blk)
*/
BlockBackend *blk_next(BlockBackend *blk)
{
- return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
+ return blk ? QTAILQ_NEXT(blk, monitor_link)
+ : QTAILQ_FIRST(&monitor_block_backends);
+}
+
+/*
+ * Iterates over all BlockDriverStates which are attached to a BlockBackend.
+ * This function is for use by bdrv_next().
+ *
+ * @bs must be NULL or a BDS that is attached to a BB.
+ */
+BlockDriverState *blk_next_root_bs(BlockDriverState *bs)
+{
+ BlockBackend *blk;
+
+ if (bs) {
+ assert(bs->blk);
+ blk = bs->blk;
+ } else {
+ blk = NULL;
+ }
+
+ do {
+ blk = blk_all_next(blk);
+ } while (blk && !blk->root);
+
+ return blk ? blk->root->bs : NULL;
+}
+
+/*
+ * Add a BlockBackend into the list of backends referenced by the monitor, with
+ * the given @name acting as the handle for the monitor.
+ * Strictly for use by blockdev.c.
+ *
+ * @name must not be null or empty.
+ *
+ * Returns true on success and false on failure. In the latter case, an Error
+ * object is returned through @errp.
+ */
+bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
+{
+ assert(!blk->name);
+ assert(name && name[0]);
+
+ if (!id_wellformed(name)) {
+ error_setg(errp, "Invalid device name");
+ return false;
+ }
+ if (blk_by_name(name)) {
+ error_setg(errp, "Device with id '%s' already exists", name);
+ return false;
+ }
+ if (bdrv_find_node(name)) {
+ error_setg(errp,
+ "Device name '%s' conflicts with an existing node name",
+ name);
+ return false;
+ }
+
+ blk->name = g_strdup(name);
+ QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
+ return true;
+}
+
+/*
+ * Remove a BlockBackend from the list of backends referenced by the monitor.
+ * Strictly for use by blockdev.c.
+ */
+void monitor_remove_blk(BlockBackend *blk)
+{
+ if (!blk->name) {
+ return;
+ }
+
+ QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
+ g_free(blk->name);
+ blk->name = NULL;
}
/*
* Return @blk's name, a non-null string.
- * Wart: the name is empty iff @blk has been hidden with
- * blk_hide_on_behalf_of_hmp_drive_del().
+ * Returns an empty string iff @blk is not referenced by the monitor.
*/
const char *blk_name(BlockBackend *blk)
{
- return blk->name;
+ return blk->name ?: "";
}
/*
@@ -219,10 +356,10 @@ const char *blk_name(BlockBackend *blk)
*/
BlockBackend *blk_by_name(const char *name)
{
- BlockBackend *blk;
+ BlockBackend *blk = NULL;
assert(name);
- QTAILQ_FOREACH(blk, &blk_backends, link) {
+ while ((blk = blk_next(blk)) != NULL) {
if (!strcmp(name, blk->name)) {
return blk;
}
@@ -235,7 +372,7 @@ BlockBackend *blk_by_name(const char *name)
*/
BlockDriverState *blk_bs(BlockBackend *blk)
{
- return blk->bs;
+ return blk->root ? blk->root->bs : NULL;
}
/*
@@ -263,9 +400,9 @@ DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
*/
BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
{
- BlockBackend *blk;
+ BlockBackend *blk = NULL;
- QTAILQ_FOREACH(blk, &blk_backends, link) {
+ while ((blk = blk_next(blk)) != NULL) {
if (blk->legacy_dinfo == dinfo) {
return blk;
}
@@ -274,21 +411,32 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
}
/*
- * Hide @blk.
- * @blk must not have been hidden already.
- * Make attached BlockDriverState, if any, anonymous.
- * Once hidden, @blk is invisible to all functions that don't receive
- * it as argument. For example, blk_by_name() won't return it.
- * Strictly for use by do_drive_del().
- * TODO get rid of it!
+ * Disassociates the currently associated BlockDriverState from @blk.
*/
-void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk)
+void blk_remove_bs(BlockBackend *blk)
{
- QTAILQ_REMOVE(&blk_backends, blk, link);
- blk->name[0] = 0;
- if (blk->bs) {
- bdrv_make_anon(blk->bs);
- }
+ assert(blk->root->bs->blk == blk);
+
+ notifier_list_notify(&blk->remove_bs_notifiers, blk);
+
+ blk_update_root_state(blk);
+
+ blk->root->bs->blk = NULL;
+ bdrv_root_unref_child(blk->root);
+ blk->root = NULL;
+}
+
+/*
+ * Associates a new BlockDriverState with @blk.
+ */
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+ assert(!blk->root && !bs->blk);
+ bdrv_ref(bs);
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root);
+ bs->blk = blk;
+
+ notifier_list_notify(&blk->insert_bs_notifiers, blk);
}
/*
@@ -303,7 +451,7 @@ int blk_attach_dev(BlockBackend *blk, void *dev)
}
blk_ref(blk);
blk->dev = dev;
- bdrv_iostatus_reset(blk->bs);
+ blk_iostatus_reset(blk);
return 0;
}
@@ -330,7 +478,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
blk->dev = NULL;
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
- bdrv_set_guest_block_size(blk->bs, 512);
+ blk->guest_block_size = 512;
blk_unref(blk);
}
@@ -364,18 +512,15 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
void blk_dev_change_media_cb(BlockBackend *blk, bool load)
{
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
- bool tray_was_closed = !blk_dev_is_tray_open(blk);
+ bool tray_was_open, tray_is_open;
+ tray_was_open = blk_dev_is_tray_open(blk);
blk->dev_ops->change_media_cb(blk->dev_opaque, load);
- if (tray_was_closed) {
- /* tray open */
- qapi_event_send_device_tray_moved(blk_name(blk),
- true, &error_abort);
- }
- if (load) {
- /* tray close */
- qapi_event_send_device_tray_moved(blk_name(blk),
- false, &error_abort);
+ tray_is_open = blk_dev_is_tray_open(blk);
+
+ if (tray_was_open != tray_is_open) {
+ qapi_event_send_device_tray_moved(blk_name(blk), tray_is_open,
+ &error_abort);
}
}
}
@@ -390,6 +535,14 @@ bool blk_dev_has_removable_media(BlockBackend *blk)
}
/*
+ * Does @blk's attached device model have a tray?
+ */
+bool blk_dev_has_tray(BlockBackend *blk)
+{
+ return blk->dev_ops && blk->dev_ops->is_tray_open;
+}
+
+/*
* Notify @blk's attached device model of a media eject request.
* If @force is true, the medium is about to be yanked out forcefully.
*/
@@ -405,7 +558,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force)
*/
bool blk_dev_is_tray_open(BlockBackend *blk)
{
- if (blk->dev_ops && blk->dev_ops->is_tray_open) {
+ if (blk_dev_has_tray(blk)) {
return blk->dev_ops->is_tray_open(blk->dev_opaque);
}
return false;
@@ -435,7 +588,53 @@ void blk_dev_resize_cb(BlockBackend *blk)
void blk_iostatus_enable(BlockBackend *blk)
{
- bdrv_iostatus_enable(blk->bs);
+ blk->iostatus_enabled = true;
+ blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+}
+
+/* The I/O status is only enabled if the drive explicitly
+ * enables it _and_ the VM is configured to stop on errors */
+bool blk_iostatus_is_enabled(const BlockBackend *blk)
+{
+ return (blk->iostatus_enabled &&
+ (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+ blk->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
+ blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
+}
+
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
+{
+ return blk->iostatus;
+}
+
+void blk_iostatus_disable(BlockBackend *blk)
+{
+ blk->iostatus_enabled = false;
+}
+
+void blk_iostatus_reset(BlockBackend *blk)
+{
+ if (blk_iostatus_is_enabled(blk)) {
+ BlockDriverState *bs = blk_bs(blk);
+ blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+ if (bs && bs->job) {
+ block_job_iostatus_reset(bs->job);
+ }
+ }
+}
+
+void blk_iostatus_set_err(BlockBackend *blk, int error)
+{
+ assert(blk_iostatus_is_enabled(blk));
+ if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+ blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+ BLOCK_DEVICE_IO_STATUS_FAILED;
+ }
+}
+
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
+{
+ blk->allow_write_beyond_eof = allow;
}
static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
@@ -447,21 +646,23 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
return -EIO;
}
- if (!blk_is_inserted(blk)) {
+ if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
- len = blk_getlength(blk);
- if (len < 0) {
- return len;
- }
-
if (offset < 0) {
return -EIO;
}
- if (offset > len || len - offset < size) {
- return -EIO;
+ if (!blk->allow_write_beyond_eof) {
+ len = blk_getlength(blk);
+ if (len < 0) {
+ return len;
+ }
+
+ if (offset > len || len - offset < size) {
+ return -EIO;
+ }
}
return 0;
@@ -482,48 +683,144 @@ static int blk_check_request(BlockBackend *blk, int64_t sector_num,
nb_sectors * BDRV_SECTOR_SIZE);
}
-int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
+static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
+ int ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
- return bdrv_read(blk->bs, sector_num, buf, nb_sectors);
+ return bdrv_co_do_preadv(blk_bs(blk), offset, bytes, qiov, flags);
}
-int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
+static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
+ int ret;
+
+ ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
- return bdrv_read_unthrottled(blk->bs, sector_num, buf, nb_sectors);
+ if (!blk->enable_write_cache) {
+ flags |= BDRV_REQ_FUA;
+ }
+
+ return bdrv_co_do_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
}
-int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
- int nb_sectors)
+typedef struct BlkRwCo {
+ BlockBackend *blk;
+ int64_t offset;
+ QEMUIOVector *qiov;
+ int ret;
+ BdrvRequestFlags flags;
+} BlkRwCo;
+
+static void blk_read_entry(void *opaque)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return ret;
+ BlkRwCo *rwco = opaque;
+
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
+ rwco->qiov, rwco->flags);
+}
+
+static void blk_write_entry(void *opaque)
+{
+ BlkRwCo *rwco = opaque;
+
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
+ rwco->qiov, rwco->flags);
+}
+
+static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
+ int64_t bytes, CoroutineEntry co_entry,
+ BdrvRequestFlags flags)
+{
+ AioContext *aio_context;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ Coroutine *co;
+ BlkRwCo rwco;
+
+ iov = (struct iovec) {
+ .iov_base = buf,
+ .iov_len = bytes,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ rwco = (BlkRwCo) {
+ .blk = blk,
+ .offset = offset,
+ .qiov = &qiov,
+ .flags = flags,
+ .ret = NOT_DONE,
+ };
+
+ co = qemu_coroutine_create(co_entry);
+ qemu_coroutine_enter(co, &rwco);
+
+ aio_context = blk_get_aio_context(blk);
+ while (rwco.ret == NOT_DONE) {
+ aio_poll(aio_context, true);
}
- return bdrv_write(blk->bs, sector_num, buf, nb_sectors);
+ return rwco.ret;
}
-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+static int blk_rw(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
+ int nb_sectors, CoroutineEntry co_entry,
+ BdrvRequestFlags flags)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return -EINVAL;
+ }
+
+ return blk_prw(blk, sector_num << BDRV_SECTOR_BITS, buf,
+ nb_sectors << BDRV_SECTOR_BITS, co_entry, flags);
+}
+
+int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
+ int nb_sectors)
+{
+ return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0);
+}
+
+int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
+ int nb_sectors)
+{
+ BlockDriverState *bs = blk_bs(blk);
+ bool enabled;
+ int ret;
+
+ ret = blk_check_request(blk, sector_num, nb_sectors);
if (ret < 0) {
return ret;
}
- return bdrv_write_zeroes(blk->bs, sector_num, nb_sectors, flags);
+ enabled = bs->io_limits_enabled;
+ bs->io_limits_enabled = false;
+ ret = blk_read(blk, sector_num, buf, nb_sectors);
+ bs->io_limits_enabled = enabled;
+ return ret;
+}
+
+int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
+ int nb_sectors)
+{
+ return blk_rw(blk, sector_num, (uint8_t*) buf, nb_sectors,
+ blk_write_entry, 0);
+}
+
+int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
+ int nb_sectors, BdrvRequestFlags flags)
+{
+ return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE);
}
static void error_callback_bh(void *opaque)
@@ -534,13 +831,15 @@ static void error_callback_bh(void *opaque)
qemu_aio_unref(acb);
}
-static BlockAIOCB *abort_aio_request(BlockBackend *blk, BlockCompletionFunc *cb,
- void *opaque, int ret)
+BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
+ BlockCompletionFunc *cb,
+ void *opaque, int ret)
{
struct BlockBackendAIOCB *acb;
QEMUBH *bh;
acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
+ acb->blk = blk;
acb->ret = ret;
bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
@@ -550,82 +849,182 @@ static BlockAIOCB *abort_aio_request(BlockBackend *blk, BlockCompletionFunc *cb,
return &acb->common;
}
+typedef struct BlkAioEmAIOCB {
+ BlockAIOCB common;
+ BlkRwCo rwco;
+ int bytes;
+ bool has_returned;
+ QEMUBH* bh;
+} BlkAioEmAIOCB;
+
+static const AIOCBInfo blk_aio_em_aiocb_info = {
+ .aiocb_size = sizeof(BlkAioEmAIOCB),
+};
+
+static void blk_aio_complete(BlkAioEmAIOCB *acb)
+{
+ if (acb->bh) {
+ assert(acb->has_returned);
+ qemu_bh_delete(acb->bh);
+ }
+ if (acb->has_returned) {
+ acb->common.cb(acb->common.opaque, acb->rwco.ret);
+ qemu_aio_unref(acb);
+ }
+}
+
+static void blk_aio_complete_bh(void *opaque)
+{
+ blk_aio_complete(opaque);
+}
+
+static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
+ QEMUIOVector *qiov, CoroutineEntry co_entry,
+ BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ BlkAioEmAIOCB *acb;
+ Coroutine *co;
+
+ acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+ acb->rwco = (BlkRwCo) {
+ .blk = blk,
+ .offset = offset,
+ .qiov = qiov,
+ .flags = flags,
+ .ret = NOT_DONE,
+ };
+ acb->bytes = bytes;
+ acb->bh = NULL;
+ acb->has_returned = false;
+
+ co = qemu_coroutine_create(co_entry);
+ qemu_coroutine_enter(co, acb);
+
+ acb->has_returned = true;
+ if (acb->rwco.ret != NOT_DONE) {
+ acb->bh = aio_bh_new(blk_get_aio_context(blk), blk_aio_complete_bh, acb);
+ qemu_bh_schedule(acb->bh);
+ }
+
+ return &acb->common;
+}
+
+static void blk_aio_read_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ assert(rwco->qiov->size == acb->bytes);
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
+ rwco->qiov, rwco->flags);
+ blk_aio_complete(acb);
+}
+
+static void blk_aio_write_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
+ rwco->qiov, rwco->flags);
+ blk_aio_complete(acb);
+}
+
BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return abort_aio_request(blk, cb, opaque, ret);
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
}
- return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags,
- cb, opaque);
+ return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, NULL,
+ blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE,
+ cb, opaque);
}
int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
{
- int ret = blk_check_byte_request(blk, offset, count);
+ int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
if (ret < 0) {
return ret;
}
-
- return bdrv_pread(blk->bs, offset, buf, count);
+ return count;
}
int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
{
- int ret = blk_check_byte_request(blk, offset, count);
+ int ret = blk_prw(blk, offset, (void*) buf, count, blk_write_entry, 0);
if (ret < 0) {
return ret;
}
-
- return bdrv_pwrite(blk->bs, offset, buf, count);
+ return count;
}
int64_t blk_getlength(BlockBackend *blk)
{
- return bdrv_getlength(blk->bs);
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_getlength(blk_bs(blk));
}
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
{
- bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+ if (!blk_bs(blk)) {
+ *nb_sectors_ptr = 0;
+ } else {
+ bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
+ }
}
int64_t blk_nb_sectors(BlockBackend *blk)
{
- return bdrv_nb_sectors(blk->bs);
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_nb_sectors(blk_bs(blk));
}
BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
QEMUIOVector *iov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return abort_aio_request(blk, cb, opaque, ret);
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
}
- return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
+ assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
+ return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
+ blk_aio_read_entry, 0, cb, opaque);
}
BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
QEMUIOVector *iov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return abort_aio_request(blk, cb, opaque, ret);
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
}
- return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
+ assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
+ return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
+ blk_aio_write_entry, 0, cb, opaque);
}
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
- return bdrv_aio_flush(blk->bs, cb, opaque);
+ if (!blk_is_available(blk)) {
+ return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+ }
+
+ return bdrv_aio_flush(blk_bs(blk), cb, opaque);
}
BlockAIOCB *blk_aio_discard(BlockBackend *blk,
@@ -634,10 +1033,10 @@ BlockAIOCB *blk_aio_discard(BlockBackend *blk,
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
if (ret < 0) {
- return abort_aio_request(blk, cb, opaque, ret);
+ return blk_abort_aio_request(blk, cb, opaque, ret);
}
- return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque);
+ return bdrv_aio_discard(blk_bs(blk), sector_num, nb_sectors, cb, opaque);
}
void blk_aio_cancel(BlockAIOCB *acb)
@@ -661,18 +1060,26 @@ int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
}
}
- return bdrv_aio_multiwrite(blk->bs, reqs, num_reqs);
+ return bdrv_aio_multiwrite(blk_bs(blk), reqs, num_reqs);
}
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
- return bdrv_ioctl(blk->bs, req, buf);
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_ioctl(blk_bs(blk), req, buf);
}
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
{
- return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque);
+ if (!blk_is_available(blk)) {
+ return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+ }
+
+ return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
}
int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
@@ -682,27 +1089,32 @@ int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
return ret;
}
- return bdrv_co_discard(blk->bs, sector_num, nb_sectors);
+ return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors);
}
int blk_co_flush(BlockBackend *blk)
{
- return bdrv_co_flush(blk->bs);
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_co_flush(blk_bs(blk));
}
int blk_flush(BlockBackend *blk)
{
- return bdrv_flush(blk->bs);
-}
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
-int blk_flush_all(void)
-{
- return bdrv_flush_all();
+ return bdrv_flush(blk_bs(blk));
}
void blk_drain(BlockBackend *blk)
{
- bdrv_drain(blk->bs);
+ if (blk_bs(blk)) {
+ bdrv_drain(blk_bs(blk));
+ }
}
void blk_drain_all(void)
@@ -710,119 +1122,273 @@ void blk_drain_all(void)
bdrv_drain_all();
}
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error)
+{
+ blk->on_read_error = on_read_error;
+ blk->on_write_error = on_write_error;
+}
+
BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
{
- return bdrv_get_on_error(blk->bs, is_read);
+ return is_read ? blk->on_read_error : blk->on_write_error;
}
BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
int error)
{
- return bdrv_get_error_action(blk->bs, is_read, error);
+ BlockdevOnError on_err = blk_get_on_error(blk, is_read);
+
+ switch (on_err) {
+ case BLOCKDEV_ON_ERROR_ENOSPC:
+ return (error == ENOSPC) ?
+ BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_STOP:
+ return BLOCK_ERROR_ACTION_STOP;
+ case BLOCKDEV_ON_ERROR_REPORT:
+ return BLOCK_ERROR_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_IGNORE:
+ return BLOCK_ERROR_ACTION_IGNORE;
+ default:
+ abort();
+ }
+}
+
+static void send_qmp_error_event(BlockBackend *blk,
+ BlockErrorAction action,
+ bool is_read, int error)
+{
+ IoOperationType optype;
+
+ optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
+ qapi_event_send_block_io_error(blk_name(blk), optype, action,
+ blk_iostatus_is_enabled(blk),
+ error == ENOSPC, strerror(error),
+ &error_abort);
}
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
void blk_error_action(BlockBackend *blk, BlockErrorAction action,
bool is_read, int error)
{
- bdrv_error_action(blk->bs, action, is_read, error);
+ assert(error >= 0);
+
+ if (action == BLOCK_ERROR_ACTION_STOP) {
+ /* First set the iostatus, so that "info block" returns an iostatus
+ * that matches the events raised so far (an additional error iostatus
+ * is fine, but not a lost one).
+ */
+ blk_iostatus_set_err(blk, error);
+
+ /* Then raise the request to stop the VM and the event.
+ * qemu_system_vmstop_request_prepare has two effects. First,
+ * it ensures that the STOP event always comes after the
+ * BLOCK_IO_ERROR event. Second, it ensures that even if management
+ * can observe the STOP event and do a "cont" before the STOP
+ * event is issued, the VM will not stop. In this case, vm_start()
+ * also ensures that the STOP/RESUME pair of events is emitted.
+ */
+ qemu_system_vmstop_request_prepare();
+ send_qmp_error_event(blk, action, is_read, error);
+ qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
+ } else {
+ send_qmp_error_event(blk, action, is_read, error);
+ }
}
int blk_is_read_only(BlockBackend *blk)
{
- return bdrv_is_read_only(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bdrv_is_read_only(bs);
+ } else {
+ return blk->root_state.read_only;
+ }
}
int blk_is_sg(BlockBackend *blk)
{
- return bdrv_is_sg(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (!bs) {
+ return 0;
+ }
+
+ return bdrv_is_sg(bs);
}
int blk_enable_write_cache(BlockBackend *blk)
{
- return bdrv_enable_write_cache(blk->bs);
+ return blk->enable_write_cache;
}
void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
{
- bdrv_set_enable_write_cache(blk->bs, wce);
+ blk->enable_write_cache = wce;
}
void blk_invalidate_cache(BlockBackend *blk, Error **errp)
{
- bdrv_invalidate_cache(blk->bs, errp);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (!bs) {
+ error_setg(errp, "Device '%s' has no medium", blk->name);
+ return;
+ }
+
+ bdrv_invalidate_cache(bs, errp);
+}
+
+bool blk_is_inserted(BlockBackend *blk)
+{
+ BlockDriverState *bs = blk_bs(blk);
+
+ return bs && bdrv_is_inserted(bs);
}
-int blk_is_inserted(BlockBackend *blk)
+bool blk_is_available(BlockBackend *blk)
{
- return bdrv_is_inserted(blk->bs);
+ return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
}
void blk_lock_medium(BlockBackend *blk, bool locked)
{
- bdrv_lock_medium(blk->bs, locked);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_lock_medium(bs, locked);
+ }
}
void blk_eject(BlockBackend *blk, bool eject_flag)
{
- bdrv_eject(blk->bs, eject_flag);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_eject(bs, eject_flag);
+ }
}
int blk_get_flags(BlockBackend *blk)
{
- return bdrv_get_flags(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bdrv_get_flags(bs);
+ } else {
+ return blk->root_state.open_flags;
+ }
}
int blk_get_max_transfer_length(BlockBackend *blk)
{
- return blk->bs->bl.max_transfer_length;
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bs->bl.max_transfer_length;
+ } else {
+ return 0;
+ }
+}
+
+int blk_get_max_iov(BlockBackend *blk)
+{
+ return blk->root->bs->bl.max_iov;
}
void blk_set_guest_block_size(BlockBackend *blk, int align)
{
- bdrv_set_guest_block_size(blk->bs, align);
+ blk->guest_block_size = align;
+}
+
+void *blk_try_blockalign(BlockBackend *blk, size_t size)
+{
+ return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
}
void *blk_blockalign(BlockBackend *blk, size_t size)
{
- return qemu_blockalign(blk ? blk->bs : NULL, size);
+ return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
}
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
{
- return bdrv_op_is_blocked(blk->bs, op, errp);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (!bs) {
+ return false;
+ }
+
+ return bdrv_op_is_blocked(bs, op, errp);
}
void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
{
- bdrv_op_unblock(blk->bs, op, reason);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_op_unblock(bs, op, reason);
+ }
}
void blk_op_block_all(BlockBackend *blk, Error *reason)
{
- bdrv_op_block_all(blk->bs, reason);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_op_block_all(bs, reason);
+ }
}
void blk_op_unblock_all(BlockBackend *blk, Error *reason)
{
- bdrv_op_unblock_all(blk->bs, reason);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_op_unblock_all(bs, reason);
+ }
}
AioContext *blk_get_aio_context(BlockBackend *blk)
{
- return bdrv_get_aio_context(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bdrv_get_aio_context(bs);
+ } else {
+ return qemu_get_aio_context();
+ }
+}
+
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
+{
+ BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
+ return blk_get_aio_context(blk_acb->blk);
}
void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
{
- bdrv_set_aio_context(blk->bs, new_context);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_set_aio_context(bs, new_context);
+ }
}
void blk_add_aio_context_notifier(BlockBackend *blk,
void (*attached_aio_context)(AioContext *new_context, void *opaque),
void (*detach_aio_context)(void *opaque), void *opaque)
{
- bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
- detach_aio_context, opaque);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_add_aio_context_notifier(bs, attached_aio_context,
+ detach_aio_context, opaque);
+ }
}
void blk_remove_aio_context_notifier(BlockBackend *blk,
@@ -831,28 +1397,45 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
void (*detach_aio_context)(void *),
void *opaque)
{
- bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
- detach_aio_context, opaque);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_remove_aio_context_notifier(bs, attached_aio_context,
+ detach_aio_context, opaque);
+ }
}
-void blk_add_close_notifier(BlockBackend *blk, Notifier *notify)
+void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
{
- bdrv_add_close_notifier(blk->bs, notify);
+ notifier_list_add(&blk->remove_bs_notifiers, notify);
+}
+
+void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
+{
+ notifier_list_add(&blk->insert_bs_notifiers, notify);
}
void blk_io_plug(BlockBackend *blk)
{
- bdrv_io_plug(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_io_plug(bs);
+ }
}
void blk_io_unplug(BlockBackend *blk)
{
- bdrv_io_unplug(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_io_unplug(bs);
+ }
}
BlockAcctStats *blk_get_stats(BlockBackend *blk)
{
- return bdrv_get_stats(blk->bs);
+ return &blk->stats;
}
void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
@@ -864,12 +1447,13 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return ret;
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return -EINVAL;
}
- return bdrv_co_write_zeroes(blk->bs, sector_num, nb_sectors, flags);
+ return blk_co_pwritev(blk, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, NULL,
+ flags | BDRV_REQ_ZERO_WRITE);
}
int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
@@ -880,12 +1464,16 @@ int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
return ret;
}
- return bdrv_write_compressed(blk->bs, sector_num, buf, nb_sectors);
+ return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
}
int blk_truncate(BlockBackend *blk, int64_t offset)
{
- return bdrv_truncate(blk->bs, offset);
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_truncate(blk_bs(blk), offset);
}
int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
@@ -895,26 +1483,153 @@ int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
return ret;
}
- return bdrv_discard(blk->bs, sector_num, nb_sectors);
+ return bdrv_discard(blk_bs(blk), sector_num, nb_sectors);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size)
{
- return bdrv_save_vmstate(blk->bs, buf, pos, size);
+ int ret;
+
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (ret == size && !blk->enable_write_cache) {
+ ret = bdrv_flush(blk_bs(blk));
+ }
+
+ return ret < 0 ? ret : size;
}
int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
{
- return bdrv_load_vmstate(blk->bs, buf, pos, size);
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
}
int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
{
- return bdrv_probe_blocksizes(blk->bs, bsz);
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_probe_blocksizes(blk_bs(blk), bsz);
}
int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
{
- return bdrv_probe_geometry(blk->bs, geo);
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_probe_geometry(blk_bs(blk), geo);
+}
+
+/*
+ * Updates the BlockBackendRootState object with data from the currently
+ * attached BlockDriverState.
+ */
+void blk_update_root_state(BlockBackend *blk)
+{
+ assert(blk->root);
+
+ blk->root_state.open_flags = blk->root->bs->open_flags;
+ blk->root_state.read_only = blk->root->bs->read_only;
+ blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
+
+ if (blk->root_state.throttle_group) {
+ g_free(blk->root_state.throttle_group);
+ throttle_group_unref(blk->root_state.throttle_state);
+ }
+ if (blk->root->bs->throttle_state) {
+ const char *name = throttle_group_get_name(blk->root->bs);
+ blk->root_state.throttle_group = g_strdup(name);
+ blk->root_state.throttle_state = throttle_group_incref(name);
+ } else {
+ blk->root_state.throttle_group = NULL;
+ blk->root_state.throttle_state = NULL;
+ }
+}
+
+/*
+ * Applies the information in the root state to the given BlockDriverState. This
+ * does not include the flags which have to be specified for bdrv_open(), use
+ * blk_get_open_flags_from_root_state() to inquire them.
+ */
+void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
+{
+ bs->detect_zeroes = blk->root_state.detect_zeroes;
+ if (blk->root_state.throttle_group) {
+ bdrv_io_limits_enable(bs, blk->root_state.throttle_group);
+ }
+}
+
+/*
+ * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
+ * supposed to inherit the root state.
+ */
+int blk_get_open_flags_from_root_state(BlockBackend *blk)
+{
+ int bs_flags;
+
+ bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
+ bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
+
+ return bs_flags;
+}
+
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
+{
+ return &blk->root_state;
+}
+
+int blk_commit_all(void)
+{
+ BlockBackend *blk = NULL;
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *aio_context = blk_get_aio_context(blk);
+
+ aio_context_acquire(aio_context);
+ if (blk_is_inserted(blk) && blk->root->bs->backing) {
+ int ret = bdrv_commit(blk->root->bs);
+ if (ret < 0) {
+ aio_context_release(aio_context);
+ return ret;
+ }
+ }
+ aio_context_release(aio_context);
+ }
+ return 0;
+}
+
+int blk_flush_all(void)
+{
+ BlockBackend *blk = NULL;
+ int result = 0;
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *aio_context = blk_get_aio_context(blk);
+ int ret;
+
+ aio_context_acquire(aio_context);
+ if (blk_is_inserted(blk)) {
+ ret = blk_flush(blk);
+ if (ret < 0 && !result) {
+ result = ret;
+ }
+ }
+ aio_context_release(aio_context);
+ }
+
+ return result;
}
diff --git a/qemu/block/bochs.c b/qemu/block/bochs.c
index 199ac2b9a..af8b7abdf 100644
--- a/qemu/block/bochs.c
+++ b/qemu/block/bochs.c
@@ -22,6 +22,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/module.h"
@@ -103,7 +105,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
bs->read_only = 1; // no write support yet
- ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
+ ret = bdrv_pread(bs->file->bs, 0, &bochs, sizeof(bochs));
if (ret < 0) {
return ret;
}
@@ -137,7 +139,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
return -ENOMEM;
}
- ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
+ ret = bdrv_pread(bs->file->bs, le32_to_cpu(bochs.header), s->catalog_bitmap,
s->catalog_size * 4);
if (ret < 0) {
goto fail;
@@ -206,7 +208,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
(s->extent_blocks + s->bitmap_blocks));
/* read in bitmap for current extent */
- ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
+ ret = bdrv_pread(bs->file->bs, bitmap_offset + (extent_offset / 8),
&bitmap_entry, 1);
if (ret < 0) {
return ret;
@@ -229,7 +231,7 @@ static int bochs_read(BlockDriverState *bs, int64_t sector_num,
if (block_offset < 0) {
return block_offset;
} else if (block_offset > 0) {
- ret = bdrv_pread(bs->file, block_offset, buf, 512);
+ ret = bdrv_pread(bs->file->bs, block_offset, buf, 512);
if (ret < 0) {
return ret;
}
diff --git a/qemu/block/cloop.c b/qemu/block/cloop.c
index f328be06f..a84f14019 100644
--- a/qemu/block/cloop.c
+++ b/qemu/block/cloop.c
@@ -21,6 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/module.h"
@@ -66,7 +68,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
bs->read_only = 1;
/* read header */
- ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
+ ret = bdrv_pread(bs->file->bs, 128, &s->block_size, 4);
if (ret < 0) {
return ret;
}
@@ -92,7 +94,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
- ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
+ ret = bdrv_pread(bs->file->bs, 128 + 4, &s->n_blocks, 4);
if (ret < 0) {
return ret;
}
@@ -123,7 +125,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
return -ENOMEM;
}
- ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
+ ret = bdrv_pread(bs->file->bs, 128 + 4 + 4, s->offsets, offsets_size);
if (ret < 0) {
goto fail;
}
@@ -203,8 +205,8 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
int ret;
uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];
- ret = bdrv_pread(bs->file, s->offsets[block_num], s->compressed_block,
- bytes);
+ ret = bdrv_pread(bs->file->bs, s->offsets[block_num],
+ s->compressed_block, bytes);
if (ret != bytes) {
return -1;
}
diff --git a/qemu/block/commit.c b/qemu/block/commit.c
index 7312a5bdc..cba0e8c1e 100644
--- a/qemu/block/commit.c
+++ b/qemu/block/commit.c
@@ -12,11 +12,14 @@
*
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/block_int.h"
#include "block/blockjob.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
enum {
/*
@@ -213,7 +216,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- !bdrv_iostatus_is_enabled(bs)) {
+ (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
error_setg(errp, "Invalid parameter combination");
return;
}
@@ -235,14 +238,14 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
orig_overlay_flags = bdrv_get_flags(overlay_bs);
/* convert base & overlay_bs to r/w, if necessary */
- if (!(orig_base_flags & BDRV_O_RDWR)) {
- reopen_queue = bdrv_reopen_queue(reopen_queue, base,
- orig_base_flags | BDRV_O_RDWR);
- }
if (!(orig_overlay_flags & BDRV_O_RDWR)) {
- reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs,
+ reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL,
orig_overlay_flags | BDRV_O_RDWR);
}
+ if (!(orig_base_flags & BDRV_O_RDWR)) {
+ reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
+ orig_base_flags | BDRV_O_RDWR);
+ }
if (reopen_queue) {
bdrv_reopen_multiple(reopen_queue, &local_err);
if (local_err != NULL) {
diff --git a/qemu/block/crypto.c b/qemu/block/crypto.c
new file mode 100644
index 000000000..1903e84fb
--- /dev/null
+++ b/qemu/block/crypto.c
@@ -0,0 +1,586 @@
+/*
+ * QEMU block full disk encryption
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "crypto/block.h"
+#include "qapi/opts-visitor.h"
+#include "qapi-visit.h"
+#include "qapi/error.h"
+
+#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
+
+typedef struct BlockCrypto BlockCrypto;
+
+struct BlockCrypto {
+ QCryptoBlock *block;
+};
+
+
+static int block_crypto_probe_generic(QCryptoBlockFormat format,
+ const uint8_t *buf,
+ int buf_size,
+ const char *filename)
+{
+ if (qcrypto_block_has_format(format, buf, buf_size)) {
+ return 100;
+ } else {
+ return 0;
+ }
+}
+
+
+static ssize_t block_crypto_read_func(QCryptoBlock *block,
+ size_t offset,
+ uint8_t *buf,
+ size_t buflen,
+ Error **errp,
+ void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ ssize_t ret;
+
+ ret = bdrv_pread(bs->file->bs, offset, buf, buflen);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not read encryption header");
+ return ret;
+ }
+ return ret;
+}
+
+
+struct BlockCryptoCreateData {
+ const char *filename;
+ QemuOpts *opts;
+ BlockBackend *blk;
+ uint64_t size;
+};
+
+
+static ssize_t block_crypto_write_func(QCryptoBlock *block,
+ size_t offset,
+ const uint8_t *buf,
+ size_t buflen,
+ Error **errp,
+ void *opaque)
+{
+ struct BlockCryptoCreateData *data = opaque;
+ ssize_t ret;
+
+ ret = blk_pwrite(data->blk, offset, buf, buflen);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not write encryption header");
+ return ret;
+ }
+ return ret;
+}
+
+
+static ssize_t block_crypto_init_func(QCryptoBlock *block,
+ size_t headerlen,
+ Error **errp,
+ void *opaque)
+{
+ struct BlockCryptoCreateData *data = opaque;
+ int ret;
+
+ /* User provided size should reflect amount of space made
+ * available to the guest, so we must take account of that
+ * which will be used by the crypto header
+ */
+ data->size += headerlen;
+
+ qemu_opt_set_number(data->opts, BLOCK_OPT_SIZE, data->size, &error_abort);
+ ret = bdrv_create_file(data->filename, data->opts, errp);
+ if (ret < 0) {
+ return -1;
+ }
+
+ data->blk = blk_new_open(data->filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+ if (!data->blk) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
+static QemuOptsList block_crypto_runtime_opts_luks = {
+ .name = "crypto",
+ .head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head),
+ .desc = {
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
+ .type = QEMU_OPT_STRING,
+ .help = "ID of the secret that provides the encryption key",
+ },
+ { /* end of list */ }
+ },
+};
+
+
+static QemuOptsList block_crypto_create_opts_luks = {
+ .name = "crypto",
+ .head = QTAILQ_HEAD_INITIALIZER(block_crypto_create_opts_luks.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
+ .type = QEMU_OPT_STRING,
+ .help = "ID of the secret that provides the encryption key",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of encryption cipher algorithm",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of encryption cipher mode",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of IV generator algorithm",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of IV generator hash algorithm",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_HASH_ALG,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of encryption hash algorithm",
+ },
+ { /* end of list */ }
+ },
+};
+
+
+static QCryptoBlockOpenOptions *
+block_crypto_open_opts_init(QCryptoBlockFormat format,
+ QemuOpts *opts,
+ Error **errp)
+{
+ OptsVisitor *ov;
+ QCryptoBlockOpenOptions *ret = NULL;
+ Error *local_err = NULL;
+ Error *end_err = NULL;
+
+ ret = g_new0(QCryptoBlockOpenOptions, 1);
+ ret->format = format;
+
+ ov = opts_visitor_new(opts);
+
+ visit_start_struct(opts_get_visitor(ov),
+ NULL, NULL, 0, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ switch (format) {
+ case Q_CRYPTO_BLOCK_FORMAT_LUKS:
+ visit_type_QCryptoBlockOptionsLUKS_members(
+ opts_get_visitor(ov), &ret->u.luks, &local_err);
+ break;
+
+ default:
+ error_setg(&local_err, "Unsupported block format %d", format);
+ break;
+ }
+
+ visit_end_struct(opts_get_visitor(ov), &end_err);
+ error_propagate(&local_err, end_err);
+
+ out:
+ if (local_err) {
+ error_propagate(errp, local_err);
+ qapi_free_QCryptoBlockOpenOptions(ret);
+ ret = NULL;
+ }
+ opts_visitor_cleanup(ov);
+ return ret;
+}
+
+
+static QCryptoBlockCreateOptions *
+block_crypto_create_opts_init(QCryptoBlockFormat format,
+ QemuOpts *opts,
+ Error **errp)
+{
+ OptsVisitor *ov;
+ QCryptoBlockCreateOptions *ret = NULL;
+ Error *local_err = NULL;
+ Error *end_err = NULL;
+
+ ret = g_new0(QCryptoBlockCreateOptions, 1);
+ ret->format = format;
+
+ ov = opts_visitor_new(opts);
+
+ visit_start_struct(opts_get_visitor(ov),
+ NULL, NULL, 0, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ switch (format) {
+ case Q_CRYPTO_BLOCK_FORMAT_LUKS:
+ visit_type_QCryptoBlockCreateOptionsLUKS_members(
+ opts_get_visitor(ov), &ret->u.luks, &local_err);
+ break;
+
+ default:
+ error_setg(&local_err, "Unsupported block format %d", format);
+ break;
+ }
+
+ visit_end_struct(opts_get_visitor(ov), &end_err);
+ error_propagate(&local_err, end_err);
+
+ out:
+ if (local_err) {
+ error_propagate(errp, local_err);
+ qapi_free_QCryptoBlockCreateOptions(ret);
+ ret = NULL;
+ }
+ opts_visitor_cleanup(ov);
+ return ret;
+}
+
+
+static int block_crypto_open_generic(QCryptoBlockFormat format,
+ QemuOptsList *opts_spec,
+ BlockDriverState *bs,
+ QDict *options,
+ int flags,
+ Error **errp)
+{
+ BlockCrypto *crypto = bs->opaque;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
+ int ret = -EINVAL;
+ QCryptoBlockOpenOptions *open_opts = NULL;
+ unsigned int cflags = 0;
+
+ opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto cleanup;
+ }
+
+ open_opts = block_crypto_open_opts_init(format, opts, errp);
+ if (!open_opts) {
+ goto cleanup;
+ }
+
+ if (flags & BDRV_O_NO_IO) {
+ cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
+ }
+ crypto->block = qcrypto_block_open(open_opts,
+ block_crypto_read_func,
+ bs,
+ cflags,
+ errp);
+
+ if (!crypto->block) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ bs->encrypted = 1;
+ bs->valid_key = 1;
+
+ ret = 0;
+ cleanup:
+ qapi_free_QCryptoBlockOpenOptions(open_opts);
+ return ret;
+}
+
+
+static int block_crypto_create_generic(QCryptoBlockFormat format,
+ const char *filename,
+ QemuOpts *opts,
+ Error **errp)
+{
+ int ret = -EINVAL;
+ QCryptoBlockCreateOptions *create_opts = NULL;
+ QCryptoBlock *crypto = NULL;
+ struct BlockCryptoCreateData data = {
+ .size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+ BDRV_SECTOR_SIZE),
+ .opts = opts,
+ .filename = filename,
+ };
+
+ create_opts = block_crypto_create_opts_init(format, opts, errp);
+ if (!create_opts) {
+ return -1;
+ }
+
+ crypto = qcrypto_block_create(create_opts,
+ block_crypto_init_func,
+ block_crypto_write_func,
+ &data,
+ errp);
+
+ if (!crypto) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ qcrypto_block_free(crypto);
+ blk_unref(data.blk);
+ qapi_free_QCryptoBlockCreateOptions(create_opts);
+ return ret;
+}
+
+static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)
+{
+ BlockCrypto *crypto = bs->opaque;
+ size_t payload_offset =
+ qcrypto_block_get_payload_offset(crypto->block);
+
+ offset += payload_offset;
+
+ return bdrv_truncate(bs->file->bs, offset);
+}
+
+static void block_crypto_close(BlockDriverState *bs)
+{
+ BlockCrypto *crypto = bs->opaque;
+ qcrypto_block_free(crypto->block);
+}
+
+
+#define BLOCK_CRYPTO_MAX_SECTORS 32
+
+static coroutine_fn int
+block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int remaining_sectors, QEMUIOVector *qiov)
+{
+ BlockCrypto *crypto = bs->opaque;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t bytes_done = 0;
+ uint8_t *cipher_data = NULL;
+ QEMUIOVector hd_qiov;
+ int ret = 0;
+ size_t payload_offset =
+ qcrypto_block_get_payload_offset(crypto->block) / 512;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ /* Bounce buffer so we have a linear mem region for
+ * entire sector. XXX optimize so we avoid bounce
+ * buffer in case that qiov->niov == 1
+ */
+ cipher_data =
+ qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
+ qiov->size));
+ if (cipher_data == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ while (remaining_sectors) {
+ cur_nr_sectors = remaining_sectors;
+
+ if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
+ cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
+ }
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
+
+ ret = bdrv_co_readv(bs->file->bs,
+ payload_offset + sector_num,
+ cur_nr_sectors, &hd_qiov);
+ if (ret < 0) {
+ goto cleanup;
+ }
+
+ if (qcrypto_block_decrypt(crypto->block,
+ sector_num,
+ cipher_data, cur_nr_sectors * 512,
+ NULL) < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ qemu_iovec_from_buf(qiov, bytes_done,
+ cipher_data, cur_nr_sectors * 512);
+
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ }
+
+ cleanup:
+ qemu_iovec_destroy(&hd_qiov);
+ qemu_vfree(cipher_data);
+
+ return ret;
+}
+
+
+static coroutine_fn int
+block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int remaining_sectors, QEMUIOVector *qiov)
+{
+ BlockCrypto *crypto = bs->opaque;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t bytes_done = 0;
+ uint8_t *cipher_data = NULL;
+ QEMUIOVector hd_qiov;
+ int ret = 0;
+ size_t payload_offset =
+ qcrypto_block_get_payload_offset(crypto->block) / 512;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ /* Bounce buffer so we have a linear mem region for
+ * entire sector. XXX optimize so we avoid bounce
+ * buffer in case that qiov->niov == 1
+ */
+ cipher_data =
+ qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
+ qiov->size));
+ if (cipher_data == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ while (remaining_sectors) {
+ cur_nr_sectors = remaining_sectors;
+
+ if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
+ cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
+ }
+
+ qemu_iovec_to_buf(qiov, bytes_done,
+ cipher_data, cur_nr_sectors * 512);
+
+ if (qcrypto_block_encrypt(crypto->block,
+ sector_num,
+ cipher_data, cur_nr_sectors * 512,
+ NULL) < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
+
+ ret = bdrv_co_writev(bs->file->bs,
+ payload_offset + sector_num,
+ cur_nr_sectors, &hd_qiov);
+ if (ret < 0) {
+ goto cleanup;
+ }
+
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ }
+
+ cleanup:
+ qemu_iovec_destroy(&hd_qiov);
+ qemu_vfree(cipher_data);
+
+ return ret;
+}
+
+
+static int64_t block_crypto_getlength(BlockDriverState *bs)
+{
+ BlockCrypto *crypto = bs->opaque;
+ int64_t len = bdrv_getlength(bs->file->bs);
+
+ ssize_t offset = qcrypto_block_get_payload_offset(crypto->block);
+
+ len -= offset;
+
+ return len;
+}
+
+
+static int block_crypto_probe_luks(const uint8_t *buf,
+ int buf_size,
+ const char *filename) {
+ return block_crypto_probe_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+ buf, buf_size, filename);
+}
+
+static int block_crypto_open_luks(BlockDriverState *bs,
+ QDict *options,
+ int flags,
+ Error **errp)
+{
+ return block_crypto_open_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+ &block_crypto_runtime_opts_luks,
+ bs, options, flags, errp);
+}
+
+static int block_crypto_create_luks(const char *filename,
+ QemuOpts *opts,
+ Error **errp)
+{
+ return block_crypto_create_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+ filename, opts, errp);
+}
+
+BlockDriver bdrv_crypto_luks = {
+ .format_name = "luks",
+ .instance_size = sizeof(BlockCrypto),
+ .bdrv_probe = block_crypto_probe_luks,
+ .bdrv_open = block_crypto_open_luks,
+ .bdrv_close = block_crypto_close,
+ .bdrv_create = block_crypto_create_luks,
+ .bdrv_truncate = block_crypto_truncate,
+ .create_opts = &block_crypto_create_opts_luks,
+
+ .bdrv_co_readv = block_crypto_co_readv,
+ .bdrv_co_writev = block_crypto_co_writev,
+ .bdrv_getlength = block_crypto_getlength,
+};
+
+static void block_crypto_init(void)
+{
+ bdrv_register(&bdrv_crypto_luks);
+}
+
+block_init(block_crypto_init);
diff --git a/qemu/block/curl.c b/qemu/block/curl.c
index 032cc8ae2..5a8f8b623 100644
--- a/qemu/block/curl.c
+++ b/qemu/block/curl.c
@@ -21,12 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "qemu/error-report.h"
#include "block/block_int.h"
#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qstring.h"
+#include "crypto/secret.h"
#include <curl/curl.h>
+#include "qemu/cutils.h"
// #define DEBUG_CURL
// #define DEBUG_VERBOSE
@@ -77,6 +81,10 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
#define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
#define CURL_BLOCK_OPT_TIMEOUT "timeout"
#define CURL_BLOCK_OPT_COOKIE "cookie"
+#define CURL_BLOCK_OPT_USERNAME "username"
+#define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
+#define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
+#define CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET "proxy-password-secret"
struct BDRVCURLState;
@@ -119,6 +127,10 @@ typedef struct BDRVCURLState {
char *cookie;
bool accept_range;
AioContext *aio_context;
+ char *username;
+ char *password;
+ char *proxyusername;
+ char *proxypassword;
} BDRVCURLState;
static void curl_clean_state(CURLState *s);
@@ -154,18 +166,20 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
switch (action) {
case CURL_POLL_IN:
- aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
- NULL, state);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ curl_multi_read, NULL, state);
break;
case CURL_POLL_OUT:
- aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ NULL, curl_multi_do, state);
break;
case CURL_POLL_INOUT:
- aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
- curl_multi_do, state);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ curl_multi_read, curl_multi_do, state);
break;
case CURL_POLL_REMOVE:
- aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ NULL, NULL, NULL);
break;
}
@@ -416,6 +430,21 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);
+ if (s->username) {
+ curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username);
+ }
+ if (s->password) {
+ curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password);
+ }
+ if (s->proxyusername) {
+ curl_easy_setopt(state->curl,
+ CURLOPT_PROXYUSERNAME, s->proxyusername);
+ }
+ if (s->proxypassword) {
+ curl_easy_setopt(state->curl,
+ CURLOPT_PROXYPASSWORD, s->proxypassword);
+ }
+
/* Restrict supported protocols to avoid security issues in the more
* obscure protocols. For example, do not allow POP3/SMTP/IMAP see
* CVE-2013-0249.
@@ -522,10 +551,31 @@ static QemuOptsList runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "Pass the cookie or list of cookies with each request"
},
+ {
+ .name = CURL_BLOCK_OPT_USERNAME,
+ .type = QEMU_OPT_STRING,
+ .help = "Username for HTTP auth"
+ },
+ {
+ .name = CURL_BLOCK_OPT_PASSWORD_SECRET,
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret used as password for HTTP auth",
+ },
+ {
+ .name = CURL_BLOCK_OPT_PROXY_USERNAME,
+ .type = QEMU_OPT_STRING,
+ .help = "Username for HTTP proxy auth"
+ },
+ {
+ .name = CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET,
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret used as password for HTTP proxy auth",
+ },
{ /* end of list */ }
},
};
+
static int curl_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -536,6 +586,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
const char *file;
const char *cookie;
double d;
+ const char *secretid;
static int inited = 0;
@@ -577,6 +628,26 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
goto out_noclean;
}
+ s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
+ secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);
+
+ if (secretid) {
+ s->password = qcrypto_secret_lookup_as_utf8(secretid, errp);
+ if (!s->password) {
+ goto out_noclean;
+ }
+ }
+
+ s->proxyusername = g_strdup(
+ qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_USERNAME));
+ secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET);
+ if (secretid) {
+ s->proxypassword = qcrypto_secret_lookup_as_utf8(secretid, errp);
+ if (!s->proxypassword) {
+ goto out_noclean;
+ }
+ }
+
if (!inited) {
curl_global_init(CURL_GLOBAL_ALL);
inited = 1;
diff --git a/qemu/block/dirty-bitmap.c b/qemu/block/dirty-bitmap.c
new file mode 100644
index 000000000..4902ca557
--- /dev/null
+++ b/qemu/block/dirty-bitmap.c
@@ -0,0 +1,387 @@
+/*
+ * Block Dirty Bitmap
+ *
+ * Copyright (c) 2016 Red Hat. Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "block/block_int.h"
+#include "block/blockjob.h"
+
+/**
+ * A BdrvDirtyBitmap can be in three possible states:
+ * (1) successor is NULL and disabled is false: full r/w mode
+ * (2) successor is NULL and disabled is true: read only mode ("disabled")
+ * (3) successor is set: frozen mode.
+ * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
+ * or enabled. A frozen bitmap can only abdicate() or reclaim().
+ */
+struct BdrvDirtyBitmap {
+ HBitmap *bitmap; /* Dirty sector bitmap implementation */
+ BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
+ char *name; /* Optional non-empty unique ID */
+ int64_t size; /* Size of the bitmap (Number of sectors) */
+ bool disabled; /* Bitmap is read-only */
+ QLIST_ENTRY(BdrvDirtyBitmap) list;
+};
+
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
+{
+ BdrvDirtyBitmap *bm;
+
+ assert(name);
+ QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+ if (bm->name && !strcmp(name, bm->name)) {
+ return bm;
+ }
+ }
+ return NULL;
+}
+
+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
+{
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ g_free(bitmap->name);
+ bitmap->name = NULL;
+}
+
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+ uint32_t granularity,
+ const char *name,
+ Error **errp)
+{
+ int64_t bitmap_size;
+ BdrvDirtyBitmap *bitmap;
+ uint32_t sector_granularity;
+
+ assert((granularity & (granularity - 1)) == 0);
+
+ if (name && bdrv_find_dirty_bitmap(bs, name)) {
+ error_setg(errp, "Bitmap already exists: %s", name);
+ return NULL;
+ }
+ sector_granularity = granularity >> BDRV_SECTOR_BITS;
+ assert(sector_granularity);
+ bitmap_size = bdrv_nb_sectors(bs);
+ if (bitmap_size < 0) {
+ error_setg_errno(errp, -bitmap_size, "could not get length of device");
+ errno = -bitmap_size;
+ return NULL;
+ }
+ bitmap = g_new0(BdrvDirtyBitmap, 1);
+ bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
+ bitmap->size = bitmap_size;
+ bitmap->name = g_strdup(name);
+ bitmap->disabled = false;
+ QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
+ return bitmap;
+}
+
+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
+{
+ return bitmap->successor;
+}
+
+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
+{
+ return !(bitmap->disabled || bitmap->successor);
+}
+
+DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
+{
+ if (bdrv_dirty_bitmap_frozen(bitmap)) {
+ return DIRTY_BITMAP_STATUS_FROZEN;
+ } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+ return DIRTY_BITMAP_STATUS_DISABLED;
+ } else {
+ return DIRTY_BITMAP_STATUS_ACTIVE;
+ }
+}
+
+/**
+ * Create a successor bitmap destined to replace this bitmap after an operation.
+ * Requires that the bitmap is not frozen and has no successor.
+ */
+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap, Error **errp)
+{
+ uint64_t granularity;
+ BdrvDirtyBitmap *child;
+
+ if (bdrv_dirty_bitmap_frozen(bitmap)) {
+ error_setg(errp, "Cannot create a successor for a bitmap that is "
+ "currently frozen");
+ return -1;
+ }
+ assert(!bitmap->successor);
+
+ /* Create an anonymous successor */
+ granularity = bdrv_dirty_bitmap_granularity(bitmap);
+ child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
+ if (!child) {
+ return -1;
+ }
+
+ /* Successor will be on or off based on our current state. */
+ child->disabled = bitmap->disabled;
+
+ /* Install the successor and freeze the parent */
+ bitmap->successor = child;
+ return 0;
+}
+
+/**
+ * For a bitmap with a successor, yield our name to the successor,
+ * delete the old bitmap, and return a handle to the new bitmap.
+ */
+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp)
+{
+ char *name;
+ BdrvDirtyBitmap *successor = bitmap->successor;
+
+ if (successor == NULL) {
+ error_setg(errp, "Cannot relinquish control if "
+ "there's no successor present");
+ return NULL;
+ }
+
+ name = bitmap->name;
+ bitmap->name = NULL;
+ successor->name = name;
+ bitmap->successor = NULL;
+ bdrv_release_dirty_bitmap(bs, bitmap);
+
+ return successor;
+}
+
+/**
+ * In cases of failure where we can no longer safely delete the parent,
+ * we may wish to re-join the parent and child/successor.
+ * The merged parent will be un-frozen, but not explicitly re-enabled.
+ */
+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *parent,
+ Error **errp)
+{
+ BdrvDirtyBitmap *successor = parent->successor;
+
+ if (!successor) {
+ error_setg(errp, "Cannot reclaim a successor when none is present");
+ return NULL;
+ }
+
+ if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
+ error_setg(errp, "Merging of parent and successor bitmap failed");
+ return NULL;
+ }
+ bdrv_release_dirty_bitmap(bs, successor);
+ parent->successor = NULL;
+
+ return parent;
+}
+
+/**
+ * Truncates _all_ bitmaps attached to a BDS.
+ */
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
+{
+ BdrvDirtyBitmap *bitmap;
+ uint64_t size = bdrv_nb_sectors(bs);
+
+ QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ hbitmap_truncate(bitmap->bitmap, size);
+ bitmap->size = size;
+ }
+}
+
+static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ bool only_named)
+{
+ BdrvDirtyBitmap *bm, *next;
+ QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
+ if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
+ assert(!bdrv_dirty_bitmap_frozen(bm));
+ QLIST_REMOVE(bm, list);
+ hbitmap_free(bm->bitmap);
+ g_free(bm->name);
+ g_free(bm);
+
+ if (bitmap) {
+ return;
+ }
+ }
+ }
+}
+
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+{
+ bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
+}
+
+/**
+ * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
+ * There must not be any frozen bitmaps attached.
+ */
+void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
+{
+ bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
+}
+
+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ bitmap->disabled = true;
+}
+
+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ bitmap->disabled = false;
+}
+
+BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
+{
+ BdrvDirtyBitmap *bm;
+ BlockDirtyInfoList *list = NULL;
+ BlockDirtyInfoList **plist = &list;
+
+ QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+ BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
+ BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
+ info->count = bdrv_get_dirty_count(bm);
+ info->granularity = bdrv_dirty_bitmap_granularity(bm);
+ info->has_name = !!bm->name;
+ info->name = g_strdup(bm->name);
+ info->status = bdrv_dirty_bitmap_status(bm);
+ entry->value = info;
+ *plist = entry;
+ plist = &entry->next;
+ }
+
+ return list;
+}
+
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ int64_t sector)
+{
+ if (bitmap) {
+ return hbitmap_get(bitmap->bitmap, sector);
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * Chooses a default granularity based on the existing cluster size,
+ * but clamped between [4K, 64K]. Defaults to 64K in the case that there
+ * is no cluster size information available.
+ */
+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
+{
+ BlockDriverInfo bdi;
+ uint32_t granularity;
+
+ if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
+ granularity = MAX(4096, bdi.cluster_size);
+ granularity = MIN(65536, granularity);
+ } else {
+ granularity = 65536;
+ }
+
+ return granularity;
+}
+
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
+{
+ return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
+}
+
+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
+{
+ hbitmap_iter_init(hbi, bitmap->bitmap, 0);
+}
+
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int nr_sectors)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int nr_sectors)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ if (!out) {
+ hbitmap_reset_all(bitmap->bitmap);
+ } else {
+ HBitmap *backup = bitmap->bitmap;
+ bitmap->bitmap = hbitmap_alloc(bitmap->size,
+ hbitmap_granularity(backup));
+ *out = backup;
+ }
+}
+
+void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
+{
+ HBitmap *tmp = bitmap->bitmap;
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ bitmap->bitmap = in;
+ hbitmap_free(tmp);
+}
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+ int nr_sectors)
+{
+ BdrvDirtyBitmap *bitmap;
+ QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+ if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+ continue;
+ }
+ hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+ }
+}
+
+/**
+ * Advance an HBitmapIter to an arbitrary offset.
+ */
+void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
+{
+ assert(hbi->hb);
+ hbitmap_iter_init(hbi, hbi->hb, offset);
+}
+
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
+{
+ return hbitmap_count(bitmap->bitmap);
+}
diff --git a/qemu/block/dmg.c b/qemu/block/dmg.c
index 9f2528169..a496eb7c9 100644
--- a/qemu/block/dmg.c
+++ b/qemu/block/dmg.c
@@ -21,6 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/bswap.h"
@@ -85,7 +87,7 @@ static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result)
uint64_t buffer;
int ret;
- ret = bdrv_pread(bs->file, offset, &buffer, 8);
+ ret = bdrv_pread(bs->file->bs, offset, &buffer, 8);
if (ret < 0) {
return ret;
}
@@ -99,7 +101,7 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
uint32_t buffer;
int ret;
- ret = bdrv_pread(bs->file, offset, &buffer, 4);
+ ret = bdrv_pread(bs->file->bs, offset, &buffer, 4);
if (ret < 0) {
return ret;
}
@@ -354,7 +356,7 @@ static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
offset += 4;
buffer = g_realloc(buffer, count);
- ret = bdrv_pread(bs->file, offset, buffer, count);
+ ret = bdrv_pread(bs->file->bs, offset, buffer, count);
if (ret < 0) {
goto fail;
}
@@ -391,7 +393,7 @@ static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
buffer = g_malloc(info_length + 1);
buffer[info_length] = '\0';
- ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
+ ret = bdrv_pread(bs->file->bs, info_begin, buffer, info_length);
if (ret != info_length) {
ret = -EINVAL;
goto fail;
@@ -446,7 +448,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
ds.max_sectors_per_chunk = 1;
/* locate the UDIF trailer */
- offset = dmg_find_koly_offset(bs->file, errp);
+ offset = dmg_find_koly_offset(bs->file->bs, errp);
if (offset < 0) {
ret = offset;
goto fail;
@@ -514,9 +516,9 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
}
/* initialize zlib engine */
- s->compressed_chunk = qemu_try_blockalign(bs->file,
+ s->compressed_chunk = qemu_try_blockalign(bs->file->bs,
ds.max_compressed_size + 1);
- s->uncompressed_chunk = qemu_try_blockalign(bs->file,
+ s->uncompressed_chunk = qemu_try_blockalign(bs->file->bs,
512 * ds.max_sectors_per_chunk);
if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
ret = -ENOMEM;
@@ -592,7 +594,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
case 0x80000005: { /* zlib compressed */
/* we need to buffer, because only the chunk as whole can be
* inflated. */
- ret = bdrv_pread(bs->file, s->offsets[chunk],
+ ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
s->compressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
@@ -616,7 +618,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
case 0x80000006: /* bzip2 compressed */
/* we need to buffer, because only the chunk as whole can be
* inflated. */
- ret = bdrv_pread(bs->file, s->offsets[chunk],
+ ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
s->compressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
@@ -641,7 +643,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
break;
#endif /* CONFIG_BZIP2 */
case 1: /* copy */
- ret = bdrv_pread(bs->file, s->offsets[chunk],
+ ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
s->uncompressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
diff --git a/qemu/block/gluster.c b/qemu/block/gluster.c
index 1eb3a8c39..a8aaacf64 100644
--- a/qemu/block/gluster.c
+++ b/qemu/block/gluster.c
@@ -7,8 +7,10 @@
* See the COPYING file in the top-level directory.
*
*/
+#include "qemu/osdep.h"
#include <glusterfs/api/glfs.h>
#include "block/block_int.h"
+#include "qapi/error.h"
#include "qemu/uri.h"
typedef struct GlusterAIOCB {
@@ -245,7 +247,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
if (!ret || ret == acb->size) {
acb->ret = 0; /* Success */
} else if (ret < 0) {
- acb->ret = ret; /* Read/Write failed */
+ acb->ret = -errno; /* Read/Write failed */
} else {
acb->ret = -EIO; /* Partial read/write - fail it */
}
@@ -312,6 +314,23 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
goto out;
}
+#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
+ /* Without this, if fsync fails for a recoverable reason (for instance,
+ * ENOSPC), gluster will dump its cache, preventing retries. This means
+ * almost certain data loss. Not all gluster versions support the
+ * 'resync-failed-syncs-after-fsync' key value, but there is no way to
+ * discover during runtime if it is supported (this api returns success for
+ * unknown key/value pairs) */
+ ret = glfs_set_xlator_option(s->glfs, "*-write-behind",
+ "resync-failed-syncs-after-fsync",
+ "on");
+ if (ret < 0) {
+ error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
+ ret = -errno;
+ goto out;
+ }
+#endif
+
qemu_gluster_parse_flags(bdrv_flags, &open_flags);
s->fd = glfs_open(s->glfs, gconf->image, open_flags);
@@ -364,6 +383,16 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
goto exit;
}
+#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
+ ret = glfs_set_xlator_option(reop_s->glfs, "*-write-behind",
+ "resync-failed-syncs-after-fsync", "on");
+ if (ret < 0) {
+ error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
+ ret = -errno;
+ goto exit;
+ }
+#endif
+
reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
if (reop_s->fd == NULL) {
/* reops->glfs will be cleaned up in _abort */
@@ -429,28 +458,23 @@ static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
{
int ret;
- GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+ GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
off_t size = nb_sectors * BDRV_SECTOR_SIZE;
off_t offset = sector_num * BDRV_SECTOR_SIZE;
- acb->size = size;
- acb->ret = 0;
- acb->coroutine = qemu_coroutine_self();
- acb->aio_context = bdrv_get_aio_context(bs);
+ acb.size = size;
+ acb.ret = 0;
+ acb.coroutine = qemu_coroutine_self();
+ acb.aio_context = bdrv_get_aio_context(bs);
- ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
+ ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {
- ret = -errno;
- goto out;
+ return -errno;
}
qemu_coroutine_yield();
- ret = acb->ret;
-
-out:
- g_slice_free(GlusterAIOCB, acb);
- return ret;
+ return acb.ret;
}
static inline bool gluster_supports_zerofill(void)
@@ -541,35 +565,30 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
{
int ret;
- GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+ GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
size_t size = nb_sectors * BDRV_SECTOR_SIZE;
off_t offset = sector_num * BDRV_SECTOR_SIZE;
- acb->size = size;
- acb->ret = 0;
- acb->coroutine = qemu_coroutine_self();
- acb->aio_context = bdrv_get_aio_context(bs);
+ acb.size = size;
+ acb.ret = 0;
+ acb.coroutine = qemu_coroutine_self();
+ acb.aio_context = bdrv_get_aio_context(bs);
if (write) {
ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
- &gluster_finish_aiocb, acb);
+ gluster_finish_aiocb, &acb);
} else {
ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
- &gluster_finish_aiocb, acb);
+ gluster_finish_aiocb, &acb);
}
if (ret < 0) {
- ret = -errno;
- goto out;
+ return -errno;
}
qemu_coroutine_yield();
- ret = acb->ret;
-
-out:
- g_slice_free(GlusterAIOCB, acb);
- return ret;
+ return acb.ret;
}
static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
@@ -597,28 +616,58 @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
}
+static void qemu_gluster_close(BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+
+ if (s->fd) {
+ glfs_close(s->fd);
+ s->fd = NULL;
+ }
+ glfs_fini(s->glfs);
+}
+
static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
{
int ret;
- GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+ GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
- acb->size = 0;
- acb->ret = 0;
- acb->coroutine = qemu_coroutine_self();
- acb->aio_context = bdrv_get_aio_context(bs);
+ acb.size = 0;
+ acb.ret = 0;
+ acb.coroutine = qemu_coroutine_self();
+ acb.aio_context = bdrv_get_aio_context(bs);
- ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
+ ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
if (ret < 0) {
ret = -errno;
- goto out;
+ goto error;
}
qemu_coroutine_yield();
- ret = acb->ret;
-
-out:
- g_slice_free(GlusterAIOCB, acb);
+ if (acb.ret < 0) {
+ ret = acb.ret;
+ goto error;
+ }
+
+ return acb.ret;
+
+error:
+ /* Some versions of Gluster (3.5.6 -> 3.5.8?) will not retain its cache
+ * after a fsync failure, so we have no way of allowing the guest to safely
+ * continue. Gluster versions prior to 3.5.6 don't retain the cache
+ * either, but will invalidate the fd on error, so this is again our only
+ * option.
+ *
+ * The 'resync-failed-syncs-after-fsync' xlator option for the
+ * write-behind cache will cause later gluster versions to retain its
+ * cache after error, so long as the fd remains open. However, we
+ * currently have no way of knowing if this option is supported.
+ *
+ * TODO: Once gluster provides a way for us to determine if the option
+ * is supported, bypass the closure and setting drv to NULL. */
+ qemu_gluster_close(bs);
+ bs->drv = NULL;
return ret;
}
@@ -627,28 +676,23 @@ static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
int ret;
- GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+ GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
size_t size = nb_sectors * BDRV_SECTOR_SIZE;
off_t offset = sector_num * BDRV_SECTOR_SIZE;
- acb->size = 0;
- acb->ret = 0;
- acb->coroutine = qemu_coroutine_self();
- acb->aio_context = bdrv_get_aio_context(bs);
+ acb.size = 0;
+ acb.ret = 0;
+ acb.coroutine = qemu_coroutine_self();
+ acb.aio_context = bdrv_get_aio_context(bs);
- ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
+ ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {
- ret = -errno;
- goto out;
+ return -errno;
}
qemu_coroutine_yield();
- ret = acb->ret;
-
-out:
- g_slice_free(GlusterAIOCB, acb);
- return ret;
+ return acb.ret;
}
#endif
@@ -679,17 +723,6 @@ static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
}
}
-static void qemu_gluster_close(BlockDriverState *bs)
-{
- BDRVGlusterState *s = bs->opaque;
-
- if (s->fd) {
- glfs_close(s->fd);
- s->fd = NULL;
- }
- glfs_fini(s->glfs);
-}
-
static int qemu_gluster_has_zero_init(BlockDriverState *bs)
{
/* GlusterFS volume could be backed by a block device */
diff --git a/qemu/block/io.c b/qemu/block/io.c
index d4bc83b33..a7dbf85b1 100644
--- a/qemu/block/io.c
+++ b/qemu/block/io.c
@@ -22,10 +22,14 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "trace.h"
+#include "sysemu/block-backend.h"
#include "block/blockjob.h"
#include "block/block_int.h"
#include "block/throttle-groups.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
#include "qemu/error-report.h"
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
@@ -42,12 +46,6 @@ static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
- int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
- int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
@@ -156,38 +154,45 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
/* Take some limits from the children as a default */
if (bs->file) {
- bdrv_refresh_limits(bs->file, &local_err);
+ bdrv_refresh_limits(bs->file->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
- bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
- bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
- bs->bl.min_mem_alignment = bs->file->bl.min_mem_alignment;
- bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
+ bs->bl.opt_transfer_length = bs->file->bs->bl.opt_transfer_length;
+ bs->bl.max_transfer_length = bs->file->bs->bl.max_transfer_length;
+ bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment;
+ bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment;
+ bs->bl.max_iov = bs->file->bs->bl.max_iov;
} else {
bs->bl.min_mem_alignment = 512;
bs->bl.opt_mem_alignment = getpagesize();
+
+ /* Safe default since most protocols use readv()/writev()/etc */
+ bs->bl.max_iov = IOV_MAX;
}
- if (bs->backing_hd) {
- bdrv_refresh_limits(bs->backing_hd, &local_err);
+ if (bs->backing) {
+ bdrv_refresh_limits(bs->backing->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
bs->bl.opt_transfer_length =
MAX(bs->bl.opt_transfer_length,
- bs->backing_hd->bl.opt_transfer_length);
+ bs->backing->bs->bl.opt_transfer_length);
bs->bl.max_transfer_length =
MIN_NON_ZERO(bs->bl.max_transfer_length,
- bs->backing_hd->bl.max_transfer_length);
+ bs->backing->bs->bl.max_transfer_length);
bs->bl.opt_mem_alignment =
MAX(bs->bl.opt_mem_alignment,
- bs->backing_hd->bl.opt_mem_alignment);
+ bs->backing->bs->bl.opt_mem_alignment);
bs->bl.min_mem_alignment =
MAX(bs->bl.min_mem_alignment,
- bs->backing_hd->bl.min_mem_alignment);
+ bs->backing->bs->bl.min_mem_alignment);
+ bs->bl.max_iov =
+ MIN(bs->bl.max_iov,
+ bs->backing->bs->bl.max_iov);
}
/* Then let the driver override it */
@@ -213,8 +218,10 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
}
/* Check if any requests are in-flight (including throttled requests) */
-static bool bdrv_requests_pending(BlockDriverState *bs)
+bool bdrv_requests_pending(BlockDriverState *bs)
{
+ BdrvChild *child;
+
if (!QLIST_EMPTY(&bs->tracked_requests)) {
return true;
}
@@ -224,17 +231,72 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
return true;
}
- if (bs->file && bdrv_requests_pending(bs->file)) {
- return true;
- }
- if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
- return true;
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ if (bdrv_requests_pending(child->bs)) {
+ return true;
+ }
}
+
return false;
}
+static void bdrv_drain_recurse(BlockDriverState *bs)
+{
+ BdrvChild *child;
+
+ if (bs->drv && bs->drv->bdrv_drain) {
+ bs->drv->bdrv_drain(bs);
+ }
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_drain_recurse(child->bs);
+ }
+}
+
+typedef struct {
+ Coroutine *co;
+ BlockDriverState *bs;
+ QEMUBH *bh;
+ bool done;
+} BdrvCoDrainData;
+
+static void bdrv_co_drain_bh_cb(void *opaque)
+{
+ BdrvCoDrainData *data = opaque;
+ Coroutine *co = data->co;
+
+ qemu_bh_delete(data->bh);
+ bdrv_drain(data->bs);
+ data->done = true;
+ qemu_coroutine_enter(co, NULL);
+}
+
+void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
+{
+ BdrvCoDrainData data;
+
+ /* Calling bdrv_drain() from a BH ensures the current coroutine yields and
+ * other coroutines run if they were queued from
+ * qemu_co_queue_run_restart(). */
+
+ assert(qemu_in_coroutine());
+ data = (BdrvCoDrainData) {
+ .co = qemu_coroutine_self(),
+ .bs = bs,
+ .done = false,
+ .bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_drain_bh_cb, &data),
+ };
+ qemu_bh_schedule(data.bh);
+
+ qemu_coroutine_yield();
+ /* If we are resumed from some other event (such as an aio completion or a
+ * timer callback), it is a bug in the caller that should be fixed. */
+ assert(data.done);
+}
+
/*
- * Wait for pending requests to complete on a single BlockDriverState subtree
+ * Wait for pending requests to complete on a single BlockDriverState subtree,
+ * and suspend block driver's internal I/O until next request arrives.
*
* Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
* AioContext.
@@ -247,6 +309,11 @@ void bdrv_drain(BlockDriverState *bs)
{
bool busy = true;
+ bdrv_drain_recurse(bs);
+ if (qemu_in_coroutine()) {
+ bdrv_co_drain(bs);
+ return;
+ }
while (busy) {
/* Keep iterating */
bdrv_flush_io_queue(bs);
@@ -275,6 +342,7 @@ void bdrv_drain_all(void)
if (bs->job) {
block_job_pause(bs->job);
}
+ bdrv_drain_recurse(bs);
aio_context_release(aio_context);
if (!g_slist_find(aio_ctxs, aio_context)) {
@@ -344,13 +412,14 @@ static void tracked_request_end(BdrvTrackedRequest *req)
static void tracked_request_begin(BdrvTrackedRequest *req,
BlockDriverState *bs,
int64_t offset,
- unsigned int bytes, bool is_write)
+ unsigned int bytes,
+ enum BdrvTrackedRequestType type)
{
*req = (BdrvTrackedRequest){
.bs = bs,
.offset = offset,
.bytes = bytes,
- .is_write = is_write,
+ .type = type,
.co = qemu_coroutine_self(),
.serialising = false,
.overlap_offset = offset,
@@ -593,20 +662,6 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
}
-/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
-int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- bool enabled;
- int ret;
-
- enabled = bs->io_limits_enabled;
- bs->io_limits_enabled = false;
- ret = bdrv_read(bs, sector_num, buf, nb_sectors);
- bs->io_limits_enabled = enabled;
- return ret;
-}
-
/* Return < 0 if error. Important errors are:
-EIO generic I/O error (may happen for all errors)
-ENOMEDIUM No media inserted.
@@ -637,6 +692,7 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
{
int64_t target_sectors, ret, nb_sectors, sector_num = 0;
+ BlockDriverState *file;
int n;
target_sectors = bdrv_nb_sectors(bs);
@@ -649,7 +705,7 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
if (nb_sectors <= 0) {
return 0;
}
- ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
+ ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n, &file);
if (ret < 0) {
error_report("error getting block status at sector %" PRId64 ": %s",
sector_num, strerror(-ret));
@@ -736,9 +792,9 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
return ret;
}
- /* No flush needed for cache modes that already do it */
- if (bs->enable_write_cache) {
- bdrv_flush(bs);
+ ret = bdrv_flush(bs);
+ if (ret < 0) {
+ return ret;
}
return 0;
@@ -833,6 +889,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(!qiov || bytes == qiov->size);
+ assert((bs->open_flags & BDRV_O_NO_IO) == 0);
/* Handle Copy on Read and associated serialisation */
if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -844,7 +901,9 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
mark_request_serialising(req, bdrv_get_cluster_size(bs));
}
- wait_serialising_requests(req);
+ if (!(flags & BDRV_REQ_NO_SERIALISING)) {
+ wait_serialising_requests(req);
+ }
if (flags & BDRV_REQ_COPY_ON_READ) {
int pnum;
@@ -908,7 +967,7 @@ out:
/*
* Handle a read request in coroutine context
*/
-static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
@@ -932,7 +991,8 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
return ret;
}
- if (bs->copy_on_read) {
+ /* Don't do copy-on-read if we read data before write operation */
+ if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
flags |= BDRV_REQ_COPY_ON_READ;
}
@@ -966,7 +1026,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
bytes = ROUND_UP(bytes, align);
}
- tracked_request_begin(&req, bs, offset, bytes, false);
+ tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
@@ -1001,6 +1061,15 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
}
+int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_readv_no_serialising(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
+ BDRV_REQ_NO_SERIALISING);
+}
+
int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
{
@@ -1107,6 +1176,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(!qiov || bytes == qiov->size);
+ assert((bs->open_flags & BDRV_O_NO_IO) == 0);
waited = wait_serialising_requests(req);
assert(!waited || !req->serialising);
@@ -1127,21 +1197,30 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
if (ret < 0) {
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
+ } else if (drv->bdrv_co_writev_flags) {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV);
+ ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
+ flags);
} else {
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
+ assert(drv->supported_write_flags == 0);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV);
ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
}
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
- if (ret == 0 && !bs->enable_write_cache) {
+ if (ret == 0 && (flags & BDRV_REQ_FUA) &&
+ !(drv->supported_write_flags & BDRV_REQ_FUA))
+ {
ret = bdrv_co_flush(bs);
}
bdrv_set_dirty(bs, sector_num, nb_sectors);
- block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
+ if (bs->wr_highest_offset < offset + bytes) {
+ bs->wr_highest_offset = offset + bytes;
+ }
if (ret >= 0) {
bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
@@ -1182,13 +1261,13 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
/* RMW the unaligned part before head. */
mark_request_serialising(req, align);
wait_serialising_requests(req);
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
}
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
memset(buf + head_padding_bytes, 0, zero_bytes);
ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
@@ -1220,13 +1299,13 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
/* RMW the unaligned part after tail. */
mark_request_serialising(req, align);
wait_serialising_requests(req);
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(bs, req, offset, align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
}
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
memset(buf, 0, bytes);
ret = bdrv_aligned_pwritev(bs, req, offset, align,
@@ -1241,7 +1320,7 @@ fail:
/*
* Handle a write request in coroutine context
*/
-static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
@@ -1260,6 +1339,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
if (bs->read_only) {
return -EPERM;
}
+ assert(!(bs->open_flags & BDRV_O_INACTIVE));
ret = bdrv_check_byte_request(bs, offset, bytes);
if (ret < 0) {
@@ -1276,7 +1356,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
* Pad qiov with the read parts and be sure to have a tracked request not
* only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
*/
- tracked_request_begin(&req, bs, offset, bytes, true);
+ tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
if (!qiov) {
ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
@@ -1297,13 +1377,13 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
};
qemu_iovec_init_external(&head_qiov, &head_iov, 1);
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
align, &head_qiov, 0);
if (ret < 0) {
goto fail;
}
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
qemu_iovec_init(&local_qiov, qiov->niov + 2);
qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
@@ -1331,13 +1411,13 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
};
qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
align, &tail_qiov, 0);
if (ret < 0) {
goto fail;
}
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
if (!use_local_qiov) {
qemu_iovec_init(&local_qiov, qiov->niov + 1);
@@ -1401,29 +1481,10 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
BDRV_REQ_ZERO_WRITE | flags);
}
-int bdrv_flush_all(void)
-{
- BlockDriverState *bs = NULL;
- int result = 0;
-
- while ((bs = bdrv_next(bs))) {
- AioContext *aio_context = bdrv_get_aio_context(bs);
- int ret;
-
- aio_context_acquire(aio_context);
- ret = bdrv_flush(bs);
- if (ret < 0 && !result) {
- result = ret;
- }
- aio_context_release(aio_context);
- }
-
- return result;
-}
-
typedef struct BdrvCoGetBlockStatusData {
BlockDriverState *bs;
BlockDriverState *base;
+ BlockDriverState **file;
int64_t sector_num;
int nb_sectors;
int *pnum;
@@ -1445,10 +1506,14 @@ typedef struct BdrvCoGetBlockStatusData {
*
* 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
* beyond the end of the disk image it will be clamped.
+ *
+ * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file'
+ * points to the BDS which the sector range is allocated in.
*/
static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
int64_t total_sectors;
int64_t n;
@@ -1478,7 +1543,9 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
return ret;
}
- ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
+ *file = NULL;
+ ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
+ file);
if (ret < 0) {
*pnum = 0;
return ret;
@@ -1486,8 +1553,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
if (ret & BDRV_BLOCK_RAW) {
assert(ret & BDRV_BLOCK_OFFSET_VALID);
- return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
- *pnum, pnum);
+ return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
+ *pnum, pnum, file);
}
if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
@@ -1495,8 +1562,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
} else {
if (bdrv_unallocated_blocks_are_zero(bs)) {
ret |= BDRV_BLOCK_ZERO;
- } else if (bs->backing_hd) {
- BlockDriverState *bs2 = bs->backing_hd;
+ } else if (bs->backing) {
+ BlockDriverState *bs2 = bs->backing->bs;
int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
ret |= BDRV_BLOCK_ZERO;
@@ -1504,13 +1571,14 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
}
}
- if (bs->file &&
+ if (*file && *file != bs &&
(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
(ret & BDRV_BLOCK_OFFSET_VALID)) {
+ BlockDriverState *file2;
int file_pnum;
- ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
- *pnum, &file_pnum);
+ ret2 = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
+ *pnum, &file_pnum, &file2);
if (ret2 >= 0) {
/* Ignore errors. This is just providing extra information, it
* is useful but not necessary.
@@ -1535,14 +1603,15 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
int64_t sector_num,
int nb_sectors,
- int *pnum)
+ int *pnum,
+ BlockDriverState **file)
{
BlockDriverState *p;
int64_t ret = 0;
assert(bs != base);
- for (p = bs; p != base; p = p->backing_hd) {
- ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum);
+ for (p = bs; p != base; p = backing_bs(p)) {
+ ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file);
if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) {
break;
}
@@ -1561,7 +1630,8 @@ static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
data->ret = bdrv_co_get_block_status_above(data->bs, data->base,
data->sector_num,
data->nb_sectors,
- data->pnum);
+ data->pnum,
+ data->file);
data->done = true;
}
@@ -1573,12 +1643,14 @@ static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
int64_t bdrv_get_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
Coroutine *co;
BdrvCoGetBlockStatusData data = {
.bs = bs,
.base = base,
+ .file = file,
.sector_num = sector_num,
.nb_sectors = nb_sectors,
.pnum = pnum,
@@ -1602,16 +1674,19 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
int64_t bdrv_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
- return bdrv_get_block_status_above(bs, bs->backing_hd,
- sector_num, nb_sectors, pnum);
+ return bdrv_get_block_status_above(bs, backing_bs(bs),
+ sector_num, nb_sectors, pnum, file);
}
int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
{
- int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
+ BlockDriverState *file;
+ int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum,
+ &file);
if (ret < 0) {
return ret;
}
@@ -1662,7 +1737,7 @@ int bdrv_is_allocated_above(BlockDriverState *top,
n = pnum_inter;
}
- intermediate = intermediate->backing_hd;
+ intermediate = backing_bs(intermediate);
}
*pnum = n;
@@ -1713,7 +1788,7 @@ int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
} else if (drv->bdrv_save_vmstate) {
return drv->bdrv_save_vmstate(bs, qiov, pos);
} else if (bs->file) {
- return bdrv_writev_vmstate(bs->file, qiov, pos);
+ return bdrv_writev_vmstate(bs->file->bs, qiov, pos);
}
return -ENOTSUP;
@@ -1728,7 +1803,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
if (drv->bdrv_load_vmstate)
return drv->bdrv_load_vmstate(bs, buf, pos, size);
if (bs->file)
- return bdrv_load_vmstate(bs->file, buf, pos, size);
+ return bdrv_load_vmstate(bs->file->bs, buf, pos, size);
return -ENOTSUP;
}
@@ -1849,7 +1924,8 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
merge = 1;
}
- if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
+ if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 >
+ bs->bl.max_iov) {
merge = 0;
}
@@ -1893,7 +1969,10 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
}
}
- block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
+ if (bs->blk) {
+ block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
+ num_reqs - outidx - 1);
+ }
return outidx + 1;
}
@@ -2208,7 +2287,7 @@ void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
{
BlockAIOCB *acb;
- acb = g_slice_alloc(aiocb_info->aiocb_size);
+ acb = g_malloc(aiocb_info->aiocb_size);
acb->aiocb_info = aiocb_info;
acb->bs = bs;
acb->cb = cb;
@@ -2228,7 +2307,7 @@ void qemu_aio_unref(void *p)
BlockAIOCB *acb = p;
assert(acb->refcnt > 0);
if (--acb->refcnt == 0) {
- g_slice_free1(acb->aiocb_info->aiocb_size, acb);
+ g_free(acb);
}
}
@@ -2298,18 +2377,27 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque)
int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
{
int ret;
+ BdrvTrackedRequest req;
if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
bdrv_is_sg(bs)) {
return 0;
}
+ tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
+
+ /* Write back all layers by calling one driver function */
+ if (bs->drv->bdrv_co_flush) {
+ ret = bs->drv->bdrv_co_flush(bs);
+ goto out;
+ }
+
/* Write back cached data to the OS even with cache=unsafe */
BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
if (bs->drv->bdrv_co_flush_to_os) {
ret = bs->drv->bdrv_co_flush_to_os(bs);
if (ret < 0) {
- return ret;
+ goto out;
}
}
@@ -2349,14 +2437,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
ret = 0;
}
if (ret < 0) {
- return ret;
+ goto out;
}
/* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
* in the case of cache=unsafe, so there are no useless flushes.
*/
flush_parent:
- return bdrv_co_flush(bs->file);
+ ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
+out:
+ tracked_request_end(&req);
+ return ret;
}
int bdrv_flush(BlockDriverState *bs)
@@ -2399,6 +2490,7 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque)
int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
+ BdrvTrackedRequest req;
int max_discard, ret;
if (!bs->drv) {
@@ -2411,6 +2503,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
} else if (bs->read_only) {
return -EPERM;
}
+ assert(!(bs->open_flags & BDRV_O_INACTIVE));
/* Do nothing if disabled. */
if (!(bs->open_flags & BDRV_O_UNMAP)) {
@@ -2421,6 +2514,8 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
return 0;
}
+ tracked_request_begin(&req, bs, sector_num, nb_sectors,
+ BDRV_TRACKED_DISCARD);
bdrv_set_dirty(bs, sector_num, nb_sectors);
max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
@@ -2454,20 +2549,24 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
bdrv_co_io_em_complete, &co);
if (acb == NULL) {
- return -EIO;
+ ret = -EIO;
+ goto out;
} else {
qemu_coroutine_yield();
ret = co.ret;
}
}
if (ret && ret != -ENOTSUP) {
- return ret;
+ goto out;
}
sector_num += num;
nb_sectors -= num;
}
- return 0;
+ ret = 0;
+out:
+ tracked_request_end(&req);
+ return ret;
}
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
@@ -2496,26 +2595,110 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
return rwco.ret;
}
-/* needed for generic scsi interface */
+typedef struct {
+ CoroutineIOCompletion *co;
+ QEMUBH *bh;
+} BdrvIoctlCompletionData;
-int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+static void bdrv_ioctl_bh_cb(void *opaque)
+{
+ BdrvIoctlCompletionData *data = opaque;
+
+ bdrv_co_io_em_complete(data->co, -ENOTSUP);
+ qemu_bh_delete(data->bh);
+}
+
+static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
{
BlockDriver *drv = bs->drv;
+ BdrvTrackedRequest tracked_req;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ BlockAIOCB *acb;
- if (drv && drv->bdrv_ioctl)
- return drv->bdrv_ioctl(bs, req, buf);
- return -ENOTSUP;
+ tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL);
+ if (!drv || !drv->bdrv_aio_ioctl) {
+ co.ret = -ENOTSUP;
+ goto out;
+ }
+
+ acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
+ if (!acb) {
+ BdrvIoctlCompletionData *data = g_new(BdrvIoctlCompletionData, 1);
+ data->bh = aio_bh_new(bdrv_get_aio_context(bs),
+ bdrv_ioctl_bh_cb, data);
+ data->co = &co;
+ qemu_bh_schedule(data->bh);
+ }
+ qemu_coroutine_yield();
+out:
+ tracked_request_end(&tracked_req);
+ return co.ret;
+}
+
+typedef struct {
+ BlockDriverState *bs;
+ int req;
+ void *buf;
+ int ret;
+} BdrvIoctlCoData;
+
+static void coroutine_fn bdrv_co_ioctl_entry(void *opaque)
+{
+ BdrvIoctlCoData *data = opaque;
+ data->ret = bdrv_co_do_ioctl(data->bs, data->req, data->buf);
+}
+
+/* needed for generic scsi interface */
+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+ BdrvIoctlCoData data = {
+ .bs = bs,
+ .req = req,
+ .buf = buf,
+ .ret = -EINPROGRESS,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_co_ioctl_entry(&data);
+ } else {
+ Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry);
+
+ qemu_coroutine_enter(co, &data);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(bdrv_get_aio_context(bs), true);
+ }
+ }
+ return data.ret;
+}
+
+static void coroutine_fn bdrv_co_aio_ioctl_entry(void *opaque)
+{
+ BlockAIOCBCoroutine *acb = opaque;
+ acb->req.error = bdrv_co_do_ioctl(acb->common.bs,
+ acb->req.req, acb->req.buf);
+ bdrv_co_complete(acb);
}
BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
{
- BlockDriver *drv = bs->drv;
+ BlockAIOCBCoroutine *acb = qemu_aio_get(&bdrv_em_co_aiocb_info,
+ bs, cb, opaque);
+ Coroutine *co;
+
+ acb->need_bh = true;
+ acb->req.error = -EINPROGRESS;
+ acb->req.req = req;
+ acb->req.buf = buf;
+ co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry);
+ qemu_coroutine_enter(co, acb);
- if (drv && drv->bdrv_aio_ioctl)
- return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
- return NULL;
+ bdrv_co_maybe_schedule_bh(acb);
+ return &acb->common;
}
void *qemu_blockalign(BlockDriverState *bs, size_t size)
@@ -2584,7 +2767,7 @@ void bdrv_io_plug(BlockDriverState *bs)
if (drv && drv->bdrv_io_plug) {
drv->bdrv_io_plug(bs);
} else if (bs->file) {
- bdrv_io_plug(bs->file);
+ bdrv_io_plug(bs->file->bs);
}
}
@@ -2594,7 +2777,7 @@ void bdrv_io_unplug(BlockDriverState *bs)
if (drv && drv->bdrv_io_unplug) {
drv->bdrv_io_unplug(bs);
} else if (bs->file) {
- bdrv_io_unplug(bs->file);
+ bdrv_io_unplug(bs->file->bs);
}
}
@@ -2604,7 +2787,24 @@ void bdrv_flush_io_queue(BlockDriverState *bs)
if (drv && drv->bdrv_flush_io_queue) {
drv->bdrv_flush_io_queue(bs);
} else if (bs->file) {
- bdrv_flush_io_queue(bs->file);
+ bdrv_flush_io_queue(bs->file->bs);
}
bdrv_start_throttled_reqs(bs);
}
+
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+ if (!bs->quiesce_counter++) {
+ aio_disable_external(bdrv_get_aio_context(bs));
+ }
+ bdrv_drain(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+ assert(bs->quiesce_counter > 0);
+ if (--bs->quiesce_counter > 0) {
+ return;
+ }
+ aio_enable_external(bdrv_get_aio_context(bs));
+}
diff --git a/qemu/block/iscsi.c b/qemu/block/iscsi.c
index 50029168e..302baf84c 100644
--- a/qemu/block/iscsi.c
+++ b/qemu/block/iscsi.c
@@ -23,7 +23,7 @@
* THE SOFTWARE.
*/
-#include "config-host.h"
+#include "qemu/osdep.h"
#include <poll.h>
#include <math.h>
@@ -39,6 +39,7 @@
#include "sysemu/sysemu.h"
#include "qmp-commands.h"
#include "qapi/qmp/qstring.h"
+#include "crypto/secret.h"
#include <iscsi/iscsi.h>
#include <iscsi/scsi-lowlevel.h>
@@ -69,7 +70,6 @@ typedef struct IscsiLun {
bool lbprz;
bool dpofua;
bool has_write_same;
- bool force_next_flush;
bool request_timed_out;
} IscsiLun;
@@ -83,7 +83,7 @@ typedef struct IscsiTask {
QEMUBH *bh;
IscsiLun *iscsilun;
QEMUTimer retry_timer;
- bool force_next_flush;
+ int err_code;
} IscsiTask;
typedef struct IscsiAIOCB {
@@ -96,6 +96,7 @@ typedef struct IscsiAIOCB {
int status;
int64_t sector_num;
int nb_sectors;
+ int ret;
#ifdef __linux__
sg_io_hdr_t *ioh;
#endif
@@ -169,19 +170,70 @@ static inline unsigned exp_random(double mean)
return -mean * log((double)rand() / RAND_MAX);
}
-/* SCSI_STATUS_TASK_SET_FULL and SCSI_STATUS_TIMEOUT were introduced
- * in libiscsi 1.10.0 as part of an enum. The LIBISCSI_API_VERSION
- * macro was introduced in 1.11.0. So use the API_VERSION macro as
- * a hint that the macros are defined and define them ourselves
- * otherwise to keep the required libiscsi version at 1.9.0 */
-#if !defined(LIBISCSI_API_VERSION)
-#define QEMU_SCSI_STATUS_TASK_SET_FULL 0x28
-#define QEMU_SCSI_STATUS_TIMEOUT 0x0f000002
-#else
-#define QEMU_SCSI_STATUS_TASK_SET_FULL SCSI_STATUS_TASK_SET_FULL
-#define QEMU_SCSI_STATUS_TIMEOUT SCSI_STATUS_TIMEOUT
+/* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
+ * libiscsi 1.10.0, together with other constants we need. Use it as
+ * a hint that we have to define them ourselves if needed, to keep the
+ * minimum required libiscsi version at 1.9.0. We use an ASCQ macro for
+ * the test because SCSI_STATUS_* is an enum.
+ *
+ * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
+ * an enum, check against the LIBISCSI_API_VERSION macro, which was
+ * introduced in 1.11.0. If it is present, there is no need to define
+ * anything.
+ */
+#if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
+ !defined(LIBISCSI_API_VERSION)
+#define SCSI_STATUS_TASK_SET_FULL 0x28
+#define SCSI_STATUS_TIMEOUT 0x0f000002
+#define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST 0x2600
+#define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR 0x1a00
#endif
+static int iscsi_translate_sense(struct scsi_sense *sense)
+{
+ int ret;
+
+ switch (sense->key) {
+ case SCSI_SENSE_NOT_READY:
+ return -EBUSY;
+ case SCSI_SENSE_DATA_PROTECTION:
+ return -EACCES;
+ case SCSI_SENSE_COMMAND_ABORTED:
+ return -ECANCELED;
+ case SCSI_SENSE_ILLEGAL_REQUEST:
+ /* Parse ASCQ */
+ break;
+ default:
+ return -EIO;
+ }
+ switch (sense->ascq) {
+ case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
+ case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
+ case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
+ case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
+ ret = -EINVAL;
+ break;
+ case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
+ ret = -ENOSPC;
+ break;
+ case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
+ ret = -ENOTSUP;
+ break;
+ case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
+ case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
+ case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
+ ret = -ENOMEDIUM;
+ break;
+ case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
+ ret = -EACCES;
+ break;
+ default:
+ ret = -EIO;
+ break;
+ }
+ return ret;
+}
+
static void
iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
@@ -203,11 +255,11 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
goto out;
}
if (status == SCSI_STATUS_BUSY ||
- status == QEMU_SCSI_STATUS_TIMEOUT ||
- status == QEMU_SCSI_STATUS_TASK_SET_FULL) {
+ status == SCSI_STATUS_TIMEOUT ||
+ status == SCSI_STATUS_TASK_SET_FULL) {
unsigned retry_time =
exp_random(iscsi_retry_times[iTask->retries - 1]);
- if (status == QEMU_SCSI_STATUS_TIMEOUT) {
+ if (status == SCSI_STATUS_TIMEOUT) {
/* make sure the request is rescheduled AFTER the
* reconnect is initiated */
retry_time = EVENT_INTERVAL * 2;
@@ -226,9 +278,8 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
return;
}
}
+ iTask->err_code = iscsi_translate_sense(&task->sense);
error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
- } else {
- iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
}
out:
@@ -291,8 +342,8 @@ iscsi_set_events(IscsiLun *iscsilun)
int ev = iscsi_which_events(iscsi);
if (ev != iscsilun->events) {
- aio_set_fd_handler(iscsilun->aio_context,
- iscsi_get_fd(iscsi),
+ aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
+ false,
(ev & POLLIN) ? iscsi_process_read : NULL,
(ev & POLLOUT) ? iscsi_process_write : NULL,
iscsilun);
@@ -397,15 +448,15 @@ static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
}
}
-static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
+static int coroutine_fn
+iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov, int flags)
{
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
uint64_t lba;
uint32_t num_sectors;
- int fua;
+ bool fua;
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
@@ -421,8 +472,7 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
- fua = iscsilun->dpofua && !bs->enable_write_cache;
- iTask.force_next_flush = !fua;
+ fua = iscsilun->dpofua && (flags & BDRV_REQ_FUA);
if (iscsilun->use_16_for_rw) {
iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
NULL, num_sectors * iscsilun->block_size,
@@ -455,7 +505,7 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
@@ -463,6 +513,13 @@ retry:
return 0;
}
+static int coroutine_fn
+iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov)
+{
+ return iscsi_co_writev_flags(bs, sector_num, nb_sectors, iov, 0);
+}
+
static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
int64_t sector_num, int nb_sectors)
@@ -478,7 +535,8 @@ static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
IscsiLun *iscsilun = bs->opaque;
struct scsi_get_lba_status *lbas = NULL;
@@ -570,6 +628,9 @@ out:
if (iTask.task != NULL) {
scsi_free_scsi_task(iTask.task);
}
+ if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
+ *file = bs;
+ }
return ret;
}
@@ -596,7 +657,8 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
!iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
int64_t ret;
int pnum;
- ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
+ BlockDriverState *file;
+ ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum, &file);
if (ret < 0) {
return ret;
}
@@ -644,7 +706,7 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
return 0;
@@ -655,11 +717,6 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
- if (!iscsilun->force_next_flush) {
- return 0;
- }
- iscsilun->force_next_flush = false;
-
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
@@ -683,7 +740,7 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
return 0;
@@ -703,7 +760,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
if (status < 0) {
error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
iscsi_get_error(iscsi));
- acb->status = -EIO;
+ acb->status = iscsi_translate_sense(&acb->task->sense);
}
acb->ioh->driver_status = 0;
@@ -726,6 +783,38 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
iscsi_schedule_bh(acb);
}
+static void iscsi_ioctl_bh_completion(void *opaque)
+{
+ IscsiAIOCB *acb = opaque;
+
+ qemu_bh_delete(acb->bh);
+ acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_aio_unref(acb);
+}
+
+static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
+{
+ BlockDriverState *bs = acb->common.bs;
+ IscsiLun *iscsilun = bs->opaque;
+ int ret = 0;
+
+ switch (req) {
+ case SG_GET_VERSION_NUM:
+ *(int *)buf = 30000;
+ break;
+ case SG_GET_SCSI_ID:
+ ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ assert(!acb->bh);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
+ iscsi_ioctl_bh_completion, acb);
+ acb->ret = ret;
+ qemu_bh_schedule(acb->bh);
+}
+
static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
@@ -735,8 +824,6 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
struct iscsi_data data;
IscsiAIOCB *acb;
- assert(req == SG_IO);
-
acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
acb->iscsilun = iscsilun;
@@ -745,6 +832,11 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
acb->buf = NULL;
acb->ioh = buf;
+ if (req != SG_IO) {
+ iscsi_ioctl_handle_emulated(acb, req, buf);
+ return &acb->common;
+ }
+
acb->task = malloc(sizeof(struct scsi_task));
if (acb->task == NULL) {
error_report("iSCSI: Failed to allocate task for scsi command. %s",
@@ -809,38 +901,6 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
return &acb->common;
}
-static void ioctl_cb(void *opaque, int status)
-{
- int *p_status = opaque;
- *p_status = status;
-}
-
-static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
- IscsiLun *iscsilun = bs->opaque;
- int status;
-
- switch (req) {
- case SG_GET_VERSION_NUM:
- *(int *)buf = 30000;
- break;
- case SG_GET_SCSI_ID:
- ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
- break;
- case SG_IO:
- status = -EINPROGRESS;
- iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
-
- while (status == -EINPROGRESS) {
- aio_poll(iscsilun->aio_context, true);
- }
-
- return 0;
- default:
- return -1;
- }
- return 0;
-}
#endif
static int64_t
@@ -905,7 +965,7 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
@@ -956,7 +1016,6 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
}
iscsi_co_init_iscsitask(iscsilun, &iTask);
- iTask.force_next_flush = true;
retry:
if (use_16_for_ws) {
iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
@@ -999,7 +1058,7 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
if (flags & BDRV_REQ_MAY_UNMAP) {
@@ -1018,6 +1077,8 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
QemuOpts *opts;
const char *user = NULL;
const char *password = NULL;
+ const char *secretid;
+ char *secret = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
@@ -1037,8 +1098,20 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
return;
}
+ secretid = qemu_opt_get(opts, "password-secret");
password = qemu_opt_get(opts, "password");
- if (!password) {
+ if (secretid && password) {
+ error_setg(errp, "'password' and 'password-secret' properties are "
+ "mutually exclusive");
+ return;
+ }
+ if (secretid) {
+ secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
+ if (!secret) {
+ return;
+ }
+ password = secret;
+ } else if (!password) {
error_setg(errp, "CHAP username specified but no password was given");
return;
}
@@ -1046,6 +1119,8 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
error_setg(errp, "Failed to set initiator username and password");
}
+
+ g_free(secret);
}
static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
@@ -1186,8 +1261,13 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
iscsilun->lbprz = !!rc16->lbprz;
iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
}
+ break;
}
- break;
+ if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
+ && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
+ break;
+ }
+ /* Fall through and try READ CAPACITY(10) instead. */
case TYPE_ROM:
task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
if (task != NULL && task->status == SCSI_STATUS_GOOD) {
@@ -1213,7 +1293,11 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
&& retries-- > 0);
if (task == NULL || task->status != SCSI_STATUS_GOOD) {
- error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
+ error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
+ } else if (!iscsilun->block_size ||
+ iscsilun->block_size % BDRV_SECTOR_SIZE) {
+ error_setg(errp, "iSCSI: the target returned an invalid "
+ "block size of %d.", iscsilun->block_size);
}
if (task) {
scsi_free_scsi_task(task);
@@ -1276,9 +1360,8 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
{
IscsiLun *iscsilun = bs->opaque;
- aio_set_fd_handler(iscsilun->aio_context,
- iscsi_get_fd(iscsilun->iscsi),
- NULL, NULL, NULL);
+ aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
+ false, NULL, NULL, NULL);
iscsilun->events = 0;
if (iscsilun->nop_timer) {
@@ -1765,10 +1848,11 @@ static BlockDriver bdrv_iscsi = {
.bdrv_co_write_zeroes = iscsi_co_write_zeroes,
.bdrv_co_readv = iscsi_co_readv,
.bdrv_co_writev = iscsi_co_writev,
+ .bdrv_co_writev_flags = iscsi_co_writev_flags,
+ .supported_write_flags = BDRV_REQ_FUA,
.bdrv_co_flush_to_disk = iscsi_co_flush,
#ifdef __linux__
- .bdrv_ioctl = iscsi_ioctl,
.bdrv_aio_ioctl = iscsi_aio_ioctl,
#endif
@@ -1789,6 +1873,11 @@ static QemuOptsList qemu_iscsi_opts = {
.type = QEMU_OPT_STRING,
.help = "password for CHAP authentication to target",
},{
+ .name = "password-secret",
+ .type = QEMU_OPT_STRING,
+ .help = "ID of the secret providing password for CHAP "
+ "authentication to target",
+ },{
.name = "header-digest",
.type = QEMU_OPT_STRING,
.help = "HeaderDigest setting. "
diff --git a/qemu/block/linux-aio.c b/qemu/block/linux-aio.c
index c991443c5..805757e02 100644
--- a/qemu/block/linux-aio.c
+++ b/qemu/block/linux-aio.c
@@ -7,6 +7,7 @@
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/aio.h"
#include "qemu/queue.h"
@@ -287,7 +288,7 @@ void laio_detach_aio_context(void *s_, AioContext *old_context)
{
struct qemu_laio_state *s = s_;
- aio_set_event_notifier(old_context, &s->e, NULL);
+ aio_set_event_notifier(old_context, &s->e, false, NULL);
qemu_bh_delete(s->completion_bh);
}
@@ -296,7 +297,8 @@ void laio_attach_aio_context(void *s_, AioContext *new_context)
struct qemu_laio_state *s = s_;
s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
- aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
+ aio_set_event_notifier(new_context, &s->e, false,
+ qemu_laio_completion_cb);
}
void *laio_init(void)
diff --git a/qemu/block/mirror.c b/qemu/block/mirror.c
index fc4d8f561..039f48125 100644
--- a/qemu/block/mirror.c
+++ b/qemu/block/mirror.c
@@ -11,12 +11,16 @@
*
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/blockjob.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
#include "qemu/bitmap.h"
+#include "qemu/error-report.h"
#define SLICE_TIME 100000000ULL /* ns */
#define MAX_IN_FLIGHT 16
@@ -44,7 +48,6 @@ typedef struct MirrorBlockJob {
BlockdevOnError on_source_error, on_target_error;
bool synced;
bool should_complete;
- int64_t sector_num;
int64_t granularity;
size_t buf_size;
int64_t bdev_length;
@@ -60,6 +63,9 @@ typedef struct MirrorBlockJob {
int sectors_in_flight;
int ret;
bool unmap;
+ bool waiting_for_io;
+ int target_cluster_sectors;
+ int max_iov;
} MirrorBlockJob;
typedef struct MirrorOp {
@@ -102,7 +108,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
chunk_num = op->sector_num / sectors_per_chunk;
- nb_chunks = op->nb_sectors / sectors_per_chunk;
+ nb_chunks = DIV_ROUND_UP(op->nb_sectors, sectors_per_chunk);
bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
if (ret >= 0) {
if (s->cow_bitmap) {
@@ -112,13 +118,9 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
}
qemu_iovec_destroy(&op->qiov);
- g_slice_free(MirrorOp, op);
+ g_free(op);
- /* Enter coroutine when it is not sleeping. The coroutine sleeps to
- * rate-limit itself. The coroutine will eventually resume since there is
- * a sleep timeout so don't wake it early.
- */
- if (s->common.busy) {
+ if (s->waiting_for_io) {
qemu_coroutine_enter(s->common.co, NULL);
}
}
@@ -159,107 +161,97 @@ static void mirror_read_complete(void *opaque, int ret)
mirror_write_complete, op);
}
-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static inline void mirror_clip_sectors(MirrorBlockJob *s,
+ int64_t sector_num,
+ int *nb_sectors)
{
- BlockDriverState *source = s->common.bs;
- int nb_sectors, sectors_per_chunk, nb_chunks;
- int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
- uint64_t delay_ns = 0;
- MirrorOp *op;
- int pnum;
- int64_t ret;
-
- s->sector_num = hbitmap_iter_next(&s->hbi);
- if (s->sector_num < 0) {
- bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
- s->sector_num = hbitmap_iter_next(&s->hbi);
- trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
- assert(s->sector_num >= 0);
- }
-
- hbitmap_next_sector = s->sector_num;
- sector_num = s->sector_num;
- sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
- end = s->bdev_length / BDRV_SECTOR_SIZE;
-
- /* Extend the QEMUIOVector to include all adjacent blocks that will
- * be copied in this operation.
- *
- * We have to do this if we have no backing file yet in the destination,
- * and the cluster size is very large. Then we need to do COW ourselves.
- * The first time a cluster is copied, copy it entirely. Note that,
- * because both the granularity and the cluster size are powers of two,
- * the number of sectors to copy cannot exceed one cluster.
- *
- * We also want to extend the QEMUIOVector to include more adjacent
- * dirty blocks if possible, to limit the number of I/O operations and
- * run efficiently even with a small granularity.
- */
- nb_chunks = 0;
- nb_sectors = 0;
- next_sector = sector_num;
- next_chunk = sector_num / sectors_per_chunk;
+ *nb_sectors = MIN(*nb_sectors,
+ s->bdev_length / BDRV_SECTOR_SIZE - sector_num);
+}
- /* Wait for I/O to this cluster (from a previous iteration) to be done. */
- while (test_bit(next_chunk, s->in_flight_bitmap)) {
- trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
- qemu_coroutine_yield();
+/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
+ * return the offset of the adjusted tail sector against original. */
+static int mirror_cow_align(MirrorBlockJob *s,
+ int64_t *sector_num,
+ int *nb_sectors)
+{
+ bool need_cow;
+ int ret = 0;
+ int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS;
+ int64_t align_sector_num = *sector_num;
+ int align_nb_sectors = *nb_sectors;
+ int max_sectors = chunk_sectors * s->max_iov;
+
+ need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap);
+ need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
+ s->cow_bitmap);
+ if (need_cow) {
+ bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
+ &align_sector_num, &align_nb_sectors);
+ }
+
+ if (align_nb_sectors > max_sectors) {
+ align_nb_sectors = max_sectors;
+ if (need_cow) {
+ align_nb_sectors = QEMU_ALIGN_DOWN(align_nb_sectors,
+ s->target_cluster_sectors);
+ }
}
+ /* Clipping may result in align_nb_sectors unaligned to chunk boundary, but
+ * that doesn't matter because it's already the end of source image. */
+ mirror_clip_sectors(s, align_sector_num, &align_nb_sectors);
- do {
- int added_sectors, added_chunks;
-
- if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
- test_bit(next_chunk, s->in_flight_bitmap)) {
- assert(nb_sectors > 0);
- break;
- }
+ ret = align_sector_num + align_nb_sectors - (*sector_num + *nb_sectors);
+ *sector_num = align_sector_num;
+ *nb_sectors = align_nb_sectors;
+ assert(ret >= 0);
+ return ret;
+}
- added_sectors = sectors_per_chunk;
- if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
- bdrv_round_to_clusters(s->target,
- next_sector, added_sectors,
- &next_sector, &added_sectors);
+static inline void mirror_wait_for_io(MirrorBlockJob *s)
+{
+ assert(!s->waiting_for_io);
+ s->waiting_for_io = true;
+ qemu_coroutine_yield();
+ s->waiting_for_io = false;
+}
- /* On the first iteration, the rounding may make us copy
- * sectors before the first dirty one.
- */
- if (next_sector < sector_num) {
- assert(nb_sectors == 0);
- sector_num = next_sector;
- next_chunk = next_sector / sectors_per_chunk;
- }
- }
+/* Submit async read while handling COW.
+ * Returns: nb_sectors if no alignment is necessary, or
+ * (new_end - sector_num) if tail is rounded up or down due to
+ * alignment or buffer limit.
+ */
+static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
+ int nb_sectors)
+{
+ BlockDriverState *source = s->common.bs;
+ int sectors_per_chunk, nb_chunks;
+ int ret = nb_sectors;
+ MirrorOp *op;
- added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
- added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
+ sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
- /* When doing COW, it may happen that there is not enough space for
- * a full cluster. Wait if that is the case.
- */
- while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
- trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
- qemu_coroutine_yield();
- }
- if (s->buf_free_count < nb_chunks + added_chunks) {
- trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
- break;
- }
+ /* We can only handle as much as buf_size at a time. */
+ nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
+ assert(nb_sectors);
- /* We have enough free space to copy these sectors. */
- bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
+ if (s->cow_bitmap) {
+ ret += mirror_cow_align(s, &sector_num, &nb_sectors);
+ }
+ assert(nb_sectors << BDRV_SECTOR_BITS <= s->buf_size);
+ /* The sector range must meet granularity because:
+ * 1) Caller passes in aligned values;
+ * 2) mirror_cow_align is used only when target cluster is larger. */
+ assert(!(sector_num % sectors_per_chunk));
+ nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk);
- nb_sectors += added_sectors;
- nb_chunks += added_chunks;
- next_sector += added_sectors;
- next_chunk += added_chunks;
- if (!s->synced && s->common.speed) {
- delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
- }
- } while (delay_ns == 0 && next_sector < end);
+ while (s->buf_free_count < nb_chunks) {
+ trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+ mirror_wait_for_io(s);
+ }
/* Allocate a MirrorOp that is used as an AIO callback. */
- op = g_slice_new(MirrorOp);
+ op = g_new(MirrorOp, 1);
op->s = s;
op->sector_num = sector_num;
op->nb_sectors = nb_sectors;
@@ -268,47 +260,158 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
* from s->buf_free.
*/
qemu_iovec_init(&op->qiov, nb_chunks);
- next_sector = sector_num;
while (nb_chunks-- > 0) {
MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
- size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size;
+ size_t remaining = nb_sectors * BDRV_SECTOR_SIZE - op->qiov.size;
QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
s->buf_free_count--;
qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
-
- /* Advance the HBitmapIter in parallel, so that we do not examine
- * the same sector twice.
- */
- if (next_sector > hbitmap_next_sector
- && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
- hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
- }
-
- next_sector += sectors_per_chunk;
}
- bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors);
-
/* Copy the dirty cluster. */
s->in_flight++;
s->sectors_in_flight += nb_sectors;
trace_mirror_one_iteration(s, sector_num, nb_sectors);
- ret = bdrv_get_block_status_above(source, NULL, sector_num,
- nb_sectors, &pnum);
- if (ret < 0 || pnum < nb_sectors ||
- (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) {
- bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
- mirror_read_complete, op);
- } else if (ret & BDRV_BLOCK_ZERO) {
+ bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+ mirror_read_complete, op);
+ return ret;
+}
+
+static void mirror_do_zero_or_discard(MirrorBlockJob *s,
+ int64_t sector_num,
+ int nb_sectors,
+ bool is_discard)
+{
+ MirrorOp *op;
+
+ /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed
+ * so the freeing in mirror_iteration_done is nop. */
+ op = g_new0(MirrorOp, 1);
+ op->s = s;
+ op->sector_num = sector_num;
+ op->nb_sectors = nb_sectors;
+
+ s->in_flight++;
+ s->sectors_in_flight += nb_sectors;
+ if (is_discard) {
+ bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
+ mirror_write_complete, op);
+ } else {
bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
mirror_write_complete, op);
- } else {
- assert(!(ret & BDRV_BLOCK_DATA));
- bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
- mirror_write_complete, op);
+ }
+}
+
+static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+{
+ BlockDriverState *source = s->common.bs;
+ int64_t sector_num, first_chunk;
+ uint64_t delay_ns = 0;
+ /* At least the first dirty chunk is mirrored in one iteration. */
+ int nb_chunks = 1;
+ int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
+ int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+
+ sector_num = hbitmap_iter_next(&s->hbi);
+ if (sector_num < 0) {
+ bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
+ sector_num = hbitmap_iter_next(&s->hbi);
+ trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
+ assert(sector_num >= 0);
+ }
+
+ first_chunk = sector_num / sectors_per_chunk;
+ while (test_bit(first_chunk, s->in_flight_bitmap)) {
+ trace_mirror_yield_in_flight(s, first_chunk, s->in_flight);
+ mirror_wait_for_io(s);
+ }
+
+ /* Find the number of consective dirty chunks following the first dirty
+ * one, and wait for in flight requests in them. */
+ while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
+ int64_t hbitmap_next;
+ int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
+ int64_t next_chunk = next_sector / sectors_per_chunk;
+ if (next_sector >= end ||
+ !bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
+ break;
+ }
+ if (test_bit(next_chunk, s->in_flight_bitmap)) {
+ break;
+ }
+
+ hbitmap_next = hbitmap_iter_next(&s->hbi);
+ if (hbitmap_next > next_sector || hbitmap_next < 0) {
+ /* The bitmap iterator's cache is stale, refresh it */
+ bdrv_set_dirty_iter(&s->hbi, next_sector);
+ hbitmap_next = hbitmap_iter_next(&s->hbi);
+ }
+ assert(hbitmap_next == next_sector);
+ nb_chunks++;
+ }
+
+ /* Clear dirty bits before querying the block status, because
+ * calling bdrv_get_block_status_above could yield - if some blocks are
+ * marked dirty in this window, we need to know.
+ */
+ bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
+ nb_chunks * sectors_per_chunk);
+ bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
+ while (nb_chunks > 0 && sector_num < end) {
+ int ret;
+ int io_sectors;
+ BlockDriverState *file;
+ enum MirrorMethod {
+ MIRROR_METHOD_COPY,
+ MIRROR_METHOD_ZERO,
+ MIRROR_METHOD_DISCARD
+ } mirror_method = MIRROR_METHOD_COPY;
+
+ assert(!(sector_num % sectors_per_chunk));
+ ret = bdrv_get_block_status_above(source, NULL, sector_num,
+ nb_chunks * sectors_per_chunk,
+ &io_sectors, &file);
+ if (ret < 0) {
+ io_sectors = nb_chunks * sectors_per_chunk;
+ }
+
+ io_sectors -= io_sectors % sectors_per_chunk;
+ if (io_sectors < sectors_per_chunk) {
+ io_sectors = sectors_per_chunk;
+ } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
+ int64_t target_sector_num;
+ int target_nb_sectors;
+ bdrv_round_to_clusters(s->target, sector_num, io_sectors,
+ &target_sector_num, &target_nb_sectors);
+ if (target_sector_num == sector_num &&
+ target_nb_sectors == io_sectors) {
+ mirror_method = ret & BDRV_BLOCK_ZERO ?
+ MIRROR_METHOD_ZERO :
+ MIRROR_METHOD_DISCARD;
+ }
+ }
+
+ mirror_clip_sectors(s, sector_num, &io_sectors);
+ switch (mirror_method) {
+ case MIRROR_METHOD_COPY:
+ io_sectors = mirror_do_read(s, sector_num, io_sectors);
+ break;
+ case MIRROR_METHOD_ZERO:
+ mirror_do_zero_or_discard(s, sector_num, io_sectors, false);
+ break;
+ case MIRROR_METHOD_DISCARD:
+ mirror_do_zero_or_discard(s, sector_num, io_sectors, true);
+ break;
+ default:
+ abort();
+ }
+ assert(io_sectors);
+ sector_num += io_sectors;
+ nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
+ delay_ns += ratelimit_calculate_delay(&s->limit, io_sectors);
}
return delay_ns;
}
@@ -333,7 +436,7 @@ static void mirror_free_init(MirrorBlockJob *s)
static void mirror_drain(MirrorBlockJob *s)
{
while (s->in_flight > 0) {
- qemu_coroutine_yield();
+ mirror_wait_for_io(s);
}
}
@@ -346,6 +449,11 @@ static void mirror_exit(BlockJob *job, void *opaque)
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
MirrorExitData *data = opaque;
AioContext *replace_aio_context = NULL;
+ BlockDriverState *src = s->common.bs;
+
+ /* Make sure that the source BDS doesn't go away before we called
+ * block_job_completed(). */
+ bdrv_ref(src);
if (s->to_replace) {
replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -357,18 +465,22 @@ static void mirror_exit(BlockJob *job, void *opaque)
if (s->to_replace) {
to_replace = s->to_replace;
}
+
+ /* This was checked in mirror_start_job(), but meanwhile one of the
+ * nodes could have been newly attached to a BlockBackend. */
+ if (to_replace->blk && s->target->blk) {
+ error_report("block job: Can't create node with two BlockBackends");
+ data->ret = -EINVAL;
+ goto out;
+ }
+
if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
}
- bdrv_swap(s->target, to_replace);
- if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
- /* drop the bs loop chain formed by the swap: break the loop then
- * trigger the unref from the top one */
- BlockDriverState *p = s->base->backing_hd;
- bdrv_set_backing_hd(s->base, NULL);
- bdrv_unref(p);
- }
+ bdrv_replace_in_backing_chain(to_replace, s->target);
}
+
+out:
if (s->to_replace) {
bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
error_free(s->replace_blocker);
@@ -378,9 +490,15 @@ static void mirror_exit(BlockJob *job, void *opaque)
aio_context_release(replace_aio_context);
}
g_free(s->replaces);
+ bdrv_op_unblock_all(s->target, s->common.blocker);
bdrv_unref(s->target);
block_job_completed(&s->common, data->ret);
g_free(data);
+ bdrv_drained_end(src);
+ if (qemu_get_aio_context() == bdrv_get_aio_context(src)) {
+ aio_enable_external(iohandler_get_aio_context());
+ }
+ bdrv_unref(src);
}
static void coroutine_fn mirror_run(void *opaque)
@@ -395,6 +513,7 @@ static void coroutine_fn mirror_run(void *opaque)
checking for a NULL string */
int ret = 0;
int n;
+ int target_cluster_size = BDRV_SECTOR_SIZE;
if (block_job_is_cancelled(&s->common)) {
goto immediate_exit;
@@ -424,16 +543,16 @@ static void coroutine_fn mirror_run(void *opaque)
*/
bdrv_get_backing_filename(s->target, backing_filename,
sizeof(backing_filename));
- if (backing_filename[0] && !s->target->backing_hd) {
- ret = bdrv_get_info(s->target, &bdi);
- if (ret < 0) {
- goto immediate_exit;
- }
- if (s->granularity < bdi.cluster_size) {
- s->buf_size = MAX(s->buf_size, bdi.cluster_size);
- s->cow_bitmap = bitmap_new(length);
- }
+ if (!bdrv_get_info(s->target, &bdi) && bdi.cluster_size) {
+ target_cluster_size = bdi.cluster_size;
}
+ if (backing_filename[0] && !s->target->backing
+ && s->granularity < target_cluster_size) {
+ s->buf_size = MAX(s->buf_size, target_cluster_size);
+ s->cow_bitmap = bitmap_new(length);
+ }
+ s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
+ s->max_iov = MIN(s->common.bs->bl.max_iov, s->target->bl.max_iov);
end = s->bdev_length / BDRV_SECTOR_SIZE;
s->buf = qemu_try_blockalign(bs, s->buf_size);
@@ -448,6 +567,8 @@ static void coroutine_fn mirror_run(void *opaque)
if (!s->is_none_mode) {
/* First part, loop on the sectors and initialize the dirty bitmap. */
BlockDriverState *base = s->base;
+ bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(s->target);
+
for (sector_num = 0; sector_num < end; ) {
/* Just to make sure we are not exceeding int limit. */
int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
@@ -470,7 +591,7 @@ static void coroutine_fn mirror_run(void *opaque)
}
assert(n > 0);
- if (ret == 1) {
+ if (ret == 1 || mark_all_dirty) {
bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
}
sector_num += n;
@@ -506,7 +627,7 @@ static void coroutine_fn mirror_run(void *opaque)
if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
- qemu_coroutine_yield();
+ mirror_wait_for_io(s);
continue;
} else if (cnt != 0) {
delay_ns = mirror_iteration(s);
@@ -549,7 +670,7 @@ static void coroutine_fn mirror_run(void *opaque)
* mirror_populate runs.
*/
trace_mirror_before_drain(s, cnt);
- bdrv_drain(bs);
+ bdrv_co_drain(bs);
cnt = bdrv_get_dirty_count(s->dirty_bitmap);
}
@@ -589,10 +710,21 @@ immediate_exit:
g_free(s->cow_bitmap);
g_free(s->in_flight_bitmap);
bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
- bdrv_iostatus_disable(s->target);
+ if (s->target->blk) {
+ blk_iostatus_disable(s->target->blk);
+ }
data = g_malloc(sizeof(*data));
data->ret = ret;
+ /* Before we switch to target in mirror_exit, make sure data doesn't
+ * change. */
+ bdrv_drained_begin(s->common.bs);
+ if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) {
+ /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the
+ * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we
+ * need a block layer API change to achieve this. */
+ aio_disable_external(iohandler_get_aio_context());
+ }
block_job_defer_to_main_loop(&s->common, mirror_exit, data);
}
@@ -611,7 +743,9 @@ static void mirror_iostatus_reset(BlockJob *job)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
- bdrv_iostatus_reset(s->target);
+ if (s->target->blk) {
+ blk_iostatus_reset(s->target->blk);
+ }
}
static void mirror_complete(BlockJob *job, Error **errp)
@@ -620,14 +754,13 @@ static void mirror_complete(BlockJob *job, Error **errp)
Error *local_err = NULL;
int ret;
- ret = bdrv_open_backing_file(s->target, NULL, &local_err);
+ ret = bdrv_open_backing_file(s->target, NULL, "backing", &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
return;
}
if (!s->synced) {
- error_setg(errp, QERR_BLOCK_JOB_NOT_READY,
- bdrv_get_device_name(job->bs));
+ error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
return;
}
@@ -635,9 +768,9 @@ static void mirror_complete(BlockJob *job, Error **errp)
if (s->replaces) {
AioContext *replace_aio_context;
- s->to_replace = check_to_replace_node(s->replaces, &local_err);
+ s->to_replace = bdrv_find_node(s->replaces);
if (!s->to_replace) {
- error_propagate(errp, local_err);
+ error_setg(errp, "Node name '%s' not found", s->replaces);
return;
}
@@ -686,6 +819,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
bool is_none_mode, BlockDriverState *base)
{
MirrorBlockJob *s;
+ BlockDriverState *replaced_bs;
if (granularity == 0) {
granularity = bdrv_get_default_bitmap_granularity(target);
@@ -695,7 +829,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- !bdrv_iostatus_is_enabled(bs)) {
+ (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
return;
}
@@ -709,6 +843,21 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
buf_size = DEFAULT_MIRROR_BUF_SIZE;
}
+ /* We can't support this case as long as the block layer can't handle
+ * multiple BlockBackends per BlockDriverState. */
+ if (replaces) {
+ replaced_bs = bdrv_lookup_bs(replaces, replaces, errp);
+ if (replaced_bs == NULL) {
+ return;
+ }
+ } else {
+ replaced_bs = bs;
+ }
+ if (replaced_bs->blk && target->blk) {
+ error_setg(errp, "Can't create node with two BlockBackends");
+ return;
+ }
+
s = block_job_create(driver, bs, speed, cb, opaque, errp);
if (!s) {
return;
@@ -727,12 +876,16 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
if (!s->dirty_bitmap) {
g_free(s->replaces);
- block_job_release(bs);
+ block_job_unref(&s->common);
return;
}
- bdrv_set_enable_write_cache(s->target, true);
- bdrv_set_on_error(s->target, on_target_error, on_target_error);
- bdrv_iostatus_enable(s->target);
+
+ bdrv_op_block_all(s->target, s->common.blocker);
+
+ if (s->target->blk) {
+ blk_set_on_error(s->target->blk, on_target_error, on_target_error);
+ blk_iostatus_enable(s->target->blk);
+ }
s->common.co = qemu_coroutine_create(mirror_run);
trace_mirror_start(bs, s, s->common.co, opaque);
qemu_coroutine_enter(s->common.co, s);
@@ -755,7 +908,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
return;
}
is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
- base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
+ base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
mirror_start_job(bs, target, replaces,
speed, granularity, buf_size,
on_source_error, on_target_error, unmap, cb, opaque, errp,
diff --git a/qemu/block/nbd-client.c b/qemu/block/nbd-client.c
index e1bb9198c..878e879ac 100644
--- a/qemu/block/nbd-client.c
+++ b/qemu/block/nbd-client.c
@@ -26,8 +26,8 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "nbd-client.h"
-#include "qemu/sockets.h"
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
@@ -47,13 +47,21 @@ static void nbd_teardown_connection(BlockDriverState *bs)
{
NbdClientSession *client = nbd_get_client_session(bs);
+ if (!client->ioc) { /* Already closed */
+ return;
+ }
+
/* finish any pending coroutines */
- shutdown(client->sock, 2);
+ qio_channel_shutdown(client->ioc,
+ QIO_CHANNEL_SHUTDOWN_BOTH,
+ NULL);
nbd_recv_coroutines_enter_all(client);
nbd_client_detach_aio_context(bs);
- closesocket(client->sock);
- client->sock = -1;
+ object_unref(OBJECT(client->sioc));
+ client->sioc = NULL;
+ object_unref(OBJECT(client->ioc));
+ client->ioc = NULL;
}
static void nbd_reply_ready(void *opaque)
@@ -63,12 +71,16 @@ static void nbd_reply_ready(void *opaque)
uint64_t i;
int ret;
+ if (!s->ioc) { /* Already closed */
+ return;
+ }
+
if (s->reply.handle == 0) {
/* No reply already in flight. Fetch a header. It is possible
* that another thread has done the same thing in parallel, so
* the socket is not readable anymore.
*/
- ret = nbd_receive_reply(s->sock, &s->reply);
+ ret = nbd_receive_reply(s->ioc, &s->reply);
if (ret == -EAGAIN) {
return;
}
@@ -119,32 +131,36 @@ static int nbd_co_send_request(BlockDriverState *bs,
}
}
+ g_assert(qemu_in_coroutine());
assert(i < MAX_NBD_REQUESTS);
request->handle = INDEX_TO_HANDLE(s, i);
+
+ if (!s->ioc) {
+ qemu_co_mutex_unlock(&s->send_mutex);
+ return -EPIPE;
+ }
+
s->send_coroutine = qemu_coroutine_self();
aio_context = bdrv_get_aio_context(bs);
- aio_set_fd_handler(aio_context, s->sock,
+ aio_set_fd_handler(aio_context, s->sioc->fd, false,
nbd_reply_ready, nbd_restart_write, bs);
if (qiov) {
- if (!s->is_unix) {
- socket_set_cork(s->sock, 1);
- }
- rc = nbd_send_request(s->sock, request);
+ qio_channel_set_cork(s->ioc, true);
+ rc = nbd_send_request(s->ioc, request);
if (rc >= 0) {
- ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
- offset, request->len);
+ ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
+ offset, request->len, 0);
if (ret != request->len) {
rc = -EIO;
}
}
- if (!s->is_unix) {
- socket_set_cork(s->sock, 0);
- }
+ qio_channel_set_cork(s->ioc, false);
} else {
- rc = nbd_send_request(s->sock, request);
+ rc = nbd_send_request(s->ioc, request);
}
- aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, bs);
+ aio_set_fd_handler(aio_context, s->sioc->fd, false,
+ nbd_reply_ready, NULL, bs);
s->send_coroutine = NULL;
qemu_co_mutex_unlock(&s->send_mutex);
return rc;
@@ -160,12 +176,13 @@ static void nbd_co_receive_reply(NbdClientSession *s,
* peek at the next reply and avoid yielding if it's ours? */
qemu_coroutine_yield();
*reply = s->reply;
- if (reply->handle != request->handle) {
+ if (reply->handle != request->handle ||
+ !s->ioc) {
reply->error = EIO;
} else {
if (qiov && reply->error == 0) {
- ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
- offset, request->len);
+ ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
+ offset, request->len, 1);
if (ret != request->len) {
reply->error = EIO;
}
@@ -226,15 +243,15 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov,
- int offset)
+ int offset, int *flags)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = { .type = NBD_CMD_WRITE };
struct nbd_reply reply;
ssize_t ret;
- if (!bdrv_enable_write_cache(bs) &&
- (client->nbdflags & NBD_FLAG_SEND_FUA)) {
+ if ((*flags & BDRV_REQ_FUA) && (client->nbdflags & NBD_FLAG_SEND_FUA)) {
+ *flags &= ~BDRV_REQ_FUA;
request.type |= NBD_CMD_FLAG_FUA;
}
@@ -274,12 +291,13 @@ int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
}
int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+ int nb_sectors, QEMUIOVector *qiov, int *flags)
{
int offset = 0;
int ret;
while (nb_sectors > NBD_MAX_SECTORS) {
- ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+ ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset,
+ flags);
if (ret < 0) {
return ret;
}
@@ -287,7 +305,7 @@ int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
sector_num += NBD_MAX_SECTORS;
nb_sectors -= NBD_MAX_SECTORS;
}
- return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
+ return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset, flags);
}
int nbd_client_co_flush(BlockDriverState *bs)
@@ -301,10 +319,6 @@ int nbd_client_co_flush(BlockDriverState *bs)
return 0;
}
- if (client->nbdflags & NBD_FLAG_SEND_FUA) {
- request.type |= NBD_CMD_FLAG_FUA;
- }
-
request.from = 0;
request.len = 0;
@@ -348,14 +362,15 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
void nbd_client_detach_aio_context(BlockDriverState *bs)
{
aio_set_fd_handler(bdrv_get_aio_context(bs),
- nbd_get_client_session(bs)->sock, NULL, NULL, NULL);
+ nbd_get_client_session(bs)->sioc->fd,
+ false, NULL, NULL, NULL);
}
void nbd_client_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
- aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sock,
- nbd_reply_ready, NULL, bs);
+ aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
+ false, nbd_reply_ready, NULL, bs);
}
void nbd_client_close(BlockDriverState *bs)
@@ -367,16 +382,20 @@ void nbd_client_close(BlockDriverState *bs)
.len = 0
};
- if (client->sock == -1) {
+ if (client->ioc == NULL) {
return;
}
- nbd_send_request(client->sock, &request);
+ nbd_send_request(client->ioc, &request);
nbd_teardown_connection(bs);
}
-int nbd_client_init(BlockDriverState *bs, int sock, const char *export,
+int nbd_client_init(BlockDriverState *bs,
+ QIOChannelSocket *sioc,
+ const char *export,
+ QCryptoTLSCreds *tlscreds,
+ const char *hostname,
Error **errp)
{
NbdClientSession *client = nbd_get_client_session(bs);
@@ -384,22 +403,32 @@ int nbd_client_init(BlockDriverState *bs, int sock, const char *export,
/* NBD handshake */
logout("session init %s\n", export);
- qemu_set_block(sock);
- ret = nbd_receive_negotiate(sock, export,
- &client->nbdflags, &client->size, errp);
+ qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
+
+ ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
+ &client->nbdflags,
+ tlscreds, hostname,
+ &client->ioc,
+ &client->size, errp);
if (ret < 0) {
logout("Failed to negotiate with the NBD server\n");
- closesocket(sock);
return ret;
}
qemu_co_mutex_init(&client->send_mutex);
qemu_co_mutex_init(&client->free_sema);
- client->sock = sock;
+ client->sioc = sioc;
+ object_ref(OBJECT(client->sioc));
+
+ if (!client->ioc) {
+ client->ioc = QIO_CHANNEL(sioc);
+ object_ref(OBJECT(client->ioc));
+ }
/* Now that we're connected, set the socket to be non-blocking and
* kick the reply mechanism. */
- qemu_set_nonblock(sock);
+ qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
+
nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
logout("Established connection with NBD server\n");
diff --git a/qemu/block/nbd-client.h b/qemu/block/nbd-client.h
index e8413408b..bc7aec079 100644
--- a/qemu/block/nbd-client.h
+++ b/qemu/block/nbd-client.h
@@ -4,6 +4,7 @@
#include "qemu-common.h"
#include "block/nbd.h"
#include "block/block_int.h"
+#include "io/channel-socket.h"
/* #define DEBUG_NBD */
@@ -17,7 +18,8 @@
#define MAX_NBD_REQUESTS 16
typedef struct NbdClientSession {
- int sock;
+ QIOChannelSocket *sioc; /* The master data channel */
+ QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
uint32_t nbdflags;
off_t size;
@@ -34,7 +36,11 @@ typedef struct NbdClientSession {
NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
-int nbd_client_init(BlockDriverState *bs, int sock, const char *export_name,
+int nbd_client_init(BlockDriverState *bs,
+ QIOChannelSocket *sock,
+ const char *export_name,
+ QCryptoTLSCreds *tlscreds,
+ const char *hostname,
Error **errp);
void nbd_client_close(BlockDriverState *bs);
@@ -42,7 +48,7 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors);
int nbd_client_co_flush(BlockDriverState *bs);
int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
+ int nb_sectors, QEMUIOVector *qiov, int *flags);
int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
diff --git a/qemu/block/nbd.c b/qemu/block/nbd.c
index 217618612..f7ea3b360 100644
--- a/qemu/block/nbd.c
+++ b/qemu/block/nbd.c
@@ -26,24 +26,22 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/nbd-client.h"
+#include "qapi/error.h"
#include "qemu/uri.h"
#include "block/block_int.h"
#include "qemu/module.h"
-#include "qemu/sockets.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qjson.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qstring.h"
-
-#include <sys/types.h>
-#include <unistd.h>
+#include "qemu/cutils.h"
#define EN_OPTSTR ":exportname="
typedef struct BDRVNBDState {
NbdClientSession client;
- QemuOpts *socket_opts;
} BDRVNBDState;
static int nbd_parse_uri(const char *filename, QDict *options)
@@ -190,10 +188,10 @@ out:
g_free(file);
}
-static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
- Error **errp)
+static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
+ Error **errp)
{
- Error *local_err = NULL;
+ SocketAddress *saddr;
if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
if (qdict_haskey(options, "path")) {
@@ -201,28 +199,39 @@ static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
} else {
error_setg(errp, "one of path and host must be specified.");
}
- return;
+ return NULL;
}
- s->client.is_unix = qdict_haskey(options, "path");
- s->socket_opts = qemu_opts_create(&socket_optslist, NULL, 0,
- &error_abort);
+ saddr = g_new0(SocketAddress, 1);
- qemu_opts_absorb_qdict(s->socket_opts, options, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
+ if (qdict_haskey(options, "path")) {
+ UnixSocketAddress *q_unix;
+ saddr->type = SOCKET_ADDRESS_KIND_UNIX;
+ q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
+ q_unix->path = g_strdup(qdict_get_str(options, "path"));
+ qdict_del(options, "path");
+ } else {
+ InetSocketAddress *inet;
+ saddr->type = SOCKET_ADDRESS_KIND_INET;
+ inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
+ inet->host = g_strdup(qdict_get_str(options, "host"));
+ if (!qdict_get_try_str(options, "port")) {
+ inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
+ } else {
+ inet->port = g_strdup(qdict_get_str(options, "port"));
+ }
+ qdict_del(options, "host");
+ qdict_del(options, "port");
}
- if (!qemu_opt_get(s->socket_opts, "port")) {
- qemu_opt_set_number(s->socket_opts, "port", NBD_DEFAULT_PORT,
- &error_abort);
- }
+ s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
*export = g_strdup(qdict_get_try_str(options, "export"));
if (*export) {
qdict_del(options, "export");
}
+
+ return saddr;
}
NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
@@ -231,57 +240,113 @@ NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
return &s->client;
}
-static int nbd_establish_connection(BlockDriverState *bs, Error **errp)
+static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
+ Error **errp)
{
- BDRVNBDState *s = bs->opaque;
- int sock;
+ QIOChannelSocket *sioc;
+ Error *local_err = NULL;
- if (s->client.is_unix) {
- sock = unix_connect_opts(s->socket_opts, errp, NULL, NULL);
- } else {
- sock = inet_connect_opts(s->socket_opts, errp, NULL, NULL);
- if (sock >= 0) {
- socket_set_nodelay(sock);
- }
+ sioc = qio_channel_socket_new();
+
+ qio_channel_socket_connect_sync(sioc,
+ saddr,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return NULL;
}
- /* Failed to establish connection */
- if (sock < 0) {
- logout("Failed to establish connection to NBD server\n");
- return -EIO;
+ qio_channel_set_delay(QIO_CHANNEL(sioc), false);
+
+ return sioc;
+}
+
+
+static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
+{
+ Object *obj;
+ QCryptoTLSCreds *creds;
+
+ obj = object_resolve_path_component(
+ object_get_objects_root(), id);
+ if (!obj) {
+ error_setg(errp, "No TLS credentials with id '%s'",
+ id);
+ return NULL;
+ }
+ creds = (QCryptoTLSCreds *)
+ object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
+ if (!creds) {
+ error_setg(errp, "Object with id '%s' is not TLS credentials",
+ id);
+ return NULL;
}
- return sock;
+ if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
+ error_setg(errp,
+ "Expecting TLS credentials with a client endpoint");
+ return NULL;
+ }
+ object_ref(obj);
+ return creds;
}
+
static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVNBDState *s = bs->opaque;
char *export = NULL;
- int result, sock;
- Error *local_err = NULL;
+ QIOChannelSocket *sioc = NULL;
+ SocketAddress *saddr;
+ const char *tlscredsid;
+ QCryptoTLSCreds *tlscreds = NULL;
+ const char *hostname = NULL;
+ int ret = -EINVAL;
/* Pop the config into our state object. Exit if invalid. */
- nbd_config(s, options, &export, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return -EINVAL;
+ saddr = nbd_config(s, options, &export, errp);
+ if (!saddr) {
+ goto error;
+ }
+
+ tlscredsid = g_strdup(qdict_get_try_str(options, "tls-creds"));
+ if (tlscredsid) {
+ qdict_del(options, "tls-creds");
+ tlscreds = nbd_get_tls_creds(tlscredsid, errp);
+ if (!tlscreds) {
+ goto error;
+ }
+
+ if (saddr->type != SOCKET_ADDRESS_KIND_INET) {
+ error_setg(errp, "TLS only supported over IP sockets");
+ goto error;
+ }
+ hostname = saddr->u.inet.data->host;
}
/* establish TCP connection, return error if it fails
* TODO: Configurable retry-until-timeout behaviour.
*/
- sock = nbd_establish_connection(bs, errp);
- if (sock < 0) {
- g_free(export);
- return sock;
+ sioc = nbd_establish_connection(saddr, errp);
+ if (!sioc) {
+ ret = -ECONNREFUSED;
+ goto error;
}
/* NBD handshake */
- result = nbd_client_init(bs, sock, export, errp);
+ ret = nbd_client_init(bs, sioc, export,
+ tlscreds, hostname, errp);
+ error:
+ if (sioc) {
+ object_unref(OBJECT(sioc));
+ }
+ if (tlscreds) {
+ object_unref(OBJECT(tlscreds));
+ }
+ qapi_free_SocketAddress(saddr);
g_free(export);
- return result;
+ return ret;
}
static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
@@ -290,10 +355,29 @@ static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov);
}
+static int nbd_co_writev_flags(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov, int flags)
+{
+ int ret;
+
+ ret = nbd_client_co_writev(bs, sector_num, nb_sectors, qiov, &flags);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* The flag wasn't sent to the server, so we need to emulate it with an
+ * explicit flush */
+ if (flags & BDRV_REQ_FUA) {
+ ret = nbd_client_co_flush(bs);
+ }
+
+ return ret;
+}
+
static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
- return nbd_client_co_writev(bs, sector_num, nb_sectors, qiov);
+ return nbd_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0);
}
static int nbd_co_flush(BlockDriverState *bs)
@@ -315,9 +399,6 @@ static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
static void nbd_close(BlockDriverState *bs)
{
- BDRVNBDState *s = bs->opaque;
-
- qemu_opts_del(s->socket_opts);
nbd_client_close(bs);
}
@@ -339,13 +420,14 @@ static void nbd_attach_aio_context(BlockDriverState *bs,
nbd_client_attach_aio_context(bs, new_context);
}
-static void nbd_refresh_filename(BlockDriverState *bs)
+static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
{
QDict *opts = qdict_new();
- const char *path = qdict_get_try_str(bs->options, "path");
- const char *host = qdict_get_try_str(bs->options, "host");
- const char *port = qdict_get_try_str(bs->options, "port");
- const char *export = qdict_get_try_str(bs->options, "export");
+ const char *path = qdict_get_try_str(options, "path");
+ const char *host = qdict_get_try_str(options, "host");
+ const char *port = qdict_get_try_str(options, "port");
+ const char *export = qdict_get_try_str(options, "export");
+ const char *tlscreds = qdict_get_try_str(options, "tls-creds");
qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
@@ -380,6 +462,9 @@ static void nbd_refresh_filename(BlockDriverState *bs)
if (export) {
qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export)));
}
+ if (tlscreds) {
+ qdict_put_obj(opts, "tls-creds", QOBJECT(qstring_from_str(tlscreds)));
+ }
bs->full_open_options = opts;
}
@@ -392,6 +477,8 @@ static BlockDriver bdrv_nbd = {
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev = nbd_co_writev,
+ .bdrv_co_writev_flags = nbd_co_writev_flags,
+ .supported_write_flags = BDRV_REQ_FUA,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
@@ -410,6 +497,8 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev = nbd_co_writev,
+ .bdrv_co_writev_flags = nbd_co_writev_flags,
+ .supported_write_flags = BDRV_REQ_FUA,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
@@ -428,6 +517,8 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev = nbd_co_writev,
+ .bdrv_co_writev_flags = nbd_co_writev_flags,
+ .supported_write_flags = BDRV_REQ_FUA,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
diff --git a/qemu/block/nfs.c b/qemu/block/nfs.c
index c026ff688..9f51cc3f1 100644
--- a/qemu/block/nfs.c
+++ b/qemu/block/nfs.c
@@ -22,20 +22,23 @@
* THE SOFTWARE.
*/
-#include "config-host.h"
+#include "qemu/osdep.h"
#include <poll.h>
#include "qemu-common.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
+#include "qapi/error.h"
#include "block/block_int.h"
#include "trace.h"
#include "qemu/iov.h"
#include "qemu/uri.h"
+#include "qemu/cutils.h"
#include "sysemu/sysemu.h"
#include <nfsc/libnfs.h>
#define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
+#define QEMU_NFS_MAX_DEBUG_LEVEL 2
typedef struct NFSClient {
struct nfs_context *context;
@@ -43,6 +46,7 @@ typedef struct NFSClient {
int events;
bool has_zero_init;
AioContext *aio_context;
+ blkcnt_t st_blocks;
} NFSClient;
typedef struct NFSRPC {
@@ -62,11 +66,10 @@ static void nfs_set_events(NFSClient *client)
{
int ev = nfs_which_events(client->context);
if (ev != client->events) {
- aio_set_fd_handler(client->aio_context,
- nfs_get_fd(client->context),
+ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+ false,
(ev & POLLIN) ? nfs_process_read : NULL,
- (ev & POLLOUT) ? nfs_process_write : NULL,
- client);
+ (ev & POLLOUT) ? nfs_process_write : NULL, client);
}
client->events = ev;
@@ -241,9 +244,8 @@ static void nfs_detach_aio_context(BlockDriverState *bs)
{
NFSClient *client = bs->opaque;
- aio_set_fd_handler(client->aio_context,
- nfs_get_fd(client->context),
- NULL, NULL, NULL);
+ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+ false, NULL, NULL, NULL);
client->events = 0;
}
@@ -262,9 +264,8 @@ static void nfs_client_close(NFSClient *client)
if (client->fh) {
nfs_close(client->context, client->fh);
}
- aio_set_fd_handler(client->aio_context,
- nfs_get_fd(client->context),
- NULL, NULL, NULL);
+ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+ false, NULL, NULL, NULL);
nfs_destroy_context(client->context);
}
memset(client, 0, sizeof(NFSClient));
@@ -336,6 +337,17 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
}
nfs_set_readahead(client->context, val);
#endif
+#ifdef LIBNFS_FEATURE_DEBUG
+ } else if (!strcmp(qp->p[i].name, "debug")) {
+ /* limit the maximum debug level to avoid potential flooding
+ * of our log files. */
+ if (val > QEMU_NFS_MAX_DEBUG_LEVEL) {
+ error_report("NFS Warning: Limiting NFS debug level"
+ " to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
+ val = QEMU_NFS_MAX_DEBUG_LEVEL;
+ }
+ nfs_set_debug(client->context, val);
+#endif
} else {
error_setg(errp, "Unknown NFS parameter name: %s",
qp->p[i].name);
@@ -374,6 +386,7 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
}
ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
+ client->st_blocks = st.st_blocks;
client->has_zero_init = S_ISREG(st.st_mode);
goto out;
fail:
@@ -464,6 +477,11 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
NFSRPC task = {0};
struct stat st;
+ if (bdrv_is_read_only(bs) &&
+ !(bs->open_flags & BDRV_O_NOCACHE)) {
+ return client->st_blocks * 512;
+ }
+
task.st = &st;
if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
&task) != 0) {
@@ -475,7 +493,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
aio_poll(client->aio_context, true);
}
- return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize);
+ return (task.ret < 0 ? task.ret : st.st_blocks * 512);
}
static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
@@ -484,6 +502,34 @@ static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
return nfs_ftruncate(client->context, client->fh, offset);
}
+/* Note that this will not re-establish a connection with the NFS server
+ * - it is effectively a NOP. */
+static int nfs_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ NFSClient *client = state->bs->opaque;
+ struct stat st;
+ int ret = 0;
+
+ if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) {
+ error_setg(errp, "Cannot open a read-only mount as read-write");
+ return -EACCES;
+ }
+
+ /* Update cache for read-only reopens */
+ if (!(state->flags & BDRV_O_RDWR)) {
+ ret = nfs_fstat(client->context, client->fh, &st);
+ if (ret < 0) {
+ error_setg(errp, "Failed to fstat file: %s",
+ nfs_get_error(client->context));
+ return ret;
+ }
+ client->st_blocks = st.st_blocks;
+ }
+
+ return 0;
+}
+
static BlockDriver bdrv_nfs = {
.format_name = "nfs",
.protocol_name = "nfs",
@@ -499,6 +545,7 @@ static BlockDriver bdrv_nfs = {
.bdrv_file_open = nfs_file_open,
.bdrv_close = nfs_file_close,
.bdrv_create = nfs_file_create,
+ .bdrv_reopen_prepare = nfs_reopen_prepare,
.bdrv_co_readv = nfs_co_readv,
.bdrv_co_writev = nfs_co_writev,
diff --git a/qemu/block/null.c b/qemu/block/null.c
index 7d083233f..396500bab 100644
--- a/qemu/block/null.c
+++ b/qemu/block/null.c
@@ -10,13 +10,17 @@
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "block/block_int.h"
#define NULL_OPT_LATENCY "latency-ns"
+#define NULL_OPT_ZEROES "read-zeroes"
typedef struct {
int64_t length;
int64_t latency_ns;
+ bool read_zeroes;
} BDRVNullState;
static QemuOptsList runtime_opts = {
@@ -39,6 +43,11 @@ static QemuOptsList runtime_opts = {
.help = "nanoseconds (approximated) to wait "
"before completing request",
},
+ {
+ .name = NULL_OPT_ZEROES,
+ .type = QEMU_OPT_BOOL,
+ .help = "return zeroes when read",
+ },
{ /* end of list */ }
},
};
@@ -60,6 +69,7 @@ static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
error_setg(errp, "latency-ns is invalid");
ret = -EINVAL;
}
+ s->read_zeroes = qemu_opt_get_bool(opts, NULL_OPT_ZEROES, false);
qemu_opts_del(opts);
return ret;
}
@@ -89,6 +99,12 @@ static coroutine_fn int null_co_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov)
{
+ BDRVNullState *s = bs->opaque;
+
+ if (s->read_zeroes) {
+ qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
+ }
+
return null_co_common(bs);
}
@@ -158,6 +174,12 @@ static BlockAIOCB *null_aio_readv(BlockDriverState *bs,
BlockCompletionFunc *cb,
void *opaque)
{
+ BDRVNullState *s = bs->opaque;
+
+ if (s->read_zeroes) {
+ qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
+ }
+
return null_aio_common(bs, cb, opaque);
}
@@ -183,6 +205,24 @@ static int null_reopen_prepare(BDRVReopenState *reopen_state,
return 0;
}
+static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
+{
+ BDRVNullState *s = bs->opaque;
+ off_t start = sector_num * BDRV_SECTOR_SIZE;
+
+ *pnum = nb_sectors;
+ *file = bs;
+
+ if (s->read_zeroes) {
+ return BDRV_BLOCK_OFFSET_VALID | start | BDRV_BLOCK_ZERO;
+ } else {
+ return BDRV_BLOCK_OFFSET_VALID | start;
+ }
+}
+
static BlockDriver bdrv_null_co = {
.format_name = "null-co",
.protocol_name = "null-co",
@@ -196,6 +236,8 @@ static BlockDriver bdrv_null_co = {
.bdrv_co_writev = null_co_writev,
.bdrv_co_flush_to_disk = null_co_flush,
.bdrv_reopen_prepare = null_reopen_prepare,
+
+ .bdrv_co_get_block_status = null_co_get_block_status,
};
static BlockDriver bdrv_null_aio = {
@@ -211,6 +253,8 @@ static BlockDriver bdrv_null_aio = {
.bdrv_aio_writev = null_aio_writev,
.bdrv_aio_flush = null_aio_flush,
.bdrv_reopen_prepare = null_reopen_prepare,
+
+ .bdrv_co_get_block_status = null_co_get_block_status,
};
static void bdrv_null_init(void)
diff --git a/qemu/block/parallels.c b/qemu/block/parallels.c
index 046b56844..324ed43ac 100644
--- a/qemu/block/parallels.c
+++ b/qemu/block/parallels.c
@@ -27,8 +27,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "qemu/bitmap.h"
#include "qapi/util.h"
@@ -61,7 +64,7 @@ typedef struct ParallelsHeader {
typedef enum ParallelsPreallocMode {
PRL_PREALLOC_MODE_FALLOCATE = 0,
PRL_PREALLOC_MODE_TRUNCATE = 1,
- PRL_PREALLOC_MODE_MAX = 2,
+ PRL_PREALLOC_MODE__MAX = 2,
} ParallelsPreallocMode;
static const char *prealloc_mode_lookup[] = {
@@ -202,13 +205,13 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx;
space = to_allocate * s->tracks;
- if (s->data_end + space > bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS) {
+ if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
int ret;
space += s->prealloc_size;
if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
- ret = bdrv_write_zeroes(bs->file, s->data_end, space, 0);
+ ret = bdrv_write_zeroes(bs->file->bs, s->data_end, space, 0);
} else {
- ret = bdrv_truncate(bs->file,
+ ret = bdrv_truncate(bs->file->bs,
(s->data_end + space) << BDRV_SECTOR_BITS);
}
if (ret < 0) {
@@ -220,7 +223,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
s->data_end += s->tracks;
bitmap_set(s->bat_dirty_bmap,
- bat_entry_off(idx) / s->bat_dirty_block, 1);
+ bat_entry_off(idx + i) / s->bat_dirty_block, 1);
}
return bat2sect(s, idx) + sector_num % s->tracks;
@@ -244,7 +247,8 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
if (off + to_write > s->header_size) {
to_write = s->header_size - off;
}
- ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off, to_write);
+ ret = bdrv_pwrite(bs->file->bs, off, (uint8_t *)s->header + off,
+ to_write);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
return ret;
@@ -259,7 +263,7 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
BDRVParallelsState *s = bs->opaque;
int64_t offset;
@@ -272,6 +276,7 @@ static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs,
return 0;
}
+ *file = bs->file->bs;
return (offset << BDRV_SECTOR_BITS) |
BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
}
@@ -303,7 +308,7 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
- ret = bdrv_co_writev(bs->file, position, n, &hd_qiov);
+ ret = bdrv_co_writev(bs->file->bs, position, n, &hd_qiov);
if (ret < 0) {
break;
}
@@ -343,7 +348,7 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
- ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
+ ret = bdrv_co_readv(bs->file->bs, position, n, &hd_qiov);
if (ret < 0) {
break;
}
@@ -369,7 +374,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
bool flush_bat = false;
int cluster_size = s->tracks << BDRV_SECTOR_BITS;
- size = bdrv_getlength(bs->file);
+ size = bdrv_getlength(bs->file->bs);
if (size < 0) {
res->check_errors++;
return size;
@@ -424,7 +429,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
}
if (flush_bat) {
- ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
+ ret = bdrv_pwrite_sync(bs->file->bs, 0, s->header, s->header_size);
if (ret < 0) {
res->check_errors++;
return ret;
@@ -440,7 +445,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
size - res->image_end_offset);
res->leaks += count;
if (fix & BDRV_FIX_LEAKS) {
- ret = bdrv_truncate(bs->file, res->image_end_offset);
+ ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
if (ret < 0) {
res->check_errors++;
return ret;
@@ -458,7 +463,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size, cl_size;
uint8_t tmp[BDRV_SECTOR_SIZE];
Error *local_err = NULL;
- BlockDriverState *file;
+ BlockBackend *file;
uint32_t bat_entries, bat_sectors;
ParallelsHeader header;
int ret;
@@ -474,14 +479,16 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
return ret;
}
- file = NULL;
- ret = bdrv_open(&file, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
- if (ret < 0) {
+ file = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (file == NULL) {
error_propagate(errp, local_err);
- return ret;
+ return -EIO;
}
- ret = bdrv_truncate(file, 0);
+
+ blk_set_allow_write_beyond_eof(file, true);
+
+ ret = blk_truncate(file, 0);
if (ret < 0) {
goto exit;
}
@@ -505,18 +512,18 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
memset(tmp, 0, sizeof(tmp));
memcpy(tmp, &header, sizeof(header));
- ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
+ ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
if (ret < 0) {
goto exit;
}
- ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0);
+ ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0);
if (ret < 0) {
goto exit;
}
ret = 0;
done:
- bdrv_unref(file);
+ blk_unref(file);
return ret;
exit:
@@ -546,12 +553,13 @@ static int parallels_probe(const uint8_t *buf, int buf_size,
static int parallels_update_header(BlockDriverState *bs)
{
BDRVParallelsState *s = bs->opaque;
- unsigned size = MAX(bdrv_opt_mem_align(bs->file), sizeof(ParallelsHeader));
+ unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs),
+ sizeof(ParallelsHeader));
if (size > s->header_size) {
size = s->header_size;
}
- return bdrv_pwrite_sync(bs->file, 0, s->header, size);
+ return bdrv_pwrite_sync(bs->file->bs, 0, s->header, size);
}
static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
@@ -564,7 +572,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
Error *local_err = NULL;
char *buf;
- ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
+ ret = bdrv_pread(bs->file->bs, 0, &ph, sizeof(ph));
if (ret < 0) {
goto fail;
}
@@ -603,8 +611,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
}
size = bat_entry_off(s->bat_size);
- s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file));
- s->header = qemu_try_blockalign(bs->file, s->header_size);
+ s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
+ s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
if (s->header == NULL) {
ret = -ENOMEM;
goto fail;
@@ -619,7 +627,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
s->header_size = size;
}
- ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
+ ret = bdrv_pread(bs->file->bs, 0, s->header, s->header_size);
if (ret < 0) {
goto fail;
}
@@ -658,13 +666,13 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
s->prealloc_mode = qapi_enum_parse(prealloc_mode_lookup, buf,
- PRL_PREALLOC_MODE_MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err);
+ PRL_PREALLOC_MODE__MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err);
g_free(buf);
if (local_err != NULL) {
goto fail_options;
}
- if (!bdrv_has_zero_init(bs->file) ||
- bdrv_truncate(bs->file, bdrv_getlength(bs->file)) != 0) {
+ if (!bdrv_has_zero_init(bs->file->bs) ||
+ bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
}
@@ -707,7 +715,7 @@ static void parallels_close(BlockDriverState *bs)
}
if (bs->open_flags & BDRV_O_RDWR) {
- bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
+ bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
}
g_free(s->bat_dirty_bmap);
diff --git a/qemu/block/qapi.c b/qemu/block/qapi.c
index 2ce509711..c5f6ba643 100644
--- a/qemu/block/qapi.c
+++ b/qemu/block/qapi.c
@@ -22,6 +22,7 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/qapi.h"
#include "block/block_int.h"
#include "block/throttle-groups.h"
@@ -31,8 +32,10 @@
#include "qapi/qmp-output-visitor.h"
#include "qapi/qmp/types.h"
#include "sysemu/block-backend.h"
+#include "qemu/cutils.h"
-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
+BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
+ BlockDriverState *bs, Error **errp)
{
ImageInfo **p_image_info;
BlockDriverState *bs0;
@@ -46,7 +49,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
info->cache = g_new(BlockdevCacheInfo, 1);
*info->cache = (BlockdevCacheInfo) {
- .writeback = bdrv_enable_write_cache(bs),
+ .writeback = blk ? blk_enable_write_cache(blk) : true,
.direct = !!(bs->open_flags & BDRV_O_NOCACHE),
.no_flush = !!(bs->open_flags & BDRV_O_NO_FLUSH),
};
@@ -64,7 +67,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
info->backing_file_depth = bdrv_get_backing_file_depth(bs);
info->detect_zeroes = bs->detect_zeroes;
- if (bs->io_limits_enabled) {
+ if (bs->throttle_state) {
ThrottleConfig cfg;
throttle_group_get_config(bs, &cfg);
@@ -91,6 +94,26 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
info->iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
+ info->has_bps_max_length = info->has_bps_max;
+ info->bps_max_length =
+ cfg.buckets[THROTTLE_BPS_TOTAL].burst_length;
+ info->has_bps_rd_max_length = info->has_bps_rd_max;
+ info->bps_rd_max_length =
+ cfg.buckets[THROTTLE_BPS_READ].burst_length;
+ info->has_bps_wr_max_length = info->has_bps_wr_max;
+ info->bps_wr_max_length =
+ cfg.buckets[THROTTLE_BPS_WRITE].burst_length;
+
+ info->has_iops_max_length = info->has_iops_max;
+ info->iops_max_length =
+ cfg.buckets[THROTTLE_OPS_TOTAL].burst_length;
+ info->has_iops_rd_max_length = info->has_iops_rd_max;
+ info->iops_rd_max_length =
+ cfg.buckets[THROTTLE_OPS_READ].burst_length;
+ info->has_iops_wr_max_length = info->has_iops_wr_max;
+ info->iops_wr_max_length =
+ cfg.buckets[THROTTLE_OPS_WRITE].burst_length;
+
info->has_iops_size = cfg.op_size;
info->iops_size = cfg.op_size;
@@ -110,8 +133,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
qapi_free_BlockDeviceInfo(info);
return NULL;
}
- if (bs0->drv && bs0->backing_hd) {
- bs0 = bs0->backing_hd;
+ if (bs0->drv && bs0->backing) {
+ bs0 = bs0->backing->bs;
(*p_image_info)->has_backing_image = true;
p_image_info = &((*p_image_info)->backing_image);
} else {
@@ -210,11 +233,13 @@ void bdrv_query_image_info(BlockDriverState *bs,
Error *err = NULL;
ImageInfo *info;
+ aio_context_acquire(bdrv_get_aio_context(bs));
+
size = bdrv_getlength(bs);
if (size < 0) {
error_setg_errno(errp, -size, "Can't get size of device '%s'",
bdrv_get_device_name(bs));
- return;
+ goto out;
}
info = g_new0(ImageInfo, 1);
@@ -245,15 +270,18 @@ void bdrv_query_image_info(BlockDriverState *bs,
info->has_backing_filename = true;
bdrv_get_full_backing_filename(bs, backing_filename2, PATH_MAX, &err);
if (err) {
- error_propagate(errp, err);
- qapi_free_ImageInfo(info);
+ /* Can't reconstruct the full backing filename, so we must omit
+ * this field and apply a Best Effort to this query. */
g_free(backing_filename2);
- return;
+ backing_filename2 = NULL;
+ error_free(err);
+ err = NULL;
}
- if (strcmp(backing_filename, backing_filename2) != 0) {
- info->full_backing_filename =
- g_strdup(backing_filename2);
+ /* Always report the full_backing_filename if present, even if it's the
+ * same as backing_filename. That they are same is useful info. */
+ if (backing_filename2) {
+ info->full_backing_filename = g_strdup(backing_filename2);
info->has_full_backing_filename = true;
}
@@ -279,10 +307,13 @@ void bdrv_query_image_info(BlockDriverState *bs,
default:
error_propagate(errp, err);
qapi_free_ImageInfo(info);
- return;
+ goto out;
}
*p_info = info;
+
+out:
+ aio_context_release(bdrv_get_aio_context(bs));
}
/* @p_info will be set only on success. */
@@ -296,24 +327,24 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
info->locked = blk_dev_is_medium_locked(blk);
info->removable = blk_dev_has_removable_media(blk);
- if (blk_dev_has_removable_media(blk)) {
+ if (blk_dev_has_tray(blk)) {
info->has_tray_open = true;
info->tray_open = blk_dev_is_tray_open(blk);
}
- if (bdrv_iostatus_is_enabled(bs)) {
+ if (blk_iostatus_is_enabled(blk)) {
info->has_io_status = true;
- info->io_status = bs->iostatus;
+ info->io_status = blk_iostatus(blk);
}
- if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
+ if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) {
info->has_dirty_bitmaps = true;
info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
}
- if (bs->drv) {
+ if (bs && bs->drv) {
info->has_inserted = true;
- info->inserted = bdrv_block_device_info(bs, errp);
+ info->inserted = bdrv_block_device_info(blk, bs, errp);
if (info->inserted == NULL) {
goto err;
}
@@ -326,45 +357,115 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
qapi_free_BlockInfo(info);
}
-static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
- bool query_backing)
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+ const BlockDriverState *bs,
+ bool query_backing);
+
+static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
{
- BlockStats *s;
+ BlockAcctStats *stats = blk_get_stats(blk);
+ BlockAcctTimedStats *ts = NULL;
- s = g_malloc0(sizeof(*s));
+ ds->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
+ ds->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+ ds->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
+ ds->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
- if (bdrv_get_device_name(bs)[0]) {
- s->has_device = true;
- s->device = g_strdup(bdrv_get_device_name(bs));
+ ds->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
+ ds->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
+ ds->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
+
+ ds->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
+ ds->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
+ ds->invalid_flush_operations =
+ stats->invalid_ops[BLOCK_ACCT_FLUSH];
+
+ ds->rd_merged = stats->merged[BLOCK_ACCT_READ];
+ ds->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+ ds->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
+ ds->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+ ds->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
+ ds->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+
+ ds->has_idle_time_ns = stats->last_access_time_ns > 0;
+ if (ds->has_idle_time_ns) {
+ ds->idle_time_ns = block_acct_idle_time_ns(stats);
}
+ ds->account_invalid = stats->account_invalid;
+ ds->account_failed = stats->account_failed;
+
+ while ((ts = block_acct_interval_next(stats, ts))) {
+ BlockDeviceTimedStatsList *timed_stats =
+ g_malloc0(sizeof(*timed_stats));
+ BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
+ timed_stats->next = ds->timed_stats;
+ timed_stats->value = dev_stats;
+ ds->timed_stats = timed_stats;
+
+ TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
+ TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
+ TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
+
+ dev_stats->interval_length = ts->interval_length;
+
+ dev_stats->min_rd_latency_ns = timed_average_min(rd);
+ dev_stats->max_rd_latency_ns = timed_average_max(rd);
+ dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
+
+ dev_stats->min_wr_latency_ns = timed_average_min(wr);
+ dev_stats->max_wr_latency_ns = timed_average_max(wr);
+ dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
+
+ dev_stats->min_flush_latency_ns = timed_average_min(fl);
+ dev_stats->max_flush_latency_ns = timed_average_max(fl);
+ dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
+
+ dev_stats->avg_rd_queue_depth =
+ block_acct_queue_depth(ts, BLOCK_ACCT_READ);
+ dev_stats->avg_wr_queue_depth =
+ block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
+ }
+}
+
+static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
+ bool query_backing)
+{
if (bdrv_get_node_name(bs)[0]) {
s->has_node_name = true;
s->node_name = g_strdup(bdrv_get_node_name(bs));
}
- s->stats = g_malloc0(sizeof(*s->stats));
- s->stats->rd_bytes = bs->stats.nr_bytes[BLOCK_ACCT_READ];
- s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE];
- s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ];
- s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE];
- s->stats->rd_merged = bs->stats.merged[BLOCK_ACCT_READ];
- s->stats->wr_merged = bs->stats.merged[BLOCK_ACCT_WRITE];
- s->stats->wr_highest_offset =
- bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE;
- s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH];
- s->stats->wr_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_WRITE];
- s->stats->rd_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_READ];
- s->stats->flush_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_FLUSH];
+ s->stats->wr_highest_offset = bs->wr_highest_offset;
if (bs->file) {
s->has_parent = true;
- s->parent = bdrv_query_stats(bs->file, query_backing);
+ s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
}
- if (query_backing && bs->backing_hd) {
+ if (query_backing && bs->backing) {
s->has_backing = true;
- s->backing = bdrv_query_stats(bs->backing_hd, query_backing);
+ s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
+ }
+
+}
+
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+ const BlockDriverState *bs,
+ bool query_backing)
+{
+ BlockStats *s;
+
+ s = g_malloc0(sizeof(*s));
+ s->stats = g_malloc0(sizeof(*s->stats));
+
+ if (blk) {
+ s->has_device = true;
+ s->device = g_strdup(blk_name(blk));
+ bdrv_query_blk_stats(s->stats, blk);
+ }
+ if (bs) {
+ bdrv_query_bds_stats(s, bs, query_backing);
}
return s;
@@ -381,7 +482,9 @@ BlockInfoList *qmp_query_block(Error **errp)
bdrv_query_info(blk, &info->value, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- goto err;
+ g_free(info);
+ qapi_free_BlockInfoList(head);
+ return NULL;
}
*p_next = info;
@@ -389,10 +492,20 @@ BlockInfoList *qmp_query_block(Error **errp)
}
return head;
+}
- err:
- qapi_free_BlockInfoList(head);
- return NULL;
+static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
+ bool query_nodes)
+{
+ if (query_nodes) {
+ *bs = bdrv_next_node(*bs);
+ return !!*bs;
+ }
+
+ *blk = blk_next(*blk);
+ *bs = *blk ? blk_bs(*blk) : NULL;
+
+ return !!*blk;
}
BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
@@ -400,17 +513,19 @@ BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
Error **errp)
{
BlockStatsList *head = NULL, **p_next = &head;
+ BlockBackend *blk = NULL;
BlockDriverState *bs = NULL;
/* Just to be safe if query_nodes is not always initialized */
query_nodes = has_query_nodes && query_nodes;
- while ((bs = query_nodes ? bdrv_next_node(bs) : bdrv_next(bs))) {
+ while (next_query_bds(&blk, &bs, query_nodes)) {
BlockStatsList *info = g_malloc0(sizeof(*info));
- AioContext *ctx = bdrv_get_aio_context(bs);
+ AioContext *ctx = blk ? blk_get_aio_context(blk)
+ : bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
- info->value = bdrv_query_stats(bs, !query_nodes);
+ info->value = bdrv_query_stats(blk, bs, !query_nodes);
aio_context_release(ctx);
*p_next = info;
@@ -535,11 +650,10 @@ static void dump_qlist(fprintf_function func_fprintf, void *f, int indentation,
int i = 0;
for (entry = qlist_first(list); entry; entry = qlist_next(entry), i++) {
- qtype_code type = qobject_type(entry->value);
+ QType type = qobject_type(entry->value);
bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
- const char *format = composite ? "%*s[%i]:\n" : "%*s[%i]: ";
-
- func_fprintf(f, format, indentation * 4, "", i);
+ func_fprintf(f, "%*s[%i]:%c", indentation * 4, "", i,
+ composite ? '\n' : ' ');
dump_qobject(func_fprintf, f, indentation + 1, entry->value);
if (!composite) {
func_fprintf(f, "\n");
@@ -553,10 +667,9 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
const QDictEntry *entry;
for (entry = qdict_first(dict); entry; entry = qdict_next(dict, entry)) {
- qtype_code type = qobject_type(entry->value);
+ QType type = qobject_type(entry->value);
bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
- const char *format = composite ? "%*s%s:\n" : "%*s%s: ";
- char key[strlen(entry->key) + 1];
+ char *key = g_malloc(strlen(entry->key) + 1);
int i;
/* replace dashes with spaces in key (variable) names */
@@ -564,12 +677,13 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
key[i] = entry->key[i] == '-' ? ' ' : entry->key[i];
}
key[i] = 0;
-
- func_fprintf(f, format, indentation * 4, "", key);
+ func_fprintf(f, "%*s%s:%c", indentation * 4, "", key,
+ composite ? '\n' : ' ');
dump_qobject(func_fprintf, f, indentation + 1, entry->value);
if (!composite) {
func_fprintf(f, "\n");
}
+ g_free(key);
}
}
@@ -579,7 +693,7 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
QmpOutputVisitor *ov = qmp_output_visitor_new();
QObject *obj, *data;
- visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), &info_spec, NULL,
+ visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), NULL, &info_spec,
&error_abort);
obj = qmp_output_get_qobject(ov);
assert(qobject_type(obj) == QTYPE_QDICT);
@@ -623,7 +737,10 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
if (info->has_backing_filename) {
func_fprintf(f, "backing file: %s", info->backing_filename);
- if (info->has_full_backing_filename) {
+ if (!info->has_full_backing_filename) {
+ func_fprintf(f, " (cannot determine actual path)");
+ } else if (strcmp(info->backing_filename,
+ info->full_backing_filename) != 0) {
func_fprintf(f, " (actual path: %s)", info->full_backing_filename);
}
func_fprintf(f, "\n");
diff --git a/qemu/block/qcow.c b/qemu/block/qcow.c
index 01fba54ce..60ddb12ec 100644
--- a/qemu/block/qcow.c
+++ b/qemu/block/qcow.c
@@ -21,8 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
+#include "qemu/error-report.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include <zlib.h>
#include "qapi/qmp/qerror.h"
@@ -100,7 +104,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
int ret;
QCowHeader header;
- ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+ ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
if (ret < 0) {
goto fail;
}
@@ -119,11 +123,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
if (header.version != QCOW_VERSION) {
- char version[64];
- snprintf(version, sizeof(version), "QCOW version %" PRIu32,
- header.version);
- error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bdrv_get_device_or_node_name(bs), "qcow", version);
+ error_setg(errp, "Unsupported qcow version %" PRIu32, header.version);
ret = -ENOTSUP;
goto fail;
}
@@ -159,6 +159,14 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
}
s->crypt_method_header = header.crypt_method;
if (s->crypt_method_header) {
+ if (bdrv_uses_whitelist() &&
+ s->crypt_method_header == QCOW_CRYPT_AES) {
+ error_report("qcow built-in AES encryption is deprecated");
+ error_printf("Support for it will be removed in a future release.\n"
+ "You can use 'qemu-img convert' to switch to an\n"
+ "unencrypted qcow image, or a LUKS raw image.\n");
+ }
+
bs->encrypted = 1;
}
s->cluster_bits = header.cluster_bits;
@@ -193,7 +201,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+ ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
goto fail;
@@ -205,7 +213,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
/* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */
s->l2_cache =
- qemu_try_blockalign(bs->file,
+ qemu_try_blockalign(bs->file->bs,
s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
if (s->l2_cache == NULL) {
error_setg(errp, "Could not allocate L2 table cache");
@@ -224,7 +232,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EINVAL;
goto fail;
}
- ret = bdrv_pread(bs->file, header.backing_file_offset,
+ ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
bs->backing_file, len);
if (ret < 0) {
goto fail;
@@ -369,13 +377,13 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
if (!allocate)
return 0;
/* allocate a new l2 entry */
- l2_offset = bdrv_getlength(bs->file);
+ l2_offset = bdrv_getlength(bs->file->bs);
/* round to cluster size */
l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
/* update the L1 entry */
s->l1_table[l1_index] = l2_offset;
tmp = cpu_to_be64(l2_offset);
- if (bdrv_pwrite_sync(bs->file,
+ if (bdrv_pwrite_sync(bs->file->bs,
s->l1_table_offset + l1_index * sizeof(tmp),
&tmp, sizeof(tmp)) < 0)
return 0;
@@ -405,11 +413,12 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
l2_table = s->l2_cache + (min_index << s->l2_bits);
if (new_l2_table) {
memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
- if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+ if (bdrv_pwrite_sync(bs->file->bs, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t)) < 0)
return 0;
} else {
- if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
+ if (bdrv_pread(bs->file->bs, l2_offset, l2_table,
+ s->l2_size * sizeof(uint64_t)) !=
s->l2_size * sizeof(uint64_t))
return 0;
}
@@ -430,20 +439,21 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
overwritten */
if (decompress_cluster(bs, cluster_offset) < 0)
return 0;
- cluster_offset = bdrv_getlength(bs->file);
+ cluster_offset = bdrv_getlength(bs->file->bs);
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
/* write the cluster content */
- if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
+ if (bdrv_pwrite(bs->file->bs, cluster_offset, s->cluster_cache,
+ s->cluster_size) !=
s->cluster_size)
return -1;
} else {
- cluster_offset = bdrv_getlength(bs->file);
+ cluster_offset = bdrv_getlength(bs->file->bs);
if (allocate == 1) {
/* round to cluster size */
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
- bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
+ bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
/* if encrypted, we must initialize the cluster
content which won't be written */
if (bs->encrypted &&
@@ -463,7 +473,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
errno = EIO;
return -1;
}
- if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
+ if (bdrv_pwrite(bs->file->bs,
+ cluster_offset + i * 512,
s->cluster_data, 512) != 512)
return -1;
}
@@ -477,7 +488,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
/* update L2 table */
tmp = cpu_to_be64(cluster_offset);
l2_table[l2_index] = tmp;
- if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
+ if (bdrv_pwrite_sync(bs->file->bs, l2_offset + l2_index * sizeof(tmp),
&tmp, sizeof(tmp)) < 0)
return 0;
}
@@ -485,7 +496,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
}
static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
BDRVQcowState *s = bs->opaque;
int index_in_cluster, n;
@@ -506,6 +517,7 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
return BDRV_BLOCK_DATA;
}
cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
+ *file = bs->file->bs;
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
}
@@ -546,7 +558,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
if (s->cluster_cache_offset != coffset) {
csize = cluster_offset >> (63 - s->cluster_bits);
csize &= (s->cluster_size - 1);
- ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
+ ret = bdrv_pread(bs->file->bs, coffset, s->cluster_data, csize);
if (ret != csize)
return -1;
if (decompress_buffer(s->cluster_cache, s->cluster_size,
@@ -594,13 +606,13 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
}
if (!cluster_offset) {
- if (bs->backing_hd) {
+ if (bs->backing) {
/* read from the base image */
hd_iov.iov_base = (void *)buf;
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ ret = bdrv_co_readv(bs->backing->bs, sector_num,
n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
@@ -625,7 +637,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
+ ret = bdrv_co_readv(bs->file->bs,
(cluster_offset >> 9) + index_in_cluster,
n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -727,7 +739,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_writev(bs->file,
+ ret = bdrv_co_writev(bs->file->bs,
(cluster_offset >> 9) + index_in_cluster,
n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -775,7 +787,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
int flags = 0;
Error *local_err = NULL;
int ret;
- BlockDriverState *qcow_bs;
+ BlockBackend *qcow_blk;
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -791,15 +803,17 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
goto cleanup;
}
- qcow_bs = NULL;
- ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
- if (ret < 0) {
+ qcow_blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (qcow_blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto cleanup;
}
- ret = bdrv_truncate(qcow_bs, 0);
+ blk_set_allow_write_beyond_eof(qcow_blk, true);
+
+ ret = blk_truncate(qcow_blk, 0);
if (ret < 0) {
goto exit;
}
@@ -839,13 +853,13 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
}
/* write all the data */
- ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
+ ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header));
if (ret != sizeof(header)) {
goto exit;
}
if (backing_file) {
- ret = bdrv_pwrite(qcow_bs, sizeof(header),
+ ret = blk_pwrite(qcow_blk, sizeof(header),
backing_file, backing_filename_len);
if (ret != backing_filename_len) {
goto exit;
@@ -855,7 +869,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
tmp = g_malloc0(BDRV_SECTOR_SIZE);
for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
BDRV_SECTOR_SIZE); i++) {
- ret = bdrv_pwrite(qcow_bs, header_size +
+ ret = blk_pwrite(qcow_blk, header_size +
BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
if (ret != BDRV_SECTOR_SIZE) {
g_free(tmp);
@@ -866,7 +880,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
g_free(tmp);
ret = 0;
exit:
- bdrv_unref(qcow_bs);
+ blk_unref(qcow_blk);
cleanup:
g_free(backing_file);
return ret;
@@ -879,10 +893,10 @@ static int qcow_make_empty(BlockDriverState *bs)
int ret;
memset(s->l1_table, 0, l1_length);
- if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
+ if (bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, s->l1_table,
l1_length) < 0)
return -1;
- ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+ ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
if (ret < 0)
return ret;
@@ -962,7 +976,7 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
}
cluster_offset &= s->cluster_offset_mask;
- ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+ ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
if (ret < 0) {
goto fail;
}
diff --git a/qemu/block/qcow2-cache.c b/qemu/block/qcow2-cache.c
index 53b8afc3d..0fe8edae4 100644
--- a/qemu/block/qcow2-cache.c
+++ b/qemu/block/qcow2-cache.c
@@ -22,6 +22,13 @@
* THE SOFTWARE.
*/
+/* Needed for CONFIG_MADVISE */
+#include "qemu/osdep.h"
+
+#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
+#include <sys/mman.h>
+#endif
+
#include "block/block_int.h"
#include "qemu-common.h"
#include "qcow2.h"
@@ -29,9 +36,9 @@
typedef struct Qcow2CachedTable {
int64_t offset;
- bool dirty;
uint64_t lru_counter;
int ref;
+ bool dirty;
} Qcow2CachedTable;
struct Qcow2Cache {
@@ -41,34 +48,85 @@ struct Qcow2Cache {
bool depends_on_flush;
void *table_array;
uint64_t lru_counter;
+ uint64_t cache_clean_lru_counter;
};
static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
Qcow2Cache *c, int table)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
}
static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
Qcow2Cache *c, void *table)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
int idx = table_offset / s->cluster_size;
assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
return idx;
}
+static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
+ int i, int num_tables)
+{
+#if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
+ BDRVQcow2State *s = bs->opaque;
+ void *t = qcow2_cache_get_table_addr(bs, c, i);
+ int align = getpagesize();
+ size_t mem_size = (size_t) s->cluster_size * num_tables;
+ size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
+ size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
+ if (length > 0) {
+ qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
+ }
+#endif
+}
+
+static inline bool can_clean_entry(Qcow2Cache *c, int i)
+{
+ Qcow2CachedTable *t = &c->entries[i];
+ return t->ref == 0 && !t->dirty && t->offset != 0 &&
+ t->lru_counter <= c->cache_clean_lru_counter;
+}
+
+void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
+{
+ int i = 0;
+ while (i < c->size) {
+ int to_clean = 0;
+
+ /* Skip the entries that we don't need to clean */
+ while (i < c->size && !can_clean_entry(c, i)) {
+ i++;
+ }
+
+ /* And count how many we can clean in a row */
+ while (i < c->size && can_clean_entry(c, i)) {
+ c->entries[i].offset = 0;
+ c->entries[i].lru_counter = 0;
+ i++;
+ to_clean++;
+ }
+
+ if (to_clean > 0) {
+ qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
+ }
+ }
+
+ c->cache_clean_lru_counter = c->lru_counter;
+}
+
Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
Qcow2Cache *c;
c = g_new0(Qcow2Cache, 1);
c->size = num_tables;
c->entries = g_try_new0(Qcow2CachedTable, num_tables);
- c->table_array = qemu_try_blockalign(bs->file,
+ c->table_array = qemu_try_blockalign(bs->file->bs,
(size_t) num_tables * s->cluster_size);
if (!c->entries || !c->table_array) {
@@ -113,7 +171,7 @@ static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret = 0;
if (!c->entries[i].dirty || !c->entries[i].offset) {
@@ -126,7 +184,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
if (c->depends) {
ret = qcow2_cache_flush_dependency(bs, c);
} else if (c->depends_on_flush) {
- ret = bdrv_flush(bs->file);
+ ret = bdrv_flush(bs->file->bs);
if (ret >= 0) {
c->depends_on_flush = false;
}
@@ -157,7 +215,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
}
- ret = bdrv_pwrite(bs->file, c->entries[i].offset,
+ ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset,
qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
if (ret < 0) {
return ret;
@@ -170,7 +228,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int result = 0;
int ret;
int i;
@@ -185,7 +243,7 @@ int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
}
if (result == 0) {
- ret = bdrv_flush(bs->file);
+ ret = bdrv_flush(bs->file->bs);
if (ret < 0) {
result = ret;
}
@@ -237,6 +295,8 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
c->entries[i].lru_counter = 0;
}
+ qcow2_cache_table_release(bs, c, 0, c->size);
+
c->lru_counter = 0;
return 0;
@@ -245,7 +305,7 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
uint64_t offset, void **table, bool read_from_disk)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int i;
int ret;
int lookup_index;
@@ -295,7 +355,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
}
- ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
+ ret = bdrv_pread(bs->file->bs, offset,
+ qcow2_cache_get_table_addr(bs, c, i),
s->cluster_size);
if (ret < 0) {
return ret;
diff --git a/qemu/block/qcow2-cluster.c b/qemu/block/qcow2-cluster.c
index b43f186eb..31ecc1030 100644
--- a/qemu/block/qcow2-cluster.c
+++ b/qemu/block/qcow2-cluster.c
@@ -22,8 +22,10 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include <zlib.h>
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "block/qcow2.h"
@@ -32,7 +34,7 @@
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int new_l1_size2, ret, i;
uint64_t *new_l1_table;
int64_t old_l1_table_offset, old_l1_size;
@@ -72,7 +74,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
#endif
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
- new_l1_table = qemu_try_blockalign(bs->file,
+ new_l1_table = qemu_try_blockalign(bs->file->bs,
align_offset(new_l1_size2, 512));
if (new_l1_table == NULL) {
return -ENOMEM;
@@ -105,7 +107,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
- ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
+ ret = bdrv_pwrite_sync(bs->file->bs, new_l1_table_offset,
+ new_l1_table, new_l1_size2);
if (ret < 0)
goto fail;
for(i = 0; i < s->l1_size; i++)
@@ -115,7 +118,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
cpu_to_be32w((uint32_t*)data, new_l1_size);
stq_be_p(data + 4, new_l1_table_offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
+ ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_size),
+ data, sizeof(data));
if (ret < 0) {
goto fail;
}
@@ -148,7 +152,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
uint64_t **l2_table)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
@@ -163,7 +167,7 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
#define L1_ENTRIES_PER_SECTOR (512 / 8)
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t buf[L1_ENTRIES_PER_SECTOR] = { 0 };
int l1_start_index;
int i, ret;
@@ -182,8 +186,9 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
}
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
- ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
- buf, sizeof(buf));
+ ret = bdrv_pwrite_sync(bs->file->bs,
+ s->l1_table_offset + 8 * l1_start_index,
+ buf, sizeof(buf));
if (ret < 0) {
return ret;
}
@@ -203,7 +208,7 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t old_l2_offset;
uint64_t *l2_table = NULL;
int64_t l2_offset;
@@ -298,7 +303,7 @@ fail:
* as contiguous. (This allows it, for example, to stop at the first compressed
* cluster which may require a different handling)
*/
-static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
+static int count_contiguous_clusters(int nb_clusters, int cluster_size,
uint64_t *l2_table, uint64_t stop_flags)
{
int i;
@@ -309,7 +314,7 @@ static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
if (!offset)
return 0;
- assert(qcow2_get_cluster_type(first_entry) != QCOW2_CLUSTER_COMPRESSED);
+ assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL);
for (i = 0; i < nb_clusters; i++) {
uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
@@ -321,14 +326,16 @@ static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
return i;
}
-static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
+static int count_contiguous_clusters_by_type(int nb_clusters,
+ uint64_t *l2_table,
+ int wanted_type)
{
int i;
for (i = 0; i < nb_clusters; i++) {
int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
- if (type != QCOW2_CLUSTER_UNALLOCATED) {
+ if (type != wanted_type) {
break;
}
}
@@ -339,7 +346,7 @@ static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_tab
/* The crypt function is compatible with the linux cryptoloop
algorithm for < 4 GB images. NOTE: out_buf == in_buf is
supported */
-int qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
uint8_t *out_buf, const uint8_t *in_buf,
int nb_sectors, bool enc,
Error **errp)
@@ -387,7 +394,7 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
uint64_t cluster_offset,
int n_start, int n_end)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QEMUIOVector qiov;
struct iovec iov;
int n, ret;
@@ -440,7 +447,8 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
}
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
- ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
+ ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n,
+ &qiov);
if (ret < 0) {
goto out;
}
@@ -469,7 +477,7 @@ out:
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
int *num, uint64_t *cluster_offset)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int l2_index;
uint64_t l1_index, l2_offset, *l2_table;
int l1_bits, c;
@@ -495,10 +503,11 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
if (nb_needed > nb_available) {
nb_needed = nb_available;
}
+ assert(nb_needed <= INT_MAX);
*cluster_offset = 0;
- /* seek the the l2 offset in the l1 table */
+ /* seek to the l2 offset in the l1 table */
l1_index = offset >> l1_bits;
if (l1_index >= s->l1_size) {
@@ -530,6 +539,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
*cluster_offset = be64_to_cpu(l2_table[l2_index]);
+
+ /* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */
nb_clusters = size_to_clusters(s, nb_needed << 9);
ret = qcow2_get_cluster_type(*cluster_offset);
@@ -547,13 +558,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
ret = -EIO;
goto fail;
}
- c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], QCOW_OFLAG_ZERO);
+ c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
+ QCOW2_CLUSTER_ZERO);
*cluster_offset = 0;
break;
case QCOW2_CLUSTER_UNALLOCATED:
/* how many empty clusters ? */
- c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
+ c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
+ QCOW2_CLUSTER_UNALLOCATED);
*cluster_offset = 0;
break;
case QCOW2_CLUSTER_NORMAL:
@@ -606,13 +618,13 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
uint64_t **new_l2_table,
int *new_l2_index)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int l2_index;
uint64_t l1_index, l2_offset;
uint64_t *l2_table = NULL;
int ret;
- /* seek the the l2 offset in the l1 table */
+ /* seek to the l2 offset in the l1 table */
l1_index = offset >> (s->l2_bits + s->cluster_bits);
if (l1_index >= s->l1_size) {
@@ -680,7 +692,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
uint64_t offset,
int compressed_size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int l2_index, ret;
uint64_t *l2_table;
int64_t cluster_offset;
@@ -725,7 +737,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
if (r->nb_sectors == 0) {
@@ -754,7 +766,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int i, j = 0, l2_index, ret;
uint64_t *old_cluster, *l2_table;
uint64_t cluster_offset = m->alloc_offset;
@@ -814,7 +826,6 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
/*
* If this was a COW, we need to decrease the refcount of the old cluster.
- * Also flush bs->file to get the right order for L2 and refcount update.
*
* Don't discard clusters that reach a refcount of 0 (e.g. compressed
* clusters), the next write will reuse them anyway.
@@ -837,7 +848,7 @@ err:
* write, but require COW to be performed (this includes yet unallocated space,
* which must copy from the backing file)
*/
-static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
+static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
uint64_t *l2_table, int l2_index)
{
int i;
@@ -883,7 +894,7 @@ out:
static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *cur_bytes, QCowL2Meta **m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowL2Meta *old_alloc;
uint64_t bytes = *cur_bytes;
@@ -956,11 +967,11 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int l2_index;
uint64_t cluster_offset;
uint64_t *l2_table;
- unsigned int nb_clusters;
+ uint64_t nb_clusters;
unsigned int keep_clusters;
int ret;
@@ -979,6 +990,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
l2_index = offset_to_l2_index(s, guest_offset);
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+ assert(nb_clusters <= INT_MAX);
/* Find L2 entry for the first involved cluster */
ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
@@ -1061,9 +1073,9 @@ out:
* restarted, but the whole request should not be failed.
*/
static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, unsigned int *nb_clusters)
+ uint64_t *host_offset, uint64_t *nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
*host_offset, *nb_clusters);
@@ -1079,7 +1091,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
*host_offset = cluster_offset;
return 0;
} else {
- int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
+ int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
if (ret < 0) {
return ret;
}
@@ -1111,11 +1123,11 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int l2_index;
uint64_t *l2_table;
uint64_t entry;
- unsigned int nb_clusters;
+ uint64_t nb_clusters;
int ret;
uint64_t alloc_cluster_offset;
@@ -1133,6 +1145,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
l2_index = offset_to_l2_index(s, guest_offset);
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+ assert(nb_clusters <= INT_MAX);
/* Find L2 entry for the first involved cluster */
ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
@@ -1263,7 +1276,7 @@ fail:
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
int *num, uint64_t *host_offset, QCowL2Meta **m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t start, remaining;
uint64_t cluster_offset;
uint64_t cur_bytes;
@@ -1397,7 +1410,7 @@ static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret, csize, nb_csectors, sector_offset;
uint64_t coffset;
@@ -1407,7 +1420,8 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
sector_offset = coffset & 511;
csize = nb_csectors * 512 - sector_offset;
BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
- ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
+ ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data,
+ nb_csectors);
if (ret < 0) {
return ret;
}
@@ -1426,9 +1440,10 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
* clusters.
*/
static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
- unsigned int nb_clusters, enum qcow2_discard_type type, bool full_discard)
+ uint64_t nb_clusters, enum qcow2_discard_type type,
+ bool full_discard)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table;
int l2_index;
int ret;
@@ -1441,6 +1456,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
/* Limit nb_clusters to one L2 table */
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+ assert(nb_clusters <= INT_MAX);
for (i = 0; i < nb_clusters; i++) {
uint64_t old_l2_entry;
@@ -1462,7 +1478,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
*/
switch (qcow2_get_cluster_type(old_l2_entry)) {
case QCOW2_CLUSTER_UNALLOCATED:
- if (full_discard || !bs->backing_hd) {
+ if (full_discard || !bs->backing) {
continue;
}
break;
@@ -1501,9 +1517,9 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
int nb_sectors, enum qcow2_discard_type type, bool full_discard)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t end_offset;
- unsigned int nb_clusters;
+ uint64_t nb_clusters;
int ret;
end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
@@ -1545,9 +1561,9 @@ fail:
* clusters.
*/
static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
- unsigned int nb_clusters)
+ uint64_t nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table;
int l2_index;
int ret;
@@ -1560,6 +1576,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
/* Limit nb_clusters to one L2 table */
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+ assert(nb_clusters <= INT_MAX);
for (i = 0; i < nb_clusters; i++) {
uint64_t old_offset;
@@ -1583,8 +1600,8 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
{
- BDRVQcowState *s = bs->opaque;
- unsigned int nb_clusters;
+ BDRVQcow2State *s = bs->opaque;
+ uint64_t nb_clusters;
int ret;
/* The zero flag is only supported by version 3 and newer */
@@ -1626,9 +1643,10 @@ fail:
static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
int l1_size, int64_t *visited_l1_entries,
int64_t l1_entries,
- BlockDriverAmendStatusCB *status_cb)
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
bool is_active_l1 = (l1_table == s->l1_table);
uint64_t *l2_table = NULL;
int ret;
@@ -1637,7 +1655,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
if (!is_active_l1) {
/* inactive L2 tables require a buffer to be stored in when loading
* them from disk */
- l2_table = qemu_try_blockalign(bs->file, s->cluster_size);
+ l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size);
if (l2_table == NULL) {
return -ENOMEM;
}
@@ -1652,7 +1670,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
/* unallocated */
(*visited_l1_entries)++;
if (status_cb) {
- status_cb(bs, *visited_l1_entries, l1_entries);
+ status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque);
}
continue;
}
@@ -1671,8 +1689,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
(void **)&l2_table);
} else {
/* load inactive L2 tables from disk */
- ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
- (void *)l2_table, s->cluster_sectors);
+ ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+ (void *)l2_table, s->cluster_sectors);
}
if (ret < 0) {
goto fail;
@@ -1695,7 +1713,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
}
if (!preallocated) {
- if (!bs->backing_hd) {
+ if (!bs->backing) {
/* not backed; therefore we can simply deallocate the
* cluster */
l2_table[j] = 0;
@@ -1746,7 +1764,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
goto fail;
}
- ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
+ ret = bdrv_write_zeroes(bs->file->bs, offset / BDRV_SECTOR_SIZE,
s->cluster_sectors, 0);
if (ret < 0) {
if (!preallocated) {
@@ -1779,8 +1797,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
goto fail;
}
- ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
- (void *)l2_table, s->cluster_sectors);
+ ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+ (void *)l2_table, s->cluster_sectors);
if (ret < 0) {
goto fail;
}
@@ -1789,7 +1807,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
(*visited_l1_entries)++;
if (status_cb) {
- status_cb(bs, *visited_l1_entries, l1_entries);
+ status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque);
}
}
@@ -1813,9 +1831,10 @@ fail:
* qcow2 version which doesn't yet support metadata zero clusters.
*/
int qcow2_expand_zero_clusters(BlockDriverState *bs,
- BlockDriverAmendStatusCB *status_cb)
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l1_table = NULL;
int64_t l1_entries = 0, visited_l1_entries = 0;
int ret;
@@ -1830,7 +1849,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
&visited_l1_entries, l1_entries,
- status_cb);
+ status_cb, cb_opaque);
if (ret < 0) {
goto fail;
}
@@ -1853,8 +1872,9 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
- ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset /
- BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors);
+ ret = bdrv_read(bs->file->bs,
+ s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
+ (void *)l1_table, l1_sectors);
if (ret < 0) {
goto fail;
}
@@ -1865,7 +1885,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
&visited_l1_entries, l1_entries,
- status_cb);
+ status_cb, cb_opaque);
if (ret < 0) {
goto fail;
}
diff --git a/qemu/block/qcow2-refcount.c b/qemu/block/qcow2-refcount.c
index b0ee42d81..ca6094ff5 100644
--- a/qemu/block/qcow2-refcount.c
+++ b/qemu/block/qcow2-refcount.c
@@ -22,6 +22,8 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "block/qcow2.h"
@@ -82,7 +84,7 @@ static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
int qcow2_refcount_init(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int refcount_table_size2, i;
int ret;
@@ -101,7 +103,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
goto fail;
}
BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
- ret = bdrv_pread(bs->file, s->refcount_table_offset,
+ ret = bdrv_pread(bs->file->bs, s->refcount_table_offset,
s->refcount_table, refcount_table_size2);
if (ret < 0) {
goto fail;
@@ -116,7 +118,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
void qcow2_refcount_close(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
g_free(s->refcount_table);
}
@@ -214,7 +216,7 @@ static int load_refcount_block(BlockDriverState *bs,
int64_t refcount_block_offset,
void **refcount_block)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
@@ -231,7 +233,7 @@ static int load_refcount_block(BlockDriverState *bs,
int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
uint64_t *refcount)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t refcount_table_index, block_index;
int64_t refcount_block_offset;
int ret;
@@ -274,7 +276,7 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
* Rounds the refcount table size up to avoid growing the table for each single
* refcount block that is allocated.
*/
-static unsigned int next_refcount_table_size(BDRVQcowState *s,
+static unsigned int next_refcount_table_size(BDRVQcow2State *s,
unsigned int min_size)
{
unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
@@ -290,7 +292,7 @@ static unsigned int next_refcount_table_size(BDRVQcowState *s,
/* Checks if two offsets are described by the same refcount block */
-static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
+static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
uint64_t offset_b)
{
uint64_t block_a = offset_a >> (s->cluster_bits + s->refcount_block_bits);
@@ -308,7 +310,7 @@ static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
static int alloc_refcount_block(BlockDriverState *bs,
int64_t cluster_index, void **refcount_block)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int refcount_table_index;
int ret;
@@ -431,7 +433,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
if (refcount_table_index < s->refcount_table_size) {
uint64_t data64 = cpu_to_be64(new_block);
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
- ret = bdrv_pwrite_sync(bs->file,
+ ret = bdrv_pwrite_sync(bs->file->bs,
s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
&data64, sizeof(data64));
if (ret < 0) {
@@ -535,7 +537,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
/* Write refcount blocks to disk */
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
- ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
+ ret = bdrv_pwrite_sync(bs->file->bs, meta_offset, new_blocks,
blocks_clusters * s->cluster_size);
g_free(new_blocks);
new_blocks = NULL;
@@ -549,7 +551,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
}
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
- ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
+ ret = bdrv_pwrite_sync(bs->file->bs, table_offset, new_table,
table_size * sizeof(uint64_t));
if (ret < 0) {
goto fail_table;
@@ -560,12 +562,16 @@ static int alloc_refcount_block(BlockDriverState *bs,
}
/* Hook up the new refcount table in the qcow2 header */
- uint8_t data[12];
- cpu_to_be64w((uint64_t*)data, table_offset);
- cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
+ struct QEMU_PACKED {
+ uint64_t d64;
+ uint32_t d32;
+ } data;
+ cpu_to_be64w(&data.d64, table_offset);
+ cpu_to_be32w(&data.d32, table_clusters);
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
- data, sizeof(data));
+ ret = bdrv_pwrite_sync(bs->file->bs,
+ offsetof(QCowHeader, refcount_table_offset),
+ &data, sizeof(data));
if (ret < 0) {
goto fail_table;
}
@@ -605,7 +611,7 @@ fail_block:
void qcow2_process_discards(BlockDriverState *bs, int ret)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
Qcow2DiscardRegion *d, *next;
QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
@@ -613,7 +619,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)
/* Discard is optional, ignore the return value */
if (ret >= 0) {
- bdrv_discard(bs->file,
+ bdrv_discard(bs->file->bs,
d->offset >> BDRV_SECTOR_BITS,
d->bytes >> BDRV_SECTOR_BITS);
}
@@ -625,7 +631,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)
static void update_refcount_discard(BlockDriverState *bs,
uint64_t offset, uint64_t length)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
Qcow2DiscardRegion *d, *p, *next;
QTAILQ_FOREACH(d, &s->discards, next) {
@@ -682,7 +688,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
bool decrease,
enum qcow2_discard_type type)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t start, last, cluster_offset;
void *refcount_block = NULL;
int64_t old_table_index = -1;
@@ -793,7 +799,7 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs,
uint64_t addend, bool decrease,
enum qcow2_discard_type type)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
@@ -815,7 +821,7 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs,
/* return < 0 if error */
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t i, nb_clusters, refcount;
int ret;
@@ -875,10 +881,10 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size)
return offset;
}
-int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int nb_clusters)
+int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int64_t nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t cluster_index, refcount;
uint64_t i;
int ret;
@@ -916,7 +922,7 @@ int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
contiguous sectors. size must be <= cluster_size */
int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t offset;
size_t free_in_cluster;
int ret;
@@ -949,11 +955,17 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) {
offset = new_cluster;
+ free_in_cluster = s->cluster_size;
+ } else {
+ free_in_cluster += s->cluster_size;
}
}
assert(offset);
ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ offset = 0;
+ }
} while (ret == -EAGAIN);
if (ret < 0) {
return ret;
@@ -992,7 +1004,7 @@ void qcow2_free_clusters(BlockDriverState *bs,
void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
int nb_clusters, enum qcow2_discard_type type)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
switch (qcow2_get_cluster_type(l2_entry)) {
case QCOW2_CLUSTER_COMPRESSED:
@@ -1036,7 +1048,7 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size, int addend)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount;
bool l1_allocated = false;
int64_t old_offset, old_l2_offset;
@@ -1062,7 +1074,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
}
l1_allocated = true;
- ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+ ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
if (ret < 0) {
goto fail;
}
@@ -1215,7 +1227,8 @@ fail:
cpu_to_be64s(&l1_table[i]);
}
- ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
+ ret = bdrv_pwrite_sync(bs->file->bs, l1_table_offset,
+ l1_table, l1_size2);
for (i = 0; i < l1_size; i++) {
be64_to_cpus(&l1_table[i]);
@@ -1233,7 +1246,7 @@ fail:
/* refcount checking functions */
-static size_t refcount_array_byte_size(BDRVQcowState *s, uint64_t entries)
+static uint64_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries)
{
/* This assertion holds because there is no way we can address more than
* 2^(64 - 9) clusters at once (with cluster size 512 = 2^9, and because
@@ -1256,10 +1269,10 @@ static size_t refcount_array_byte_size(BDRVQcowState *s, uint64_t entries)
* refcount array buffer will be aligned to a cluster boundary, and the newly
* allocated area will be zeroed.
*/
-static int realloc_refcount_array(BDRVQcowState *s, void **array,
+static int realloc_refcount_array(BDRVQcow2State *s, void **array,
int64_t *size, int64_t new_size)
{
- size_t old_byte_size, new_byte_size;
+ int64_t old_byte_size, new_byte_size;
void *new_ptr;
/* Round to clusters so the array can be directly written to disk */
@@ -1275,13 +1288,17 @@ static int realloc_refcount_array(BDRVQcowState *s, void **array,
assert(new_byte_size > 0);
+ if (new_byte_size > SIZE_MAX) {
+ return -ENOMEM;
+ }
+
new_ptr = g_try_realloc(*array, new_byte_size);
if (!new_ptr) {
return -ENOMEM;
}
if (new_byte_size > old_byte_size) {
- memset((void *)((uintptr_t)new_ptr + old_byte_size), 0,
+ memset((char *)new_ptr + old_byte_size, 0,
new_byte_size - old_byte_size);
}
@@ -1294,7 +1311,7 @@ static int realloc_refcount_array(BDRVQcowState *s, void **array,
/*
* Increases the refcount for a range of clusters in a given refcount table.
* This is used to construct a temporary refcount table out of L1 and L2 tables
- * which can be compared the the refcount table saved in the image.
+ * which can be compared to the refcount table saved in the image.
*
* Modifies the number of errors in res.
*/
@@ -1304,7 +1321,7 @@ static int inc_refcounts(BlockDriverState *bs,
int64_t *refcount_table_size,
int64_t offset, int64_t size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t start, last, cluster_offset, k, refcount;
int ret;
@@ -1330,6 +1347,9 @@ static int inc_refcounts(BlockDriverState *bs,
if (refcount == s->refcount_max) {
fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
"\n", cluster_offset);
+ fprintf(stderr, "Use qemu-img amend to increase the refcount entry "
+ "width or qemu-img convert to create a clean copy if the "
+ "image cannot be opened for writing\n");
res->corruptions++;
continue;
}
@@ -1357,7 +1377,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
int64_t *refcount_table_size, int64_t l2_offset,
int flags)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table, l2_entry;
uint64_t next_contiguous_offset = 0;
int i, l2_size, nb_csectors, ret;
@@ -1366,7 +1386,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
l2_size = s->l2_size * sizeof(uint64_t);
l2_table = g_malloc(l2_size);
- ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size);
+ ret = bdrv_pread(bs->file->bs, l2_offset, l2_table, l2_size);
if (ret < 0) {
fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
res->check_errors++;
@@ -1477,7 +1497,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size,
int flags)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l1_table = NULL, l2_offset, l1_size2;
int i, ret;
@@ -1498,7 +1518,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
res->check_errors++;
goto fail;
}
- ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+ ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
if (ret < 0) {
fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
res->check_errors++;
@@ -1554,7 +1574,7 @@ fail:
static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
int ret;
uint64_t refcount;
@@ -1596,7 +1616,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
}
}
- ret = bdrv_pread(bs->file, l2_offset, l2_table,
+ ret = bdrv_pread(bs->file->bs, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t));
if (ret < 0) {
fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
@@ -1648,7 +1668,8 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
goto fail;
}
- ret = bdrv_pwrite(bs->file, l2_offset, l2_table, s->cluster_size);
+ ret = bdrv_pwrite(bs->file->bs, l2_offset, l2_table,
+ s->cluster_size);
if (ret < 0) {
fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
strerror(-ret));
@@ -1673,7 +1694,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix, bool *rebuild,
void **refcount_table, int64_t *nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t i, size;
int ret;
@@ -1703,11 +1724,11 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
goto resize_fail;
}
- ret = bdrv_truncate(bs->file, offset + s->cluster_size);
+ ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
if (ret < 0) {
goto resize_fail;
}
- size = bdrv_getlength(bs->file);
+ size = bdrv_getlength(bs->file->bs);
if (size < 0) {
ret = size;
goto resize_fail;
@@ -1776,7 +1797,7 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix, bool *rebuild,
void **refcount_table, int64_t *nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t i;
QCowSnapshot *sn;
int ret;
@@ -1840,7 +1861,7 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
int64_t *highest_cluster,
void *refcount_table, int64_t nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t i;
uint64_t refcount1, refcount2;
int ret;
@@ -1917,7 +1938,7 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs,
int64_t *imrt_nb_clusters,
int64_t *first_free_cluster)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t cluster = *first_free_cluster, i;
bool first_gap = true;
int contiguous_free_clusters;
@@ -1987,7 +2008,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
void **refcount_table,
int64_t *nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0;
int64_t refblock_offset, refblock_start, refblock_index;
uint32_t reftable_size = 0;
@@ -2081,7 +2102,7 @@ write_refblocks:
on_disk_refblock = (void *)((char *) *refcount_table +
refblock_index * s->cluster_size);
- ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE,
+ ret = bdrv_write(bs->file->bs, refblock_offset / BDRV_SECTOR_SIZE,
on_disk_refblock, s->cluster_sectors);
if (ret < 0) {
fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
@@ -2130,7 +2151,7 @@ write_refblocks:
}
assert(reftable_size < INT_MAX / sizeof(uint64_t));
- ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
+ ret = bdrv_pwrite(bs->file->bs, reftable_offset, on_disk_reftable,
reftable_size * sizeof(uint64_t));
if (ret < 0) {
fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
@@ -2142,8 +2163,8 @@ write_refblocks:
reftable_offset);
cpu_to_be32w(&reftable_offset_and_clusters.reftable_clusters,
size_to_clusters(s, reftable_size * sizeof(uint64_t)));
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader,
- refcount_table_offset),
+ ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader,
+ refcount_table_offset),
&reftable_offset_and_clusters,
sizeof(reftable_offset_and_clusters));
if (ret < 0) {
@@ -2174,14 +2195,14 @@ fail:
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
BdrvCheckResult pre_compare_res;
int64_t size, highest_cluster, nb_clusters;
void *refcount_table = NULL;
bool rebuild = false;
int ret;
- size = bdrv_getlength(bs->file);
+ size = bdrv_getlength(bs->file->bs);
if (size < 0) {
res->check_errors++;
return size;
@@ -2311,7 +2332,7 @@ fail:
int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
int64_t size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int chk = s->overlap_check & ~ign;
int i, j;
@@ -2390,7 +2411,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
return -ENOMEM;
}
- ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
+ ret = bdrv_pread(bs->file->bs, l1_ofs, l1, l1_sz2);
if (ret < 0) {
g_free(l1);
return ret;
@@ -2451,3 +2472,450 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
return 0;
}
+
+/* A pointer to a function of this type is given to walk_over_reftable(). That
+ * function will create refblocks and pass them to a RefblockFinishOp once they
+ * are completed (@refblock). @refblock_empty is set if the refblock is
+ * completely empty.
+ *
+ * Along with the refblock, a corresponding reftable entry is passed, in the
+ * reftable @reftable (which may be reallocated) at @reftable_index.
+ *
+ * @allocated should be set to true if a new cluster has been allocated.
+ */
+typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty,
+ bool *allocated, Error **errp);
+
+/**
+ * This "operation" for walk_over_reftable() allocates the refblock on disk (if
+ * it is not empty) and inserts its offset into the new reftable. The size of
+ * this new reftable is increased as required.
+ */
+static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty, bool *allocated,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int64_t offset;
+
+ if (!refblock_empty && reftable_index >= *reftable_size) {
+ uint64_t *new_reftable;
+ uint64_t new_reftable_size;
+
+ new_reftable_size = ROUND_UP(reftable_index + 1,
+ s->cluster_size / sizeof(uint64_t));
+ if (new_reftable_size > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) {
+ error_setg(errp,
+ "This operation would make the refcount table grow "
+ "beyond the maximum size supported by QEMU, aborting");
+ return -ENOTSUP;
+ }
+
+ new_reftable = g_try_realloc(*reftable, new_reftable_size *
+ sizeof(uint64_t));
+ if (!new_reftable) {
+ error_setg(errp, "Failed to increase reftable buffer size");
+ return -ENOMEM;
+ }
+
+ memset(new_reftable + *reftable_size, 0,
+ (new_reftable_size - *reftable_size) * sizeof(uint64_t));
+
+ *reftable = new_reftable;
+ *reftable_size = new_reftable_size;
+ }
+
+ if (!refblock_empty && !(*reftable)[reftable_index]) {
+ offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ error_setg_errno(errp, -offset, "Failed to allocate refblock");
+ return offset;
+ }
+ (*reftable)[reftable_index] = offset;
+ *allocated = true;
+ }
+
+ return 0;
+}
+
+/**
+ * This "operation" for walk_over_reftable() writes the refblock to disk at the
+ * offset specified by the new reftable's entry. It does not modify the new
+ * reftable or change any refcounts.
+ */
+static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty, bool *allocated,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int64_t offset;
+ int ret;
+
+ if (reftable_index < *reftable_size && (*reftable)[reftable_index]) {
+ offset = (*reftable)[reftable_index];
+
+ ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Overlap check failed");
+ return ret;
+ }
+
+ ret = bdrv_pwrite(bs->file->bs, offset, refblock, s->cluster_size);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to write refblock");
+ return ret;
+ }
+ } else {
+ assert(refblock_empty);
+ }
+
+ return 0;
+}
+
+/**
+ * This function walks over the existing reftable and every referenced refblock;
+ * if @new_set_refcount is non-NULL, it is called for every refcount entry to
+ * create an equal new entry in the passed @new_refblock. Once that
+ * @new_refblock is completely filled, @operation will be called.
+ *
+ * @status_cb and @cb_opaque are used for the amend operation's status callback.
+ * @index is the index of the walk_over_reftable() calls and @total is the total
+ * number of walk_over_reftable() calls per amend operation. Both are used for
+ * calculating the parameters for the status callback.
+ *
+ * @allocated is set to true if a new cluster has been allocated.
+ */
+static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
+ uint64_t *new_reftable_index,
+ uint64_t *new_reftable_size,
+ void *new_refblock, int new_refblock_size,
+ int new_refcount_bits,
+ RefblockFinishOp *operation, bool *allocated,
+ Qcow2SetRefcountFunc *new_set_refcount,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque, int index, int total,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ uint64_t reftable_index;
+ bool new_refblock_empty = true;
+ int refblock_index;
+ int new_refblock_index = 0;
+ int ret;
+
+ for (reftable_index = 0; reftable_index < s->refcount_table_size;
+ reftable_index++)
+ {
+ uint64_t refblock_offset = s->refcount_table[reftable_index]
+ & REFT_OFFSET_MASK;
+
+ status_cb(bs, (uint64_t)index * s->refcount_table_size + reftable_index,
+ (uint64_t)total * s->refcount_table_size, cb_opaque);
+
+ if (refblock_offset) {
+ void *refblock;
+
+ if (offset_into_cluster(s, refblock_offset)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
+ PRIx64 " unaligned (reftable index: %#"
+ PRIx64 ")", refblock_offset,
+ reftable_index);
+ error_setg(errp,
+ "Image is corrupt (unaligned refblock offset)");
+ return -EIO;
+ }
+
+ ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offset,
+ &refblock);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to retrieve refblock");
+ return ret;
+ }
+
+ for (refblock_index = 0; refblock_index < s->refcount_block_size;
+ refblock_index++)
+ {
+ uint64_t refcount;
+
+ if (new_refblock_index >= new_refblock_size) {
+ /* new_refblock is now complete */
+ ret = operation(bs, new_reftable, *new_reftable_index,
+ new_reftable_size, new_refblock,
+ new_refblock_empty, allocated, errp);
+ if (ret < 0) {
+ qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+ return ret;
+ }
+
+ (*new_reftable_index)++;
+ new_refblock_index = 0;
+ new_refblock_empty = true;
+ }
+
+ refcount = s->get_refcount(refblock, refblock_index);
+ if (new_refcount_bits < 64 && refcount >> new_refcount_bits) {
+ uint64_t offset;
+
+ qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+
+ offset = ((reftable_index << s->refcount_block_bits)
+ + refblock_index) << s->cluster_bits;
+
+ error_setg(errp, "Cannot decrease refcount entry width to "
+ "%i bits: Cluster at offset %#" PRIx64 " has a "
+ "refcount of %" PRIu64, new_refcount_bits,
+ offset, refcount);
+ return -EINVAL;
+ }
+
+ if (new_set_refcount) {
+ new_set_refcount(new_refblock, new_refblock_index++,
+ refcount);
+ } else {
+ new_refblock_index++;
+ }
+ new_refblock_empty = new_refblock_empty && refcount == 0;
+ }
+
+ qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+ } else {
+ /* No refblock means every refcount is 0 */
+ for (refblock_index = 0; refblock_index < s->refcount_block_size;
+ refblock_index++)
+ {
+ if (new_refblock_index >= new_refblock_size) {
+ /* new_refblock is now complete */
+ ret = operation(bs, new_reftable, *new_reftable_index,
+ new_reftable_size, new_refblock,
+ new_refblock_empty, allocated, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ (*new_reftable_index)++;
+ new_refblock_index = 0;
+ new_refblock_empty = true;
+ }
+
+ if (new_set_refcount) {
+ new_set_refcount(new_refblock, new_refblock_index++, 0);
+ } else {
+ new_refblock_index++;
+ }
+ }
+ }
+ }
+
+ if (new_refblock_index > 0) {
+ /* Complete the potentially existing partially filled final refblock */
+ if (new_set_refcount) {
+ for (; new_refblock_index < new_refblock_size;
+ new_refblock_index++)
+ {
+ new_set_refcount(new_refblock, new_refblock_index, 0);
+ }
+ }
+
+ ret = operation(bs, new_reftable, *new_reftable_index,
+ new_reftable_size, new_refblock, new_refblock_empty,
+ allocated, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ (*new_reftable_index)++;
+ }
+
+ status_cb(bs, (uint64_t)(index + 1) * s->refcount_table_size,
+ (uint64_t)total * s->refcount_table_size, cb_opaque);
+
+ return 0;
+}
+
+int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque, Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ Qcow2GetRefcountFunc *new_get_refcount;
+ Qcow2SetRefcountFunc *new_set_refcount;
+ void *new_refblock = qemu_blockalign(bs->file->bs, s->cluster_size);
+ uint64_t *new_reftable = NULL, new_reftable_size = 0;
+ uint64_t *old_reftable, old_reftable_size, old_reftable_offset;
+ uint64_t new_reftable_index = 0;
+ uint64_t i;
+ int64_t new_reftable_offset = 0, allocated_reftable_size = 0;
+ int new_refblock_size, new_refcount_bits = 1 << refcount_order;
+ int old_refcount_order;
+ int walk_index = 0;
+ int ret;
+ bool new_allocation;
+
+ assert(s->qcow_version >= 3);
+ assert(refcount_order >= 0 && refcount_order <= 6);
+
+ /* see qcow2_open() */
+ new_refblock_size = 1 << (s->cluster_bits - (refcount_order - 3));
+
+ new_get_refcount = get_refcount_funcs[refcount_order];
+ new_set_refcount = set_refcount_funcs[refcount_order];
+
+
+ do {
+ int total_walks;
+
+ new_allocation = false;
+
+ /* At least we have to do this walk and the one which writes the
+ * refblocks; also, at least we have to do this loop here at least
+ * twice (normally), first to do the allocations, and second to
+ * determine that everything is correctly allocated, this then makes
+ * three walks in total */
+ total_walks = MAX(walk_index + 2, 3);
+
+ /* First, allocate the structures so they are present in the refcount
+ * structures */
+ ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
+ &new_reftable_size, NULL, new_refblock_size,
+ new_refcount_bits, &alloc_refblock,
+ &new_allocation, NULL, status_cb, cb_opaque,
+ walk_index++, total_walks, errp);
+ if (ret < 0) {
+ goto done;
+ }
+
+ new_reftable_index = 0;
+
+ if (new_allocation) {
+ if (new_reftable_offset) {
+ qcow2_free_clusters(bs, new_reftable_offset,
+ allocated_reftable_size * sizeof(uint64_t),
+ QCOW2_DISCARD_NEVER);
+ }
+
+ new_reftable_offset = qcow2_alloc_clusters(bs, new_reftable_size *
+ sizeof(uint64_t));
+ if (new_reftable_offset < 0) {
+ error_setg_errno(errp, -new_reftable_offset,
+ "Failed to allocate the new reftable");
+ ret = new_reftable_offset;
+ goto done;
+ }
+ allocated_reftable_size = new_reftable_size;
+ }
+ } while (new_allocation);
+
+ /* Second, write the new refblocks */
+ ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
+ &new_reftable_size, new_refblock,
+ new_refblock_size, new_refcount_bits,
+ &flush_refblock, &new_allocation, new_set_refcount,
+ status_cb, cb_opaque, walk_index, walk_index + 1,
+ errp);
+ if (ret < 0) {
+ goto done;
+ }
+ assert(!new_allocation);
+
+
+ /* Write the new reftable */
+ ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset,
+ new_reftable_size * sizeof(uint64_t));
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Overlap check failed");
+ goto done;
+ }
+
+ for (i = 0; i < new_reftable_size; i++) {
+ cpu_to_be64s(&new_reftable[i]);
+ }
+
+ ret = bdrv_pwrite(bs->file->bs, new_reftable_offset, new_reftable,
+ new_reftable_size * sizeof(uint64_t));
+
+ for (i = 0; i < new_reftable_size; i++) {
+ be64_to_cpus(&new_reftable[i]);
+ }
+
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to write the new reftable");
+ goto done;
+ }
+
+
+ /* Empty the refcount cache */
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to flush the refblock cache");
+ goto done;
+ }
+
+ /* Update the image header to point to the new reftable; this only updates
+ * the fields which are relevant to qcow2_update_header(); other fields
+ * such as s->refcount_table or s->refcount_bits stay stale for now
+ * (because we have to restore everything if qcow2_update_header() fails) */
+ old_refcount_order = s->refcount_order;
+ old_reftable_size = s->refcount_table_size;
+ old_reftable_offset = s->refcount_table_offset;
+
+ s->refcount_order = refcount_order;
+ s->refcount_table_size = new_reftable_size;
+ s->refcount_table_offset = new_reftable_offset;
+
+ ret = qcow2_update_header(bs);
+ if (ret < 0) {
+ s->refcount_order = old_refcount_order;
+ s->refcount_table_size = old_reftable_size;
+ s->refcount_table_offset = old_reftable_offset;
+ error_setg_errno(errp, -ret, "Failed to update the qcow2 header");
+ goto done;
+ }
+
+ /* Now update the rest of the in-memory information */
+ old_reftable = s->refcount_table;
+ s->refcount_table = new_reftable;
+
+ s->refcount_bits = 1 << refcount_order;
+ s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
+ s->refcount_max += s->refcount_max - 1;
+
+ s->refcount_block_bits = s->cluster_bits - (refcount_order - 3);
+ s->refcount_block_size = 1 << s->refcount_block_bits;
+
+ s->get_refcount = new_get_refcount;
+ s->set_refcount = new_set_refcount;
+
+ /* For cleaning up all old refblocks and the old reftable below the "done"
+ * label */
+ new_reftable = old_reftable;
+ new_reftable_size = old_reftable_size;
+ new_reftable_offset = old_reftable_offset;
+
+done:
+ if (new_reftable) {
+ /* On success, new_reftable actually points to the old reftable (and
+ * new_reftable_size is the old reftable's size); but that is just
+ * fine */
+ for (i = 0; i < new_reftable_size; i++) {
+ uint64_t offset = new_reftable[i] & REFT_OFFSET_MASK;
+ if (offset) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_OTHER);
+ }
+ }
+ g_free(new_reftable);
+
+ if (new_reftable_offset > 0) {
+ qcow2_free_clusters(bs, new_reftable_offset,
+ new_reftable_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
+ }
+ }
+
+ qemu_vfree(new_refblock);
+ return ret;
+}
diff --git a/qemu/block/qcow2-snapshot.c b/qemu/block/qcow2-snapshot.c
index b6f58c13e..5f4a17e47 100644
--- a/qemu/block/qcow2-snapshot.c
+++ b/qemu/block/qcow2-snapshot.c
@@ -22,14 +22,16 @@
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "block/block_int.h"
#include "block/qcow2.h"
#include "qemu/error-report.h"
+#include "qemu/cutils.h"
void qcow2_free_snapshots(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int i;
for(i = 0; i < s->nb_snapshots; i++) {
@@ -43,7 +45,7 @@ void qcow2_free_snapshots(BlockDriverState *bs)
int qcow2_read_snapshots(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshotHeader h;
QCowSnapshotExtraData extra;
QCowSnapshot *sn;
@@ -64,7 +66,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
for(i = 0; i < s->nb_snapshots; i++) {
/* Read statically sized part of the snapshot header */
offset = align_offset(offset, 8);
- ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
+ ret = bdrv_pread(bs->file->bs, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
}
@@ -83,7 +85,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
name_size = be16_to_cpu(h.name_size);
/* Read extra data */
- ret = bdrv_pread(bs->file, offset, &extra,
+ ret = bdrv_pread(bs->file->bs, offset, &extra,
MIN(sizeof(extra), extra_data_size));
if (ret < 0) {
goto fail;
@@ -102,7 +104,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
/* Read snapshot ID */
sn->id_str = g_malloc(id_str_size + 1);
- ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
+ ret = bdrv_pread(bs->file->bs, offset, sn->id_str, id_str_size);
if (ret < 0) {
goto fail;
}
@@ -111,7 +113,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
/* Read snapshot name */
sn->name = g_malloc(name_size + 1);
- ret = bdrv_pread(bs->file, offset, sn->name, name_size);
+ ret = bdrv_pread(bs->file->bs, offset, sn->name, name_size);
if (ret < 0) {
goto fail;
}
@@ -136,7 +138,7 @@ fail:
/* add at the end of the file a new list of snapshots */
static int qcow2_write_snapshots(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *sn;
QCowSnapshotHeader h;
QCowSnapshotExtraData extra;
@@ -214,25 +216,25 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
h.name_size = cpu_to_be16(name_size);
offset = align_offset(offset, 8);
- ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
+ ret = bdrv_pwrite(bs->file->bs, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
}
offset += sizeof(h);
- ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
+ ret = bdrv_pwrite(bs->file->bs, offset, &extra, sizeof(extra));
if (ret < 0) {
goto fail;
}
offset += sizeof(extra);
- ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
+ ret = bdrv_pwrite(bs->file->bs, offset, sn->id_str, id_str_size);
if (ret < 0) {
goto fail;
}
offset += id_str_size;
- ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
+ ret = bdrv_pwrite(bs->file->bs, offset, sn->name, name_size);
if (ret < 0) {
goto fail;
}
@@ -254,7 +256,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+ ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, nb_snapshots),
&header_data, sizeof(header_data));
if (ret < 0) {
goto fail;
@@ -278,7 +280,7 @@ fail:
static void find_new_snapshot_id(BlockDriverState *bs,
char *id_str, int id_str_size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *sn;
int i;
unsigned long id, id_max = 0;
@@ -296,7 +298,7 @@ static int find_snapshot_by_id_and_name(BlockDriverState *bs,
const char *id,
const char *name)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int i;
if (id && name) {
@@ -338,7 +340,7 @@ static int find_snapshot_by_id_or_name(BlockDriverState *bs,
/* if no id is provided, a new one is constructed */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *new_snapshot_list = NULL;
QCowSnapshot *old_snapshot_list = NULL;
QCowSnapshot sn1, *sn = &sn1;
@@ -396,7 +398,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
goto fail;
}
- ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
+ ret = bdrv_pwrite(bs->file->bs, sn->l1_table_offset, l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
goto fail;
@@ -461,7 +463,7 @@ fail:
/* copy the snapshot 'snapshot_name' into the current disk image */
int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *sn;
int i, snapshot_index;
int cur_l1_bytes, sn_l1_bytes;
@@ -509,7 +511,8 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
}
- ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
+ ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
+ sn_l1_table, sn_l1_bytes);
if (ret < 0) {
goto fail;
}
@@ -526,7 +529,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
}
- ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
+ ret = bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, sn_l1_table,
cur_l1_bytes);
if (ret < 0) {
goto fail;
@@ -587,7 +590,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
const char *name,
Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot sn;
int snapshot_index, ret;
@@ -650,7 +653,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QEMUSnapshotInfo *sn_tab, *sn_info;
QCowSnapshot *sn;
int i;
@@ -683,7 +686,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
Error **errp)
{
int i, snapshot_index;
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *sn;
uint64_t *new_l1_table;
int new_l1_bytes;
@@ -706,13 +709,14 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
return -EFBIG;
}
new_l1_bytes = sn->l1_size * sizeof(uint64_t);
- new_l1_table = qemu_try_blockalign(bs->file,
+ new_l1_table = qemu_try_blockalign(bs->file->bs,
align_offset(new_l1_bytes, 512));
if (new_l1_table == NULL) {
return -ENOMEM;
}
- ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
+ ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
+ new_l1_table, new_l1_bytes);
if (ret < 0) {
error_setg(errp, "Failed to read l1 table for snapshot");
qemu_vfree(new_l1_table);
diff --git a/qemu/block/qcow2.c b/qemu/block/qcow2.c
index 76c331b38..470734be9 100644
--- a/qemu/block/qcow2.c
+++ b/qemu/block/qcow2.c
@@ -21,8 +21,9 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include <zlib.h>
#include "block/qcow2.h"
@@ -34,6 +35,7 @@
#include "qapi-event.h"
#include "trace.h"
#include "qemu/option_int.h"
+#include "qemu/cutils.h"
/*
Differences with QCOW:
@@ -85,7 +87,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
uint64_t end_offset, void **p_feature_table,
Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowExtension ext;
uint64_t offset;
int ret;
@@ -104,7 +106,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
printf("attempting to read extended header in offset %lu\n", offset);
#endif
- ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
+ ret = bdrv_pread(bs->file->bs, offset, &ext, sizeof(ext));
if (ret < 0) {
error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
"pread fail from offset %" PRIu64, offset);
@@ -132,7 +134,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
sizeof(bs->backing_format));
return 2;
}
- ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
+ ret = bdrv_pread(bs->file->bs, offset, bs->backing_format, ext.len);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
"Could not read format name");
@@ -148,7 +150,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
case QCOW2_EXT_MAGIC_FEATURE_TABLE:
if (p_feature_table != NULL) {
void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
- ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
+ ret = bdrv_pread(bs->file->bs, offset , feature_table, ext.len);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
"Could not read table");
@@ -169,7 +171,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
uext->len = ext.len;
QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
- ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
+ ret = bdrv_pread(bs->file->bs, offset , uext->data, uext->len);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: unknown extension: "
"Could not read data");
@@ -187,7 +189,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
static void cleanup_unknown_header_ext(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
Qcow2UnknownHeaderExtension *uext, *next;
QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
@@ -196,22 +198,8 @@ static void cleanup_unknown_header_ext(BlockDriverState *bs)
}
}
-static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs,
- Error **errp, const char *fmt, ...)
-{
- char msg[64];
- va_list ap;
-
- va_start(ap, fmt);
- vsnprintf(msg, sizeof(msg), fmt, ap);
- va_end(ap);
-
- error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bdrv_get_device_or_node_name(bs), "qcow2", msg);
-}
-
-static void report_unsupported_feature(BlockDriverState *bs,
- Error **errp, Qcow2Feature *table, uint64_t mask)
+static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
+ uint64_t mask)
{
char *features = g_strdup("");
char *old;
@@ -236,7 +224,7 @@ static void report_unsupported_feature(BlockDriverState *bs,
g_free(old);
}
- report_unsupported(bs, errp, "%s", features);
+ error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
g_free(features);
}
@@ -249,7 +237,7 @@ static void report_unsupported_feature(BlockDriverState *bs,
*/
int qcow2_mark_dirty(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t val;
int ret;
@@ -260,12 +248,12 @@ int qcow2_mark_dirty(BlockDriverState *bs)
}
val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
- ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
+ ret = bdrv_pwrite(bs->file->bs, offsetof(QCowHeader, incompatible_features),
&val, sizeof(val));
if (ret < 0) {
return ret;
}
- ret = bdrv_flush(bs->file);
+ ret = bdrv_flush(bs->file->bs);
if (ret < 0) {
return ret;
}
@@ -282,7 +270,7 @@ int qcow2_mark_dirty(BlockDriverState *bs)
*/
static int qcow2_mark_clean(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
int ret;
@@ -304,7 +292,7 @@ static int qcow2_mark_clean(BlockDriverState *bs)
*/
int qcow2_mark_corrupt(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
return qcow2_update_header(bs);
@@ -316,7 +304,7 @@ int qcow2_mark_corrupt(BlockDriverState *bs)
*/
int qcow2_mark_consistent(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
int ret = bdrv_flush(bs);
@@ -351,7 +339,7 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
static int validate_table_offset(BlockDriverState *bs, uint64_t offset,
uint64_t entries, size_t entry_len)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t size;
/* Use signed INT64_MAX as the maximum even for uint64_t header fields,
@@ -467,6 +455,11 @@ static QemuOptsList qcow2_runtime_opts = {
.type = QEMU_OPT_SIZE,
.help = "Maximum refcount block cache size",
},
+ {
+ .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Clean unused cache entries after this time (in seconds)",
+ },
{ /* end of list */ }
},
};
@@ -482,11 +475,54 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
[QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2,
};
+static void cache_clean_timer_cb(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ BDRVQcow2State *s = bs->opaque;
+ qcow2_cache_clean_unused(bs, s->l2_table_cache);
+ qcow2_cache_clean_unused(bs, s->refcount_block_cache);
+ timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+ (int64_t) s->cache_clean_interval * 1000);
+}
+
+static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
+{
+ BDRVQcow2State *s = bs->opaque;
+ if (s->cache_clean_interval > 0) {
+ s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
+ SCALE_MS, cache_clean_timer_cb,
+ bs);
+ timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+ (int64_t) s->cache_clean_interval * 1000);
+ }
+}
+
+static void cache_clean_timer_del(BlockDriverState *bs)
+{
+ BDRVQcow2State *s = bs->opaque;
+ if (s->cache_clean_timer) {
+ timer_del(s->cache_clean_timer);
+ timer_free(s->cache_clean_timer);
+ s->cache_clean_timer = NULL;
+ }
+}
+
+static void qcow2_detach_aio_context(BlockDriverState *bs)
+{
+ cache_clean_timer_del(bs);
+}
+
+static void qcow2_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ cache_clean_timer_init(bs, new_context);
+}
+
static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
uint64_t *l2_cache_size,
uint64_t *refcount_cache_size, Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t combined_cache_size;
bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
@@ -541,22 +577,246 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
}
}
+typedef struct Qcow2ReopenState {
+ Qcow2Cache *l2_table_cache;
+ Qcow2Cache *refcount_block_cache;
+ bool use_lazy_refcounts;
+ int overlap_check;
+ bool discard_passthrough[QCOW2_DISCARD_MAX];
+ uint64_t cache_clean_interval;
+} Qcow2ReopenState;
+
+static int qcow2_update_options_prepare(BlockDriverState *bs,
+ Qcow2ReopenState *r,
+ QDict *options, int flags,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ QemuOpts *opts = NULL;
+ const char *opt_overlap_check, *opt_overlap_check_template;
+ int overlap_check_template = 0;
+ uint64_t l2_cache_size, refcount_cache_size;
+ int i;
+ Error *local_err = NULL;
+ int ret;
+
+ opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* get L2 table/refcount block cache size from command line options */
+ read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ l2_cache_size /= s->cluster_size;
+ if (l2_cache_size < MIN_L2_CACHE_SIZE) {
+ l2_cache_size = MIN_L2_CACHE_SIZE;
+ }
+ if (l2_cache_size > INT_MAX) {
+ error_setg(errp, "L2 cache size too big");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ refcount_cache_size /= s->cluster_size;
+ if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
+ refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
+ }
+ if (refcount_cache_size > INT_MAX) {
+ error_setg(errp, "Refcount cache size too big");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* alloc new L2 table/refcount block cache, flush old one */
+ if (s->l2_table_cache) {
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
+ goto fail;
+ }
+ }
+
+ if (s->refcount_block_cache) {
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "Failed to flush the refcount block cache");
+ goto fail;
+ }
+ }
+
+ r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
+ r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
+ if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
+ error_setg(errp, "Could not allocate metadata caches");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ /* New interval for cache cleanup timer */
+ r->cache_clean_interval =
+ qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
+ s->cache_clean_interval);
+ if (r->cache_clean_interval > UINT_MAX) {
+ error_setg(errp, "Cache clean interval too big");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* lazy-refcounts; flush if going from enabled to disabled */
+ r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
+ (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
+ if (r->use_lazy_refcounts && s->qcow_version < 3) {
+ error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
+ "qemu 1.1 compatibility level");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
+ ret = qcow2_mark_clean(bs);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
+ goto fail;
+ }
+ }
+
+ /* Overlap check options */
+ opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
+ opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
+ if (opt_overlap_check_template && opt_overlap_check &&
+ strcmp(opt_overlap_check_template, opt_overlap_check))
+ {
+ error_setg(errp, "Conflicting values for qcow2 options '"
+ QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
+ "' ('%s')", opt_overlap_check, opt_overlap_check_template);
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (!opt_overlap_check) {
+ opt_overlap_check = opt_overlap_check_template ?: "cached";
+ }
+
+ if (!strcmp(opt_overlap_check, "none")) {
+ overlap_check_template = 0;
+ } else if (!strcmp(opt_overlap_check, "constant")) {
+ overlap_check_template = QCOW2_OL_CONSTANT;
+ } else if (!strcmp(opt_overlap_check, "cached")) {
+ overlap_check_template = QCOW2_OL_CACHED;
+ } else if (!strcmp(opt_overlap_check, "all")) {
+ overlap_check_template = QCOW2_OL_ALL;
+ } else {
+ error_setg(errp, "Unsupported value '%s' for qcow2 option "
+ "'overlap-check'. Allowed are any of the following: "
+ "none, constant, cached, all", opt_overlap_check);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ r->overlap_check = 0;
+ for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
+ /* overlap-check defines a template bitmask, but every flag may be
+ * overwritten through the associated boolean option */
+ r->overlap_check |=
+ qemu_opt_get_bool(opts, overlap_bool_option_names[i],
+ overlap_check_template & (1 << i)) << i;
+ }
+
+ r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
+ r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
+ r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
+ flags & BDRV_O_UNMAP);
+ r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
+ r->discard_passthrough[QCOW2_DISCARD_OTHER] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
+
+ ret = 0;
+fail:
+ qemu_opts_del(opts);
+ opts = NULL;
+ return ret;
+}
+
+static void qcow2_update_options_commit(BlockDriverState *bs,
+ Qcow2ReopenState *r)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int i;
+
+ if (s->l2_table_cache) {
+ qcow2_cache_destroy(bs, s->l2_table_cache);
+ }
+ if (s->refcount_block_cache) {
+ qcow2_cache_destroy(bs, s->refcount_block_cache);
+ }
+ s->l2_table_cache = r->l2_table_cache;
+ s->refcount_block_cache = r->refcount_block_cache;
+
+ s->overlap_check = r->overlap_check;
+ s->use_lazy_refcounts = r->use_lazy_refcounts;
+
+ for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
+ s->discard_passthrough[i] = r->discard_passthrough[i];
+ }
+
+ if (s->cache_clean_interval != r->cache_clean_interval) {
+ cache_clean_timer_del(bs);
+ s->cache_clean_interval = r->cache_clean_interval;
+ cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
+ }
+}
+
+static void qcow2_update_options_abort(BlockDriverState *bs,
+ Qcow2ReopenState *r)
+{
+ if (r->l2_table_cache) {
+ qcow2_cache_destroy(bs, r->l2_table_cache);
+ }
+ if (r->refcount_block_cache) {
+ qcow2_cache_destroy(bs, r->refcount_block_cache);
+ }
+}
+
+static int qcow2_update_options(BlockDriverState *bs, QDict *options,
+ int flags, Error **errp)
+{
+ Qcow2ReopenState r = {};
+ int ret;
+
+ ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
+ if (ret >= 0) {
+ qcow2_update_options_commit(bs, &r);
+ } else {
+ qcow2_update_options_abort(bs, &r);
+ }
+
+ return ret;
+}
+
static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int len, i;
int ret = 0;
QCowHeader header;
- QemuOpts *opts = NULL;
Error *local_err = NULL;
uint64_t ext_end;
uint64_t l1_vm_state_index;
- const char *opt_overlap_check, *opt_overlap_check_template;
- int overlap_check_template = 0;
- uint64_t l2_cache_size, refcount_cache_size;
- ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+ ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read qcow2 header");
goto fail;
@@ -581,7 +841,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
if (header.version < 2 || header.version > 3) {
- report_unsupported(bs, errp, "QCOW version %" PRIu32, header.version);
+ error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
ret = -ENOTSUP;
goto fail;
}
@@ -631,7 +891,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
if (header.header_length > sizeof(header)) {
s->unknown_header_fields_size = header.header_length - sizeof(header);
s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
- ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
+ ret = bdrv_pread(bs->file->bs, sizeof(header), s->unknown_header_fields,
s->unknown_header_fields_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
@@ -661,7 +921,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
void *feature_table = NULL;
qcow2_read_extensions(bs, header.header_length, ext_end,
&feature_table, NULL);
- report_unsupported_feature(bs, errp, feature_table,
+ report_unsupported_feature(errp, feature_table,
s->incompatible_features &
~QCOW2_INCOMPAT_MASK);
ret = -ENOTSUP;
@@ -705,6 +965,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
}
s->crypt_method_header = header.crypt_method;
if (s->crypt_method_header) {
+ if (bdrv_uses_whitelist() &&
+ s->crypt_method_header == QCOW_CRYPT_AES) {
+ error_report("qcow2 built-in AES encryption is deprecated");
+ error_printf("Support for it will be removed in a future release.\n"
+ "You can use 'qemu-img convert' to switch to an\n"
+ "unencrypted qcow2 image, or a LUKS raw image.\n");
+ }
+
bs->encrypted = 1;
}
@@ -784,14 +1052,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
if (s->l1_size > 0) {
- s->l1_table = qemu_try_blockalign(bs->file,
+ s->l1_table = qemu_try_blockalign(bs->file->bs,
align_offset(s->l1_size * sizeof(uint64_t), 512));
if (s->l1_table == NULL) {
error_setg(errp, "Could not allocate L1 table");
ret = -ENOMEM;
goto fail;
}
- ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+ ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read L1 table");
@@ -802,55 +1070,15 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
}
}
- /* get L2 table/refcount block cache size from command line options */
- opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
- qemu_opts_absorb_qdict(opts, options, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- ret = -EINVAL;
- goto fail;
- }
-
- read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size,
- &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- ret = -EINVAL;
- goto fail;
- }
-
- l2_cache_size /= s->cluster_size;
- if (l2_cache_size < MIN_L2_CACHE_SIZE) {
- l2_cache_size = MIN_L2_CACHE_SIZE;
- }
- if (l2_cache_size > INT_MAX) {
- error_setg(errp, "L2 cache size too big");
- ret = -EINVAL;
- goto fail;
- }
-
- refcount_cache_size /= s->cluster_size;
- if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
- refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
- }
- if (refcount_cache_size > INT_MAX) {
- error_setg(errp, "Refcount cache size too big");
- ret = -EINVAL;
- goto fail;
- }
-
- /* alloc L2 table/refcount block cache */
- s->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
- s->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
- if (s->l2_table_cache == NULL || s->refcount_block_cache == NULL) {
- error_setg(errp, "Could not allocate metadata caches");
- ret = -ENOMEM;
+ /* Parse driver-specific options */
+ ret = qcow2_update_options(bs, options, flags, errp);
+ if (ret < 0) {
goto fail;
}
s->cluster_cache = g_malloc(s->cluster_size);
/* one more sector for decompressed data alignment */
- s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
+ s->cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS
* s->cluster_size + 512);
if (s->cluster_data == NULL) {
error_setg(errp, "Could not allocate temporary cluster buffer");
@@ -887,7 +1115,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EINVAL;
goto fail;
}
- ret = bdrv_pread(bs->file, header.backing_file_offset,
+ ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
bs->backing_file, len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read backing file name");
@@ -908,7 +1136,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
}
/* Clear unknown autoclear feature bits */
- if (!bs->read_only && !(flags & BDRV_O_INCOMING) && s->autoclear_features) {
+ if (!bs->read_only && !(flags & BDRV_O_INACTIVE) && s->autoclear_features) {
s->autoclear_features = 0;
ret = qcow2_update_header(bs);
if (ret < 0) {
@@ -921,7 +1149,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
qemu_co_mutex_init(&s->lock);
/* Repair image if dirty */
- if (!(flags & (BDRV_O_CHECK | BDRV_O_INCOMING)) && !bs->read_only &&
+ if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
(s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
BdrvCheckResult result = {0};
@@ -932,70 +1160,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
}
}
- /* Enable lazy_refcounts according to image and command line options */
- s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
- (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
-
- s->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
- s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
- s->discard_passthrough[QCOW2_DISCARD_REQUEST] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
- flags & BDRV_O_UNMAP);
- s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
- s->discard_passthrough[QCOW2_DISCARD_OTHER] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
-
- opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
- opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
- if (opt_overlap_check_template && opt_overlap_check &&
- strcmp(opt_overlap_check_template, opt_overlap_check))
- {
- error_setg(errp, "Conflicting values for qcow2 options '"
- QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
- "' ('%s')", opt_overlap_check, opt_overlap_check_template);
- ret = -EINVAL;
- goto fail;
- }
- if (!opt_overlap_check) {
- opt_overlap_check = opt_overlap_check_template ?: "cached";
- }
-
- if (!strcmp(opt_overlap_check, "none")) {
- overlap_check_template = 0;
- } else if (!strcmp(opt_overlap_check, "constant")) {
- overlap_check_template = QCOW2_OL_CONSTANT;
- } else if (!strcmp(opt_overlap_check, "cached")) {
- overlap_check_template = QCOW2_OL_CACHED;
- } else if (!strcmp(opt_overlap_check, "all")) {
- overlap_check_template = QCOW2_OL_ALL;
- } else {
- error_setg(errp, "Unsupported value '%s' for qcow2 option "
- "'overlap-check'. Allowed are either of the following: "
- "none, constant, cached, all", opt_overlap_check);
- ret = -EINVAL;
- goto fail;
- }
-
- s->overlap_check = 0;
- for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
- /* overlap-check defines a template bitmask, but every flag may be
- * overwritten through the associated boolean option */
- s->overlap_check |=
- qemu_opt_get_bool(opts, overlap_bool_option_names[i],
- overlap_check_template & (1 << i)) << i;
- }
-
- qemu_opts_del(opts);
- opts = NULL;
-
- if (s->use_lazy_refcounts && s->qcow_version < 3) {
- error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
- "qemu 1.1 compatibility level");
- ret = -EINVAL;
- goto fail;
- }
-
#ifdef DEBUG_ALLOC
{
BdrvCheckResult result = {0};
@@ -1005,7 +1169,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
return ret;
fail:
- qemu_opts_del(opts);
g_free(s->unknown_header_fields);
cleanup_unknown_header_ext(bs);
qcow2_free_snapshots(bs);
@@ -1013,6 +1176,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
+ cache_clean_timer_del(bs);
if (s->l2_table_cache) {
qcow2_cache_destroy(bs, s->l2_table_cache);
}
@@ -1026,14 +1190,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
bs->bl.write_zeroes_alignment = s->cluster_sectors;
}
static int qcow2_set_key(BlockDriverState *bs, const char *key)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint8_t keybuf[16];
int len, i;
Error *err = NULL;
@@ -1066,32 +1230,104 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key)
return 0;
}
-/* We have no actual commit/abort logic for qcow2, but we need to write out any
- * unwritten data if we reopen read-only. */
static int qcow2_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
+ Qcow2ReopenState *r;
int ret;
+ r = g_new0(Qcow2ReopenState, 1);
+ state->opaque = r;
+
+ ret = qcow2_update_options_prepare(state->bs, r, state->options,
+ state->flags, errp);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* We need to write out any unwritten data if we reopen read-only. */
if ((state->flags & BDRV_O_RDWR) == 0) {
ret = bdrv_flush(state->bs);
if (ret < 0) {
- return ret;
+ goto fail;
}
ret = qcow2_mark_clean(state->bs);
if (ret < 0) {
- return ret;
+ goto fail;
}
}
return 0;
+
+fail:
+ qcow2_update_options_abort(state->bs, r);
+ g_free(r);
+ return ret;
+}
+
+static void qcow2_reopen_commit(BDRVReopenState *state)
+{
+ qcow2_update_options_commit(state->bs, state->opaque);
+ g_free(state->opaque);
+}
+
+static void qcow2_reopen_abort(BDRVReopenState *state)
+{
+ qcow2_update_options_abort(state->bs, state->opaque);
+ g_free(state->opaque);
+}
+
+static void qcow2_join_options(QDict *options, QDict *old_options)
+{
+ bool has_new_overlap_template =
+ qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
+ qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
+ bool has_new_total_cache_size =
+ qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
+ bool has_all_cache_options;
+
+ /* New overlap template overrides all old overlap options */
+ if (has_new_overlap_template) {
+ qdict_del(old_options, QCOW2_OPT_OVERLAP);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
+ }
+
+ /* New total cache size overrides all old options */
+ if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
+ qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
+ qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
+ }
+
+ qdict_join(options, old_options, false);
+
+ /*
+ * If after merging all cache size options are set, an old total size is
+ * overwritten. Do keep all options, however, if all three are new. The
+ * resulting error message is what we want to happen.
+ */
+ has_all_cache_options =
+ qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
+ qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
+ qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
+
+ if (has_all_cache_options && !has_new_total_cache_size) {
+ qdict_del(options, QCOW2_OPT_CACHE_SIZE);
+ }
}
static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t cluster_offset;
int index_in_cluster, ret;
int64_t status = 0;
@@ -1108,6 +1344,7 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
!s->cipher) {
index_in_cluster = sector_num & (s->cluster_sectors - 1);
cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
+ *file = bs->file->bs;
status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
}
if (ret == QCOW2_CLUSTER_ZERO) {
@@ -1138,7 +1375,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
int remaining_sectors, QEMUIOVector *qiov)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int index_in_cluster, n1;
int ret;
int cur_nr_sectors; /* number of sectors in current iteration */
@@ -1175,9 +1412,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
switch (ret) {
case QCOW2_CLUSTER_UNALLOCATED:
- if (bs->backing_hd) {
+ if (bs->backing) {
/* read from the base image */
- n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
+ n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
sector_num, cur_nr_sectors);
if (n1 > 0) {
QEMUIOVector local_qiov;
@@ -1188,7 +1425,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ ret = bdrv_co_readv(bs->backing->bs, sector_num,
n1, &local_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1235,8 +1472,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
*/
if (!cluster_data) {
cluster_data =
- qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
- * s->cluster_size);
+ qemu_try_blockalign(bs->file->bs,
+ QCOW_MAX_CRYPT_CLUSTERS
+ * s->cluster_size);
if (cluster_data == NULL) {
ret = -ENOMEM;
goto fail;
@@ -1252,7 +1490,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
+ ret = bdrv_co_readv(bs->file->bs,
(cluster_offset >> 9) + index_in_cluster,
cur_nr_sectors, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1300,7 +1538,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
int remaining_sectors,
QEMUIOVector *qiov)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int index_in_cluster;
int ret;
int cur_nr_sectors; /* number of sectors in current iteration */
@@ -1349,7 +1587,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
Error *err = NULL;
assert(s->cipher);
if (!cluster_data) {
- cluster_data = qemu_try_blockalign(bs->file,
+ cluster_data = qemu_try_blockalign(bs->file->bs,
QCOW_MAX_CRYPT_CLUSTERS
* s->cluster_size);
if (cluster_data == NULL) {
@@ -1386,7 +1624,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
trace_qcow2_writev_data(qemu_coroutine_self(),
(cluster_offset >> 9) + index_in_cluster);
- ret = bdrv_co_writev(bs->file,
+ ret = bdrv_co_writev(bs->file->bs,
(cluster_offset >> 9) + index_in_cluster,
cur_nr_sectors, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1444,33 +1682,44 @@ fail:
return ret;
}
+static int qcow2_inactivate(BlockDriverState *bs)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int ret, result = 0;
+
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret) {
+ result = ret;
+ error_report("Failed to flush the L2 table cache: %s",
+ strerror(-ret));
+ }
+
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret) {
+ result = ret;
+ error_report("Failed to flush the refcount block cache: %s",
+ strerror(-ret));
+ }
+
+ if (result == 0) {
+ qcow2_mark_clean(bs);
+ }
+
+ return result;
+}
+
static void qcow2_close(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
- if (!(bs->open_flags & BDRV_O_INCOMING)) {
- int ret1, ret2;
-
- ret1 = qcow2_cache_flush(bs, s->l2_table_cache);
- ret2 = qcow2_cache_flush(bs, s->refcount_block_cache);
-
- if (ret1) {
- error_report("Failed to flush the L2 table cache: %s",
- strerror(-ret1));
- }
- if (ret2) {
- error_report("Failed to flush the refcount block cache: %s",
- strerror(-ret2));
- }
-
- if (!ret1 && !ret2) {
- qcow2_mark_clean(bs);
- }
+ if (!(s->flags & BDRV_O_INACTIVE)) {
+ qcow2_inactivate(bs);
}
+ cache_clean_timer_del(bs);
qcow2_cache_destroy(bs, s->l2_table_cache);
qcow2_cache_destroy(bs, s->refcount_block_cache);
@@ -1491,7 +1740,7 @@ static void qcow2_close(BlockDriverState *bs)
static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int flags = s->flags;
QCryptoCipher *cipher = NULL;
QDict *options;
@@ -1508,24 +1757,27 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
qcow2_close(bs);
- bdrv_invalidate_cache(bs->file, &local_err);
+ bdrv_invalidate_cache(bs->file->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
+ bs->drv = NULL;
return;
}
- memset(s, 0, sizeof(BDRVQcowState));
+ memset(s, 0, sizeof(BDRVQcow2State));
options = qdict_clone_shallow(bs->options);
+ flags &= ~BDRV_O_INACTIVE;
ret = qcow2_open(bs, options, flags, &local_err);
QDECREF(options);
if (local_err) {
- error_setg(errp, "Could not reopen qcow2 layer: %s",
- error_get_pretty(local_err));
- error_free(local_err);
+ error_propagate(errp, local_err);
+ error_prepend(errp, "Could not reopen qcow2 layer: ");
+ bs->drv = NULL;
return;
} else if (ret < 0) {
error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
+ bs->drv = NULL;
return;
}
@@ -1561,7 +1813,7 @@ static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
*/
int qcow2_update_header(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowHeader *header;
char *buf;
size_t buflen = s->cluster_size;
@@ -1653,31 +1905,33 @@ int qcow2_update_header(BlockDriverState *bs)
}
/* Feature table */
- Qcow2Feature features[] = {
- {
- .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
- .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
- .name = "dirty bit",
- },
- {
- .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
- .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
- .name = "corrupt bit",
- },
- {
- .type = QCOW2_FEAT_TYPE_COMPATIBLE,
- .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
- .name = "lazy refcounts",
- },
- };
+ if (s->qcow_version >= 3) {
+ Qcow2Feature features[] = {
+ {
+ .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+ .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
+ .name = "dirty bit",
+ },
+ {
+ .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+ .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
+ .name = "corrupt bit",
+ },
+ {
+ .type = QCOW2_FEAT_TYPE_COMPATIBLE,
+ .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+ .name = "lazy refcounts",
+ },
+ };
- ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
- features, sizeof(features), buflen);
- if (ret < 0) {
- goto fail;
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
+ features, sizeof(features), buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+ buf += ret;
+ buflen -= ret;
}
- buf += ret;
- buflen -= ret;
/* Keep unknown header extensions */
QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
@@ -1716,7 +1970,7 @@ int qcow2_update_header(BlockDriverState *bs)
}
/* Write the new header */
- ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
+ ret = bdrv_pwrite(bs->file->bs, 0, header, s->cluster_size);
if (ret < 0) {
goto fail;
}
@@ -1730,7 +1984,11 @@ fail:
static int qcow2_change_backing_file(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
+
+ if (backing_file && strlen(backing_file) > 1023) {
+ return -EINVAL;
+ }
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
@@ -1796,7 +2054,8 @@ static int preallocate(BlockDriverState *bs)
if (host_offset != 0) {
uint8_t buf[BDRV_SECTOR_SIZE];
memset(buf, 0, BDRV_SECTOR_SIZE);
- ret = bdrv_write(bs->file, (host_offset >> BDRV_SECTOR_BITS) + num - 1,
+ ret = bdrv_write(bs->file->bs,
+ (host_offset >> BDRV_SECTOR_BITS) + num - 1,
buf, 1);
if (ret < 0) {
return ret;
@@ -1812,8 +2071,10 @@ static int qcow2_create2(const char *filename, int64_t total_size,
QemuOpts *opts, int version, int refcount_order,
Error **errp)
{
- /* Calculate cluster_bits */
int cluster_bits;
+ QDict *options;
+
+ /* Calculate cluster_bits */
cluster_bits = ctz32(cluster_size);
if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
(1 << cluster_bits) != cluster_size)
@@ -1835,7 +2096,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
* 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
* size for any qcow2 image.
*/
- BlockDriverState* bs;
+ BlockBackend *blk;
QCowHeader *header;
uint64_t* refcount_table;
Error *local_err = NULL;
@@ -1910,14 +2171,15 @@ static int qcow2_create2(const char *filename, int64_t total_size,
return ret;
}
- bs = NULL;
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
- return ret;
+ return -EIO;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* Write the header */
QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
header = g_malloc0(cluster_size);
@@ -1945,7 +2207,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
}
- ret = bdrv_pwrite(bs, 0, header, cluster_size);
+ ret = blk_pwrite(blk, 0, header, cluster_size);
g_free(header);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write qcow2 header");
@@ -1955,7 +2217,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* Write a refcount table with one refcount block */
refcount_table = g_malloc0(2 * cluster_size);
refcount_table[0] = cpu_to_be64(2 * cluster_size);
- ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
+ ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size);
g_free(refcount_table);
if (ret < 0) {
@@ -1963,23 +2225,25 @@ static int qcow2_create2(const char *filename, int64_t total_size,
goto out;
}
- bdrv_unref(bs);
- bs = NULL;
+ blk_unref(blk);
+ blk = NULL;
/*
* And now open the image and make it consistent first (i.e. increase the
* refcount of the cluster that is occupied by the header and the refcount
* table)
*/
- ret = bdrv_open(&bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
- &bdrv_qcow2, &local_err);
- if (ret < 0) {
+ options = qdict_new();
+ qdict_put(options, "driver", qstring_from_str("qcow2"));
+ blk = blk_new_open(filename, NULL, options,
+ BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto out;
}
- ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
+ ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
"header and refcount table");
@@ -1990,8 +2254,15 @@ static int qcow2_create2(const char *filename, int64_t total_size,
abort();
}
+ /* Create a full header (including things like feature table) */
+ ret = qcow2_update_header(blk_bs(blk));
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not update qcow2 header");
+ goto out;
+ }
+
/* Okay, now that we have a valid image, let's give it the right size */
- ret = bdrv_truncate(bs, total_size);
+ ret = blk_truncate(blk, total_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not resize image");
goto out;
@@ -1999,7 +2270,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* Want a backing file? There you go.*/
if (backing_file) {
- ret = bdrv_change_backing_file(bs, backing_file, backing_format);
+ ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
"with format '%s'", backing_file, backing_format);
@@ -2009,9 +2280,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* And if we're supposed to preallocate metadata, do that now */
if (prealloc != PREALLOC_MODE_OFF) {
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = blk_bs(blk)->opaque;
qemu_co_mutex_lock(&s->lock);
- ret = preallocate(bs);
+ ret = preallocate(blk_bs(blk));
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not preallocate metadata");
@@ -2019,22 +2290,24 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
}
- bdrv_unref(bs);
- bs = NULL;
+ blk_unref(blk);
+ blk = NULL;
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
- ret = bdrv_open(&bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
- &bdrv_qcow2, &local_err);
- if (local_err) {
+ options = qdict_new();
+ qdict_put(options, "driver", qstring_from_str("qcow2"));
+ blk = blk_new_open(filename, NULL, options,
+ BDRV_O_RDWR | BDRV_O_NO_BACKING, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto out;
}
ret = 0;
out:
- if (bs) {
- bdrv_unref(bs);
+ if (blk) {
+ blk_unref(blk);
}
return ret;
}
@@ -2066,7 +2339,7 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
DEFAULT_CLUSTER_SIZE);
buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
- PREALLOC_MODE_MAX, PREALLOC_MODE_OFF,
+ PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
&local_err);
if (local_err) {
error_propagate(errp, local_err);
@@ -2142,7 +2415,7 @@ static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
{
int ret;
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
/* Emulate misaligned zero writes */
if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
@@ -2162,7 +2435,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
int ret;
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
@@ -2173,7 +2446,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t new_l1_size;
int ret;
@@ -2202,7 +2475,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
/* write updated header.size */
offset = cpu_to_be64(offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
+ ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, size),
&offset, sizeof(uint64_t));
if (ret < 0) {
return ret;
@@ -2217,7 +2490,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
z_stream strm;
int ret, out_len;
uint8_t *out_buf;
@@ -2226,8 +2499,8 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (nb_sectors == 0) {
/* align end of file to a sector boundary to ease reading with
sector based I/Os */
- cluster_offset = bdrv_getlength(bs->file);
- return bdrv_truncate(bs->file, cluster_offset);
+ cluster_offset = bdrv_getlength(bs->file->bs);
+ return bdrv_truncate(bs->file->bs, cluster_offset);
}
if (nb_sectors != s->cluster_sectors) {
@@ -2294,7 +2567,7 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
- ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+ ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
if (ret < 0) {
goto fail;
}
@@ -2308,7 +2581,7 @@ fail:
static int make_completely_empty(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret, l1_clusters;
int64_t offset;
uint64_t *new_reftable = NULL;
@@ -2343,7 +2616,7 @@ static int make_completely_empty(BlockDriverState *bs)
/* After this call, neither the in-memory nor the on-disk refcount
* information accurately describe the actual references */
- ret = bdrv_write_zeroes(bs->file, s->l1_table_offset / BDRV_SECTOR_SIZE,
+ ret = bdrv_write_zeroes(bs->file->bs, s->l1_table_offset / BDRV_SECTOR_SIZE,
l1_clusters * s->cluster_sectors, 0);
if (ret < 0) {
goto fail_broken_refcounts;
@@ -2357,7 +2630,7 @@ static int make_completely_empty(BlockDriverState *bs)
* overwrite parts of the existing refcount and L1 table, which is not
* an issue because the dirty flag is set, complete data loss is in fact
* desired and partial data loss is consequently fine as well */
- ret = bdrv_write_zeroes(bs->file, s->cluster_size / BDRV_SECTOR_SIZE,
+ ret = bdrv_write_zeroes(bs->file->bs, s->cluster_size / BDRV_SECTOR_SIZE,
(2 + l1_clusters) * s->cluster_size /
BDRV_SECTOR_SIZE, 0);
/* This call (even if it failed overall) may have overwritten on-disk
@@ -2377,7 +2650,7 @@ static int make_completely_empty(BlockDriverState *bs)
cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
+ ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_table_offset),
&l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
if (ret < 0) {
goto fail_broken_refcounts;
@@ -2408,7 +2681,7 @@ static int make_completely_empty(BlockDriverState *bs)
/* Enter the first refblock into the reftable */
rt_entry = cpu_to_be64(2 * s->cluster_size);
- ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
+ ret = bdrv_pwrite_sync(bs->file->bs, s->cluster_size,
&rt_entry, sizeof(rt_entry));
if (ret < 0) {
goto fail_broken_refcounts;
@@ -2433,7 +2706,7 @@ static int make_completely_empty(BlockDriverState *bs)
goto fail;
}
- ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
+ ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size);
if (ret < 0) {
goto fail;
}
@@ -2456,7 +2729,7 @@ fail:
static int qcow2_make_empty(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t start_sector;
int sector_step = INT_MAX / BDRV_SECTOR_SIZE;
int l1_clusters, ret = 0;
@@ -2497,7 +2770,7 @@ static int qcow2_make_empty(BlockDriverState *bs)
static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
qemu_co_mutex_lock(&s->lock);
@@ -2521,7 +2794,7 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
bdi->unallocated_blocks_are_zero = true;
bdi->can_write_zeroes_with_unmap = (s->qcow_version >= 3);
bdi->cluster_size = s->cluster_size;
@@ -2531,22 +2804,20 @@ static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1);
*spec_info = (ImageInfoSpecific){
- .kind = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
- {
- .qcow2 = g_new(ImageInfoSpecificQCow2, 1),
- },
+ .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
+ .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1),
};
if (s->qcow_version == 2) {
- *spec_info->qcow2 = (ImageInfoSpecificQCow2){
+ *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
.compat = g_strdup("0.10"),
.refcount_bits = s->refcount_bits,
};
} else if (s->qcow_version == 3) {
- *spec_info->qcow2 = (ImageInfoSpecificQCow2){
+ *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
.compat = g_strdup("1.1"),
.lazy_refcounts = s->compatible_features &
QCOW2_COMPAT_LAZY_REFCOUNTS,
@@ -2556,6 +2827,10 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
.has_corrupt = true,
.refcount_bits = s->refcount_bits,
};
+ } else {
+ /* if this assertion fails, this probably means a new version was
+ * added without having it covered here */
+ assert(false);
}
return spec_info;
@@ -2564,11 +2839,11 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
#if 0
static void dump_refcounts(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t nb_clusters, k, k1, size;
int refcount;
- size = bdrv_getlength(bs->file);
+ size = bdrv_getlength(bs->file->bs);
nb_clusters = size_to_clusters(s, size);
for(k = 0; k < nb_clusters;) {
k1 = k;
@@ -2585,7 +2860,7 @@ static void dump_refcounts(BlockDriverState *bs)
static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t pos)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t total_sectors = bs->total_sectors;
bool zero_beyond_eof = bs->zero_beyond_eof;
int ret;
@@ -2606,7 +2881,7 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
int64_t pos, int size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
bool zero_beyond_eof = bs->zero_beyond_eof;
int ret;
@@ -2623,9 +2898,9 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
* have to be removed.
*/
static int qcow2_downgrade(BlockDriverState *bs, int target_version,
- BlockDriverAmendStatusCB *status_cb)
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int current_version = s->qcow_version;
int ret;
@@ -2638,13 +2913,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
}
if (s->refcount_order != 4) {
- /* we would have to convert the image to a refcount_order == 4 image
- * here; however, since qemu (at the time of writing this) does not
- * support anything different than 4 anyway, there is no point in doing
- * so right now; however, we should error out (if qemu supports this in
- * the future and this code has not been adapted) */
- error_report("qcow2_downgrade: Image refcount orders other than 4 are "
- "currently not supported.");
+ error_report("compat=0.10 requires refcount_bits=16");
return -ENOTSUP;
}
@@ -2672,7 +2941,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
/* clearing autoclear features is trivial */
s->autoclear_features = 0;
- ret = qcow2_expand_zero_clusters(bs, status_cb);
+ ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
if (ret < 0) {
return ret;
}
@@ -2686,10 +2955,81 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
return 0;
}
+typedef enum Qcow2AmendOperation {
+ /* This is the value Qcow2AmendHelperCBInfo::last_operation will be
+ * statically initialized to so that the helper CB can discern the first
+ * invocation from an operation change */
+ QCOW2_NO_OPERATION = 0,
+
+ QCOW2_CHANGING_REFCOUNT_ORDER,
+ QCOW2_DOWNGRADING,
+} Qcow2AmendOperation;
+
+typedef struct Qcow2AmendHelperCBInfo {
+ /* The code coordinating the amend operations should only modify
+ * these four fields; the rest will be managed by the CB */
+ BlockDriverAmendStatusCB *original_status_cb;
+ void *original_cb_opaque;
+
+ Qcow2AmendOperation current_operation;
+
+ /* Total number of operations to perform (only set once) */
+ int total_operations;
+
+ /* The following fields are managed by the CB */
+
+ /* Number of operations completed */
+ int operations_completed;
+
+ /* Cumulative offset of all completed operations */
+ int64_t offset_completed;
+
+ Qcow2AmendOperation last_operation;
+ int64_t last_work_size;
+} Qcow2AmendHelperCBInfo;
+
+static void qcow2_amend_helper_cb(BlockDriverState *bs,
+ int64_t operation_offset,
+ int64_t operation_work_size, void *opaque)
+{
+ Qcow2AmendHelperCBInfo *info = opaque;
+ int64_t current_work_size;
+ int64_t projected_work_size;
+
+ if (info->current_operation != info->last_operation) {
+ if (info->last_operation != QCOW2_NO_OPERATION) {
+ info->offset_completed += info->last_work_size;
+ info->operations_completed++;
+ }
+
+ info->last_operation = info->current_operation;
+ }
+
+ assert(info->total_operations > 0);
+ assert(info->operations_completed < info->total_operations);
+
+ info->last_work_size = operation_work_size;
+
+ current_work_size = info->offset_completed + operation_work_size;
+
+ /* current_work_size is the total work size for (operations_completed + 1)
+ * operations (which includes this one), so multiply it by the number of
+ * operations not covered and divide it by the number of operations
+ * covered to get a projection for the operations not covered */
+ projected_work_size = current_work_size * (info->total_operations -
+ info->operations_completed - 1)
+ / (info->operations_completed + 1);
+
+ info->original_status_cb(bs, info->offset_completed + operation_offset,
+ current_work_size + projected_work_size,
+ info->original_cb_opaque);
+}
+
static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb)
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int old_version = s->qcow_version, new_version = old_version;
uint64_t new_size = 0;
const char *backing_file = NULL, *backing_format = NULL;
@@ -2697,8 +3037,10 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
const char *compat = NULL;
uint64_t cluster_size = s->cluster_size;
bool encrypt;
+ int refcount_bits = s->refcount_bits;
int ret;
QemuOptDesc *desc = opts->list->desc;
+ Qcow2AmendHelperCBInfo helper_cb_info;
while (desc && desc->name) {
if (!qemu_opt_find(opts, desc->name)) {
@@ -2716,11 +3058,11 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
} else if (!strcmp(compat, "1.1")) {
new_version = 3;
} else {
- fprintf(stderr, "Unknown compatibility level %s.\n", compat);
+ error_report("Unknown compatibility level %s", compat);
return -EINVAL;
}
} else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
- fprintf(stderr, "Cannot change preallocation mode.\n");
+ error_report("Cannot change preallocation mode");
return -ENOTSUP;
} else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
@@ -2733,47 +3075,74 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
!!s->cipher);
if (encrypt != !!s->cipher) {
- fprintf(stderr, "Changing the encryption flag is not "
- "supported.\n");
+ error_report("Changing the encryption flag is not supported");
return -ENOTSUP;
}
} else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
cluster_size);
if (cluster_size != s->cluster_size) {
- fprintf(stderr, "Changing the cluster size is not "
- "supported.\n");
+ error_report("Changing the cluster size is not supported");
return -ENOTSUP;
}
} else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
lazy_refcounts);
} else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
- error_report("Cannot change refcount entry width");
- return -ENOTSUP;
+ refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
+ refcount_bits);
+
+ if (refcount_bits <= 0 || refcount_bits > 64 ||
+ !is_power_of_2(refcount_bits))
+ {
+ error_report("Refcount width must be a power of two and may "
+ "not exceed 64 bits");
+ return -EINVAL;
+ }
} else {
- /* if this assertion fails, this probably means a new option was
+ /* if this point is reached, this probably means a new option was
* added without having it covered here */
- assert(false);
+ abort();
}
desc++;
}
- if (new_version != old_version) {
- if (new_version > old_version) {
- /* Upgrade */
- s->qcow_version = new_version;
- ret = qcow2_update_header(bs);
- if (ret < 0) {
- s->qcow_version = old_version;
- return ret;
- }
- } else {
- ret = qcow2_downgrade(bs, new_version, status_cb);
- if (ret < 0) {
- return ret;
- }
+ helper_cb_info = (Qcow2AmendHelperCBInfo){
+ .original_status_cb = status_cb,
+ .original_cb_opaque = cb_opaque,
+ .total_operations = (new_version < old_version)
+ + (s->refcount_bits != refcount_bits)
+ };
+
+ /* Upgrade first (some features may require compat=1.1) */
+ if (new_version > old_version) {
+ s->qcow_version = new_version;
+ ret = qcow2_update_header(bs);
+ if (ret < 0) {
+ s->qcow_version = old_version;
+ return ret;
+ }
+ }
+
+ if (s->refcount_bits != refcount_bits) {
+ int refcount_order = ctz32(refcount_bits);
+ Error *local_error = NULL;
+
+ if (new_version < 3 && refcount_bits != 16) {
+ error_report("Different refcount widths than 16 bits require "
+ "compatibility level 1.1 or above (use compat=1.1 or "
+ "greater)");
+ return -EINVAL;
+ }
+
+ helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
+ ret = qcow2_change_refcount_order(bs, refcount_order,
+ &qcow2_amend_helper_cb,
+ &helper_cb_info, &local_error);
+ if (ret < 0) {
+ error_report_err(local_error);
+ return ret;
}
}
@@ -2788,9 +3157,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
if (s->use_lazy_refcounts != lazy_refcounts) {
if (lazy_refcounts) {
- if (s->qcow_version < 3) {
- fprintf(stderr, "Lazy refcounts only supported with compatibility "
- "level 1.1 and above (use compat=1.1 or greater)\n");
+ if (new_version < 3) {
+ error_report("Lazy refcounts only supported with compatibility "
+ "level 1.1 and above (use compat=1.1 or greater)");
return -EINVAL;
}
s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
@@ -2824,6 +3193,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
}
}
+ /* Downgrade last (so unsupported features can be removed before) */
+ if (new_version < old_version) {
+ helper_cb_info.current_operation = QCOW2_DOWNGRADING;
+ ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
+ &helper_cb_info);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
return 0;
}
@@ -2836,7 +3215,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
int64_t size, const char *message_format, ...)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
const char *node_name;
char *message;
va_list ap;
@@ -2937,11 +3316,14 @@ static QemuOptsList qcow2_create_opts = {
BlockDriver bdrv_qcow2 = {
.format_name = "qcow2",
- .instance_size = sizeof(BDRVQcowState),
+ .instance_size = sizeof(BDRVQcow2State),
.bdrv_probe = qcow2_probe,
.bdrv_open = qcow2_open,
.bdrv_close = qcow2_close,
.bdrv_reopen_prepare = qcow2_reopen_prepare,
+ .bdrv_reopen_commit = qcow2_reopen_commit,
+ .bdrv_reopen_abort = qcow2_reopen_abort,
+ .bdrv_join_options = qcow2_join_options,
.bdrv_create = qcow2_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = qcow2_co_get_block_status,
@@ -2973,10 +3355,14 @@ BlockDriver bdrv_qcow2 = {
.bdrv_refresh_limits = qcow2_refresh_limits,
.bdrv_invalidate_cache = qcow2_invalidate_cache,
+ .bdrv_inactivate = qcow2_inactivate,
.create_opts = &qcow2_create_opts,
.bdrv_check = qcow2_check,
.bdrv_amend_options = qcow2_amend_options,
+
+ .bdrv_detach_aio_context = qcow2_detach_aio_context,
+ .bdrv_attach_aio_context = qcow2_attach_aio_context,
};
static void bdrv_qcow2_init(void)
diff --git a/qemu/block/qcow2.h b/qemu/block/qcow2.h
index 72e132838..a063a3c1a 100644
--- a/qemu/block/qcow2.h
+++ b/qemu/block/qcow2.h
@@ -26,7 +26,7 @@
#define BLOCK_QCOW2_H
#include "crypto/cipher.h"
-#include "block/coroutine.h"
+#include "qemu/coroutine.h"
//#define DEBUG_ALLOC
//#define DEBUG_ALLOC2
@@ -96,6 +96,7 @@
#define QCOW2_OPT_CACHE_SIZE "cache-size"
#define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
#define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"
+#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval"
typedef struct QCowHeader {
uint32_t magic;
@@ -221,7 +222,7 @@ typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
typedef void Qcow2SetRefcountFunc(void *refcount_array,
uint64_t index, uint64_t value);
-typedef struct BDRVQcowState {
+typedef struct BDRVQcow2State {
int cluster_bits;
int cluster_size;
int cluster_sectors;
@@ -239,6 +240,8 @@ typedef struct BDRVQcowState {
Qcow2Cache* l2_table_cache;
Qcow2Cache* refcount_block_cache;
+ QEMUTimer *cache_clean_timer;
+ unsigned cache_clean_interval;
uint8_t *cluster_cache;
uint8_t *cluster_data;
@@ -290,9 +293,7 @@ typedef struct BDRVQcowState {
* override) */
char *image_backing_file;
char *image_backing_format;
-} BDRVQcowState;
-
-struct QCowAIOCB;
+} BDRVQcow2State;
typedef struct Qcow2COWRegion {
/**
@@ -402,28 +403,28 @@ typedef enum QCow2MetadataOverlap {
#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
-static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
+static inline int64_t start_of_cluster(BDRVQcow2State *s, int64_t offset)
{
return offset & ~(s->cluster_size - 1);
}
-static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
+static inline int64_t offset_into_cluster(BDRVQcow2State *s, int64_t offset)
{
return offset & (s->cluster_size - 1);
}
-static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
+static inline uint64_t size_to_clusters(BDRVQcow2State *s, uint64_t size)
{
return (size + (s->cluster_size - 1)) >> s->cluster_bits;
}
-static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
+static inline int64_t size_to_l1(BDRVQcow2State *s, int64_t size)
{
int shift = s->cluster_bits + s->l2_bits;
return (size + (1ULL << shift) - 1) >> shift;
}
-static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
+static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset)
{
return (offset >> s->cluster_bits) & (s->l2_size - 1);
}
@@ -434,12 +435,12 @@ static inline int64_t align_offset(int64_t offset, int n)
return offset;
}
-static inline int64_t qcow2_vm_state_offset(BDRVQcowState *s)
+static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s)
{
return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
}
-static inline uint64_t qcow2_max_refcount_clusters(BDRVQcowState *s)
+static inline uint64_t qcow2_max_refcount_clusters(BDRVQcow2State *s)
{
return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
}
@@ -458,7 +459,7 @@ static inline int qcow2_get_cluster_type(uint64_t l2_entry)
}
/* Check whether refcounts are eager or lazy */
-static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
+static inline bool qcow2_need_accurate_refcounts(BDRVQcow2State *s)
{
return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
}
@@ -506,8 +507,8 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
enum qcow2_discard_type type);
int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
-int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int nb_clusters);
+int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int64_t nb_clusters);
int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
void qcow2_free_clusters(BlockDriverState *bs,
int64_t offset, int64_t size,
@@ -528,13 +529,17 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
int64_t size);
+int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque, Error **errp);
+
/* qcow2-cluster.c functions */
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size);
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
void qcow2_l2_cache_reset(BlockDriverState *bs);
int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-int qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
uint8_t *out_buf, const uint8_t *in_buf,
int nb_sectors, bool enc, Error **errp);
@@ -552,7 +557,8 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
int qcow2_expand_zero_clusters(BlockDriverState *bs,
- BlockDriverAmendStatusCB *status_cb);
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque);
/* qcow2-snapshot.c functions */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
@@ -581,6 +587,7 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
Qcow2Cache *dependency);
void qcow2_cache_depends_on_flush(Qcow2Cache *c);
+void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
diff --git a/qemu/block/qed-check.c b/qemu/block/qed-check.c
index 36ecd290d..622f30897 100644
--- a/qemu/block/qed-check.c
+++ b/qemu/block/qed-check.c
@@ -11,6 +11,7 @@
*
*/
+#include "qemu/osdep.h"
#include "qed.h"
typedef struct {
diff --git a/qemu/block/qed-cluster.c b/qemu/block/qed-cluster.c
index f64b2af8f..c24e75616 100644
--- a/qemu/block/qed-cluster.c
+++ b/qemu/block/qed-cluster.c
@@ -12,6 +12,7 @@
*
*/
+#include "qemu/osdep.h"
#include "qed.h"
/**
diff --git a/qemu/block/qed-gencb.c b/qemu/block/qed-gencb.c
index b817a8bf5..faf8ecc84 100644
--- a/qemu/block/qed-gencb.c
+++ b/qemu/block/qed-gencb.c
@@ -11,6 +11,7 @@
*
*/
+#include "qemu/osdep.h"
#include "qed.h"
void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque)
diff --git a/qemu/block/qed-l2-cache.c b/qemu/block/qed-l2-cache.c
index e9b2aae44..5cba79465 100644
--- a/qemu/block/qed-l2-cache.c
+++ b/qemu/block/qed-l2-cache.c
@@ -50,6 +50,7 @@
* table will be deleted in favor of the existing cache entry.
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "qed.h"
diff --git a/qemu/block/qed-table.c b/qemu/block/qed-table.c
index 513aa872c..802945f5e 100644
--- a/qemu/block/qed-table.c
+++ b/qemu/block/qed-table.c
@@ -12,6 +12,7 @@
*
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
#include "qed.h"
@@ -63,7 +64,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
- bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
+ bdrv_aio_readv(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, qiov,
qiov->size / BDRV_SECTOR_SIZE,
qed_read_table_cb, read_table_cb);
}
@@ -152,7 +153,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
/* Adjust for offset into table */
offset += start * sizeof(uint64_t);
- bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+ bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
&write_table_cb->qiov,
write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
qed_write_table_cb, write_table_cb);
diff --git a/qemu/block/qed.c b/qemu/block/qed.c
index 954ed007c..0af52741d 100644
--- a/qemu/block/qed.c
+++ b/qemu/block/qed.c
@@ -12,11 +12,14 @@
*
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu/timer.h"
#include "trace.h"
#include "qed.h"
#include "qapi/qmp/qerror.h"
#include "migration/migration.h"
+#include "sysemu/block-backend.h"
static const AIOCBInfo qed_aiocb_info = {
.aiocb_size = sizeof(QEDAIOCB),
@@ -82,7 +85,7 @@ int qed_write_header_sync(BDRVQEDState *s)
int ret;
qed_header_cpu_to_le(&s->header, &le);
- ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
+ ret = bdrv_pwrite(s->bs->file->bs, 0, &le, sizeof(le));
if (ret != sizeof(le)) {
return ret;
}
@@ -119,7 +122,7 @@ static void qed_write_header_read_cb(void *opaque, int ret)
/* Update header */
qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
- bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
+ bdrv_aio_writev(s->bs->file->bs, 0, &write_header_cb->qiov,
write_header_cb->nsectors, qed_write_header_cb,
write_header_cb);
}
@@ -152,7 +155,7 @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
write_header_cb->iov.iov_len = len;
qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
- bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
+ bdrv_aio_readv(s->bs->file->bs, 0, &write_header_cb->qiov, nsectors,
qed_write_header_read_cb, write_header_cb);
}
@@ -344,7 +347,7 @@ static void qed_start_need_check_timer(BDRVQEDState *s)
* migration.
*/
timer_mod(s->need_check_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
- get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
+ NANOSECONDS_PER_SECOND * QED_NEED_CHECK_TIMEOUT);
}
/* It's okay to call this multiple times or when no timer is started */
@@ -354,12 +357,6 @@ static void qed_cancel_need_check_timer(BDRVQEDState *s)
timer_del(s->need_check_timer);
}
-static void bdrv_qed_rebind(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
- s->bs = bs;
-}
-
static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
@@ -392,7 +389,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
s->bs = bs;
QSIMPLEQ_INIT(&s->allocating_write_reqs);
- ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
+ ret = bdrv_pread(bs->file->bs, 0, &le_header, sizeof(le_header));
if (ret < 0) {
return ret;
}
@@ -404,11 +401,8 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
}
if (s->header.features & ~QED_FEATURE_MASK) {
/* image uses unsupported feature bits */
- char buf[64];
- snprintf(buf, sizeof(buf), "%" PRIx64,
- s->header.features & ~QED_FEATURE_MASK);
- error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bdrv_get_device_or_node_name(bs), "QED", buf);
+ error_setg(errp, "Unsupported QED features: %" PRIx64,
+ s->header.features & ~QED_FEATURE_MASK);
return -ENOTSUP;
}
if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
@@ -416,7 +410,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
}
/* Round down file size to the last cluster */
- file_size = bdrv_getlength(bs->file);
+ file_size = bdrv_getlength(bs->file->bs);
if (file_size < 0) {
return file_size;
}
@@ -452,7 +446,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
- ret = qed_read_string(bs->file, s->header.backing_filename_offset,
+ ret = qed_read_string(bs->file->bs, s->header.backing_filename_offset,
s->header.backing_filename_size, bs->backing_file,
sizeof(bs->backing_file));
if (ret < 0) {
@@ -471,7 +465,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
* feature is no longer valid.
*/
if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
- !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
+ !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INACTIVE)) {
s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
ret = qed_write_header_sync(s);
@@ -480,7 +474,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
}
/* From here on only known autoclear feature bits are valid */
- bdrv_flush(bs->file);
+ bdrv_flush(bs->file->bs);
}
s->l1_table = qed_alloc_table(s);
@@ -498,8 +492,8 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
* potentially inconsistent images to be opened read-only. This can
* aid data recovery from an otherwise inconsistent image.
*/
- if (!bdrv_is_read_only(bs->file) &&
- !(flags & BDRV_O_INCOMING)) {
+ if (!bdrv_is_read_only(bs->file->bs) &&
+ !(flags & BDRV_O_INACTIVE)) {
BdrvCheckResult result = {0};
ret = qed_check(s, &result, true);
@@ -541,7 +535,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
bdrv_qed_detach_aio_context(bs);
/* Ensure writes reach stable storage */
- bdrv_flush(bs->file);
+ bdrv_flush(bs->file->bs);
/* Clean shutdown, no check required on next open */
if (s->header.features & QED_F_NEED_CHECK) {
@@ -573,7 +567,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
size_t l1_size = header.cluster_size * header.table_size;
Error *local_err = NULL;
int ret = 0;
- BlockDriverState *bs;
+ BlockBackend *blk;
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
@@ -581,17 +575,17 @@ static int qed_create(const char *filename, uint32_t cluster_size,
return ret;
}
- bs = NULL;
- ret = bdrv_open(&bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, NULL,
- &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
- return ret;
+ return -EIO;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* File must start empty and grow, check truncate is supported */
- ret = bdrv_truncate(bs, 0);
+ ret = blk_truncate(blk, 0);
if (ret < 0) {
goto out;
}
@@ -607,18 +601,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
}
qed_header_cpu_to_le(&header, &le_header);
- ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
+ ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header));
if (ret < 0) {
goto out;
}
- ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
- header.backing_filename_size);
+ ret = blk_pwrite(blk, sizeof(le_header), backing_file,
+ header.backing_filename_size);
if (ret < 0) {
goto out;
}
l1_table = g_malloc0(l1_size);
- ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
+ ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size);
if (ret < 0) {
goto out;
}
@@ -626,7 +620,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
ret = 0; /* success */
out:
g_free(l1_table);
- bdrv_unref(bs);
+ blk_unref(blk);
return ret;
}
@@ -686,6 +680,7 @@ typedef struct {
uint64_t pos;
int64_t status;
int *pnum;
+ BlockDriverState **file;
} QEDIsAllocatedCB;
static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
@@ -697,6 +692,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
case QED_CLUSTER_FOUND:
offset |= qed_offset_into_cluster(s, cb->pos);
cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
+ *cb->file = cb->bs->file->bs;
break;
case QED_CLUSTER_ZERO:
cb->status = BDRV_BLOCK_ZERO;
@@ -718,7 +714,8 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
BDRVQEDState *s = bs->opaque;
size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
@@ -727,6 +724,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
.pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE,
.status = BDRV_BLOCK_OFFSET_MASK,
.pnum = pnum,
+ .file = file,
};
QEDRequest request = { .l2_table = NULL };
@@ -772,8 +770,8 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
/* If there is a backing file, get its length. Treat the absence of a
* backing file like a zero length backing file.
*/
- if (s->bs->backing_hd) {
- int64_t l = bdrv_getlength(s->bs->backing_hd);
+ if (s->bs->backing) {
+ int64_t l = bdrv_getlength(s->bs->backing->bs);
if (l < 0) {
cb(opaque, l);
return;
@@ -802,7 +800,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
qemu_iovec_concat(*backing_qiov, qiov, 0, size);
BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
- bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
+ bdrv_aio_readv(s->bs->backing->bs, pos / BDRV_SECTOR_SIZE,
*backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
}
@@ -839,7 +837,7 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
}
BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
- bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
+ bdrv_aio_writev(s->bs->file->bs, copy_cb->offset / BDRV_SECTOR_SIZE,
&copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
qed_copy_from_backing_file_cb, copy_cb);
}
@@ -1055,7 +1053,7 @@ static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
- if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
+ if (!bdrv_aio_flush(s->bs->file->bs, qed_aio_write_l2_update_cb, opaque)) {
qed_aio_complete(acb, -EIO);
}
}
@@ -1081,7 +1079,7 @@ static void qed_aio_write_main(void *opaque, int ret)
if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
next_fn = qed_aio_next_io;
} else {
- if (s->bs->backing_hd) {
+ if (s->bs->backing) {
next_fn = qed_aio_write_flush_before_l2_update;
} else {
next_fn = qed_aio_write_l2_update_cb;
@@ -1089,7 +1087,7 @@ static void qed_aio_write_main(void *opaque, int ret)
}
BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
- bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+ bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
next_fn, acb);
}
@@ -1139,7 +1137,7 @@ static void qed_aio_write_prefill(void *opaque, int ret)
static bool qed_should_set_need_check(BDRVQEDState *s)
{
/* The flush before L2 update path ensures consistency */
- if (s->bs->backing_hd) {
+ if (s->bs->backing) {
return false;
}
@@ -1321,7 +1319,7 @@ static void qed_aio_read_data(void *opaque, int ret,
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
+ bdrv_aio_readv(bs->file->bs, offset / BDRV_SECTOR_SIZE,
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
qed_aio_next_io, acb);
return;
@@ -1443,7 +1441,7 @@ static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
struct iovec iov;
/* Refuse if there are untouched backing file sectors */
- if (bs->backing_hd) {
+ if (bs->backing) {
if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
return -ENOTSUP;
}
@@ -1580,7 +1578,7 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
}
/* Write new header */
- ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
+ ret = bdrv_pwrite_sync(bs->file->bs, 0, buffer, buffer_len);
g_free(buffer);
if (ret == 0) {
memcpy(&s->header, &new_header, sizeof(new_header));
@@ -1596,7 +1594,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
bdrv_qed_close(bs);
- bdrv_invalidate_cache(bs->file, &local_err);
+ bdrv_invalidate_cache(bs->file->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -1605,9 +1603,8 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
memset(s, 0, sizeof(BDRVQEDState));
ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
if (local_err) {
- error_setg(errp, "Could not reopen qed layer: %s",
- error_get_pretty(local_err));
- error_free(local_err);
+ error_propagate(errp, local_err);
+ error_prepend(errp, "Could not reopen qed layer: ");
return;
} else if (ret < 0) {
error_setg_errno(errp, -ret, "Could not reopen qed layer");
@@ -1664,7 +1661,6 @@ static BlockDriver bdrv_qed = {
.supports_backing = true,
.bdrv_probe = bdrv_qed_probe,
- .bdrv_rebind = bdrv_qed_rebind,
.bdrv_open = bdrv_qed_open,
.bdrv_close = bdrv_qed_close,
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
diff --git a/qemu/block/qed.h b/qemu/block/qed.h
index 615e676fc..22b319875 100644
--- a/qemu/block/qed.h
+++ b/qemu/block/qed.h
@@ -16,6 +16,7 @@
#define BLOCK_QED_H
#include "block/block_int.h"
+#include "qemu/cutils.h"
/* The layout of a QED file is as follows:
*
diff --git a/qemu/block/quorum.c b/qemu/block/quorum.c
index 2f6c45f76..da15465a9 100644
--- a/qemu/block/quorum.c
+++ b/qemu/block/quorum.c
@@ -13,6 +13,7 @@
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "block/block_int.h"
#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qdict.h"
@@ -64,7 +65,7 @@ typedef struct QuorumVotes {
/* the following structure holds the state of one quorum instance */
typedef struct BDRVQuorumState {
- BlockDriverState **bs; /* children BlockDriverStates */
+ BdrvChild **children; /* children BlockDriverStates */
int num_children; /* children count */
int threshold; /* if less than threshold children reads gave the
* same result a quorum error occurs.
@@ -214,14 +215,16 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
return acb;
}
-static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
+ int nb_sectors, char *node_name, int ret)
{
const char *msg = NULL;
if (ret < 0) {
msg = strerror(-ret);
}
- qapi_event_send_quorum_report_bad(!!msg, msg, node_name,
- acb->sector_num, acb->nb_sectors, &error_abort);
+
+ qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
+ sector_num, nb_sectors, &error_abort);
}
static void quorum_report_failure(QuorumAIOCB *acb)
@@ -283,9 +286,19 @@ static void quorum_aio_cb(void *opaque, int ret)
BDRVQuorumState *s = acb->common.bs->opaque;
bool rewrite = false;
+ if (ret == 0) {
+ acb->success_count++;
+ } else {
+ QuorumOpType type;
+ type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
+ quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
+ sacb->aiocb->bs->node_name, ret);
+ }
+
if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
/* We try to read next child in FIFO order if we fail to read */
- if (ret < 0 && ++acb->child_iter < s->num_children) {
+ if (ret < 0 && (acb->child_iter + 1) < s->num_children) {
+ acb->child_iter++;
read_fifo_child(acb);
return;
}
@@ -300,11 +313,6 @@ static void quorum_aio_cb(void *opaque, int ret)
sacb->ret = ret;
acb->count++;
- if (ret == 0) {
- acb->success_count++;
- } else {
- quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret);
- }
assert(acb->count <= s->num_children);
assert(acb->success_count <= s->num_children);
if (acb->count < s->num_children) {
@@ -336,7 +344,9 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- quorum_report_bad(acb, s->bs[item->index]->node_name, 0);
+ quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
+ acb->nb_sectors,
+ s->children[item->index]->bs->node_name, 0);
}
}
}
@@ -369,8 +379,9 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
- acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+ bdrv_aio_writev(s->children[item->index]->bs, acb->sector_num,
+ acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
+ acb);
}
}
@@ -639,14 +650,15 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
int i;
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].buf = qemu_blockalign(s->bs[i], acb->qiov->size);
+ acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size);
qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf);
}
for (i = 0; i < s->num_children; i++) {
- bdrv_aio_readv(s->bs[i], acb->sector_num, &acb->qcrs[i].qiov,
- acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
+ acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num,
+ &acb->qcrs[i].qiov, acb->nb_sectors,
+ quorum_aio_cb, &acb->qcrs[i]);
}
return &acb->common;
@@ -656,14 +668,15 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
{
BDRVQuorumState *s = acb->common.bs->opaque;
- acb->qcrs[acb->child_iter].buf = qemu_blockalign(s->bs[acb->child_iter],
- acb->qiov->size);
+ acb->qcrs[acb->child_iter].buf =
+ qemu_blockalign(s->children[acb->child_iter]->bs, acb->qiov->size);
qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
acb->qcrs[acb->child_iter].buf);
- bdrv_aio_readv(s->bs[acb->child_iter], acb->sector_num,
- &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
- quorum_aio_cb, &acb->qcrs[acb->child_iter]);
+ acb->qcrs[acb->child_iter].aiocb =
+ bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
+ &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
+ quorum_aio_cb, &acb->qcrs[acb->child_iter]);
return &acb->common;
}
@@ -702,8 +715,8 @@ static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
int i;
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov,
- nb_sectors, &quorum_aio_cb,
+ acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i]->bs, sector_num,
+ qiov, nb_sectors, &quorum_aio_cb,
&acb->qcrs[i]);
}
@@ -717,12 +730,12 @@ static int64_t quorum_getlength(BlockDriverState *bs)
int i;
/* check that all file have the same length */
- result = bdrv_getlength(s->bs[0]);
+ result = bdrv_getlength(s->children[0]->bs);
if (result < 0) {
return result;
}
for (i = 1; i < s->num_children; i++) {
- int64_t value = bdrv_getlength(s->bs[i]);
+ int64_t value = bdrv_getlength(s->children[i]->bs);
if (value < 0) {
return value;
}
@@ -741,7 +754,7 @@ static void quorum_invalidate_cache(BlockDriverState *bs, Error **errp)
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_invalidate_cache(s->bs[i], &local_err);
+ bdrv_invalidate_cache(s->children[i]->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -757,19 +770,30 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
QuorumVoteValue result_value;
int i;
int result = 0;
+ int success_count = 0;
QLIST_INIT(&error_votes.vote_list);
error_votes.compare = quorum_64bits_compare;
for (i = 0; i < s->num_children; i++) {
- result = bdrv_co_flush(s->bs[i]);
- result_value.l = result;
- quorum_count_vote(&error_votes, &result_value, i);
+ result = bdrv_co_flush(s->children[i]->bs);
+ if (result) {
+ quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
+ bdrv_nb_sectors(s->children[i]->bs),
+ s->children[i]->bs->node_name, result);
+ result_value.l = result;
+ quorum_count_vote(&error_votes, &result_value, i);
+ } else {
+ success_count++;
+ }
}
- winner = quorum_get_vote_winner(&error_votes);
- result = winner->value.l;
-
+ if (success_count >= s->threshold) {
+ result = 0;
+ } else {
+ winner = quorum_get_vote_winner(&error_votes);
+ result = winner->value.l;
+ }
quorum_free_vote_list(&error_votes);
return result;
@@ -782,7 +806,7 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
int i;
for (i = 0; i < s->num_children; i++) {
- bool perm = bdrv_recurse_is_first_non_filter(s->bs[i],
+ bool perm = bdrv_recurse_is_first_non_filter(s->children[i]->bs,
candidate);
if (perm) {
return true;
@@ -846,7 +870,7 @@ static int parse_read_pattern(const char *opt)
return QUORUM_READ_PATTERN_QUORUM;
}
- for (i = 0; i < QUORUM_READ_PATTERN_MAX; i++) {
+ for (i = 0; i < QUORUM_READ_PATTERN__MAX; i++) {
if (!strcmp(opt, QuorumReadPattern_lookup[i])) {
return i;
}
@@ -889,6 +913,12 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
}
s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
+ /* and validate it against s->num_children */
+ ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
+ if (ret < 0) {
+ goto exit;
+ }
+
ret = parse_read_pattern(qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN));
if (ret < 0) {
error_setg(&local_err, "Please set read-pattern as fifo or quorum");
@@ -897,12 +927,6 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
s->read_pattern = ret;
if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
- /* and validate it against s->num_children */
- ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
- if (ret < 0) {
- goto exit;
- }
-
/* is the driver in blkverify mode */
if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
s->num_children == 2 && s->threshold == 2) {
@@ -922,8 +946,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
}
}
- /* allocate the children BlockDriverState array */
- s->bs = g_new0(BlockDriverState *, s->num_children);
+ /* allocate the children array */
+ s->children = g_new0(BdrvChild *, s->num_children);
opened = g_new0(bool, s->num_children);
for (i = 0; i < s->num_children; i++) {
@@ -931,9 +955,10 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
ret = snprintf(indexstr, 32, "children.%d", i);
assert(ret < 32);
- ret = bdrv_open_image(&s->bs[i], NULL, options, indexstr, bs,
- &child_format, false, &local_err);
- if (ret < 0) {
+ s->children[i] = bdrv_open_child(NULL, options, indexstr, bs,
+ &child_format, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
goto close_exit;
}
@@ -949,9 +974,9 @@ close_exit:
if (!opened[i]) {
continue;
}
- bdrv_unref(s->bs[i]);
+ bdrv_unref_child(bs, s->children[i]);
}
- g_free(s->bs);
+ g_free(s->children);
g_free(opened);
exit:
qemu_opts_del(opts);
@@ -968,10 +993,10 @@ static void quorum_close(BlockDriverState *bs)
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_unref(s->bs[i]);
+ bdrv_unref_child(bs, s->children[i]);
}
- g_free(s->bs);
+ g_free(s->children);
}
static void quorum_detach_aio_context(BlockDriverState *bs)
@@ -980,7 +1005,7 @@ static void quorum_detach_aio_context(BlockDriverState *bs)
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_detach_aio_context(s->bs[i]);
+ bdrv_detach_aio_context(s->children[i]->bs);
}
}
@@ -991,11 +1016,11 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_attach_aio_context(s->bs[i], new_context);
+ bdrv_attach_aio_context(s->children[i]->bs, new_context);
}
}
-static void quorum_refresh_filename(BlockDriverState *bs)
+static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVQuorumState *s = bs->opaque;
QDict *opts;
@@ -1003,16 +1028,17 @@ static void quorum_refresh_filename(BlockDriverState *bs)
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_refresh_filename(s->bs[i]);
- if (!s->bs[i]->full_open_options) {
+ bdrv_refresh_filename(s->children[i]->bs);
+ if (!s->children[i]->bs->full_open_options) {
return;
}
}
children = qlist_new();
for (i = 0; i < s->num_children; i++) {
- QINCREF(s->bs[i]->full_open_options);
- qlist_append_obj(children, QOBJECT(s->bs[i]->full_open_options));
+ QINCREF(s->children[i]->bs->full_open_options);
+ qlist_append_obj(children,
+ QOBJECT(s->children[i]->bs->full_open_options));
}
opts = qdict_new();
diff --git a/qemu/block/raw-aio.h b/qemu/block/raw-aio.h
index 31d791fe6..811e37501 100644
--- a/qemu/block/raw-aio.h
+++ b/qemu/block/raw-aio.h
@@ -15,6 +15,8 @@
#ifndef QEMU_RAW_AIO_H
#define QEMU_RAW_AIO_H
+#include "qemu/iov.h"
+
/* AIO request types */
#define QEMU_AIO_READ 0x0001
#define QEMU_AIO_WRITE 0x0002
diff --git a/qemu/block/raw-posix.c b/qemu/block/raw-posix.c
index 855febed5..906d5c941 100644
--- a/qemu/block/raw-posix.c
+++ b/qemu/block/raw-posix.c
@@ -21,7 +21,9 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
#include "qemu/error-report.h"
#include "qemu/timer.h"
#include "qemu/log.h"
@@ -43,6 +45,7 @@
#include <IOKit/storage/IOMedia.h>
#include <IOKit/storage/IOCDMedia.h>
//#include <IOKit/storage/IOCDTypes.h>
+#include <IOKit/storage/IODVDMedia.h>
#include <CoreFoundation/CoreFoundation.h>
#endif
@@ -51,8 +54,6 @@
#include <sys/dkio.h>
#endif
#ifdef __linux__
-#include <sys/types.h>
-#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <linux/cdrom.h>
@@ -127,11 +128,6 @@ do { \
#define FTYPE_FILE 0
#define FTYPE_CD 1
-#define FTYPE_FD 2
-
-/* if the FD is not accessed during that time (in ns), we try to
- reopen it to see if the disk has been changed */
-#define FD_OPEN_TIMEOUT (1000000000)
#define MAX_BLOCKSIZE 4096
@@ -141,13 +137,6 @@ typedef struct BDRVRawState {
int open_flags;
size_t buf_align;
-#if defined(__linux__)
- /* linux floppy specific */
- int64_t fd_open_time;
- int64_t fd_error_time;
- int fd_got_error;
- int fd_media_changed;
-#endif
#ifdef CONFIG_LINUX_AIO
int use_aio;
void *aio_ctx;
@@ -512,14 +501,19 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
goto fail;
}
if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
- error_printf("WARNING: aio=native was specified for '%s', but "
- "it requires cache.direct=on, which was not "
- "specified. Falling back to aio=threads.\n"
- " This will become an error condition in "
- "future QEMU versions.\n",
- bs->filename);
+ error_setg(errp, "aio=native was specified, but it requires "
+ "cache.direct=on, which was not specified.");
+ ret = -EINVAL;
+ goto fail;
}
-#endif
+#else
+ if (bdrv_flags & BDRV_O_NATIVE_AIO) {
+ error_setg(errp, "aio=native was specified, but is not supported "
+ "in this build.");
+ ret = -EINVAL;
+ goto fail;
+ }
+#endif /* !defined(CONFIG_LINUX_AIO) */
s->has_discard = true;
s->has_write_zeroes = true;
@@ -626,7 +620,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
}
#endif
- if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
+ if (s->type == FTYPE_CD) {
raw_s->open_flags |= O_NONBLOCK;
}
@@ -670,11 +664,17 @@ static int raw_reopen_prepare(BDRVReopenState *state,
/* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
if (raw_s->fd == -1) {
- assert(!(raw_s->open_flags & O_CREAT));
- raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
- if (raw_s->fd == -1) {
- error_setg_errno(errp, errno, "Could not reopen file");
- ret = -1;
+ const char *normalized_filename = state->bs->filename;
+ ret = raw_normalize_devicepath(&normalized_filename);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not normalize device path");
+ } else {
+ assert(!(raw_s->open_flags & O_CREAT));
+ raw_s->fd = qemu_open(normalized_filename, raw_s->open_flags);
+ if (raw_s->fd == -1) {
+ error_setg_errno(errp, errno, "Could not reopen file");
+ ret = -1;
+ }
}
}
@@ -780,7 +780,6 @@ static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
{
BDRVRawState *s = bs->opaque;
struct hd_geometry ioctl_geo = {0};
- uint32_t blksize;
/* If DASD, get its geometry */
if (check_for_dasd(s->fd) < 0) {
@@ -800,12 +799,6 @@ static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
}
geo->heads = ioctl_geo.heads;
geo->sectors = ioctl_geo.sectors;
- if (!probe_physical_blocksize(s->fd, &blksize)) {
- /* overwrite cyls: HDIO_GETGEO result is incorrect for big drives */
- geo->cylinders = bdrv_nb_sectors(bs) / (blksize / BDRV_SECTOR_SIZE)
- / (geo->heads * geo->sectors);
- return 0;
- }
geo->cylinders = ioctl_geo.cylinders;
return 0;
@@ -1253,7 +1246,7 @@ static int aio_worker(void *arg)
break;
}
- g_slice_free(RawPosixAIOData, aiocb);
+ g_free(aiocb);
return ret;
}
@@ -1261,7 +1254,7 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
int type)
{
- RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+ RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
ThreadPool *pool;
acb->bs = bs;
@@ -1286,7 +1279,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type)
{
- RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+ RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
ThreadPool *pool;
acb->bs = bs;
@@ -1633,7 +1626,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false);
buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
- PREALLOC_MODE_MAX, PREALLOC_MODE_OFF,
+ PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
&local_err);
g_free(buf);
if (local_err) {
@@ -1642,7 +1635,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
goto out;
}
- fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
+ fd = qemu_open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY,
0644);
if (fd < 0) {
result = -errno;
@@ -1827,7 +1820,8 @@ static int find_allocation(BlockDriverState *bs, off_t start,
*/
static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
off_t start, data = 0, hole = 0;
int64_t total_size;
@@ -1869,6 +1863,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
*pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
ret = BDRV_BLOCK_ZERO;
}
+ *file = bs;
return ret | BDRV_BLOCK_OFFSET_VALID | start;
}
@@ -1972,36 +1967,51 @@ BlockDriver bdrv_file = {
/* host device */
#if defined(__APPLE__) && defined(__MACH__)
-static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
-static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
-
-kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
+static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
+ CFIndex maxPathSize, int flags);
+static char *FindEjectableOpticalMedia(io_iterator_t *mediaIterator)
{
- kern_return_t kernResult;
+ kern_return_t kernResult = KERN_FAILURE;
mach_port_t masterPort;
CFMutableDictionaryRef classesToMatch;
+ const char *matching_array[] = {kIODVDMediaClass, kIOCDMediaClass};
+ char *mediaType = NULL;
kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
if ( KERN_SUCCESS != kernResult ) {
printf( "IOMasterPort returned %d\n", kernResult );
}
- classesToMatch = IOServiceMatching( kIOCDMediaClass );
- if ( classesToMatch == NULL ) {
- printf( "IOServiceMatching returned a NULL dictionary.\n" );
- } else {
- CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
- }
- kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
- if ( KERN_SUCCESS != kernResult )
- {
- printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
- }
+ int index;
+ for (index = 0; index < ARRAY_SIZE(matching_array); index++) {
+ classesToMatch = IOServiceMatching(matching_array[index]);
+ if (classesToMatch == NULL) {
+ error_report("IOServiceMatching returned NULL for %s",
+ matching_array[index]);
+ continue;
+ }
+ CFDictionarySetValue(classesToMatch, CFSTR(kIOMediaEjectableKey),
+ kCFBooleanTrue);
+ kernResult = IOServiceGetMatchingServices(masterPort, classesToMatch,
+ mediaIterator);
+ if (kernResult != KERN_SUCCESS) {
+ error_report("Note: IOServiceGetMatchingServices returned %d",
+ kernResult);
+ continue;
+ }
- return kernResult;
+ /* If a match was found, leave the loop */
+ if (*mediaIterator != 0) {
+ DPRINTF("Matching using %s\n", matching_array[index]);
+ mediaType = g_strdup(matching_array[index]);
+ break;
+ }
+ }
+ return mediaType;
}
-kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
+kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
+ CFIndex maxPathSize, int flags)
{
io_object_t nextMedia;
kern_return_t kernResult = KERN_FAILURE;
@@ -2014,7 +2024,9 @@ kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex ma
if ( bsdPathAsCFString ) {
size_t devPathLength;
strcpy( bsdPath, _PATH_DEV );
- strcat( bsdPath, "r" );
+ if (flags & BDRV_O_NOCACHE) {
+ strcat(bsdPath, "r");
+ }
devPathLength = strlen( bsdPath );
if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
kernResult = KERN_SUCCESS;
@@ -2027,7 +2039,46 @@ kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex ma
return kernResult;
}
-#endif
+/* Sets up a real cdrom for use in QEMU */
+static bool setup_cdrom(char *bsd_path, Error **errp)
+{
+ int index, num_of_test_partitions = 2, fd;
+ char test_partition[MAXPATHLEN];
+ bool partition_found = false;
+
+ /* look for a working partition */
+ for (index = 0; index < num_of_test_partitions; index++) {
+ snprintf(test_partition, sizeof(test_partition), "%ss%d", bsd_path,
+ index);
+ fd = qemu_open(test_partition, O_RDONLY | O_BINARY | O_LARGEFILE);
+ if (fd >= 0) {
+ partition_found = true;
+ qemu_close(fd);
+ break;
+ }
+ }
+
+ /* if a working partition on the device was not found */
+ if (partition_found == false) {
+ error_setg(errp, "Failed to find a working partition on disc");
+ } else {
+ DPRINTF("Using %s as optical disc\n", test_partition);
+ pstrcpy(bsd_path, MAXPATHLEN, test_partition);
+ }
+ return partition_found;
+}
+
+/* Prints directions on mounting and unmounting a device */
+static void print_unmounting_directions(const char *file_name)
+{
+ error_report("If device %s is mounted on the desktop, unmount"
+ " it first before using it in QEMU", file_name);
+ error_report("Command to unmount device: diskutil unmountDisk %s",
+ file_name);
+ error_report("Command to mount device: diskutil mountDisk %s", file_name);
+}
+
+#endif /* defined(__APPLE__) && defined(__MACH__) */
static int hdev_probe_device(const char *filename)
{
@@ -2118,33 +2169,57 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
#if defined(__APPLE__) && defined(__MACH__)
const char *filename = qdict_get_str(options, "filename");
+ char bsd_path[MAXPATHLEN] = "";
+ bool error_occurred = false;
+
+ /* If using a real cdrom */
+ if (strcmp(filename, "/dev/cdrom") == 0) {
+ char *mediaType = NULL;
+ kern_return_t ret_val;
+ io_iterator_t mediaIterator = 0;
+
+ mediaType = FindEjectableOpticalMedia(&mediaIterator);
+ if (mediaType == NULL) {
+ error_setg(errp, "Please make sure your CD/DVD is in the optical"
+ " drive");
+ error_occurred = true;
+ goto hdev_open_Mac_error;
+ }
- if (strstart(filename, "/dev/cdrom", NULL)) {
- kern_return_t kernResult;
- io_iterator_t mediaIterator;
- char bsdPath[ MAXPATHLEN ];
- int fd;
-
- kernResult = FindEjectableCDMedia( &mediaIterator );
- kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
-
- if ( bsdPath[ 0 ] != '\0' ) {
- strcat(bsdPath,"s0");
- /* some CDs don't have a partition 0 */
- fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
- if (fd < 0) {
- bsdPath[strlen(bsdPath)-1] = '1';
- } else {
- qemu_close(fd);
- }
- filename = bsdPath;
- qdict_put(options, "filename", qstring_from_str(filename));
+ ret_val = GetBSDPath(mediaIterator, bsd_path, sizeof(bsd_path), flags);
+ if (ret_val != KERN_SUCCESS) {
+ error_setg(errp, "Could not get BSD path for optical drive");
+ error_occurred = true;
+ goto hdev_open_Mac_error;
+ }
+
+ /* If a real optical drive was not found */
+ if (bsd_path[0] == '\0') {
+ error_setg(errp, "Failed to obtain bsd path for optical drive");
+ error_occurred = true;
+ goto hdev_open_Mac_error;
+ }
+
+ /* If using a cdrom disc and finding a partition on the disc failed */
+ if (strncmp(mediaType, kIOCDMediaClass, 9) == 0 &&
+ setup_cdrom(bsd_path, errp) == false) {
+ print_unmounting_directions(bsd_path);
+ error_occurred = true;
+ goto hdev_open_Mac_error;
}
- if ( mediaIterator )
- IOObjectRelease( mediaIterator );
+ qdict_put(options, "filename", qstring_from_str(bsd_path));
+
+hdev_open_Mac_error:
+ g_free(mediaType);
+ if (mediaIterator) {
+ IOObjectRelease(mediaIterator);
+ }
+ if (error_occurred) {
+ return -ENOENT;
+ }
}
-#endif
+#endif /* defined(__APPLE__) && defined(__MACH__) */
s->type = FTYPE_FILE;
@@ -2153,6 +2228,15 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
if (local_err) {
error_propagate(errp, local_err);
}
+#if defined(__APPLE__) && defined(__MACH__)
+ if (*bsd_path) {
+ filename = bsd_path;
+ }
+ /* if a physical device experienced an error while being opened */
+ if (strncmp(filename, "/dev/", 5) == 0) {
+ print_unmounting_directions(filename);
+ }
+#endif /* defined(__APPLE__) && defined(__MACH__) */
return ret;
}
@@ -2172,53 +2256,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
}
#if defined(__linux__)
-/* Note: we do not have a reliable method to detect if the floppy is
- present. The current method is to try to open the floppy at every
- I/O and to keep it opened during a few hundreds of ms. */
-static int fd_open(BlockDriverState *bs)
-{
- BDRVRawState *s = bs->opaque;
- int last_media_present;
-
- if (s->type != FTYPE_FD)
- return 0;
- last_media_present = (s->fd >= 0);
- if (s->fd >= 0 &&
- (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
- qemu_close(s->fd);
- s->fd = -1;
- DPRINTF("Floppy closed\n");
- }
- if (s->fd < 0) {
- if (s->fd_got_error &&
- (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
- DPRINTF("No floppy (open delayed)\n");
- return -EIO;
- }
- s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
- if (s->fd < 0) {
- s->fd_error_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- s->fd_got_error = 1;
- if (last_media_present)
- s->fd_media_changed = 1;
- DPRINTF("No floppy\n");
- return -EIO;
- }
- DPRINTF("Floppy opened\n");
- }
- if (!last_media_present)
- s->fd_media_changed = 1;
- s->fd_open_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- s->fd_got_error = 0;
- return 0;
-}
-
-static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
- BDRVRawState *s = bs->opaque;
-
- return ioctl(s->fd, req, buf);
-}
static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
unsigned long int req, void *buf,
@@ -2231,7 +2268,7 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
if (fd_open(bs) < 0)
return NULL;
- acb = g_slice_new(RawPosixAIOData);
+ acb = g_new(RawPosixAIOData, 1);
acb->bs = bs;
acb->aio_type = QEMU_AIO_IOCTL;
acb->aio_fildes = s->fd;
@@ -2241,8 +2278,8 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
+#endif /* linux */
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
static int fd_open(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
@@ -2252,14 +2289,6 @@ static int fd_open(BlockDriverState *bs)
return 0;
return -EIO;
}
-#else /* !linux && !FreeBSD */
-
-static int fd_open(BlockDriverState *bs)
-{
- return 0;
-}
-
-#endif /* !linux && !FreeBSD */
static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
@@ -2303,17 +2332,22 @@ static int hdev_create(const char *filename, QemuOpts *opts,
int64_t total_size = 0;
bool has_prefix;
- /* This function is used by all three protocol block drivers and therefore
- * any of these three prefixes may be given.
+ /* This function is used by both protocol block drivers and therefore either
+ * of these prefixes may be given.
* The return value has to be stored somewhere, otherwise this is an error
* due to -Werror=unused-value. */
has_prefix =
strstart(filename, "host_device:", &filename) ||
- strstart(filename, "host_cdrom:" , &filename) ||
- strstart(filename, "host_floppy:", &filename);
+ strstart(filename, "host_cdrom:" , &filename);
(void)has_prefix;
+ ret = raw_normalize_devicepath(&filename);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not normalize device path");
+ return ret;
+ }
+
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
BDRV_SECTOR_SIZE);
@@ -2379,160 +2413,10 @@ static BlockDriver bdrv_host_device = {
/* generic scsi device */
#ifdef __linux__
- .bdrv_ioctl = hdev_ioctl,
.bdrv_aio_ioctl = hdev_aio_ioctl,
#endif
};
-#ifdef __linux__
-static void floppy_parse_filename(const char *filename, QDict *options,
- Error **errp)
-{
- /* The prefix is optional, just as for "file". */
- strstart(filename, "host_floppy:", &filename);
-
- qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
- Error **errp)
-{
- BDRVRawState *s = bs->opaque;
- Error *local_err = NULL;
- int ret;
-
- s->type = FTYPE_FD;
-
- /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
- ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
- if (ret) {
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
- }
-
- /* close fd so that we can reopen it as needed */
- qemu_close(s->fd);
- s->fd = -1;
- s->fd_media_changed = 1;
-
- error_report("Host floppy pass-through is deprecated");
- error_printf("Support for it will be removed in a future release.\n");
- return 0;
-}
-
-static int floppy_probe_device(const char *filename)
-{
- int fd, ret;
- int prio = 0;
- struct floppy_struct fdparam;
- struct stat st;
-
- if (strstart(filename, "/dev/fd", NULL) &&
- !strstart(filename, "/dev/fdset/", NULL) &&
- !strstart(filename, "/dev/fd/", NULL)) {
- prio = 50;
- }
-
- fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
- if (fd < 0) {
- goto out;
- }
- ret = fstat(fd, &st);
- if (ret == -1 || !S_ISBLK(st.st_mode)) {
- goto outc;
- }
-
- /* Attempt to detect via a floppy specific ioctl */
- ret = ioctl(fd, FDGETPRM, &fdparam);
- if (ret >= 0)
- prio = 100;
-
-outc:
- qemu_close(fd);
-out:
- return prio;
-}
-
-
-static int floppy_is_inserted(BlockDriverState *bs)
-{
- return fd_open(bs) >= 0;
-}
-
-static int floppy_media_changed(BlockDriverState *bs)
-{
- BDRVRawState *s = bs->opaque;
- int ret;
-
- /*
- * XXX: we do not have a true media changed indication.
- * It does not work if the floppy is changed without trying to read it.
- */
- fd_open(bs);
- ret = s->fd_media_changed;
- s->fd_media_changed = 0;
- DPRINTF("Floppy changed=%d\n", ret);
- return ret;
-}
-
-static void floppy_eject(BlockDriverState *bs, bool eject_flag)
-{
- BDRVRawState *s = bs->opaque;
- int fd;
-
- if (s->fd >= 0) {
- qemu_close(s->fd);
- s->fd = -1;
- }
- fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
- if (fd >= 0) {
- if (ioctl(fd, FDEJECT, 0) < 0)
- perror("FDEJECT");
- qemu_close(fd);
- }
-}
-
-static BlockDriver bdrv_host_floppy = {
- .format_name = "host_floppy",
- .protocol_name = "host_floppy",
- .instance_size = sizeof(BDRVRawState),
- .bdrv_needs_filename = true,
- .bdrv_probe_device = floppy_probe_device,
- .bdrv_parse_filename = floppy_parse_filename,
- .bdrv_file_open = floppy_open,
- .bdrv_close = raw_close,
- .bdrv_reopen_prepare = raw_reopen_prepare,
- .bdrv_reopen_commit = raw_reopen_commit,
- .bdrv_reopen_abort = raw_reopen_abort,
- .bdrv_create = hdev_create,
- .create_opts = &raw_create_opts,
-
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
- .bdrv_aio_flush = raw_aio_flush,
- .bdrv_refresh_limits = raw_refresh_limits,
- .bdrv_io_plug = raw_aio_plug,
- .bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
-
- .bdrv_truncate = raw_truncate,
- .bdrv_getlength = raw_getlength,
- .has_variable_length = true,
- .bdrv_get_allocated_file_size
- = raw_get_allocated_file_size,
-
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
- /* removable device support */
- .bdrv_is_inserted = floppy_is_inserted,
- .bdrv_media_changed = floppy_media_changed,
- .bdrv_eject = floppy_eject,
-};
-#endif
-
#if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
static void cdrom_parse_filename(const char *filename, QDict *options,
Error **errp)
@@ -2588,15 +2472,13 @@ out:
return prio;
}
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
int ret;
ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
- if (ret == CDS_DISC_OK)
- return 1;
- return 0;
+ return ret == CDS_DISC_OK;
}
static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
@@ -2663,7 +2545,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_lock_medium = cdrom_lock_medium,
/* generic scsi device */
- .bdrv_ioctl = hdev_ioctl,
.bdrv_aio_ioctl = hdev_aio_ioctl,
};
#endif /* __linux__ */
@@ -2722,7 +2603,7 @@ static int cdrom_reopen(BlockDriverState *bs)
return 0;
}
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
{
return raw_getlength(bs) > 0;
}
@@ -2810,7 +2691,6 @@ static void bdrv_file_init(void)
bdrv_register(&bdrv_file);
bdrv_register(&bdrv_host_device);
#ifdef __linux__
- bdrv_register(&bdrv_host_floppy);
bdrv_register(&bdrv_host_cdrom);
#endif
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
diff --git a/qemu/block/raw-win32.c b/qemu/block/raw-win32.c
index 68f2338ac..fd2389153 100644
--- a/qemu/block/raw-win32.c
+++ b/qemu/block/raw-win32.c
@@ -21,7 +21,9 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
#include "qemu/timer.h"
#include "block/block_int.h"
#include "qemu/module.h"
@@ -119,9 +121,9 @@ static int aio_worker(void *arg)
case QEMU_AIO_WRITE:
count = handle_aiocb_rw(aiocb);
if (count == aiocb->aio_nbytes) {
- count = 0;
+ ret = 0;
} else {
- count = -EINVAL;
+ ret = -EINVAL;
}
break;
case QEMU_AIO_FLUSH:
@@ -135,7 +137,7 @@ static int aio_worker(void *arg)
break;
}
- g_slice_free(RawWin32AIOData, aiocb);
+ g_free(aiocb);
return ret;
}
@@ -143,7 +145,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type)
{
- RawWin32AIOData *acb = g_slice_new(RawWin32AIOData);
+ RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
ThreadPool *pool;
acb->bs = bs;
diff --git a/qemu/block/raw_bsd.c b/qemu/block/raw_bsd.c
index e3d2d0468..a6cc7e991 100644
--- a/qemu/block/raw_bsd.c
+++ b/qemu/block/raw_bsd.c
@@ -26,7 +26,9 @@
* IN THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/block_int.h"
+#include "qapi/error.h"
#include "qemu/option.h"
static QemuOptsList raw_create_opts = {
@@ -52,11 +54,12 @@ static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov);
+ return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
}
-static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn
+raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov, int flags)
{
void *buf = NULL;
BlockDriver *drv;
@@ -75,7 +78,7 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
return 0;
}
- buf = qemu_try_blockalign(bs->file, 512);
+ buf = qemu_try_blockalign(bs->file->bs, 512);
if (!buf) {
ret = -ENOMEM;
goto fail;
@@ -102,7 +105,8 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- ret = bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
+ ret = bdrv_co_do_pwritev(bs->file->bs, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);
fail:
if (qiov == &local_qiov) {
@@ -112,11 +116,20 @@ fail:
return ret;
}
+static int coroutine_fn
+raw_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov)
+{
+ return raw_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0);
+}
+
static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
*pnum = nb_sectors;
+ *file = bs->file->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
(sector_num << BDRV_SECTOR_BITS);
}
@@ -125,58 +138,48 @@ static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
BdrvRequestFlags flags)
{
- return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors, flags);
+ return bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
}
static int coroutine_fn raw_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
- return bdrv_co_discard(bs->file, sector_num, nb_sectors);
+ return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
}
static int64_t raw_getlength(BlockDriverState *bs)
{
- return bdrv_getlength(bs->file);
+ return bdrv_getlength(bs->file->bs);
}
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
- return bdrv_get_info(bs->file, bdi);
+ return bdrv_get_info(bs->file->bs, bdi);
}
static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
- bs->bl = bs->file->bl;
+ bs->bl = bs->file->bs->bl;
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
{
- return bdrv_truncate(bs->file, offset);
-}
-
-static int raw_is_inserted(BlockDriverState *bs)
-{
- return bdrv_is_inserted(bs->file);
+ return bdrv_truncate(bs->file->bs, offset);
}
static int raw_media_changed(BlockDriverState *bs)
{
- return bdrv_media_changed(bs->file);
+ return bdrv_media_changed(bs->file->bs);
}
static void raw_eject(BlockDriverState *bs, bool eject_flag)
{
- bdrv_eject(bs->file, eject_flag);
+ bdrv_eject(bs->file->bs, eject_flag);
}
static void raw_lock_medium(BlockDriverState *bs, bool locked)
{
- bdrv_lock_medium(bs->file, locked);
-}
-
-static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
- return bdrv_ioctl(bs->file, req, buf);
+ bdrv_lock_medium(bs->file->bs, locked);
}
static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
@@ -184,12 +187,12 @@ static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
BlockCompletionFunc *cb,
void *opaque)
{
- return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
+ return bdrv_aio_ioctl(bs->file->bs, req, buf, cb, opaque);
}
static int raw_has_zero_init(BlockDriverState *bs)
{
- return bdrv_has_zero_init(bs->file);
+ return bdrv_has_zero_init(bs->file->bs);
}
static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
@@ -207,7 +210,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
- bs->sg = bs->file->sg;
+ bs->sg = bs->file->bs->sg;
if (bs->probed && !bdrv_is_read_only(bs)) {
fprintf(stderr,
@@ -217,7 +220,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
"raw images, write operations on block 0 will be restricted.\n"
" Specify the 'raw' format explicitly to remove the "
"restrictions.\n",
- bs->file->filename);
+ bs->file->bs->filename);
}
return 0;
@@ -237,12 +240,12 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
{
- return bdrv_probe_blocksizes(bs->file, bsz);
+ return bdrv_probe_blocksizes(bs->file->bs, bsz);
}
static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
{
- return bdrv_probe_geometry(bs->file, geo);
+ return bdrv_probe_geometry(bs->file->bs, geo);
}
BlockDriver bdrv_raw = {
@@ -254,6 +257,8 @@ BlockDriver bdrv_raw = {
.bdrv_create = &raw_create,
.bdrv_co_readv = &raw_co_readv,
.bdrv_co_writev = &raw_co_writev,
+ .bdrv_co_writev_flags = &raw_co_writev_flags,
+ .supported_write_flags = BDRV_REQ_FUA,
.bdrv_co_write_zeroes = &raw_co_write_zeroes,
.bdrv_co_discard = &raw_co_discard,
.bdrv_co_get_block_status = &raw_co_get_block_status,
@@ -264,11 +269,9 @@ BlockDriver bdrv_raw = {
.bdrv_refresh_limits = &raw_refresh_limits,
.bdrv_probe_blocksizes = &raw_probe_blocksizes,
.bdrv_probe_geometry = &raw_probe_geometry,
- .bdrv_is_inserted = &raw_is_inserted,
.bdrv_media_changed = &raw_media_changed,
.bdrv_eject = &raw_eject,
.bdrv_lock_medium = &raw_lock_medium,
- .bdrv_ioctl = &raw_ioctl,
.bdrv_aio_ioctl = &raw_aio_ioctl,
.create_opts = &raw_create_opts,
.bdrv_has_zero_init = &raw_has_zero_init
diff --git a/qemu/block/rbd.c b/qemu/block/rbd.c
index a60a19d58..5bc5b3253 100644
--- a/qemu/block/rbd.c
+++ b/qemu/block/rbd.c
@@ -11,11 +11,13 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
-#include <inttypes.h>
+#include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qapi/error.h"
#include "qemu/error-report.h"
#include "block/block_int.h"
+#include "crypto/secret.h"
+#include "qemu/cutils.h"
#include <rbd/librbd.h>
@@ -228,6 +230,27 @@ static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
return NULL;
}
+
+static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
+ Error **errp)
+{
+ if (secretid == 0) {
+ return 0;
+ }
+
+ gchar *secret = qcrypto_secret_lookup_as_base64(secretid,
+ errp);
+ if (!secret) {
+ return -1;
+ }
+
+ rados_conf_set(cluster, "key", secret);
+ g_free(secret);
+
+ return 0;
+}
+
+
static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
bool only_read_conf_file,
Error **errp)
@@ -299,10 +322,13 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
char conf[RBD_MAX_CONF_SIZE];
char clientname_buf[RBD_MAX_CONF_SIZE];
char *clientname;
+ const char *secretid;
rados_t cluster;
rados_ioctx_t io_ctx;
int ret;
+ secretid = qemu_opt_get(opts, "password-secret");
+
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
name, sizeof(name),
@@ -350,6 +376,11 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
return -EIO;
}
+ if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
+ rados_shutdown(cluster);
+ return -EIO;
+ }
+
if (rados_connect(cluster) < 0) {
error_setg(errp, "error connecting");
rados_shutdown(cluster);
@@ -423,6 +454,11 @@ static QemuOptsList runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "Specification of the rbd image",
},
+ {
+ .name = "password-secret",
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret providing the password",
+ },
{ /* end of list */ }
},
};
@@ -436,6 +472,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
char conf[RBD_MAX_CONF_SIZE];
char clientname_buf[RBD_MAX_CONF_SIZE];
char *clientname;
+ const char *secretid;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
@@ -450,6 +487,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
}
filename = qemu_opt_get(opts, "filename");
+ secretid = qemu_opt_get(opts, "password-secret");
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
@@ -488,6 +526,11 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
}
}
+ if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) {
+ r = -EIO;
+ goto failed_shutdown;
+ }
+
/*
* Fallback to more conservative semantics if setting cache
* options fails. Ignore errors from setting rbd_cache because the
@@ -919,6 +962,11 @@ static QemuOptsList qemu_rbd_create_opts = {
.type = QEMU_OPT_SIZE,
.help = "RBD object size"
},
+ {
+ .name = "password-secret",
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret providing the password",
+ },
{ /* end of list */ }
}
};
diff --git a/qemu/block/sheepdog.c b/qemu/block/sheepdog.c
index 9585beb73..33e0a3382 100644
--- a/qemu/block/sheepdog.c
+++ b/qemu/block/sheepdog.c
@@ -12,12 +12,15 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu/uri.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/bitops.h"
+#include "qemu/cutils.h"
#define SD_PROTO_VER 0x01
@@ -28,7 +31,6 @@
#define SD_OP_READ_OBJ 0x02
#define SD_OP_WRITE_OBJ 0x03
/* 0x04 is used internally by Sheepdog */
-#define SD_OP_DISCARD_OBJ 0x05
#define SD_OP_NEW_VDI 0x11
#define SD_OP_LOCK_VDI 0x12
@@ -284,6 +286,12 @@ static inline bool is_snapshot(struct SheepdogInode *inode)
return !!inode->snap_ctime;
}
+static inline size_t count_data_objs(const struct SheepdogInode *inode)
+{
+ return DIV_ROUND_UP(inode->vdi_size,
+ (1UL << inode->block_size_shift));
+}
+
#undef DPRINTF
#ifdef DEBUG_SDOG
#define DPRINTF(fmt, args...) \
@@ -318,7 +326,7 @@ enum AIOCBState {
AIOCB_DISCARD_OBJ,
};
-#define AIOCBOverwrapping(x, y) \
+#define AIOCBOverlapping(x, y) \
(!(x->max_affect_data_idx < y->min_affect_data_idx \
|| y->max_affect_data_idx < x->min_affect_data_idx))
@@ -342,6 +350,15 @@ struct SheepdogAIOCB {
uint32_t min_affect_data_idx;
uint32_t max_affect_data_idx;
+ /*
+ * The difference between affect_data_idx and dirty_data_idx:
+ * affect_data_idx represents range of index of all request types.
+ * dirty_data_idx represents range of index updated by COW requests.
+ * dirty_data_idx is used for updating an inode object.
+ */
+ uint32_t min_dirty_data_idx;
+ uint32_t max_dirty_data_idx;
+
QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
};
@@ -351,9 +368,6 @@ typedef struct BDRVSheepdogState {
SheepdogInode inode;
- uint32_t min_dirty_data_idx;
- uint32_t max_dirty_data_idx;
-
char name[SD_MAX_VDI_LEN];
bool is_snapshot;
uint32_t cache_flags;
@@ -373,10 +387,15 @@ typedef struct BDRVSheepdogState {
QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
- CoQueue overwrapping_queue;
+ CoQueue overlapping_queue;
QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
} BDRVSheepdogState;
+typedef struct BDRVSheepdogReopenState {
+ int fd;
+ int cache_flags;
+} BDRVSheepdogReopenState;
+
static const char * sd_strerror(int err)
{
int i;
@@ -556,6 +575,9 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE +
acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size;
+ acb->min_dirty_data_idx = UINT32_MAX;
+ acb->max_dirty_data_idx = 0;
+
return acb;
}
@@ -595,14 +617,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
error_report("failed to send a req, %s", strerror(errno));
- ret = -socket_error();
- return ret;
+ return -errno;
}
ret = qemu_co_send(sockfd, data, *wlen);
if (ret != *wlen) {
- ret = -socket_error();
error_report("failed to send a req, %s", strerror(errno));
+ return -errno;
}
return ret;
@@ -638,14 +659,16 @@ static coroutine_fn void do_co_req(void *opaque)
unsigned int *rlen = srco->rlen;
co = qemu_coroutine_self();
- aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co);
+ aio_set_fd_handler(srco->aio_context, sockfd, false,
+ NULL, restart_co_req, co);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
goto out;
}
- aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co);
+ aio_set_fd_handler(srco->aio_context, sockfd, false,
+ restart_co_req, NULL, co);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
@@ -670,7 +693,8 @@ static coroutine_fn void do_co_req(void *opaque)
out:
/* there is at most one request for this sockfd, so it is safe to
* set each handler to NULL. */
- aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL);
+ aio_set_fd_handler(srco->aio_context, sockfd, false,
+ NULL, NULL, NULL);
srco->ret = ret;
srco->finished = true;
@@ -722,7 +746,8 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
BDRVSheepdogState *s = opaque;
AIOReq *aio_req, *next;
- aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+ NULL, NULL);
close(s->fd);
s->fd = -1;
@@ -819,8 +844,8 @@ static void coroutine_fn aio_read_response(void *opaque)
*/
if (rsp.result == SD_RES_SUCCESS) {
s->inode.data_vdi_id[idx] = s->inode.vdi_id;
- s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx);
- s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx);
+ acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx);
+ acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx);
}
}
break;
@@ -847,10 +872,6 @@ static void coroutine_fn aio_read_response(void *opaque)
rsp.result = SD_RES_SUCCESS;
s->discard_supported = false;
break;
- case SD_RES_SUCCESS:
- idx = data_oid_to_idx(aio_req->oid);
- s->inode.data_vdi_id[idx] = 0;
- break;
default:
break;
}
@@ -929,7 +950,8 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
return fd;
}
- aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ co_read_response, NULL, s);
return fd;
}
@@ -1165,7 +1187,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
hdr.flags = SD_FLAG_CMD_WRITE | flags;
break;
case AIOCB_DISCARD_OBJ:
- hdr.opcode = SD_OP_DISCARD_OBJ;
+ hdr.opcode = SD_OP_WRITE_OBJ;
+ hdr.flags = SD_FLAG_CMD_WRITE | flags;
+ s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0;
+ offset = offsetof(SheepdogInode,
+ data_vdi_id[data_oid_to_idx(oid)]);
+ oid = vid_to_vdi_oid(s->inode.vdi_id);
+ wlen = datalen = sizeof(uint32_t);
break;
}
@@ -1184,7 +1212,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
qemu_co_mutex_lock(&s->lock);
s->co_send = qemu_coroutine_self();
- aio_set_fd_handler(s->aio_context, s->fd,
+ aio_set_fd_handler(s->aio_context, s->fd, false,
co_read_response, co_write_request, s);
socket_set_cork(s->fd, 1);
@@ -1203,7 +1231,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
}
out:
socket_set_cork(s->fd, 0);
- aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s);
+ aio_set_fd_handler(s->aio_context, s->fd, false,
+ co_read_response, NULL, s);
s->co_send = NULL;
qemu_co_mutex_unlock(&s->lock);
}
@@ -1353,7 +1382,8 @@ static void sd_detach_aio_context(BlockDriverState *bs)
{
BDRVSheepdogState *s = bs->opaque;
- aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+ NULL, NULL);
}
static void sd_attach_aio_context(BlockDriverState *bs,
@@ -1362,7 +1392,8 @@ static void sd_attach_aio_context(BlockDriverState *bs,
BDRVSheepdogState *s = bs->opaque;
s->aio_context = new_context;
- aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s);
+ aio_set_fd_handler(new_context, s->fd, false,
+ co_read_response, NULL, s);
}
/* TODO Convert to fine grained options */
@@ -1466,18 +1497,17 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
}
memcpy(&s->inode, buf, sizeof(s->inode));
- s->min_dirty_data_idx = UINT32_MAX;
- s->max_dirty_data_idx = 0;
bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
pstrcpy(s->name, sizeof(s->name), vdi);
qemu_co_mutex_init(&s->lock);
- qemu_co_queue_init(&s->overwrapping_queue);
+ qemu_co_queue_init(&s->overlapping_queue);
qemu_opts_del(opts);
g_free(buf);
return 0;
out:
- aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+ false, NULL, NULL, NULL);
if (s->fd >= 0) {
closesocket(s->fd);
}
@@ -1486,6 +1516,70 @@ out:
return ret;
}
+static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue,
+ Error **errp)
+{
+ BDRVSheepdogState *s = state->bs->opaque;
+ BDRVSheepdogReopenState *re_s;
+ int ret = 0;
+
+ re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1);
+
+ re_s->cache_flags = SD_FLAG_CMD_CACHE;
+ if (state->flags & BDRV_O_NOCACHE) {
+ re_s->cache_flags = SD_FLAG_CMD_DIRECT;
+ }
+
+ re_s->fd = get_sheep_fd(s, errp);
+ if (re_s->fd < 0) {
+ ret = re_s->fd;
+ return ret;
+ }
+
+ return ret;
+}
+
+static void sd_reopen_commit(BDRVReopenState *state)
+{
+ BDRVSheepdogReopenState *re_s = state->opaque;
+ BDRVSheepdogState *s = state->bs->opaque;
+
+ if (s->fd) {
+ aio_set_fd_handler(s->aio_context, s->fd, false,
+ NULL, NULL, NULL);
+ closesocket(s->fd);
+ }
+
+ s->fd = re_s->fd;
+ s->cache_flags = re_s->cache_flags;
+
+ g_free(state->opaque);
+ state->opaque = NULL;
+
+ return;
+}
+
+static void sd_reopen_abort(BDRVReopenState *state)
+{
+ BDRVSheepdogReopenState *re_s = state->opaque;
+ BDRVSheepdogState *s = state->bs->opaque;
+
+ if (re_s == NULL) {
+ return;
+ }
+
+ if (re_s->fd) {
+ aio_set_fd_handler(s->aio_context, re_s->fd, false,
+ NULL, NULL, NULL);
+ closesocket(re_s->fd);
+ }
+
+ g_free(state->opaque);
+ state->opaque = NULL;
+
+ return;
+}
+
static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
Error **errp)
{
@@ -1544,7 +1638,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
static int sd_prealloc(const char *filename, Error **errp)
{
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
BDRVSheepdogState *base = NULL;
unsigned long buf_size;
uint32_t idx, max_idx;
@@ -1553,19 +1647,22 @@ static int sd_prealloc(const char *filename, Error **errp)
void *buf = NULL;
int ret;
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, errp);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+ if (blk == NULL) {
+ ret = -EIO;
goto out_with_err_set;
}
- vdi_size = bdrv_getlength(bs);
+ blk_set_allow_write_beyond_eof(blk, true);
+
+ vdi_size = blk_getlength(blk);
if (vdi_size < 0) {
ret = vdi_size;
goto out;
}
- base = bs->opaque;
+ base = blk_bs(blk)->opaque;
object_size = (UINT32_C(1) << base->inode.block_size_shift);
buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
buf = g_malloc0(buf_size);
@@ -1577,23 +1674,24 @@ static int sd_prealloc(const char *filename, Error **errp)
* The created image can be a cloned image, so we need to read
* a data from the source image.
*/
- ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
+ ret = blk_pread(blk, idx * buf_size, buf, buf_size);
if (ret < 0) {
goto out;
}
- ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
+ ret = blk_pwrite(blk, idx * buf_size, buf, buf_size);
if (ret < 0) {
goto out;
}
}
+ ret = 0;
out:
if (ret < 0) {
error_setg_errno(errp, -ret, "Can't pre-allocate");
}
out_with_err_set:
- if (bs) {
- bdrv_unref(bs);
+ if (blk) {
+ blk_unref(blk);
}
g_free(buf);
@@ -1733,7 +1831,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
}
if (backing_file) {
- BlockDriverState *bs;
+ BlockBackend *blk;
BDRVSheepdogState *base;
BlockDriver *drv;
@@ -1745,23 +1843,23 @@ static int sd_create(const char *filename, QemuOpts *opts,
goto out;
}
- bs = NULL;
- ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, NULL,
- errp);
- if (ret < 0) {
+ blk = blk_new_open(backing_file, NULL, NULL,
+ BDRV_O_PROTOCOL, errp);
+ if (blk == NULL) {
+ ret = -EIO;
goto out;
}
- base = bs->opaque;
+ base = blk_bs(blk)->opaque;
if (!is_snapshot(&base->inode)) {
error_setg(errp, "cannot clone from a non snapshot vdi");
- bdrv_unref(bs);
+ blk_unref(blk);
ret = -EINVAL;
goto out;
}
s->inode.vdi_id = base->inode.vdi_id;
- bdrv_unref(bs);
+ blk_unref(blk);
}
s->aio_context = qemu_get_aio_context();
@@ -1776,8 +1874,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
- error_report("%s", error_get_pretty(local_err));
- error_free(local_err);
+ error_report_err(local_err);
ret = -EIO;
goto out;
}
@@ -1861,7 +1958,8 @@ static void sd_close(BlockDriverState *bs)
error_report("%s, %s", sd_strerror(rsp->result), s->name);
}
- aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+ false, NULL, NULL, NULL);
closesocket(s->fd);
g_free(s->host_spec);
}
@@ -1923,16 +2021,16 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
AIOReq *aio_req;
uint32_t offset, data_len, mn, mx;
- mn = s->min_dirty_data_idx;
- mx = s->max_dirty_data_idx;
+ mn = acb->min_dirty_data_idx;
+ mx = acb->max_dirty_data_idx;
if (mn <= mx) {
/* we need to update the vdi object. */
offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
mn * sizeof(s->inode.data_vdi_id[0]);
data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
- s->min_dirty_data_idx = UINT32_MAX;
- s->max_dirty_data_idx = 0;
+ acb->min_dirty_data_idx = UINT32_MAX;
+ acb->max_dirty_data_idx = 0;
iov.iov_base = &s->inode;
iov.iov_len = sizeof(s->inode);
@@ -2141,7 +2239,9 @@ static int coroutine_fn sd_co_rw_vector(void *p)
}
aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
- old_oid, done);
+ old_oid,
+ acb->aiocb_type == AIOCB_DISCARD_OBJ ?
+ 0 : done);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
@@ -2158,12 +2258,12 @@ out:
return 1;
}
-static bool check_overwrapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
+static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
{
SheepdogAIOCB *cb;
QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
- if (AIOCBOverwrapping(aiocb, cb)) {
+ if (AIOCBOverlapping(aiocb, cb)) {
return true;
}
}
@@ -2192,15 +2292,15 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
acb->aiocb_type = AIOCB_WRITE_UDATA;
retry:
- if (check_overwrapping_aiocb(s, acb)) {
- qemu_co_queue_wait(&s->overwrapping_queue);
+ if (check_overlapping_aiocb(s, acb)) {
+ qemu_co_queue_wait(&s->overlapping_queue);
goto retry;
}
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
@@ -2208,7 +2308,7 @@ retry:
qemu_coroutine_yield();
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
}
@@ -2225,15 +2325,15 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
acb->aio_done_func = sd_finish_aiocb;
retry:
- if (check_overwrapping_aiocb(s, acb)) {
- qemu_co_queue_wait(&s->overwrapping_queue);
+ if (check_overlapping_aiocb(s, acb)) {
+ qemu_co_queue_wait(&s->overlapping_queue);
goto retry;
}
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
@@ -2241,7 +2341,7 @@ retry:
qemu_coroutine_yield();
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
}
@@ -2318,9 +2418,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
ret = do_sd_create(s, &new_vid, 1, &local_err);
if (ret < 0) {
- error_report("failed to create inode for snapshot: %s",
- error_get_pretty(local_err));
- error_free(local_err);
+ error_reportf_err(local_err,
+ "failed to create inode for snapshot: ");
goto cleanup;
}
@@ -2391,13 +2490,131 @@ out:
return ret;
}
+#define NR_BATCHED_DISCARD 128
+
+static bool remove_objects(BDRVSheepdogState *s)
+{
+ int fd, i = 0, nr_objs = 0;
+ Error *local_err = NULL;
+ int ret = 0;
+ bool result = true;
+ SheepdogInode *inode = &s->inode;
+
+ fd = connect_to_sdog(s, &local_err);
+ if (fd < 0) {
+ error_report_err(local_err);
+ return false;
+ }
+
+ nr_objs = count_data_objs(inode);
+ while (i < nr_objs) {
+ int start_idx, nr_filled_idx;
+
+ while (i < nr_objs && !inode->data_vdi_id[i]) {
+ i++;
+ }
+ start_idx = i;
+
+ nr_filled_idx = 0;
+ while (i < nr_objs && nr_filled_idx < NR_BATCHED_DISCARD) {
+ if (inode->data_vdi_id[i]) {
+ inode->data_vdi_id[i] = 0;
+ nr_filled_idx++;
+ }
+
+ i++;
+ }
+
+ ret = write_object(fd, s->aio_context,
+ (char *)&inode->data_vdi_id[start_idx],
+ vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies,
+ (i - start_idx) * sizeof(uint32_t),
+ offsetof(struct SheepdogInode,
+ data_vdi_id[start_idx]),
+ false, s->cache_flags);
+ if (ret < 0) {
+ error_report("failed to discard snapshot inode.");
+ result = false;
+ goto out;
+ }
+ }
+
+out:
+ closesocket(fd);
+ return result;
+}
+
static int sd_snapshot_delete(BlockDriverState *bs,
const char *snapshot_id,
const char *name,
Error **errp)
{
- /* FIXME: Delete specified snapshot id. */
- return 0;
+ unsigned long snap_id = 0;
+ char snap_tag[SD_MAX_VDI_TAG_LEN];
+ Error *local_err = NULL;
+ int fd, ret;
+ char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
+ BDRVSheepdogState *s = bs->opaque;
+ unsigned int wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN, rlen = 0;
+ uint32_t vid;
+ SheepdogVdiReq hdr = {
+ .opcode = SD_OP_DEL_VDI,
+ .data_length = wlen,
+ .flags = SD_FLAG_CMD_WRITE,
+ };
+ SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
+
+ if (!remove_objects(s)) {
+ return -1;
+ }
+
+ memset(buf, 0, sizeof(buf));
+ memset(snap_tag, 0, sizeof(snap_tag));
+ pstrcpy(buf, SD_MAX_VDI_LEN, s->name);
+ ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id);
+ if (ret || snap_id > UINT32_MAX) {
+ error_setg(errp, "Invalid snapshot ID: %s",
+ snapshot_id ? snapshot_id : "<null>");
+ return -EINVAL;
+ }
+
+ if (snap_id) {
+ hdr.snapid = (uint32_t) snap_id;
+ } else {
+ pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id);
+ pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag);
+ }
+
+ ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true,
+ &local_err);
+ if (ret) {
+ return ret;
+ }
+
+ fd = connect_to_sdog(s, &local_err);
+ if (fd < 0) {
+ error_report_err(local_err);
+ return -1;
+ }
+
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+ buf, &wlen, &rlen);
+ closesocket(fd);
+ if (ret) {
+ return ret;
+ }
+
+ switch (rsp->result) {
+ case SD_RES_NO_VDI:
+ error_report("%s was already deleted", s->name);
+ case SD_RES_SUCCESS:
+ break;
+ default:
+ error_report("%s, %s", sd_strerror(rsp->result), s->name);
+ return -1;
+ }
+
+ return ret;
}
static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
@@ -2577,28 +2794,36 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
SheepdogAIOCB *acb;
- QEMUIOVector dummy;
BDRVSheepdogState *s = bs->opaque;
int ret;
+ QEMUIOVector discard_iov;
+ struct iovec iov;
+ uint32_t zero = 0;
if (!s->discard_supported) {
return 0;
}
- acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
+ memset(&discard_iov, 0, sizeof(discard_iov));
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = &zero;
+ iov.iov_len = sizeof(zero);
+ discard_iov.iov = &iov;
+ discard_iov.niov = 1;
+ acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors);
acb->aiocb_type = AIOCB_DISCARD_OBJ;
acb->aio_done_func = sd_finish_aiocb;
retry:
- if (check_overwrapping_aiocb(s, acb)) {
- qemu_co_queue_wait(&s->overwrapping_queue);
+ if (check_overlapping_aiocb(s, acb)) {
+ qemu_co_queue_wait(&s->overlapping_queue);
goto retry;
}
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
@@ -2606,14 +2831,14 @@ retry:
qemu_coroutine_yield();
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
}
static coroutine_fn int64_t
sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- int *pnum)
+ int *pnum, BlockDriverState **file)
{
BDRVSheepdogState *s = bs->opaque;
SheepdogInode *inode = &s->inode;
@@ -2644,6 +2869,9 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
if (*pnum > nb_sectors) {
*pnum = nb_sectors;
}
+ if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
+ *file = bs;
+ }
return ret;
}
@@ -2703,6 +2931,9 @@ static BlockDriver bdrv_sheepdog = {
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_needs_filename = true,
.bdrv_file_open = sd_open,
+ .bdrv_reopen_prepare = sd_reopen_prepare,
+ .bdrv_reopen_commit = sd_reopen_commit,
+ .bdrv_reopen_abort = sd_reopen_abort,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2736,6 +2967,9 @@ static BlockDriver bdrv_sheepdog_tcp = {
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_needs_filename = true,
.bdrv_file_open = sd_open,
+ .bdrv_reopen_prepare = sd_reopen_prepare,
+ .bdrv_reopen_commit = sd_reopen_commit,
+ .bdrv_reopen_abort = sd_reopen_abort,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2769,6 +3003,9 @@ static BlockDriver bdrv_sheepdog_unix = {
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_needs_filename = true,
.bdrv_file_open = sd_open,
+ .bdrv_reopen_prepare = sd_reopen_prepare,
+ .bdrv_reopen_commit = sd_reopen_commit,
+ .bdrv_reopen_abort = sd_reopen_abort,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
diff --git a/qemu/block/snapshot.c b/qemu/block/snapshot.c
index 49e143e99..e9d721df6 100644
--- a/qemu/block/snapshot.c
+++ b/qemu/block/snapshot.c
@@ -22,8 +22,10 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/snapshot.h"
#include "block/block_int.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
QemuOptsList internal_snapshot_opts = {
@@ -149,7 +151,7 @@ int bdrv_can_snapshot(BlockDriverState *bs)
if (!drv->bdrv_snapshot_create) {
if (bs->file != NULL) {
- return bdrv_can_snapshot(bs->file);
+ return bdrv_can_snapshot(bs->file->bs);
}
return 0;
}
@@ -168,7 +170,7 @@ int bdrv_snapshot_create(BlockDriverState *bs,
return drv->bdrv_snapshot_create(bs, sn_info);
}
if (bs->file) {
- return bdrv_snapshot_create(bs->file, sn_info);
+ return bdrv_snapshot_create(bs->file->bs, sn_info);
}
return -ENOTSUP;
}
@@ -188,10 +190,10 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
if (bs->file) {
drv->bdrv_close(bs);
- ret = bdrv_snapshot_goto(bs->file, snapshot_id);
+ ret = bdrv_snapshot_goto(bs->file->bs, snapshot_id);
open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL);
if (open_ret < 0) {
- bdrv_unref(bs->file);
+ bdrv_unref(bs->file->bs);
bs->drv = NULL;
return open_ret;
}
@@ -229,6 +231,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
Error **errp)
{
BlockDriver *drv = bs->drv;
+ int ret;
+
if (!drv) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
return -ENOMEDIUM;
@@ -239,23 +243,26 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
}
/* drain all pending i/o before deleting snapshot */
- bdrv_drain(bs);
+ bdrv_drained_begin(bs);
if (drv->bdrv_snapshot_delete) {
- return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
- }
- if (bs->file) {
- return bdrv_snapshot_delete(bs->file, snapshot_id, name, errp);
+ ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
+ } else if (bs->file) {
+ ret = bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp);
+ } else {
+ error_setg(errp, "Block format '%s' used by device '%s' "
+ "does not support internal snapshot deletion",
+ drv->format_name, bdrv_get_device_name(bs));
+ ret = -ENOTSUP;
}
- error_setg(errp, "Block format '%s' used by device '%s' "
- "does not support internal snapshot deletion",
- drv->format_name, bdrv_get_device_name(bs));
- return -ENOTSUP;
+
+ bdrv_drained_end(bs);
+ return ret;
}
-void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
- const char *id_or_name,
- Error **errp)
+int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
+ const char *id_or_name,
+ Error **errp)
{
int ret;
Error *local_err = NULL;
@@ -270,6 +277,7 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
if (ret < 0) {
error_propagate(errp, local_err);
}
+ return ret;
}
int bdrv_snapshot_list(BlockDriverState *bs,
@@ -283,7 +291,7 @@ int bdrv_snapshot_list(BlockDriverState *bs,
return drv->bdrv_snapshot_list(bs, psn_info);
}
if (bs->file) {
- return bdrv_snapshot_list(bs->file, psn_info);
+ return bdrv_snapshot_list(bs->file->bs, psn_info);
}
return -ENOTSUP;
}
@@ -356,3 +364,130 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
return ret;
}
+
+
+/* Group operations. All block drivers are involved.
+ * These functions will properly handle dataplane (take aio_context_acquire
+ * when appropriate for appropriate block drivers) */
+
+bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
+{
+ bool ok = true;
+ BlockDriverState *bs = NULL;
+
+ while (ok && (bs = bdrv_next(bs))) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) {
+ ok = bdrv_can_snapshot(bs);
+ }
+ aio_context_release(ctx);
+ }
+
+ *first_bad_bs = bs;
+ return ok;
+}
+
+int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
+ Error **err)
+{
+ int ret = 0;
+ BlockDriverState *bs = NULL;
+ QEMUSnapshotInfo sn1, *snapshot = &sn1;
+
+ while (ret == 0 && (bs = bdrv_next(bs))) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bdrv_can_snapshot(bs) &&
+ bdrv_snapshot_find(bs, snapshot, name) >= 0) {
+ ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err);
+ }
+ aio_context_release(ctx);
+ }
+
+ *first_bad_bs = bs;
+ return ret;
+}
+
+
+int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
+{
+ int err = 0;
+ BlockDriverState *bs = NULL;
+
+ while (err == 0 && (bs = bdrv_next(bs))) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bdrv_can_snapshot(bs)) {
+ err = bdrv_snapshot_goto(bs, name);
+ }
+ aio_context_release(ctx);
+ }
+
+ *first_bad_bs = bs;
+ return err;
+}
+
+int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
+{
+ QEMUSnapshotInfo sn;
+ int err = 0;
+ BlockDriverState *bs = NULL;
+
+ while (err == 0 && (bs = bdrv_next(bs))) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bdrv_can_snapshot(bs)) {
+ err = bdrv_snapshot_find(bs, &sn, name);
+ }
+ aio_context_release(ctx);
+ }
+
+ *first_bad_bs = bs;
+ return err;
+}
+
+int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
+ BlockDriverState *vm_state_bs,
+ uint64_t vm_state_size,
+ BlockDriverState **first_bad_bs)
+{
+ int err = 0;
+ BlockDriverState *bs = NULL;
+
+ while (err == 0 && (bs = bdrv_next(bs))) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bs == vm_state_bs) {
+ sn->vm_state_size = vm_state_size;
+ err = bdrv_snapshot_create(bs, sn);
+ } else if (bdrv_can_snapshot(bs)) {
+ sn->vm_state_size = 0;
+ err = bdrv_snapshot_create(bs, sn);
+ }
+ aio_context_release(ctx);
+ }
+
+ *first_bad_bs = bs;
+ return err;
+}
+
+BlockDriverState *bdrv_all_find_vmstate_bs(void)
+{
+ bool not_found = true;
+ BlockDriverState *bs = NULL;
+
+ while (not_found && (bs = bdrv_next(bs))) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ not_found = !bdrv_can_snapshot(bs);
+ aio_context_release(ctx);
+ }
+ return bs;
+}
diff --git a/qemu/block/ssh.c b/qemu/block/ssh.c
index 8d0673903..06928ed93 100644
--- a/qemu/block/ssh.c
+++ b/qemu/block/ssh.c
@@ -22,14 +22,13 @@
* THE SOFTWARE.
*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
+#include "qemu/osdep.h"
#include <libssh2.h>
#include <libssh2_sftp.h>
#include "block/block_int.h"
+#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "qemu/uri.h"
@@ -193,7 +192,7 @@ sftp_error_report(BDRVSSHState *s, const char *fs, ...)
static int parse_uri(const char *filename, QDict *options, Error **errp)
{
URI *uri = NULL;
- QueryParams *qp = NULL;
+ QueryParams *qp;
int i;
uri = uri_parse(filename);
@@ -249,9 +248,6 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
return 0;
err:
- if (qp) {
- query_params_free(qp);
- }
if (uri) {
uri_free(uri);
}
@@ -803,14 +799,15 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
rd_handler, wr_handler);
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
- rd_handler, wr_handler, co);
+ false, rd_handler, wr_handler, co);
}
static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
BlockDriverState *bs)
{
DPRINTF("s->sock=%d", s->sock);
- aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
+ false, NULL, NULL, NULL);
}
/* A non-blocking call returned EAGAIN, so yield, ensuring the
diff --git a/qemu/block/stream.c b/qemu/block/stream.c
index ab0bd057f..332b9a183 100644
--- a/qemu/block/stream.c
+++ b/qemu/block/stream.c
@@ -11,11 +11,14 @@
*
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/block_int.h"
#include "block/blockjob.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
enum {
/*
@@ -52,34 +55,6 @@ static int coroutine_fn stream_populate(BlockDriverState *bs,
return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
}
-static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
- const char *base_id)
-{
- BlockDriverState *intermediate;
- intermediate = top->backing_hd;
-
- /* Must assign before bdrv_delete() to prevent traversing dangling pointer
- * while we delete backing image instances.
- */
- bdrv_set_backing_hd(top, base);
-
- while (intermediate) {
- BlockDriverState *unused;
-
- /* reached base */
- if (intermediate == base) {
- break;
- }
-
- unused = intermediate;
- intermediate = intermediate->backing_hd;
- bdrv_set_backing_hd(unused, NULL);
- bdrv_unref(unused);
- }
-
- bdrv_refresh_limits(top, NULL);
-}
-
typedef struct {
int ret;
bool reached_end;
@@ -101,7 +76,7 @@ static void stream_complete(BlockJob *job, void *opaque)
}
}
data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
- close_unused_images(job->bs, base, base_id);
+ bdrv_set_backing_hd(job->bs, base);
}
g_free(s->backing_file_str);
@@ -115,21 +90,21 @@ static void coroutine_fn stream_run(void *opaque)
StreamCompleteData *data;
BlockDriverState *bs = s->common.bs;
BlockDriverState *base = s->base;
- int64_t sector_num, end;
+ int64_t sector_num = 0;
+ int64_t end = -1;
int error = 0;
int ret = 0;
int n = 0;
void *buf;
- if (!bs->backing_hd) {
- block_job_completed(&s->common, 0);
- return;
+ if (!bs->backing) {
+ goto out;
}
s->common.len = bdrv_getlength(bs);
if (s->common.len < 0) {
- block_job_completed(&s->common, s->common.len);
- return;
+ ret = s->common.len;
+ goto out;
}
end = s->common.len >> BDRV_SECTOR_BITS;
@@ -166,7 +141,7 @@ wait:
} else if (ret >= 0) {
/* Copy if allocated in the intermediate images. Limit to the
* known-unallocated area [sector_num, sector_num+n). */
- ret = bdrv_is_allocated_above(bs->backing_hd, base,
+ ret = bdrv_is_allocated_above(backing_bs(bs), base,
sector_num, n, &n);
/* Finish early if end of backing file has been reached */
@@ -216,6 +191,7 @@ wait:
qemu_vfree(buf);
+out:
/* Modify backing chain and close BDSes in main loop */
data = g_malloc(sizeof(*data));
data->ret = ret;
@@ -250,7 +226,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- !bdrv_iostatus_is_enabled(bs)) {
+ (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
error_setg(errp, QERR_INVALID_PARAMETER, "on-error");
return;
}
diff --git a/qemu/block/throttle-groups.c b/qemu/block/throttle-groups.c
index 1abc6fcae..4920e0949 100644
--- a/qemu/block/throttle-groups.c
+++ b/qemu/block/throttle-groups.c
@@ -22,6 +22,7 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "qemu/osdep.h"
#include "block/throttle-groups.h"
#include "qemu/queue.h"
#include "qemu/thread.h"
@@ -33,8 +34,7 @@
* its own locking.
*
* This locking is however handled internally in this file, so it's
- * mostly transparent to outside users (but see the documentation in
- * throttle_groups_lock()).
+ * transparent to outside users.
*
* The whole ThrottleGroup structure is private and invisible to
* outside users, that only use it through its ThrottleState.
@@ -76,9 +76,9 @@ static QTAILQ_HEAD(, ThrottleGroup) throttle_groups =
* created.
*
* @name: the name of the ThrottleGroup
- * @ret: the ThrottleGroup
+ * @ret: the ThrottleState member of the ThrottleGroup
*/
-static ThrottleGroup *throttle_group_incref(const char *name)
+ThrottleState *throttle_group_incref(const char *name)
{
ThrottleGroup *tg = NULL;
ThrottleGroup *iter;
@@ -108,7 +108,7 @@ static ThrottleGroup *throttle_group_incref(const char *name)
qemu_mutex_unlock(&throttle_groups_lock);
- return tg;
+ return &tg->ts;
}
/* Decrease the reference count of a ThrottleGroup.
@@ -116,10 +116,12 @@ static ThrottleGroup *throttle_group_incref(const char *name)
* When the reference count reaches zero the ThrottleGroup is
* destroyed.
*
- * @tg: The ThrottleGroup to unref
+ * @ts: The ThrottleGroup to unref, given by its ThrottleState member
*/
-static void throttle_group_unref(ThrottleGroup *tg)
+void throttle_group_unref(ThrottleState *ts)
{
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+
qemu_mutex_lock(&throttle_groups_lock);
if (--tg->refcount == 0) {
QTAILQ_REMOVE(&throttle_groups, tg, list);
@@ -401,7 +403,8 @@ static void write_timer_cb(void *opaque)
void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
{
int i;
- ThrottleGroup *tg = throttle_group_incref(groupname);
+ ThrottleState *ts = throttle_group_incref(groupname);
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
int clock_type = QEMU_CLOCK_REALTIME;
if (qtest_enabled()) {
@@ -409,7 +412,7 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
clock_type = QEMU_CLOCK_VIRTUAL;
}
- bs->throttle_state = &tg->ts;
+ bs->throttle_state = ts;
qemu_mutex_lock(&tg->lock);
/* If the ThrottleGroup is new set this BlockDriverState as the token */
@@ -435,6 +438,9 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
* list, destroying the timers and setting the throttle_state pointer
* to NULL.
*
+ * The BlockDriverState must not have pending throttled requests, so
+ * the caller has to drain them first.
+ *
* The group will be destroyed if it's empty after this operation.
*
* @bs: the BlockDriverState to remove
@@ -444,6 +450,10 @@ void throttle_group_unregister_bs(BlockDriverState *bs)
ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
int i;
+ assert(bs->pending_reqs[0] == 0 && bs->pending_reqs[1] == 0);
+ assert(qemu_co_queue_empty(&bs->throttled_reqs[0]));
+ assert(qemu_co_queue_empty(&bs->throttled_reqs[1]));
+
qemu_mutex_lock(&tg->lock);
for (i = 0; i < 2; i++) {
if (tg->tokens[i] == bs) {
@@ -461,38 +471,10 @@ void throttle_group_unregister_bs(BlockDriverState *bs)
throttle_timers_destroy(&bs->throttle_timers);
qemu_mutex_unlock(&tg->lock);
- throttle_group_unref(tg);
+ throttle_group_unref(&tg->ts);
bs->throttle_state = NULL;
}
-/* Acquire the lock of this throttling group.
- *
- * You won't normally need to use this. None of the functions from the
- * ThrottleGroup API require you to acquire the lock since all of them
- * deal with it internally.
- *
- * This should only be used in exceptional cases when you want to
- * access the protected fields of a BlockDriverState directly
- * (e.g. bdrv_swap()).
- *
- * @bs: a BlockDriverState that is member of the group
- */
-void throttle_group_lock(BlockDriverState *bs)
-{
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
- qemu_mutex_lock(&tg->lock);
-}
-
-/* Release the lock of this throttling group.
- *
- * See the comments in throttle_group_lock().
- */
-void throttle_group_unlock(BlockDriverState *bs)
-{
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
- qemu_mutex_unlock(&tg->lock);
-}
-
static void throttle_groups_init(void)
{
qemu_mutex_init(&throttle_groups_lock);
diff --git a/qemu/block/vdi.c b/qemu/block/vdi.c
index 7642ef359..75d4819ed 100644
--- a/qemu/block/vdi.c
+++ b/qemu/block/vdi.c
@@ -49,11 +49,14 @@
* so this seems to be reasonable.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "migration/migration.h"
-#include "block/coroutine.h"
+#include "qemu/coroutine.h"
+#include "qemu/cutils.h"
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
@@ -399,7 +402,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
logout("\n");
- ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
+ ret = bdrv_read(bs->file->bs, 0, (uint8_t *)&header, 1);
if (ret < 0) {
goto fail;
}
@@ -490,13 +493,14 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
bmap_size = header.blocks_in_image * sizeof(uint32_t);
bmap_size = DIV_ROUND_UP(bmap_size, SECTOR_SIZE);
- s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE);
+ s->bmap = qemu_try_blockalign(bs->file->bs, bmap_size * SECTOR_SIZE);
if (s->bmap == NULL) {
ret = -ENOMEM;
goto fail;
}
- ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
+ ret = bdrv_read(bs->file->bs, s->bmap_sector, (uint8_t *)s->bmap,
+ bmap_size);
if (ret < 0) {
goto fail_free_bmap;
}
@@ -525,7 +529,7 @@ static int vdi_reopen_prepare(BDRVReopenState *state,
}
static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
/* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
@@ -549,6 +553,7 @@ static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
offset = s->header.offset_data +
(uint64_t)bmap_entry * s->block_size +
sector_in_block * SECTOR_SIZE;
+ *file = bs->file->bs;
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
}
@@ -585,7 +590,7 @@ static int vdi_co_read(BlockDriverState *bs,
uint64_t offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors +
sector_in_block;
- ret = bdrv_read(bs->file, offset, buf, n_sectors);
+ ret = bdrv_read(bs->file->bs, offset, buf, n_sectors);
}
logout("%u sectors read\n", n_sectors);
@@ -653,7 +658,7 @@ static int vdi_co_write(BlockDriverState *bs,
* acquire the lock and thus the padded cluster is written before
* the other coroutines can write to the affected area. */
qemu_co_mutex_lock(&s->write_lock);
- ret = bdrv_write(bs->file, offset, block, s->block_sectors);
+ ret = bdrv_write(bs->file->bs, offset, block, s->block_sectors);
qemu_co_mutex_unlock(&s->write_lock);
} else {
uint64_t offset = s->header.offset_data / SECTOR_SIZE +
@@ -669,7 +674,7 @@ static int vdi_co_write(BlockDriverState *bs,
* that that write operation has returned (there may be other writes
* in flight, but they do not concern this very operation). */
qemu_co_mutex_unlock(&s->write_lock);
- ret = bdrv_write(bs->file, offset, buf, n_sectors);
+ ret = bdrv_write(bs->file->bs, offset, buf, n_sectors);
}
nb_sectors -= n_sectors;
@@ -694,7 +699,7 @@ static int vdi_co_write(BlockDriverState *bs,
assert(VDI_IS_ALLOCATED(bmap_first));
*header = s->header;
vdi_header_to_le(header);
- ret = bdrv_write(bs->file, 0, block, 1);
+ ret = bdrv_write(bs->file->bs, 0, block, 1);
g_free(block);
block = NULL;
@@ -712,7 +717,7 @@ static int vdi_co_write(BlockDriverState *bs,
base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
logout("will write %u block map sectors starting from entry %u\n",
n_sectors, bmap_first);
- ret = bdrv_write(bs->file, offset, base, n_sectors);
+ ret = bdrv_write(bs->file->bs, offset, base, n_sectors);
}
return ret;
@@ -730,7 +735,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
size_t bmap_size;
int64_t offset = 0;
Error *local_err = NULL;
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
uint32_t *bmap = NULL;
logout("\n");
@@ -763,13 +768,17 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
error_propagate(errp, local_err);
goto exit;
}
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
- if (ret < 0) {
+
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* We need enough blocks to store the given disk size,
so always round up. */
blocks = DIV_ROUND_UP(bytes, block_size);
@@ -799,7 +808,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
vdi_header_print(&header);
#endif
vdi_header_to_le(&header);
- ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header));
+ ret = blk_pwrite(blk, offset, &header, sizeof(header));
if (ret < 0) {
error_setg(errp, "Error writing header to %s", filename);
goto exit;
@@ -820,7 +829,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
bmap[i] = VDI_UNALLOCATED;
}
}
- ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size);
+ ret = blk_pwrite(blk, offset, bmap, bmap_size);
if (ret < 0) {
error_setg(errp, "Error writing bmap to %s", filename);
goto exit;
@@ -829,7 +838,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
if (image_type == VDI_TYPE_STATIC) {
- ret = bdrv_truncate(bs, offset + blocks * block_size);
+ ret = blk_truncate(blk, offset + blocks * block_size);
if (ret < 0) {
error_setg(errp, "Failed to statically allocate %s", filename);
goto exit;
@@ -837,7 +846,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
exit:
- bdrv_unref(bs);
+ blk_unref(blk);
g_free(bmap);
return ret;
}
diff --git a/qemu/block/vhdx-endian.c b/qemu/block/vhdx-endian.c
index 0640d3f4a..da33cd38e 100644
--- a/qemu/block/vhdx-endian.c
+++ b/qemu/block/vhdx-endian.c
@@ -15,6 +15,7 @@
*
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "block/vhdx.h"
diff --git a/qemu/block/vhdx-log.c b/qemu/block/vhdx-log.c
index 47fec63c6..7ea7187fc 100644
--- a/qemu/block/vhdx-log.c
+++ b/qemu/block/vhdx-log.c
@@ -17,6 +17,8 @@
* See the COPYING.LIB file in the top-level directory.
*
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/error-report.h"
@@ -81,7 +83,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
offset = log->offset + read;
- ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader));
+ ret = bdrv_pread(bs->file->bs, offset, hdr, sizeof(VHDXLogEntryHeader));
if (ret < 0) {
goto exit;
}
@@ -141,7 +143,7 @@ static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
}
offset = log->offset + read;
- ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE);
+ ret = bdrv_pread(bs->file->bs, offset, buffer, VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
}
@@ -191,7 +193,8 @@ static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
/* full */
break;
}
- ret = bdrv_pwrite(bs->file, offset, buffer_tmp, VHDX_LOG_SECTOR_SIZE);
+ ret = bdrv_pwrite(bs->file->bs, offset, buffer_tmp,
+ VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
}
@@ -353,7 +356,7 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
}
desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
- desc_entries = qemu_try_blockalign(bs->file,
+ desc_entries = qemu_try_blockalign(bs->file->bs,
desc_sectors * VHDX_LOG_SECTOR_SIZE);
if (desc_entries == NULL) {
ret = -ENOMEM;
@@ -462,7 +465,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
/* count is only > 1 if we are writing zeroes */
for (i = 0; i < count; i++) {
- ret = bdrv_pwrite_sync(bs->file, file_offset, buffer,
+ ret = bdrv_pwrite_sync(bs->file->bs, file_offset, buffer,
VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
@@ -509,7 +512,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
/* if the log shows a FlushedFileOffset larger than our current file
* size, then that means the file has been truncated / corrupted, and
* we must refused to open it / use it */
- if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) {
+ if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file->bs)) {
ret = -EINVAL;
goto exit;
}
@@ -539,12 +542,12 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
goto exit;
}
}
- if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) {
+ if (bdrv_getlength(bs->file->bs) < desc_entries->hdr.last_file_offset) {
new_file_size = desc_entries->hdr.last_file_offset;
if (new_file_size % (1024*1024)) {
/* round up to nearest 1MB boundary */
new_file_size = ((new_file_size >> 20) + 1) << 20;
- bdrv_truncate(bs->file, new_file_size);
+ bdrv_truncate(bs->file->bs, new_file_size);
}
}
qemu_vfree(desc_entries);
@@ -783,12 +786,13 @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
if (logs.valid) {
if (bs->read_only) {
ret = -EPERM;
- error_setg_errno(errp, EPERM,
- "VHDX image file '%s' opened read-only, but "
- "contains a log that needs to be replayed. To "
- "replay the log, execute:\n qemu-img check -r "
- "all '%s'",
- bs->filename, bs->filename);
+ error_setg(errp,
+ "VHDX image file '%s' opened read-only, but "
+ "contains a log that needs to be replayed",
+ bs->filename);
+ error_append_hint(errp, "To replay the log, run:\n"
+ "qemu-img check -r all '%s'\n",
+ bs->filename);
goto exit;
}
/* now flush the log */
@@ -908,8 +912,8 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
.sequence_number = s->log.sequence,
.descriptor_count = sectors,
.reserved = 0,
- .flushed_file_offset = bdrv_getlength(bs->file),
- .last_file_offset = bdrv_getlength(bs->file),
+ .flushed_file_offset = bdrv_getlength(bs->file->bs),
+ .last_file_offset = bdrv_getlength(bs->file->bs),
};
new_hdr.log_guid = header->log_guid;
@@ -940,7 +944,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
if (i == 0 && leading_length) {
/* partial sector at the front of the buffer */
- ret = bdrv_pread(bs->file, file_offset, merged_sector,
+ ret = bdrv_pread(bs->file->bs, file_offset, merged_sector,
VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
@@ -950,7 +954,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
sector_write = merged_sector;
} else if (i == sectors - 1 && trailing_length) {
/* partial sector at the end of the buffer */
- ret = bdrv_pread(bs->file,
+ ret = bdrv_pread(bs->file->bs,
file_offset,
merged_sector + trailing_length,
VHDX_LOG_SECTOR_SIZE - trailing_length);
diff --git a/qemu/block/vhdx.c b/qemu/block/vhdx.c
index 0776de717..2b7b33240 100644
--- a/qemu/block/vhdx.c
+++ b/qemu/block/vhdx.c
@@ -15,8 +15,11 @@
*
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "qemu/crc32c.h"
#include "block/vhdx.h"
@@ -263,10 +266,10 @@ static void vhdx_region_unregister_all(BDRVVHDXState *s)
static void vhdx_set_shift_bits(BDRVVHDXState *s)
{
- s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
- s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block);
- s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio);
- s->block_size_bits = 31 - clz32(s->block_size);
+ s->logical_sector_size_bits = ctz32(s->logical_sector_size);
+ s->sectors_per_block_bits = ctz32(s->sectors_per_block);
+ s->chunk_ratio_bits = ctz64(s->chunk_ratio);
+ s->block_size_bits = ctz32(s->block_size);
}
/*
@@ -375,7 +378,7 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
inactive_header->log_guid = *log_guid;
}
- ret = vhdx_write_header(bs->file, inactive_header, header_offset, true);
+ ret = vhdx_write_header(bs->file->bs, inactive_header, header_offset, true);
if (ret < 0) {
goto exit;
}
@@ -427,7 +430,8 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
/* We have to read the whole VHDX_HEADER_SIZE instead of
* sizeof(VHDXHeader), because the checksum is over the whole
* region */
- ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, VHDX_HEADER_SIZE);
+ ret = bdrv_pread(bs->file->bs, VHDX_HEADER1_OFFSET, buffer,
+ VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
}
@@ -443,7 +447,8 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
}
}
- ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
+ ret = bdrv_pread(bs->file->bs, VHDX_HEADER2_OFFSET, buffer,
+ VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
}
@@ -516,7 +521,7 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
* whole block */
buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
- ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
+ ret = bdrv_pread(bs->file->bs, VHDX_REGION_TABLE_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE);
if (ret < 0) {
goto fail;
@@ -629,7 +634,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
- ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
+ ret = bdrv_pread(bs->file->bs, s->metadata_rt.file_offset, buffer,
VHDX_METADATA_TABLE_MAX_SIZE);
if (ret < 0) {
goto exit;
@@ -732,7 +737,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
goto exit;
}
- ret = bdrv_pread(bs->file,
+ ret = bdrv_pread(bs->file->bs,
s->metadata_entries.file_parameters_entry.offset
+ s->metadata_rt.file_offset,
&s->params,
@@ -767,7 +772,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
/* determine virtual disk size, logical sector size,
* and phys sector size */
- ret = bdrv_pread(bs->file,
+ ret = bdrv_pread(bs->file->bs,
s->metadata_entries.virtual_disk_size_entry.offset
+ s->metadata_rt.file_offset,
&s->virtual_disk_size,
@@ -775,7 +780,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
if (ret < 0) {
goto exit;
}
- ret = bdrv_pread(bs->file,
+ ret = bdrv_pread(bs->file->bs,
s->metadata_entries.logical_sector_size_entry.offset
+ s->metadata_rt.file_offset,
&s->logical_sector_size,
@@ -783,7 +788,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
if (ret < 0) {
goto exit;
}
- ret = bdrv_pread(bs->file,
+ ret = bdrv_pread(bs->file->bs,
s->metadata_entries.phys_sector_size_entry.offset
+ s->metadata_rt.file_offset,
&s->physical_sector_size,
@@ -854,14 +859,8 @@ static void vhdx_calc_bat_entries(BDRVVHDXState *s)
{
uint32_t data_blocks_cnt, bitmap_blocks_cnt;
- data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
- if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
- data_blocks_cnt++;
- }
- bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits;
- if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) {
- bitmap_blocks_cnt++;
- }
+ data_blocks_cnt = DIV_ROUND_UP(s->virtual_disk_size, s->block_size);
+ bitmap_blocks_cnt = DIV_ROUND_UP(data_blocks_cnt, s->chunk_ratio);
if (s->parent_entries) {
s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
@@ -906,7 +905,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
QLIST_INIT(&s->regions);
/* validate the file signature */
- ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
+ ret = bdrv_pread(bs->file->bs, 0, &signature, sizeof(uint64_t));
if (ret < 0) {
goto fail;
}
@@ -959,13 +958,13 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
}
/* s->bat is freed in vhdx_close() */
- s->bat = qemu_try_blockalign(bs->file, s->bat_rt.length);
+ s->bat = qemu_try_blockalign(bs->file->bs, s->bat_rt.length);
if (s->bat == NULL) {
ret = -ENOMEM;
goto fail;
}
- ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
+ ret = bdrv_pread(bs->file->bs, s->bat_offset, s->bat, s->bat_rt.length);
if (ret < 0) {
goto fail;
}
@@ -1118,7 +1117,7 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
break;
case PAYLOAD_BLOCK_FULLY_PRESENT:
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
+ ret = bdrv_co_readv(bs->file->bs,
sinfo.file_offset >> BDRV_SECTOR_BITS,
sinfo.sectors_avail, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1156,12 +1155,12 @@ exit:
static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
uint64_t *new_offset)
{
- *new_offset = bdrv_getlength(bs->file);
+ *new_offset = bdrv_getlength(bs->file->bs);
/* per the spec, the address for a block is in units of 1MB */
*new_offset = ROUND_UP(*new_offset, 1024 * 1024);
- return bdrv_truncate(bs->file, *new_offset + s->block_size);
+ return bdrv_truncate(bs->file->bs, *new_offset + s->block_size);
}
/*
@@ -1260,7 +1259,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
/* Queue another write of zero buffers if the underlying file
* does not zero-fill on file extension */
- if (bdrv_has_zero_init(bs->file) == 0) {
+ if (bdrv_has_zero_init(bs->file->bs) == 0) {
use_zero_buffers = true;
/* zero fill the front, if any */
@@ -1327,7 +1326,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
}
/* block exists, so we can just overwrite it */
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_writev(bs->file,
+ ret = bdrv_co_writev(bs->file->bs,
sinfo.file_offset >> BDRV_SECTOR_BITS,
sectors_to_write, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1454,7 +1453,7 @@ static int vhdx_create_new_metadata(BlockDriverState *bs,
uint32_t offset = 0;
void *buffer = NULL;
void *entry_buffer;
- VHDXMetadataTableHeader *md_table;;
+ VHDXMetadataTableHeader *md_table;
VHDXMetadataTableEntry *md_table_entry;
/* Metadata entries */
@@ -1775,7 +1774,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
gunichar2 *creator = NULL;
glong creator_items;
- BlockDriverState *bs;
+ BlockBackend *blk;
char *type = NULL;
VHDXImageType image_type;
Error *local_err = NULL;
@@ -1840,14 +1839,16 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
- bs = NULL;
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* Create (A) */
/* The creator field is optional, but may be useful for
@@ -1855,13 +1856,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
&creator_items, NULL);
signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
- ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+ ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
if (ret < 0) {
goto delete_and_exit;
}
if (creator) {
- ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature),
- creator, creator_items * sizeof(gunichar2));
+ ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
+ creator, creator_items * sizeof(gunichar2));
if (ret < 0) {
goto delete_and_exit;
}
@@ -1869,13 +1870,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
/* Creates (B),(C) */
- ret = vhdx_create_new_headers(bs, image_size, log_size);
+ ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size);
if (ret < 0) {
goto delete_and_exit;
}
/* Creates (D),(E),(G) explicitly. (F) created as by-product */
- ret = vhdx_create_new_region_table(bs, image_size, block_size, 512,
+ ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512,
log_size, use_zero_blocks, image_type,
&metadata_offset);
if (ret < 0) {
@@ -1883,7 +1884,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
}
/* Creates (H) */
- ret = vhdx_create_new_metadata(bs, image_size, block_size, 512,
+ ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512,
metadata_offset, image_type);
if (ret < 0) {
goto delete_and_exit;
@@ -1891,7 +1892,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
delete_and_exit:
- bdrv_unref(bs);
+ blk_unref(blk);
exit:
g_free(type);
g_free(creator);
diff --git a/qemu/block/vmdk.c b/qemu/block/vmdk.c
index fbaab67c8..45f9d3c5b 100644
--- a/qemu/block/vmdk.c
+++ b/qemu/block/vmdk.c
@@ -23,12 +23,15 @@
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qapi/qmp/qerror.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "migration/migration.h"
+#include "qemu/cutils.h"
#include <zlib.h>
#include <glib.h>
@@ -87,7 +90,7 @@ typedef struct {
#define L2_CACHE_SIZE 16
typedef struct VmdkExtent {
- BlockDriverState *file;
+ BdrvChild *file;
bool flat;
bool compressed;
bool has_marker;
@@ -222,7 +225,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
g_free(e->l1_backup_table);
g_free(e->type);
if (e->file != bs->file) {
- bdrv_unref(e->file);
+ bdrv_unref_child(bs, e->file);
}
}
g_free(s->extents);
@@ -241,15 +244,17 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
{
- char desc[DESC_SIZE];
+ char *desc;
uint32_t cid = 0xffffffff;
const char *p_name, *cid_str;
size_t cid_str_size;
BDRVVmdkState *s = bs->opaque;
int ret;
- ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+ desc = g_malloc0(DESC_SIZE);
+ ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
+ g_free(desc);
return 0;
}
@@ -268,50 +273,55 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
sscanf(p_name, "%" SCNx32, &cid);
}
+ g_free(desc);
return cid;
}
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
{
- char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
+ char *desc, *tmp_desc;
char *p_name, *tmp_str;
BDRVVmdkState *s = bs->opaque;
- int ret;
+ int ret = 0;
- ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+ desc = g_malloc0(DESC_SIZE);
+ tmp_desc = g_malloc0(DESC_SIZE);
+ ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
- return ret;
+ goto out;
}
desc[DESC_SIZE - 1] = '\0';
tmp_str = strstr(desc, "parentCID");
if (tmp_str == NULL) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
- pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
+ pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
p_name = strstr(desc, "CID");
if (p_name != NULL) {
p_name += sizeof("CID");
- snprintf(p_name, sizeof(desc) - (p_name - desc), "%" PRIx32 "\n", cid);
- pstrcat(desc, sizeof(desc), tmp_desc);
+ snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
+ pstrcat(desc, DESC_SIZE, tmp_desc);
}
- ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
- if (ret < 0) {
- return ret;
- }
+ ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
- return 0;
+out:
+ g_free(desc);
+ g_free(tmp_desc);
+ return ret;
}
static int vmdk_is_cid_valid(BlockDriverState *bs)
{
BDRVVmdkState *s = bs->opaque;
- BlockDriverState *p_bs = bs->backing_hd;
uint32_t cur_pcid;
- if (!s->cid_checked && p_bs) {
+ if (!s->cid_checked && bs->backing) {
+ BlockDriverState *p_bs = bs->backing->bs;
+
cur_pcid = vmdk_read_cid(p_bs, 0);
if (s->parent_cid != cur_pcid) {
/* CID not valid */
@@ -335,15 +345,16 @@ static int vmdk_reopen_prepare(BDRVReopenState *state,
static int vmdk_parent_open(BlockDriverState *bs)
{
char *p_name;
- char desc[DESC_SIZE + 1];
+ char *desc;
BDRVVmdkState *s = bs->opaque;
int ret;
- desc[DESC_SIZE] = '\0';
- ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+ desc = g_malloc0(DESC_SIZE + 1);
+ ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
- return ret;
+ goto out;
}
+ ret = 0;
p_name = strstr(desc, "parentFileNameHint");
if (p_name != NULL) {
@@ -352,22 +363,26 @@ static int vmdk_parent_open(BlockDriverState *bs)
p_name += sizeof("parentFileNameHint") + 1;
end_name = strchr(p_name, '\"');
if (end_name == NULL) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
}
- return 0;
+out:
+ g_free(desc);
+ return ret;
}
/* Create and append extent to the extent array. Return the added VmdkExtent
* address. return NULL if allocation failed. */
static int vmdk_add_extent(BlockDriverState *bs,
- BlockDriverState *file, bool flat, int64_t sectors,
+ BdrvChild *file, bool flat, int64_t sectors,
int64_t l1_offset, int64_t l1_backup_offset,
uint32_t l1_size,
int l2_size, uint64_t cluster_sectors,
@@ -392,7 +407,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
return -EFBIG;
}
- nb_sectors = bdrv_nb_sectors(file);
+ nb_sectors = bdrv_nb_sectors(file->bs);
if (nb_sectors < 0) {
return nb_sectors;
}
@@ -439,14 +454,14 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
return -ENOMEM;
}
- ret = bdrv_pread(extent->file,
+ ret = bdrv_pread(extent->file->bs,
extent->l1_table_offset,
extent->l1_table,
l1_size);
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read l1 table from extent '%s'",
- extent->file->filename);
+ extent->file->bs->filename);
goto fail_l1;
}
for (i = 0; i < extent->l1_size; i++) {
@@ -459,14 +474,14 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
ret = -ENOMEM;
goto fail_l1;
}
- ret = bdrv_pread(extent->file,
+ ret = bdrv_pread(extent->file->bs,
extent->l1_backup_table_offset,
extent->l1_backup_table,
l1_size);
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read l1 backup table from extent '%s'",
- extent->file->filename);
+ extent->file->bs->filename);
goto fail_l1b;
}
for (i = 0; i < extent->l1_size; i++) {
@@ -485,7 +500,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
}
static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
- BlockDriverState *file,
+ BdrvChild *file,
int flags, Error **errp)
{
int ret;
@@ -493,11 +508,11 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
VMDK3Header header;
VmdkExtent *extent;
- ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
+ ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read header from file '%s'",
- file->filename);
+ file->bs->filename);
return ret;
}
ret = vmdk_add_extent(bs, file, false,
@@ -559,7 +574,7 @@ static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
}
static int vmdk_open_vmdk4(BlockDriverState *bs,
- BlockDriverState *file,
+ BdrvChild *file,
int flags, QDict *options, Error **errp)
{
int ret;
@@ -569,18 +584,19 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
int64_t l1_backup_offset = 0;
+ bool compressed;
- ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
+ ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read header from file '%s'",
- file->filename);
+ file->bs->filename);
return -EINVAL;
}
if (header.capacity == 0) {
uint64_t desc_offset = le64_to_cpu(header.desc_offset);
if (desc_offset) {
- char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
+ char *buf = vmdk_read_desc(file->bs, desc_offset << 9, errp);
if (!buf) {
return -EINVAL;
}
@@ -620,8 +636,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
} QEMU_PACKED eos_marker;
} QEMU_PACKED footer;
- ret = bdrv_pread(file,
- bs->file->total_sectors * 512 - 1536,
+ ret = bdrv_pread(file->bs,
+ bs->file->bs->total_sectors * 512 - 1536,
&footer, sizeof(footer));
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to read footer");
@@ -643,14 +659,14 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
header = footer.header;
}
+ compressed =
+ le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
if (le32_to_cpu(header.version) > 3) {
- char buf[64];
- snprintf(buf, sizeof(buf), "VMDK version %" PRId32,
- le32_to_cpu(header.version));
- error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bdrv_get_device_or_node_name(bs), "vmdk", buf);
+ error_setg(errp, "Unsupported VMDK version %" PRIu32,
+ le32_to_cpu(header.version));
return -ENOTSUP;
- } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) {
+ } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) &&
+ !compressed) {
/* VMware KB 2064959 explains that version 3 added support for
* persistent changed block tracking (CBT), and backup software can
* read it as version=1 if it doesn't care about the changed area
@@ -675,7 +691,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
}
- if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) {
+ if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) {
error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
(int64_t)(le64_to_cpu(header.grain_offset)
* BDRV_SECTOR_SIZE));
@@ -739,8 +755,7 @@ static int vmdk_parse_description(const char *desc, const char *opt_name,
}
/* Open an extent file and append to bs array */
-static int vmdk_open_sparse(BlockDriverState *bs,
- BlockDriverState *file, int flags,
+static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
char *buf, QDict *options, Error **errp)
{
uint32_t magic;
@@ -760,6 +775,17 @@ static int vmdk_open_sparse(BlockDriverState *bs,
}
}
+static const char *next_line(const char *s)
+{
+ while (*s) {
+ if (*s == '\n') {
+ return s + 1;
+ }
+ s++;
+ }
+ return s;
+}
+
static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
const char *desc_file_path, QDict *options,
Error **errp)
@@ -769,16 +795,17 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
char access[11];
char type[11];
char fname[512];
- const char *p = desc;
+ const char *p, *np;
int64_t sectors = 0;
int64_t flat_offset;
char *extent_path;
- BlockDriverState *extent_file;
+ BdrvChild *extent_file;
BDRVVmdkState *s = bs->opaque;
VmdkExtent *extent;
char extent_opt_prefix[32];
+ Error *local_err = NULL;
- while (*p) {
+ for (p = desc; *p; p = next_line(p)) {
/* parse extent line in one of below formats:
*
* RW [size in sectors] FLAT "file-name.vmdk" OFFSET
@@ -790,51 +817,48 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
access, &sectors, type, fname, &flat_offset);
if (matches < 4 || strcmp(access, "RW")) {
- goto next_line;
+ continue;
} else if (!strcmp(type, "FLAT")) {
if (matches != 5 || flat_offset < 0) {
- error_setg(errp, "Invalid extent lines: \n%s", p);
- return -EINVAL;
+ goto invalid;
}
} else if (!strcmp(type, "VMFS")) {
if (matches == 4) {
flat_offset = 0;
} else {
- error_setg(errp, "Invalid extent lines:\n%s", p);
- return -EINVAL;
+ goto invalid;
}
} else if (matches != 4) {
- error_setg(errp, "Invalid extent lines:\n%s", p);
- return -EINVAL;
+ goto invalid;
}
if (sectors <= 0 ||
(strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
(strcmp(access, "RW"))) {
- goto next_line;
+ continue;
}
if (!path_is_absolute(fname) && !path_has_protocol(fname) &&
!desc_file_path[0])
{
error_setg(errp, "Cannot use relative extent paths with VMDK "
- "descriptor file '%s'", bs->file->filename);
+ "descriptor file '%s'", bs->file->bs->filename);
return -EINVAL;
}
extent_path = g_malloc0(PATH_MAX);
path_combine(extent_path, PATH_MAX, desc_file_path, fname);
- extent_file = NULL;
ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents);
assert(ret < 32);
- ret = bdrv_open_image(&extent_file, extent_path, options,
- extent_opt_prefix, bs, &child_file, false, errp);
+ extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix,
+ bs, &child_file, false, &local_err);
g_free(extent_path);
- if (ret) {
- return ret;
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return -EINVAL;
}
/* save to extents array */
@@ -844,13 +868,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
ret = vmdk_add_extent(bs, extent_file, true, sectors,
0, 0, 0, 0, 0, &extent, errp);
if (ret < 0) {
- bdrv_unref(extent_file);
+ bdrv_unref_child(bs, extent_file);
return ret;
}
extent->flat_start_offset = flat_offset << 9;
} else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
/* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
- char *buf = vmdk_read_desc(extent_file, 0, errp);
+ char *buf = vmdk_read_desc(extent_file->bs, 0, errp);
if (!buf) {
ret = -EINVAL;
} else {
@@ -859,27 +883,27 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
}
g_free(buf);
if (ret) {
- bdrv_unref(extent_file);
+ bdrv_unref_child(bs, extent_file);
return ret;
}
extent = &s->extents[s->num_extents - 1];
} else {
error_setg(errp, "Unsupported extent type '%s'", type);
- bdrv_unref(extent_file);
+ bdrv_unref_child(bs, extent_file);
return -ENOTSUP;
}
extent->type = g_strdup(type);
-next_line:
- /* move to next line */
- while (*p) {
- if (*p == '\n') {
- p++;
- break;
- }
- p++;
- }
}
return 0;
+
+invalid:
+ np = next_line(p);
+ assert(np != p);
+ if (np[-1] == '\n') {
+ np--;
+ }
+ error_setg(errp, "Invalid extent line: %.*s", (int)(np - p), p);
+ return -EINVAL;
}
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
@@ -905,7 +929,8 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
}
s->create_type = g_strdup(ct);
s->desc_offset = 0;
- ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, options, errp);
+ ret = vmdk_parse_extents(buf, bs, bs->file->bs->exact_filename, options,
+ errp);
exit:
return ret;
}
@@ -918,7 +943,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
BDRVVmdkState *s = bs->opaque;
uint32_t magic;
- buf = vmdk_read_desc(bs->file, 0, errp);
+ buf = vmdk_read_desc(bs->file->bs, 0, errp);
if (!buf) {
return -EINVAL;
}
@@ -927,7 +952,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
switch (magic) {
case VMDK3_MAGIC:
case VMDK4_MAGIC:
- ret = vmdk_open_sparse(bs, bs->file, flags, buf, options, errp);
+ ret = vmdk_open_sparse(bs, bs->file, flags, buf, options,
+ errp);
s->desc_offset = 0x200;
break;
default:
@@ -1004,7 +1030,7 @@ static int get_whole_cluster(BlockDriverState *bs,
cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
whole_grain = qemu_blockalign(bs, cluster_bytes);
- if (!bs->backing_hd) {
+ if (!bs->backing) {
memset(whole_grain, 0, skip_start_sector << BDRV_SECTOR_BITS);
memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
@@ -1013,22 +1039,22 @@ static int get_whole_cluster(BlockDriverState *bs,
assert(skip_end_sector <= extent->cluster_sectors);
/* we will be here if it's first write on non-exist grain(cluster).
* try to read from parent image, if exist */
- if (bs->backing_hd && !vmdk_is_cid_valid(bs)) {
+ if (bs->backing && !vmdk_is_cid_valid(bs)) {
ret = VMDK_ERROR;
goto exit;
}
/* Read backing data before skip range */
if (skip_start_sector > 0) {
- if (bs->backing_hd) {
- ret = bdrv_read(bs->backing_hd, sector_num,
+ if (bs->backing) {
+ ret = bdrv_read(bs->backing->bs, sector_num,
whole_grain, skip_start_sector);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
- ret = bdrv_write(extent->file, cluster_sector_num, whole_grain,
+ ret = bdrv_write(extent->file->bs, cluster_sector_num, whole_grain,
skip_start_sector);
if (ret < 0) {
ret = VMDK_ERROR;
@@ -1037,8 +1063,8 @@ static int get_whole_cluster(BlockDriverState *bs,
}
/* Read backing data after skip range */
if (skip_end_sector < extent->cluster_sectors) {
- if (bs->backing_hd) {
- ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector,
+ if (bs->backing) {
+ ret = bdrv_read(bs->backing->bs, sector_num + skip_end_sector,
whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
extent->cluster_sectors - skip_end_sector);
if (ret < 0) {
@@ -1046,7 +1072,7 @@ static int get_whole_cluster(BlockDriverState *bs,
goto exit;
}
}
- ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector,
+ ret = bdrv_write(extent->file->bs, cluster_sector_num + skip_end_sector,
whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
extent->cluster_sectors - skip_end_sector);
if (ret < 0) {
@@ -1066,7 +1092,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
offset = cpu_to_le32(offset);
/* update L2 table */
if (bdrv_pwrite_sync(
- extent->file,
+ extent->file->bs,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
@@ -1076,7 +1102,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
if (extent->l1_backup_table_offset != 0) {
m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
if (bdrv_pwrite_sync(
- extent->file,
+ extent->file->bs,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
@@ -1166,7 +1192,7 @@ static int get_cluster_offset(BlockDriverState *bs,
}
l2_table = extent->l2_cache + (min_index * extent->l2_size);
if (bdrv_pread(
- extent->file,
+ extent->file->bs,
(int64_t)l2_offset * 512,
l2_table,
extent->l2_size * sizeof(uint32_t)
@@ -1245,7 +1271,7 @@ static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent,
}
static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
BDRVVmdkState *s = bs->opaque;
int64_t index_in_cluster, n, ret;
@@ -1262,6 +1288,7 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
0, 0);
qemu_co_mutex_unlock(&s->lock);
+ index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
switch (ret) {
case VMDK_ERROR:
ret = -EIO;
@@ -1274,14 +1301,15 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
break;
case VMDK_OK:
ret = BDRV_BLOCK_DATA;
- if (extent->file == bs->file && !extent->compressed) {
- ret |= BDRV_BLOCK_OFFSET_VALID | offset;
+ if (!extent->compressed) {
+ ret |= BDRV_BLOCK_OFFSET_VALID;
+ ret |= (offset + (index_in_cluster << BDRV_SECTOR_BITS))
+ & BDRV_BLOCK_OFFSET_MASK;
}
-
+ *file = extent->file->bs;
break;
}
- index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
n = extent->cluster_sectors - index_in_cluster;
if (n > nb_sectors) {
n = nb_sectors;
@@ -1320,12 +1348,16 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
write_len = buf_len + sizeof(VmdkGrainMarker);
}
write_offset = cluster_offset + offset_in_cluster,
- ret = bdrv_pwrite(extent->file, write_offset, write_buf, write_len);
+ ret = bdrv_pwrite(extent->file->bs, write_offset, write_buf, write_len);
write_end_sector = DIV_ROUND_UP(write_offset + write_len, BDRV_SECTOR_SIZE);
- extent->next_cluster_sector = MAX(extent->next_cluster_sector,
- write_end_sector);
+ if (extent->compressed) {
+ extent->next_cluster_sector = write_end_sector;
+ } else {
+ extent->next_cluster_sector = MAX(extent->next_cluster_sector,
+ write_end_sector);
+ }
if (ret != write_len) {
ret = ret < 0 ? ret : -EIO;
@@ -1351,7 +1383,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
if (!extent->compressed) {
- ret = bdrv_pread(extent->file,
+ ret = bdrv_pread(extent->file->bs,
cluster_offset + offset_in_cluster,
buf, nb_sectors * 512);
if (ret == nb_sectors * 512) {
@@ -1365,7 +1397,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
buf_bytes = cluster_bytes * 2;
cluster_buf = g_malloc(buf_bytes);
uncomp_buf = g_malloc(cluster_bytes);
- ret = bdrv_pread(extent->file,
+ ret = bdrv_pread(extent->file->bs,
cluster_offset,
cluster_buf, buf_bytes);
if (ret < 0) {
@@ -1427,11 +1459,11 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
}
if (ret != VMDK_OK) {
/* if not allocated, try to read from parent image, if exist */
- if (bs->backing_hd && ret != VMDK_ZEROED) {
+ if (bs->backing && ret != VMDK_ZEROED) {
if (!vmdk_is_cid_valid(bs)) {
return -EINVAL;
}
- ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+ ret = bdrv_read(bs->backing->bs, sector_num, buf, n);
if (ret < 0) {
return ret;
}
@@ -1487,8 +1519,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
if (sector_num > bs->total_sectors) {
error_report("Wrong offset: sector_num=0x%" PRIx64
- " total_sectors=0x%" PRIx64 "\n",
- sector_num, bs->total_sectors);
+ " total_sectors=0x%" PRIx64,
+ sector_num, bs->total_sectors);
return -EIO;
}
@@ -1617,7 +1649,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
QemuOpts *opts, Error **errp)
{
int ret, i;
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
VMDK4Header header;
Error *local_err = NULL;
uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
@@ -1630,16 +1662,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
goto exit;
}
- assert(bs == NULL);
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
if (flat) {
- ret = bdrv_truncate(bs, filesize);
+ ret = blk_truncate(blk, filesize);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
}
@@ -1647,7 +1681,13 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
}
magic = cpu_to_be32(VMDK4_MAGIC);
memset(&header, 0, sizeof(header));
- header.version = zeroed_grain ? 2 : 1;
+ if (compress) {
+ header.version = 3;
+ } else if (zeroed_grain) {
+ header.version = 2;
+ } else {
+ header.version = 1;
+ }
header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
| (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
| (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
@@ -1688,18 +1728,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
header.check_bytes[3] = 0xa;
/* write all the data */
- ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic));
+ ret = blk_pwrite(blk, 0, &magic, sizeof(magic));
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
- ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header));
+ ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
- ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9);
+ ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
goto exit;
@@ -1712,8 +1752,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
i < gt_count; i++, tmp += gt_size) {
gd_buf[i] = cpu_to_le32(tmp);
}
- ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
- gd_buf, gd_buf_size);
+ ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
+ gd_buf, gd_buf_size);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1724,8 +1764,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
i < gt_count; i++, tmp += gt_size) {
gd_buf[i] = cpu_to_le32(tmp);
}
- ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
- gd_buf, gd_buf_size);
+ ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
+ gd_buf, gd_buf_size);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1733,8 +1773,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
ret = 0;
exit:
- if (bs) {
- bdrv_unref(bs);
+ if (blk) {
+ blk_unref(blk);
}
g_free(gd_buf);
return ret;
@@ -1783,7 +1823,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
{
int idx = 0;
- BlockDriverState *new_bs = NULL;
+ BlockBackend *new_blk = NULL;
Error *local_err = NULL;
char *desc = NULL;
int64_t total_size = 0, filesize;
@@ -1894,7 +1934,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
if (backing_file) {
- BlockDriverState *bs = NULL;
+ BlockBackend *blk;
char *full_backing = g_new0(char, PATH_MAX);
bdrv_get_full_backing_filename_from_filename(filename, backing_file,
full_backing, PATH_MAX,
@@ -1905,19 +1945,21 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
ret = -ENOENT;
goto exit;
}
- ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, NULL,
- errp);
+
+ blk = blk_new_open(full_backing, NULL, NULL,
+ BDRV_O_NO_BACKING, errp);
g_free(full_backing);
- if (ret != 0) {
+ if (blk == NULL) {
+ ret = -EIO;
goto exit;
}
- if (strcmp(bs->drv->format_name, "vmdk")) {
- bdrv_unref(bs);
+ if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) {
+ blk_unref(blk);
ret = -EINVAL;
goto exit;
}
- parent_cid = vmdk_read_cid(bs, 0);
- bdrv_unref(bs);
+ parent_cid = vmdk_read_cid(blk_bs(blk), 0);
+ blk_unref(blk);
snprintf(parent_desc_line, BUF_SIZE,
"parentFileNameHint=\"%s\"", backing_file);
}
@@ -1975,14 +2017,18 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
}
- assert(new_bs == NULL);
- ret = bdrv_open(&new_bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
- if (ret < 0) {
+
+ new_blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (new_blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
- ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len);
+
+ blk_set_allow_write_beyond_eof(new_blk, true);
+
+ ret = blk_pwrite(new_blk, desc_offset, desc, desc_len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write description");
goto exit;
@@ -1990,14 +2036,14 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
/* bdrv_pwrite write padding zeros to align to sector, we don't need that
* for description file */
if (desc_offset == 0) {
- ret = bdrv_truncate(new_bs, desc_len);
+ ret = blk_truncate(new_blk, desc_len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
}
}
exit:
- if (new_bs) {
- bdrv_unref(new_bs);
+ if (new_blk) {
+ blk_unref(new_blk);
}
g_free(adapter_type);
g_free(backing_file);
@@ -2032,7 +2078,7 @@ static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
int ret = 0;
for (i = 0; i < s->num_extents; i++) {
- err = bdrv_co_flush(s->extents[i].file);
+ err = bdrv_co_flush(s->extents[i].file->bs);
if (err < 0) {
ret = err;
}
@@ -2047,7 +2093,7 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
int64_t r;
BDRVVmdkState *s = bs->opaque;
- ret = bdrv_get_allocated_file_size(bs->file);
+ ret = bdrv_get_allocated_file_size(bs->file->bs);
if (ret < 0) {
return ret;
}
@@ -2055,7 +2101,7 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
if (s->extents[i].file == bs->file) {
continue;
}
- r = bdrv_get_allocated_file_size(s->extents[i].file);
+ r = bdrv_get_allocated_file_size(s->extents[i].file->bs);
if (r < 0) {
return r;
}
@@ -2073,7 +2119,7 @@ static int vmdk_has_zero_init(BlockDriverState *bs)
* return 0. */
for (i = 0; i < s->num_extents; i++) {
if (s->extents[i].flat) {
- if (!bdrv_has_zero_init(s->extents[i].file)) {
+ if (!bdrv_has_zero_init(s->extents[i].file->bs)) {
return 0;
}
}
@@ -2086,7 +2132,7 @@ static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
ImageInfo *info = g_new0(ImageInfo, 1);
*info = (ImageInfo){
- .filename = g_strdup(extent->file->filename),
+ .filename = g_strdup(extent->file->bs->filename),
.format = g_strdup(extent->type),
.virtual_size = extent->sectors * BDRV_SECTOR_SIZE,
.compressed = extent->compressed,
@@ -2132,7 +2178,9 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
PRId64 "\n", sector_num);
break;
}
- if (ret == VMDK_OK && cluster_offset >= bdrv_getlength(extent->file)) {
+ if (ret == VMDK_OK &&
+ cluster_offset >= bdrv_getlength(extent->file->bs))
+ {
fprintf(stderr,
"ERROR: cluster offset for sector %"
PRId64 " points after EOF\n", sector_num);
@@ -2153,19 +2201,19 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)
ImageInfoList **next;
*spec_info = (ImageInfoSpecific){
- .kind = IMAGE_INFO_SPECIFIC_KIND_VMDK,
- {
- .vmdk = g_new0(ImageInfoSpecificVmdk, 1),
+ .type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
+ .u = {
+ .vmdk.data = g_new0(ImageInfoSpecificVmdk, 1),
},
};
- *spec_info->vmdk = (ImageInfoSpecificVmdk) {
+ *spec_info->u.vmdk.data = (ImageInfoSpecificVmdk) {
.create_type = g_strdup(s->create_type),
.cid = s->cid,
.parent_cid = s->parent_cid,
};
- next = &spec_info->vmdk->extents;
+ next = &spec_info->u.vmdk.data->extents;
for (i = 0; i < s->num_extents; i++) {
*next = g_new0(ImageInfoList, 1);
(*next)->value = vmdk_get_extent_info(&s->extents[i]);
@@ -2208,7 +2256,7 @@ static void vmdk_detach_aio_context(BlockDriverState *bs)
int i;
for (i = 0; i < s->num_extents; i++) {
- bdrv_detach_aio_context(s->extents[i].file);
+ bdrv_detach_aio_context(s->extents[i].file->bs);
}
}
@@ -2219,7 +2267,7 @@ static void vmdk_attach_aio_context(BlockDriverState *bs,
int i;
for (i = 0; i < s->num_extents; i++) {
- bdrv_attach_aio_context(s->extents[i].file, new_context);
+ bdrv_attach_aio_context(s->extents[i].file->bs, new_context);
}
}
diff --git a/qemu/block/vpc.c b/qemu/block/vpc.c
index 3e385d9fb..3e2ea698d 100644
--- a/qemu/block/vpc.c
+++ b/qemu/block/vpc.c
@@ -22,8 +22,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "migration/migration.h"
#if defined(CONFIG_UUID)
@@ -42,28 +45,34 @@ enum vhd_type {
VHD_DIFFERENCING = 4,
};
-// Seconds since Jan 1, 2000 0:00:00 (UTC)
+/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
#define VHD_TIMESTAMP_BASE 946684800
-#define VHD_MAX_SECTORS (65535LL * 255 * 255)
-#define VHD_MAX_GEOMETRY (65535LL * 16 * 255)
+#define VHD_CHS_MAX_C 65535LL
+#define VHD_CHS_MAX_H 16
+#define VHD_CHS_MAX_S 255
-// always big-endian
+#define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */
+#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
+
+#define VPC_OPT_FORCE_SIZE "force_size"
+
+/* always big-endian */
typedef struct vhd_footer {
- char creator[8]; // "conectix"
+ char creator[8]; /* "conectix" */
uint32_t features;
uint32_t version;
- // Offset of next header structure, 0xFFFFFFFF if none
+ /* Offset of next header structure, 0xFFFFFFFF if none */
uint64_t data_offset;
- // Seconds since Jan 1, 2000 0:00:00 (UTC)
+ /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
uint32_t timestamp;
- char creator_app[4]; // "vpc "
+ char creator_app[4]; /* e.g., "vpc " */
uint16_t major;
uint16_t minor;
- char creator_os[4]; // "Wi2k"
+ char creator_os[4]; /* "Wi2k" */
uint64_t orig_size;
uint64_t current_size;
@@ -74,29 +83,29 @@ typedef struct vhd_footer {
uint32_t type;
- // Checksum of the Hard Disk Footer ("one's complement of the sum of all
- // the bytes in the footer without the checksum field")
+ /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
+ the bytes in the footer without the checksum field") */
uint32_t checksum;
- // UUID used to identify a parent hard disk (backing file)
+ /* UUID used to identify a parent hard disk (backing file) */
uint8_t uuid[16];
uint8_t in_saved_state;
} QEMU_PACKED VHDFooter;
typedef struct vhd_dyndisk_header {
- char magic[8]; // "cxsparse"
+ char magic[8]; /* "cxsparse" */
- // Offset of next header structure, 0xFFFFFFFF if none
+ /* Offset of next header structure, 0xFFFFFFFF if none */
uint64_t data_offset;
- // Offset of the Block Allocation Table (BAT)
+ /* Offset of the Block Allocation Table (BAT) */
uint64_t table_offset;
uint32_t version;
- uint32_t max_table_entries; // 32bit/entry
+ uint32_t max_table_entries; /* 32bit/entry */
- // 2 MB by default, must be a power of two
+ /* 2 MB by default, must be a power of two */
uint32_t block_size;
uint32_t checksum;
@@ -104,7 +113,7 @@ typedef struct vhd_dyndisk_header {
uint32_t parent_timestamp;
uint32_t reserved;
- // Backing file name (in UTF-16)
+ /* Backing file name (in UTF-16) */
uint8_t parent_name[512];
struct {
@@ -127,6 +136,8 @@ typedef struct BDRVVPCState {
uint32_t block_size;
uint32_t bitmap_size;
+ bool force_use_chs;
+ bool force_use_sz;
#ifdef CACHE
uint8_t *pageentry_u8;
@@ -139,6 +150,22 @@ typedef struct BDRVVPCState {
Error *migration_blocker;
} BDRVVPCState;
+#define VPC_OPT_SIZE_CALC "force_size_calc"
+static QemuOptsList vpc_runtime_opts = {
+ .name = "vpc-runtime-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
+ .desc = {
+ {
+ .name = VPC_OPT_SIZE_CALC,
+ .type = QEMU_OPT_STRING,
+ .help = "Force disk size calculation to use either CHS geometry, "
+ "or use the disk current_size specified in the VHD footer. "
+ "{chs, current_size}"
+ },
+ { /* end of list */ }
+ }
+};
+
static uint32_t vpc_checksum(uint8_t* buf, size_t size)
{
uint32_t res = 0;
@@ -158,6 +185,25 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
+static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
+ Error **errp)
+{
+ BDRVVPCState *s = bs->opaque;
+ const char *size_calc;
+
+ size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
+
+ if (!size_calc) {
+ /* no override, use autodetect only */
+ } else if (!strcmp(size_calc, "current_size")) {
+ s->force_use_sz = true;
+ } else if (!strcmp(size_calc, "chs")) {
+ s->force_use_chs = true;
+ } else {
+ error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
+ }
+}
+
static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -165,6 +211,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int i;
VHDFooter *footer;
VHDDynDiskHeader *dyndisk_header;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
+ bool use_chs;
uint8_t buf[HEADER_SIZE];
uint32_t checksum;
uint64_t computed_size;
@@ -172,24 +221,42 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int disk_type = VHD_DYNAMIC;
int ret;
- ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
+ opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ vpc_parse_options(bs, opts, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
if (ret < 0) {
+ error_setg(errp, "Unable to read VHD header");
goto fail;
}
footer = (VHDFooter *) s->footer_buf;
if (strncmp(footer->creator, "conectix", 8)) {
- int64_t offset = bdrv_getlength(bs->file);
+ int64_t offset = bdrv_getlength(bs->file->bs);
if (offset < 0) {
ret = offset;
+ error_setg(errp, "Invalid file size");
goto fail;
} else if (offset < HEADER_SIZE) {
ret = -EINVAL;
+ error_setg(errp, "File too small for a VHD header");
goto fail;
}
/* If a fixed disk, the footer is found only at the end of the file */
- ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
+ ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
HEADER_SIZE);
if (ret < 0) {
goto fail;
@@ -211,36 +278,66 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
/* Write 'checksum' back to footer, or else will leave it with zero. */
footer->checksum = cpu_to_be32(checksum);
- // The visible size of a image in Virtual PC depends on the geometry
- // rather than on the size stored in the footer (the size in the footer
- // is too large usually)
+ /* The visible size of a image in Virtual PC depends on the geometry
+ rather than on the size stored in the footer (the size in the footer
+ is too large usually) */
bs->total_sectors = (int64_t)
be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
- /* Images that have exactly the maximum geometry are probably bigger and
- * would be truncated if we adhered to the geometry for them. Rely on
- * footer->current_size for them. */
- if (bs->total_sectors == VHD_MAX_GEOMETRY) {
+ /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
+ * VHD image sizes differently. VPC will rely on CHS geometry,
+ * while Hyper-V and disk2vhd use the size specified in the footer.
+ *
+ * We use a couple of approaches to try and determine the correct method:
+ * look at the Creator App field, and look for images that have CHS
+ * geometry that is the maximum value.
+ *
+ * If the CHS geometry is the maximum CHS geometry, then we assume that
+ * the size is the footer->current_size to avoid truncation. Otherwise,
+ * we follow the table based on footer->creator_app:
+ *
+ * Known creator apps:
+ * 'vpc ' : CHS Virtual PC (uses disk geometry)
+ * 'qemu' : CHS QEMU (uses disk geometry)
+ * 'qem2' : current_size QEMU (uses current_size)
+ * 'win ' : current_size Hyper-V
+ * 'd2v ' : current_size Disk2vhd
+ * 'tap\0' : current_size XenServer
+ * 'CTXS' : current_size XenConverter
+ *
+ * The user can override the table values via drive options, however
+ * even with an override we will still use current_size for images
+ * that have CHS geometry of the maximum size.
+ */
+ use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
+ !!strncmp(footer->creator_app, "qem2", 4) &&
+ !!strncmp(footer->creator_app, "d2v ", 4) &&
+ !!strncmp(footer->creator_app, "CTXS", 4) &&
+ !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
+
+ if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
bs->total_sectors = be64_to_cpu(footer->current_size) /
- BDRV_SECTOR_SIZE;
+ BDRV_SECTOR_SIZE;
}
- /* Allow a maximum disk size of approximately 2 TB */
- if (bs->total_sectors >= VHD_MAX_SECTORS) {
+ /* Allow a maximum disk size of 2040 GiB */
+ if (bs->total_sectors > VHD_MAX_SECTORS) {
ret = -EFBIG;
goto fail;
}
if (disk_type == VHD_DYNAMIC) {
- ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
+ ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
HEADER_SIZE);
if (ret < 0) {
+ error_setg(errp, "Error reading dynamic VHD header");
goto fail;
}
dyndisk_header = (VHDDynDiskHeader *) buf;
if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
+ error_setg(errp, "Invalid header magic");
ret = -EINVAL;
goto fail;
}
@@ -256,16 +353,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
- ret = -EINVAL;
- goto fail;
- }
- if (s->max_table_entries > (VHD_MAX_SECTORS * 512) / s->block_size) {
+ error_setg(errp, "Too many blocks");
ret = -EINVAL;
goto fail;
}
computed_size = (uint64_t) s->max_table_entries * s->block_size;
if (computed_size < bs->total_sectors * 512) {
+ error_setg(errp, "Page table too small");
ret = -EINVAL;
goto fail;
}
@@ -280,16 +375,19 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
pagetable_size = (uint64_t) s->max_table_entries * 4;
- s->pagetable = qemu_try_blockalign(bs->file, pagetable_size);
+ s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
if (s->pagetable == NULL) {
+ error_setg(errp, "Unable to allocate memory for page table");
ret = -ENOMEM;
goto fail;
}
s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
- ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable, pagetable_size);
+ ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
+ pagetable_size);
if (ret < 0) {
+ error_setg(errp, "Error reading pagetable");
goto fail;
}
@@ -308,7 +406,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
}
}
- if (s->free_data_block_offset > bdrv_getlength(bs->file)) {
+ if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
error_setg(errp, "block-vpc: free_data_block_offset points after "
"the end of file. The image has been truncated.");
ret = -EINVAL;
@@ -368,22 +466,22 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
pageentry_index = (offset % s->block_size) / 512;
if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
- return -1; // not allocated
+ return -1; /* not allocated */
bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
- // We must ensure that we don't write to any sectors which are marked as
- // unused in the bitmap. We get away with setting all bits in the block
- // bitmap each time we write to a new block. This might cause Virtual PC to
- // miss sparse read optimization, but it's not a problem in terms of
- // correctness.
+ /* We must ensure that we don't write to any sectors which are marked as
+ unused in the bitmap. We get away with setting all bits in the block
+ bitmap each time we write to a new block. This might cause Virtual PC to
+ miss sparse read optimization, but it's not a problem in terms of
+ correctness. */
if (write && (s->last_bitmap_offset != bitmap_offset)) {
uint8_t bitmap[s->bitmap_size];
s->last_bitmap_offset = bitmap_offset;
memset(bitmap, 0xff, s->bitmap_size);
- bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+ bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
}
return block_offset;
@@ -401,7 +499,7 @@ static int rewrite_footer(BlockDriverState* bs)
BDRVVPCState *s = bs->opaque;
int64_t offset = s->free_data_block_offset;
- ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
+ ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
if (ret < 0)
return ret;
@@ -423,35 +521,35 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
int ret;
uint8_t bitmap[s->bitmap_size];
- // Check if sector_num is valid
+ /* Check if sector_num is valid */
if ((sector_num < 0) || (sector_num > bs->total_sectors))
return -1;
- // Write entry into in-memory BAT
+ /* Write entry into in-memory BAT */
index = (sector_num * 512) / s->block_size;
if (s->pagetable[index] != 0xFFFFFFFF)
return -1;
s->pagetable[index] = s->free_data_block_offset / 512;
- // Initialize the block's bitmap
+ /* Initialize the block's bitmap */
memset(bitmap, 0xff, s->bitmap_size);
- ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
+ ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
s->bitmap_size);
if (ret < 0) {
return ret;
}
- // Write new footer (the old one will be overwritten)
+ /* Write new footer (the old one will be overwritten) */
s->free_data_block_offset += s->block_size + s->bitmap_size;
ret = rewrite_footer(bs);
if (ret < 0)
goto fail;
- // Write BAT entry to disk
+ /* Write BAT entry to disk */
bat_offset = s->bat_offset + (4 * index);
bat_value = cpu_to_be32(s->pagetable[index]);
- ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
+ ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
if (ret < 0)
goto fail;
@@ -485,7 +583,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
VHDFooter *footer = (VHDFooter *) s->footer_buf;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_read(bs->file, sector_num, buf, nb_sectors);
+ return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors);
}
while (nb_sectors > 0) {
offset = get_sector_offset(bs, sector_num, 0);
@@ -499,7 +597,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
if (offset == -1) {
memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
} else {
- ret = bdrv_pread(bs->file, offset, buf,
+ ret = bdrv_pread(bs->file->bs, offset, buf,
sectors * BDRV_SECTOR_SIZE);
if (ret != sectors * BDRV_SECTOR_SIZE) {
return -1;
@@ -534,7 +632,7 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
VHDFooter *footer = (VHDFooter *) s->footer_buf;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_write(bs->file, sector_num, buf, nb_sectors);
+ return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors);
}
while (nb_sectors > 0) {
offset = get_sector_offset(bs, sector_num, 1);
@@ -551,7 +649,8 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
return -1;
}
- ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
+ ret = bdrv_pwrite(bs->file->bs, offset, buf,
+ sectors * BDRV_SECTOR_SIZE);
if (ret != sectors * BDRV_SECTOR_SIZE) {
return -1;
}
@@ -576,7 +675,7 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
}
static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
BDRVVPCState *s = bs->opaque;
VHDFooter *footer = (VHDFooter*) s->footer_buf;
@@ -586,6 +685,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
if (be32_to_cpu(footer->type) == VHD_FIXED) {
*pnum = nb_sectors;
+ *file = bs->file->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
(sector_num << BDRV_SECTOR_BITS);
}
@@ -607,6 +707,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
/* *pnum can't be greater than one block for allocated
* sectors since there is always a bitmap in between. */
if (allocated) {
+ *file = bs->file->bs;
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
}
if (nb_sectors == 0) {
@@ -626,7 +727,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
* Note that the geometry doesn't always exactly match total_sectors but
* may round it down.
*
- * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override
+ * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
* the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
* and instead allow up to 255 heads.
*/
@@ -668,7 +769,7 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
return 0;
}
-static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
+static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
int64_t total_sectors)
{
VHDDynDiskHeader *dyndisk_header =
@@ -678,34 +779,34 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
int ret;
int64_t offset = 0;
- // Write the footer (twice: at the beginning and at the end)
+ /* Write the footer (twice: at the beginning and at the end) */
block_size = 0x200000;
num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
- ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
- if (ret) {
+ ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
+ if (ret < 0) {
goto fail;
}
offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
- ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
if (ret < 0) {
goto fail;
}
- // Write the initial BAT
+ /* Write the initial BAT */
offset = 3 * 512;
memset(buf, 0xFF, 512);
for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
- ret = bdrv_pwrite_sync(bs, offset, buf, 512);
+ ret = blk_pwrite(blk, offset, buf, 512);
if (ret < 0) {
goto fail;
}
offset += 512;
}
- // Prepare the Dynamic Disk Header
+ /* Prepare the Dynamic Disk Header */
memset(buf, 0, 1024);
memcpy(dyndisk_header->magic, "cxsparse", 8);
@@ -722,10 +823,10 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
- // Write the header
+ /* Write the header */
offset = 512;
- ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
+ ret = blk_pwrite(blk, offset, buf, 1024);
if (ret < 0) {
goto fail;
}
@@ -734,7 +835,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
return ret;
}
-static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
+static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
int64_t total_size)
{
int ret;
@@ -742,12 +843,12 @@ static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
/* Add footer to total size */
total_size += HEADER_SIZE;
- ret = bdrv_truncate(bs, total_size);
+ ret = blk_truncate(blk, total_size);
if (ret < 0) {
return ret;
}
- ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
if (ret < 0) {
return ret;
}
@@ -768,8 +869,9 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size;
int disk_type;
int ret = -EIO;
+ bool force_size;
Error *local_err = NULL;
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -781,6 +883,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
} else if (!strcmp(disk_type_param, "fixed")) {
disk_type = VHD_FIXED;
} else {
+ error_setg(errp, "Invalid disk type, %s", disk_type_param);
ret = -EINVAL;
goto out;
}
@@ -788,36 +891,50 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
disk_type = VHD_DYNAMIC;
}
+ force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
+
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto out;
}
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
- if (ret < 0) {
+
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto out;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/*
* Calculate matching total_size and geometry. Increase the number of
* sectors requested until we get enough (or fail). This ensures that
* qemu-img convert doesn't truncate images, but rather rounds up.
*
- * If the image size can't be represented by a spec conform CHS geometry,
+ * If the image size can't be represented by a spec conformant CHS geometry,
* we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
* the image size from the VHD footer to calculate total_sectors.
*/
- total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
- for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
- calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
+ if (force_size) {
+ /* This will force the use of total_size for sector count, below */
+ cyls = VHD_CHS_MAX_C;
+ heads = VHD_CHS_MAX_H;
+ secs_per_cyl = VHD_CHS_MAX_S;
+ } else {
+ total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
+ for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
+ calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
+ }
}
if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
total_sectors = total_size / BDRV_SECTOR_SIZE;
- /* Allow a maximum disk size of approximately 2 TB */
+ /* Allow a maximum disk size of 2040 GiB */
if (total_sectors > VHD_MAX_SECTORS) {
+ error_setg(errp, "Disk size is too large, max size is 2040 GiB");
ret = -EFBIG;
goto out;
}
@@ -830,8 +947,11 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
memset(buf, 0, 1024);
memcpy(footer->creator, "conectix", 8);
- /* TODO Check if "qemu" creator_app is ok for VPC */
- memcpy(footer->creator_app, "qemu", 4);
+ if (force_size) {
+ memcpy(footer->creator_app, "qem2", 4);
+ } else {
+ memcpy(footer->creator_app, "qemu", 4);
+ }
memcpy(footer->creator_os, "Wi2k", 4);
footer->features = cpu_to_be32(0x02);
@@ -861,13 +981,16 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
if (disk_type == VHD_DYNAMIC) {
- ret = create_dynamic_disk(bs, buf, total_sectors);
+ ret = create_dynamic_disk(blk, buf, total_sectors);
} else {
- ret = create_fixed_disk(bs, buf, total_size);
+ ret = create_fixed_disk(blk, buf, total_size);
+ }
+ if (ret < 0) {
+ error_setg(errp, "Unable to create or write VHD header");
}
out:
- bdrv_unref(bs);
+ blk_unref(blk);
g_free(disk_type_param);
return ret;
}
@@ -878,7 +1001,7 @@ static int vpc_has_zero_init(BlockDriverState *bs)
VHDFooter *footer = (VHDFooter *) s->footer_buf;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_has_zero_init(bs->file);
+ return bdrv_has_zero_init(bs->file->bs);
} else {
return 1;
}
@@ -912,6 +1035,13 @@ static QemuOptsList vpc_create_opts = {
"Type of virtual hard disk format. Supported formats are "
"{dynamic (default) | fixed} "
},
+ {
+ .name = VPC_OPT_FORCE_SIZE,
+ .type = QEMU_OPT_BOOL,
+ .help = "Force disk size calculation to use the actual size "
+ "specified, rather than using the nearest CHS-based "
+ "calculation"
+ },
{ /* end of list */ }
}
};
diff --git a/qemu/block/vvfat.c b/qemu/block/vvfat.c
index 206869712..183fc4f04 100644
--- a/qemu/block/vvfat.c
+++ b/qemu/block/vvfat.c
@@ -22,15 +22,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include <sys/stat.h>
+#include "qemu/osdep.h"
#include <dirent.h>
-#include "qemu-common.h"
+#include "qapi/error.h"
#include "block/block_int.h"
#include "qemu/module.h"
#include "migration/migration.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
#ifndef S_IWGRP
#define S_IWGRP 0
@@ -985,12 +986,6 @@ static BDRVVVFATState *vvv = NULL;
static int enable_write_target(BDRVVVFATState *s, Error **errp);
static int is_consistent(BDRVVVFATState *s);
-static void vvfat_rebind(BlockDriverState *bs)
-{
- BDRVVVFATState *s = bs->opaque;
- s->bs = bs;
-}
-
static QemuOptsList runtime_opts = {
.name = "vvfat",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
@@ -1114,6 +1109,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
memcpy(s->volume_label, label, label_length);
+ } else {
+ memcpy(s->volume_label, "QEMU VVFAT", 10);
}
if (floppy) {
@@ -2288,12 +2285,17 @@ DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", mapp
factor * (old_cluster_count - new_cluster_count));
for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) {
+ direntry_t *first_direntry;
void* direntry = array_get(&(s->directory), current_dir_index);
int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry,
s->sectors_per_cluster);
if (ret)
return ret;
- assert(!strncmp(s->directory.pointer, "QEMU", 4));
+
+ /* The first directory entry on the filesystem is the volume name */
+ first_direntry = (direntry_t*) s->directory.pointer;
+ assert(!memcmp(first_direntry->name, s->volume_label, 11));
+
current_dir_index += factor;
}
@@ -2890,7 +2892,7 @@ static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num,
}
static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int* n)
+ int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file)
{
BDRVVVFATState* s = bs->opaque;
*n = s->sector_count - sector_num;
@@ -2923,9 +2925,12 @@ static BlockDriver vvfat_write_target = {
static int enable_write_target(BDRVVVFATState *s, Error **errp)
{
BlockDriver *bdrv_qcow = NULL;
+ BlockDriverState *backing;
QemuOpts *opts = NULL;
int ret;
int size = sector2cluster(s, s->sector_count);
+ QDict *options;
+
s->used_clusters = calloc(size, 1);
array_init(&(s->commits), sizeof(commit_t));
@@ -2956,9 +2961,10 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
}
s->qcow = NULL;
- ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
- bdrv_qcow, errp);
+ options = qdict_new();
+ qdict_put(options, "driver", qstring_from_str("qcow"));
+ ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, options,
+ BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp);
if (ret < 0) {
goto err;
}
@@ -2967,10 +2973,13 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
unlink(s->qcow_filename);
#endif
- bdrv_set_backing_hd(s->bs, bdrv_new());
- s->bs->backing_hd->drv = &vvfat_write_target;
- s->bs->backing_hd->opaque = g_new(void *, 1);
- *(void**)s->bs->backing_hd->opaque = s;
+ backing = bdrv_new();
+ bdrv_set_backing_hd(s->bs, backing);
+ bdrv_unref(backing);
+
+ s->bs->backing->bs->drv = &vvfat_write_target;
+ s->bs->backing->bs->opaque = g_new(void *, 1);
+ *(void**)s->bs->backing->bs->opaque = s;
return 0;
@@ -3004,7 +3013,6 @@ static BlockDriver bdrv_vvfat = {
.bdrv_parse_filename = vvfat_parse_filename,
.bdrv_file_open = vvfat_open,
.bdrv_close = vvfat_close,
- .bdrv_rebind = vvfat_rebind,
.bdrv_read = vvfat_co_read,
.bdrv_write = vvfat_co_write,
diff --git a/qemu/block/win32-aio.c b/qemu/block/win32-aio.c
index 64e86827b..2d509a9a7 100644
--- a/qemu/block/win32-aio.c
+++ b/qemu/block/win32-aio.c
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/timer.h"
#include "block/block_int.h"
@@ -174,7 +175,7 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
AioContext *old_context)
{
- aio_set_event_notifier(old_context, &aio->e, NULL);
+ aio_set_event_notifier(old_context, &aio->e, false, NULL);
aio->is_aio_context_attached = false;
}
@@ -182,7 +183,8 @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
AioContext *new_context)
{
aio->is_aio_context_attached = true;
- aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb);
+ aio_set_event_notifier(new_context, &aio->e, false,
+ win32_aio_completion_cb);
}
QEMUWin32AIOState *win32_aio_init(void)
diff --git a/qemu/block/write-threshold.c b/qemu/block/write-threshold.c
index a53c1f5e6..cc2ca7183 100644
--- a/qemu/block/write-threshold.c
+++ b/qemu/block/write-threshold.c
@@ -10,8 +10,9 @@
* See the COPYING.LIB file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "block/block_int.h"
-#include "block/coroutine.h"
+#include "qemu/coroutine.h"
#include "block/write-threshold.h"
#include "qemu/notify.h"
#include "qapi-event.h"