summaryrefslogtreecommitdiffstats
path: root/qemu/include/block
diff options
context:
space:
mode:
Diffstat (limited to 'qemu/include/block')
-rw-r--r--qemu/include/block/accounting.h31
-rw-r--r--qemu/include/block/aio.h70
-rw-r--r--qemu/include/block/block.h239
-rw-r--r--qemu/include/block/block_int.h135
-rw-r--r--qemu/include/block/blockjob.h98
-rw-r--r--qemu/include/block/coroutine.h219
-rw-r--r--qemu/include/block/coroutine_int.h54
-rw-r--r--qemu/include/block/dirty-bitmap.h44
-rw-r--r--qemu/include/block/nbd.h30
-rw-r--r--qemu/include/block/qapi.h3
-rw-r--r--qemu/include/block/scsi.h2
-rw-r--r--qemu/include/block/snapshot.h25
-rw-r--r--qemu/include/block/throttle-groups.h6
-rw-r--r--qemu/include/block/write-threshold.h3
14 files changed, 465 insertions, 494 deletions
diff --git a/qemu/include/block/accounting.h b/qemu/include/block/accounting.h
index 4c406cff7..20891639d 100644
--- a/qemu/include/block/accounting.h
+++ b/qemu/include/block/accounting.h
@@ -2,6 +2,7 @@
* QEMU System Emulator block accounting
*
* Copyright (c) 2011 Christoph Hellwig
+ * Copyright (c) 2015 Igalia, S.L.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -24,9 +25,9 @@
#ifndef BLOCK_ACCOUNTING_H
#define BLOCK_ACCOUNTING_H
-#include <stdint.h>
+#include "qemu/timed-average.h"
-#include "qemu/typedefs.h"
+typedef struct BlockAcctTimedStats BlockAcctTimedStats;
enum BlockAcctType {
BLOCK_ACCT_READ,
@@ -35,12 +36,23 @@ enum BlockAcctType {
BLOCK_MAX_IOTYPE,
};
+struct BlockAcctTimedStats {
+ TimedAverage latency[BLOCK_MAX_IOTYPE];
+ unsigned interval_length; /* in seconds */
+ QSLIST_ENTRY(BlockAcctTimedStats) entries;
+};
+
typedef struct BlockAcctStats {
uint64_t nr_bytes[BLOCK_MAX_IOTYPE];
uint64_t nr_ops[BLOCK_MAX_IOTYPE];
+ uint64_t invalid_ops[BLOCK_MAX_IOTYPE];
+ uint64_t failed_ops[BLOCK_MAX_IOTYPE];
uint64_t total_time_ns[BLOCK_MAX_IOTYPE];
uint64_t merged[BLOCK_MAX_IOTYPE];
- uint64_t wr_highest_sector;
+ int64_t last_access_time_ns;
+ QSLIST_HEAD(, BlockAcctTimedStats) intervals;
+ bool account_invalid;
+ bool account_failed;
} BlockAcctStats;
typedef struct BlockAcctCookie {
@@ -49,12 +61,21 @@ typedef struct BlockAcctCookie {
enum BlockAcctType type;
} BlockAcctCookie;
+void block_acct_init(BlockAcctStats *stats, bool account_invalid,
+ bool account_failed);
+void block_acct_cleanup(BlockAcctStats *stats);
+void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length);
+BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
+ BlockAcctTimedStats *s);
void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
int64_t bytes, enum BlockAcctType type);
void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie);
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
- unsigned int nb_sectors);
+void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie);
+void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type);
void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
int num_requests);
+int64_t block_acct_idle_time_ns(BlockAcctStats *stats);
+double block_acct_queue_depth(BlockAcctTimedStats *stats,
+ enum BlockAcctType type);
#endif
diff --git a/qemu/include/block/aio.h b/qemu/include/block/aio.h
index 400b1b002..88a64eeb3 100644
--- a/qemu/include/block/aio.h
+++ b/qemu/include/block/aio.h
@@ -14,7 +14,6 @@
#ifndef QEMU_AIO_H
#define QEMU_AIO_H
-#include "qemu/typedefs.h"
#include "qemu-common.h"
#include "qemu/queue.h"
#include "qemu/event_notifier.h"
@@ -122,6 +121,13 @@ struct AioContext {
/* TimerLists for calling timers - one per clock type */
QEMUTimerListGroup tlg;
+
+ int external_disable_cnt;
+
+ /* epoll(7) state used when built with CONFIG_EPOLL */
+ int epollfd;
+ bool epoll_enabled;
+ bool epoll_available;
};
/**
@@ -207,6 +213,11 @@ void aio_notify(AioContext *ctx);
void aio_notify_accept(AioContext *ctx);
/**
+ * aio_bh_call: Executes callback function of the specified BH.
+ */
+void aio_bh_call(QEMUBH *bh);
+
+/**
* aio_bh_poll: Poll bottom halves for an AioContext.
*
* These are internal functions used by the QEMU main loop.
@@ -299,6 +310,7 @@ bool aio_poll(AioContext *ctx, bool blocking);
*/
void aio_set_fd_handler(AioContext *ctx,
int fd,
+ bool is_external,
IOHandler *io_read,
IOHandler *io_write,
void *opaque);
@@ -312,6 +324,7 @@ void aio_set_fd_handler(AioContext *ctx,
*/
void aio_set_event_notifier(AioContext *ctx,
EventNotifier *notifier,
+ bool is_external,
EventNotifierHandler *io_read);
/* Return a GSource that lets the main loop poll the file descriptors attached
@@ -373,4 +386,59 @@ static inline void aio_timer_init(AioContext *ctx,
*/
int64_t aio_compute_timeout(AioContext *ctx);
+/**
+ * aio_disable_external:
+ * @ctx: the aio context
+ *
+ * Disable the further processing of external clients.
+ */
+static inline void aio_disable_external(AioContext *ctx)
+{
+ atomic_inc(&ctx->external_disable_cnt);
+}
+
+/**
+ * aio_enable_external:
+ * @ctx: the aio context
+ *
+ * Enable the processing of external clients.
+ */
+static inline void aio_enable_external(AioContext *ctx)
+{
+ assert(ctx->external_disable_cnt > 0);
+ atomic_dec(&ctx->external_disable_cnt);
+}
+
+/**
+ * aio_external_disabled:
+ * @ctx: the aio context
+ *
+ * Return true if the external clients are disabled.
+ */
+static inline bool aio_external_disabled(AioContext *ctx)
+{
+ return atomic_read(&ctx->external_disable_cnt);
+}
+
+/**
+ * aio_node_check:
+ * @ctx: the aio context
+ * @is_external: Whether or not the checked node is an external event source.
+ *
+ * Check if the node's is_external flag is okay to be polled by the ctx at this
+ * moment. True means green light.
+ */
+static inline bool aio_node_check(AioContext *ctx, bool is_external)
+{
+ return !is_external || !atomic_read(&ctx->external_disable_cnt);
+}
+
+/**
+ * aio_context_setup:
+ * @ctx: the aio context
+ *
+ * Initialize the aio context.
+ */
+void aio_context_setup(AioContext *ctx, Error **errp);
+
#endif
diff --git a/qemu/include/block/block.h b/qemu/include/block/block.h
index 37916f720..3a731377d 100644
--- a/qemu/include/block/block.h
+++ b/qemu/include/block/block.h
@@ -2,18 +2,21 @@
#define BLOCK_H
#include "block/aio.h"
-#include "qemu-common.h"
+#include "qemu/iov.h"
#include "qemu/option.h"
-#include "block/coroutine.h"
+#include "qemu/coroutine.h"
#include "block/accounting.h"
+#include "block/dirty-bitmap.h"
#include "qapi/qmp/qobject.h"
#include "qapi-types.h"
+#include "qemu/hbitmap.h"
/* block.c */
typedef struct BlockDriver BlockDriver;
typedef struct BlockJob BlockJob;
typedef struct BdrvChild BdrvChild;
typedef struct BdrvChildRole BdrvChildRole;
+typedef struct BlockJobTxn BlockJobTxn;
typedef struct BlockDriverInfo {
/* in bytes, 0 if irrelevant */
@@ -23,7 +26,7 @@ typedef struct BlockDriverInfo {
bool is_dirty;
/*
* True if unallocated blocks read back as zeroes. This is equivalent
- * to the the LBPRZ flag in the SCSI logical block provisioning page.
+ * to the LBPRZ flag in the SCSI logical block provisioning page.
*/
bool unallocated_blocks_are_zero;
/*
@@ -51,15 +54,17 @@ typedef struct BlockFragInfo {
} BlockFragInfo;
typedef enum {
- BDRV_REQ_COPY_ON_READ = 0x1,
- BDRV_REQ_ZERO_WRITE = 0x2,
+ BDRV_REQ_COPY_ON_READ = 0x1,
+ BDRV_REQ_ZERO_WRITE = 0x2,
/* The BDRV_REQ_MAY_UNMAP flag is used to indicate that the block driver
* is allowed to optimize a write zeroes request by unmapping (discarding)
* blocks if it is guaranteed that the result will read back as
* zeroes. The flag is only passed to the driver if the block device is
* opened with BDRV_O_UNMAP.
*/
- BDRV_REQ_MAY_UNMAP = 0x4,
+ BDRV_REQ_MAY_UNMAP = 0x4,
+ BDRV_REQ_NO_SERIALISING = 0x8,
+ BDRV_REQ_FUA = 0x10,
} BdrvRequestFlags;
typedef struct BlockSizes {
@@ -77,20 +82,20 @@ typedef struct HDGeometry {
#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
-#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */
#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
-#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */
+#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */
#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
select an appropriate protocol driver,
ignoring the format layer */
+#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
-#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
+#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
/* Option names of options parsed by the block layer */
@@ -109,9 +114,10 @@ typedef struct HDGeometry {
/*
* Allocation status flags
- * BDRV_BLOCK_DATA: data is read from bs->file or another file
+ * BDRV_BLOCK_DATA: data is read from a file returned by bdrv_get_block_status.
* BDRV_BLOCK_ZERO: sectors read as zero
- * BDRV_BLOCK_OFFSET_VALID: sector stored in bs->file as raw data
+ * BDRV_BLOCK_OFFSET_VALID: sector stored as raw data in a file returned by
+ * bdrv_get_block_status.
* BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
* layer (as opposed to the backing file)
* BDRV_BLOCK_RAW: used internally to indicate that the request
@@ -147,6 +153,8 @@ typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
typedef struct BDRVReopenState {
BlockDriverState *bs;
int flags;
+ QDict *options;
+ QDict *explicit_options;
void *opaque;
} BDRVReopenState;
@@ -165,18 +173,14 @@ typedef enum BlockOpType {
BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
- BLOCK_OP_TYPE_MIRROR,
+ BLOCK_OP_TYPE_MIRROR_SOURCE,
+ BLOCK_OP_TYPE_MIRROR_TARGET,
BLOCK_OP_TYPE_RESIZE,
BLOCK_OP_TYPE_STREAM,
BLOCK_OP_TYPE_REPLACE,
BLOCK_OP_TYPE_MAX,
} BlockOpType;
-void bdrv_iostatus_enable(BlockDriverState *bs);
-void bdrv_iostatus_reset(BlockDriverState *bs);
-void bdrv_iostatus_disable(BlockDriverState *bs);
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
void bdrv_info_print(Monitor *mon, const QObject *data);
void bdrv_info(Monitor *mon, QObject **ret_data);
void bdrv_stats_print(Monitor *mon, const QObject *data);
@@ -189,51 +193,43 @@ void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group);
void bdrv_init(void);
void bdrv_init_with_whitelist(void);
+bool bdrv_uses_whitelist(void);
BlockDriver *bdrv_find_protocol(const char *filename,
bool allow_protocol_prefix,
Error **errp);
BlockDriver *bdrv_find_format(const char *format_name);
-BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
- bool readonly);
int bdrv_create(BlockDriver *drv, const char* filename,
QemuOpts *opts, Error **errp);
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
BlockDriverState *bdrv_new_root(void);
BlockDriverState *bdrv_new(void);
-void bdrv_make_anon(BlockDriverState *bs);
-void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old);
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top);
-int bdrv_parse_cache_flags(const char *mode, int *flags);
+void bdrv_replace_in_backing_chain(BlockDriverState *old,
+ BlockDriverState *new);
+
+int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
int bdrv_parse_discard_flags(const char *mode, int *flags);
-int bdrv_open_image(BlockDriverState **pbs, const char *filename,
- QDict *options, const char *bdref_key,
- BlockDriverState* parent, const BdrvChildRole *child_role,
- bool allow_none, Error **errp);
BdrvChild *bdrv_open_child(const char *filename,
QDict *options, const char *bdref_key,
BlockDriverState* parent,
const BdrvChildRole *child_role,
bool allow_none, Error **errp);
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd);
-int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp);
-int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp);
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
+ const char *bdref_key, Error **errp);
int bdrv_open(BlockDriverState **pbs, const char *filename,
- const char *reference, QDict *options, int flags,
- BlockDriver *drv, Error **errp);
+ const char *reference, QDict *options, int flags, Error **errp);
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
- BlockDriverState *bs, int flags);
+ BlockDriverState *bs,
+ QDict *options, int flags);
int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue, Error **errp);
void bdrv_reopen_commit(BDRVReopenState *reopen_state);
void bdrv_reopen_abort(BDRVReopenState *reopen_state);
-void bdrv_close(BlockDriverState *bs);
-void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify);
int bdrv_read(BlockDriverState *bs, int64_t sector_num,
uint8_t *buf, int nb_sectors);
-int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors);
int bdrv_write(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
@@ -253,6 +249,8 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
/*
@@ -274,7 +272,6 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp);
int bdrv_commit(BlockDriverState *bs);
-int bdrv_commit_all(void);
int bdrv_change_backing_file(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt);
void bdrv_register(BlockDriver *bdrv);
@@ -307,9 +304,9 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix);
* block driver; total_work_size may change during the course of the amendment
* operation */
typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
- int64_t total_work_size);
+ int64_t total_work_size, void *opaque);
int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb);
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque);
/* external snapshots */
bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -317,11 +314,10 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
bool bdrv_is_first_non_filter(BlockDriverState *candidate);
/* check if a named node can be replaced when doing drive-mirror */
-BlockDriverState *check_to_replace_node(const char *node_name, Error **errp);
+BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
+ const char *node_name, Error **errp);
/* async block I/O */
-typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector,
- int sector_num);
BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
QEMUIOVector *iov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque);
@@ -338,10 +334,18 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb);
typedef struct BlockRequest {
/* Fields to be filled by multiwrite caller */
- int64_t sector;
- int nb_sectors;
- int flags;
- QEMUIOVector *qiov;
+ union {
+ struct {
+ int64_t sector;
+ int nb_sectors;
+ int flags;
+ QEMUIOVector *qiov;
+ };
+ struct {
+ int req;
+ void *buf;
+ };
+ };
BlockCompletionFunc *cb;
void *opaque;
@@ -361,13 +365,14 @@ BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
/* Invalidate any cached metadata used by image formats */
void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp);
void bdrv_invalidate_cache_all(Error **errp);
+int bdrv_inactivate_all(void);
/* Ensure contents are flushed to disk. */
int bdrv_flush(BlockDriverState *bs);
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
-int bdrv_flush_all(void);
void bdrv_close_all(void);
void bdrv_drain(BlockDriverState *bs);
+void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
void bdrv_drain_all(void);
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors);
@@ -377,27 +382,21 @@ int bdrv_has_zero_init(BlockDriverState *bs);
bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum);
+ int nb_sectors, int *pnum,
+ BlockDriverState **file);
int64_t bdrv_get_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
int64_t sector_num,
- int nb_sectors, int *pnum);
+ int nb_sectors, int *pnum,
+ BlockDriverState **file);
int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
int *pnum);
int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
int64_t sector_num, int nb_sectors, int *pnum);
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
- BlockdevOnError on_write_error);
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
- bool is_read, int error);
int bdrv_is_read_only(BlockDriverState *bs);
int bdrv_is_sg(BlockDriverState *bs);
-int bdrv_enable_write_cache(BlockDriverState *bs);
-void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce);
-int bdrv_is_inserted(BlockDriverState *bs);
+bool bdrv_is_inserted(BlockDriverState *bs);
int bdrv_media_changed(BlockDriverState *bs);
void bdrv_lock_medium(BlockDriverState *bs, bool locked);
void bdrv_eject(BlockDriverState *bs, bool eject_flag);
@@ -410,6 +409,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
BlockDriverState *bdrv_next_node(BlockDriverState *bs);
BlockDriverState *bdrv_next(BlockDriverState *bs);
+BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
int bdrv_is_encrypted(BlockDriverState *bs);
int bdrv_key_required(BlockDriverState *bs);
int bdrv_set_key(BlockDriverState *bs, const char *key);
@@ -464,50 +464,12 @@ void bdrv_img_create(const char *filename, const char *fmt,
size_t bdrv_min_mem_align(BlockDriverState *bs);
/* Returns optimal alignment in bytes for bounce buffer */
size_t bdrv_opt_mem_align(BlockDriverState *bs);
-void bdrv_set_guest_block_size(BlockDriverState *bs, int align);
void *qemu_blockalign(BlockDriverState *bs, size_t size);
void *qemu_blockalign0(BlockDriverState *bs, size_t size);
void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
-struct HBitmapIter;
-typedef struct BdrvDirtyBitmap BdrvDirtyBitmap;
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
- uint32_t granularity,
- const char *name,
- Error **errp);
-int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap,
- Error **errp);
-BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap,
- Error **errp);
-BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap,
- Error **errp);
-BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
- const char *name);
-void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
-void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
-void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
-void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
-BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
-uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
-uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
-bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
-bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
-DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector);
-void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int nr_sectors);
-void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int nr_sectors);
-void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap);
-void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
-void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
-int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
-
void bdrv_enable_copy_on_read(BlockDriverState *bs);
void bdrv_disable_copy_on_read(BlockDriverState *bs);
@@ -522,68 +484,14 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
-typedef enum {
- BLKDBG_L1_UPDATE,
-
- BLKDBG_L1_GROW_ALLOC_TABLE,
- BLKDBG_L1_GROW_WRITE_TABLE,
- BLKDBG_L1_GROW_ACTIVATE_TABLE,
-
- BLKDBG_L2_LOAD,
- BLKDBG_L2_UPDATE,
- BLKDBG_L2_UPDATE_COMPRESSED,
- BLKDBG_L2_ALLOC_COW_READ,
- BLKDBG_L2_ALLOC_WRITE,
-
- BLKDBG_READ_AIO,
- BLKDBG_READ_BACKING_AIO,
- BLKDBG_READ_COMPRESSED,
-
- BLKDBG_WRITE_AIO,
- BLKDBG_WRITE_COMPRESSED,
-
- BLKDBG_VMSTATE_LOAD,
- BLKDBG_VMSTATE_SAVE,
-
- BLKDBG_COW_READ,
- BLKDBG_COW_WRITE,
-
- BLKDBG_REFTABLE_LOAD,
- BLKDBG_REFTABLE_GROW,
- BLKDBG_REFTABLE_UPDATE,
-
- BLKDBG_REFBLOCK_LOAD,
- BLKDBG_REFBLOCK_UPDATE,
- BLKDBG_REFBLOCK_UPDATE_PART,
- BLKDBG_REFBLOCK_ALLOC,
- BLKDBG_REFBLOCK_ALLOC_HOOKUP,
- BLKDBG_REFBLOCK_ALLOC_WRITE,
- BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS,
- BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE,
- BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE,
+#define BLKDBG_EVENT(child, evt) \
+ do { \
+ if (child) { \
+ bdrv_debug_event(child->bs, evt); \
+ } \
+ } while (0)
- BLKDBG_CLUSTER_ALLOC,
- BLKDBG_CLUSTER_ALLOC_BYTES,
- BLKDBG_CLUSTER_FREE,
-
- BLKDBG_FLUSH_TO_OS,
- BLKDBG_FLUSH_TO_DISK,
-
- BLKDBG_PWRITEV_RMW_HEAD,
- BLKDBG_PWRITEV_RMW_AFTER_HEAD,
- BLKDBG_PWRITEV_RMW_TAIL,
- BLKDBG_PWRITEV_RMW_AFTER_TAIL,
- BLKDBG_PWRITEV,
- BLKDBG_PWRITEV_ZERO,
- BLKDBG_PWRITEV_DONE,
-
- BLKDBG_EMPTY_IMAGE_PREPARE,
-
- BLKDBG_EVENT_MAX,
-} BlkDebugEvent;
-
-#define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt)
-void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event);
+void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
const char *tag);
@@ -614,6 +522,23 @@ void bdrv_io_plug(BlockDriverState *bs);
void bdrv_io_unplug(BlockDriverState *bs);
void bdrv_flush_io_queue(BlockDriverState *bs);
-BlockAcctStats *bdrv_get_stats(BlockDriverState *bs);
+/**
+ * bdrv_drained_begin:
+ *
+ * Begin a quiesced section for exclusive access to the BDS, by disabling
+ * external request sources including NBD server and device model. Note that
+ * this doesn't block timers or coroutines from submitting more requests, which
+ * means block_job_pause is still necessary.
+ *
+ * This function can be recursive.
+ */
+void bdrv_drained_begin(BlockDriverState *bs);
+
+/**
+ * bdrv_drained_end:
+ *
+ * End a quiescent section started by bdrv_drained_begin().
+ */
+void bdrv_drained_end(BlockDriverState *bs);
#endif
diff --git a/qemu/include/block/block_int.h b/qemu/include/block/block_int.h
index 14ad4c334..10d87595b 100644
--- a/qemu/include/block/block_int.h
+++ b/qemu/include/block/block_int.h
@@ -26,9 +26,10 @@
#include "block/accounting.h"
#include "block/block.h"
+#include "block/throttle-groups.h"
#include "qemu/option.h"
#include "qemu/queue.h"
-#include "block/coroutine.h"
+#include "qemu/coroutine.h"
#include "qemu/timer.h"
#include "qapi-types.h"
#include "qemu/hbitmap.h"
@@ -59,11 +60,19 @@
#define BLOCK_PROBE_BUF_SIZE 512
+enum BdrvTrackedRequestType {
+ BDRV_TRACKED_READ,
+ BDRV_TRACKED_WRITE,
+ BDRV_TRACKED_FLUSH,
+ BDRV_TRACKED_IOCTL,
+ BDRV_TRACKED_DISCARD,
+};
+
typedef struct BdrvTrackedRequest {
BlockDriverState *bs;
int64_t offset;
unsigned int bytes;
- bool is_write;
+ enum BdrvTrackedRequestType type;
bool serialising;
int64_t overlap_offset;
@@ -112,6 +121,7 @@ struct BlockDriver {
BlockReopenQueue *queue, Error **errp);
void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+ void (*bdrv_join_options)(QDict *options, QDict *old_options);
int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
Error **errp);
@@ -122,12 +132,11 @@ struct BlockDriver {
int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors);
void (*bdrv_close)(BlockDriverState *bs);
- void (*bdrv_rebind)(BlockDriverState *bs);
int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp);
int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
int (*bdrv_make_empty)(BlockDriverState *bs);
- void (*bdrv_refresh_filename)(BlockDriverState *bs);
+ void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options);
/* aio */
BlockAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
@@ -146,6 +155,11 @@ struct BlockDriver {
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_writev_flags)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
+
+ int supported_write_flags;
+
/*
* Efficiently zero a region of the disk image. Typically an image format
* would use a compact metadata representation to implement this. This
@@ -157,12 +171,21 @@ struct BlockDriver {
int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors);
int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum);
+ int64_t sector_num, int nb_sectors, int *pnum,
+ BlockDriverState **file);
/*
* Invalidate any cached meta-data.
*/
void (*bdrv_invalidate_cache)(BlockDriverState *bs, Error **errp);
+ int (*bdrv_inactivate)(BlockDriverState *bs);
+
+ /*
+ * Flushes all data for all layers by calling bdrv_co_flush for underlying
+ * layers, if needed. This function is needed for deterministic
+ * synchronization of the flush finishing callback.
+ */
+ int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
/*
* Flushes all data that was already written to the OS all the way down to
@@ -213,13 +236,12 @@ struct BlockDriver {
const char *backing_file, const char *backing_fmt);
/* removable device specific */
- int (*bdrv_is_inserted)(BlockDriverState *bs);
+ bool (*bdrv_is_inserted)(BlockDriverState *bs);
int (*bdrv_media_changed)(BlockDriverState *bs);
void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
/* to control generic scsi devices */
- int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf);
BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque);
@@ -235,9 +257,10 @@ struct BlockDriver {
BdrvCheckMode fix);
int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb);
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque);
- void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event);
+ void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
/* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
@@ -288,6 +311,12 @@ struct BlockDriver {
*/
int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);
+ /**
+ * Drain and stop any internal sources of requests in the driver, and
+ * remain so until next I/O callback (e.g. bdrv_co_writev) is called.
+ */
+ void (*bdrv_drain)(BlockDriverState *bs);
+
QLIST_ENTRY(BlockDriver) list;
};
@@ -315,6 +344,9 @@ typedef struct BlockLimits {
/* memory alignment for bounce buffer */
size_t opt_mem_alignment;
+
+ /* maximum number of iovec elements */
+ int max_iov;
} BlockLimits;
typedef struct BdrvOpBlocker BdrvOpBlocker;
@@ -329,7 +361,8 @@ typedef struct BdrvAioNotifier {
} BdrvAioNotifier;
struct BdrvChildRole {
- int (*inherit_flags)(int parent_flags);
+ void (*inherit_options)(int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options);
};
extern const BdrvChildRole child_file;
@@ -337,8 +370,10 @@ extern const BdrvChildRole child_format;
struct BdrvChild {
BlockDriverState *bs;
+ char *name;
const BdrvChildRole *role;
QLIST_ENTRY(BdrvChild) next;
+ QLIST_ENTRY(BdrvChild) next_parent;
};
/*
@@ -378,11 +413,8 @@ struct BlockDriverState {
QDict *full_open_options;
char exact_filename[PATH_MAX];
- BlockDriverState *backing_hd;
- BdrvChild *backing_child;
- BlockDriverState *file;
-
- NotifierList close_notifiers;
+ BdrvChild *backing;
+ BdrvChild *file;
/* Callback before write request is processed */
NotifierWithReturnList before_write_notifiers;
@@ -390,7 +422,10 @@ struct BlockDriverState {
/* number of in-flight serialising requests */
unsigned int serialising_in_flight;
- /* I/O throttling */
+ /* I/O throttling.
+ * throttle_state tells us if this BDS has I/O limits configured.
+ * io_limits_enabled tells us if they are currently being
+ * enforced, but it can be temporarily set to false */
CoQueue throttled_reqs[2];
bool io_limits_enabled;
/* The following fields are protected by the ThrottleGroup lock.
@@ -400,8 +435,8 @@ struct BlockDriverState {
unsigned pending_reqs[2];
QLIST_ENTRY(BlockDriverState) round_robin;
- /* I/O stats (display with "info blockstats"). */
- BlockAcctStats stats;
+ /* Offset after the highest byte written to */
+ uint64_t wr_highest_offset;
/* I/O Limits */
BlockLimits bl;
@@ -412,24 +447,14 @@ struct BlockDriverState {
/* Alignment requirement for offset/length of I/O requests */
unsigned int request_alignment;
- /* the block size for which the guest device expects atomicity */
- int guest_block_size;
-
- /* do we need to tell the quest if we have a volatile write cache? */
- int enable_write_cache;
-
- /* NOTE: the following infos are only hints for real hardware
- drivers. They are not used by the block driver */
- BlockdevOnError on_read_error, on_write_error;
- bool iostatus_enabled;
- BlockDeviceIoStatus iostatus;
-
/* the following member gives a name to every node on the bs graph. */
char node_name[32];
/* element of the list of named nodes building the graph */
QTAILQ_ENTRY(BlockDriverState) node_list;
- /* element of the list of "drives" the guest sees */
- QTAILQ_ENTRY(BlockDriverState) device_list;
+ /* element of the list of all BlockDriverStates (all_bdrv_states) */
+ QTAILQ_ENTRY(BlockDriverState) bs_list;
+ /* element of the list of monitor-owned BDS */
+ QTAILQ_ENTRY(BlockDriverState) monitor_list;
QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
int refcnt;
@@ -446,8 +471,10 @@ struct BlockDriverState {
* parent node of this node. */
BlockDriverState *inherits_from;
QLIST_HEAD(, BdrvChild) children;
+ QLIST_HEAD(, BdrvChild) parents;
QDict *options;
+ QDict *explicit_options;
BlockdevDetectZeroesOptions detect_zeroes;
/* The error object in use for blocking operations on backing_hd */
@@ -456,8 +483,24 @@ struct BlockDriverState {
/* threshold limit for writes, in bytes. "High water mark". */
uint64_t write_threshold_offset;
NotifierWithReturn write_threshold_notifier;
+
+ int quiesce_counter;
+};
+
+struct BlockBackendRootState {
+ int open_flags;
+ bool read_only;
+ BlockdevDetectZeroesOptions detect_zeroes;
+
+ char *throttle_group;
+ ThrottleState *throttle_state;
};
+static inline BlockDriverState *backing_bs(BlockDriverState *bs)
+{
+ return bs->backing ? bs->backing->bs : NULL;
+}
+
/* Essential block drivers which must always be statically linked into qemu, and
* which therefore can be accessed without using bdrv_find_format() */
@@ -474,6 +517,13 @@ extern BlockDriver bdrv_qcow2;
*/
void bdrv_setup_io_funcs(BlockDriver *bdrv);
+int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
+
int get_tmp_filename(char *filename, int size);
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
const char *filename);
@@ -496,7 +546,7 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs,
*
* May be called from .bdrv_detach_aio_context() to detach children from the
* current #AioContext. This is only needed by block drivers that manage their
- * own children. Both ->file and ->backing_hd are automatically handled and
+ * own children. Both ->file and ->backing are automatically handled and
* block drivers should not call this function on them explicitly.
*/
void bdrv_detach_aio_context(BlockDriverState *bs);
@@ -506,7 +556,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs);
*
* May be called from .bdrv_attach_aio_context() to attach children to the new
* #AioContext. This is only needed by block drivers that manage their own
- * children. Both ->file and ->backing_hd are automatically handled and block
+ * children. Both ->file and ->backing are automatically handled and block
* drivers should not call this function on them explicitly.
*/
void bdrv_attach_aio_context(BlockDriverState *bs,
@@ -643,6 +693,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
* @on_target_error: The action to take upon error writing to the target.
* @cb: Completion function for the job.
* @opaque: Opaque pointer value passed to @cb.
+ * @txn: Transaction that this job is part of (may be NULL).
*
* Start a backup operation on @bs. Clusters in @bs are written to @target
* until the job is cancelled or manually completed.
@@ -653,15 +704,29 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockCompletionFunc *cb, void *opaque,
- Error **errp);
+ BlockJobTxn *txn, Error **errp);
+
+void hmp_drive_add_node(Monitor *mon, const char *optstr);
+
+BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildRole *child_role);
+void bdrv_root_unref_child(BdrvChild *child);
void blk_dev_change_media_cb(BlockBackend *blk, bool load);
bool blk_dev_has_removable_media(BlockBackend *blk);
+bool blk_dev_has_tray(BlockBackend *blk);
void blk_dev_eject_request(BlockBackend *blk, bool force);
bool blk_dev_is_tray_open(BlockBackend *blk);
bool blk_dev_is_medium_locked(BlockBackend *blk);
void blk_dev_resize_cb(BlockBackend *blk);
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+bool bdrv_requests_pending(BlockDriverState *bs);
+
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
+void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);
+
+void blockdev_close_all_bdrv_states(void);
#endif /* BLOCK_INT_H */
diff --git a/qemu/include/block/blockjob.h b/qemu/include/block/blockjob.h
index dd9d5e6aa..8bedc4936 100644
--- a/qemu/include/block/blockjob.h
+++ b/qemu/include/block/blockjob.h
@@ -50,6 +50,26 @@ typedef struct BlockJobDriver {
* manually.
*/
void (*complete)(BlockJob *job, Error **errp);
+
+ /**
+ * If the callback is not NULL, it will be invoked when all the jobs
+ * belonging to the same transaction complete; or upon this job's
+ * completion if it is not in a transaction. Skipped if NULL.
+ *
+ * All jobs will complete with a call to either .commit() or .abort() but
+ * never both.
+ */
+ void (*commit)(BlockJob *job);
+
+ /**
+ * If the callback is not NULL, it will be invoked when any job in the
+ * same transaction fails; or upon this job's failure (due to error or
+ * cancellation) if it is not in a transaction. Skipped if NULL.
+ *
+ * All jobs will complete with a call to either .commit() or .abort() but
+ * never both.
+ */
+ void (*abort)(BlockJob *job);
} BlockJobDriver;
/**
@@ -65,6 +85,14 @@ struct BlockJob {
BlockDriverState *bs;
/**
+ * The ID of the block job. Currently the BlockBackend name of the BDS
+ * owning the job at the time when the job is started.
+ *
+ * TODO Decouple block job IDs from BlockBackend names
+ */
+ char *id;
+
+ /**
* The coroutine that executes the job. If not NULL, it is
* reentered when busy is false and the job is cancelled.
*/
@@ -102,6 +130,11 @@ struct BlockJob {
*/
bool ready;
+ /**
+ * Set to true when the job has deferred work to the main loop.
+ */
+ bool deferred_to_main_loop;
+
/** Status that is published by the query-block-jobs QMP API */
BlockDeviceIoStatus iostatus;
@@ -122,6 +155,21 @@ struct BlockJob {
/** The opaque value that is passed to the completion function. */
void *opaque;
+
+ /** Reference count of the block job */
+ int refcnt;
+
+ /* True if this job has reported completion by calling block_job_completed.
+ */
+ bool completed;
+
+ /* ret code passed to block_job_completed.
+ */
+ int ret;
+
+ /** Non-NULL if this job is part of a transaction */
+ BlockJobTxn *txn;
+ QLIST_ENTRY(BlockJob) txn_list;
};
/**
@@ -166,12 +214,21 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns);
void block_job_yield(BlockJob *job);
/**
- * block_job_release:
+ * block_job_ref:
* @bs: The block device.
*
- * Release job resources when an error occurred or job completed.
+ * Grab a reference to the block job. Should be paired with block_job_unref.
*/
-void block_job_release(BlockDriverState *bs);
+void block_job_ref(BlockJob *job);
+
+/**
+ * block_job_unref:
+ * @bs: The block device.
+ *
+ * Release reference to the block job and release resources if it is the last
+ * reference.
+ */
+void block_job_unref(BlockJob *job);
/**
* block_job_completed:
@@ -356,4 +413,39 @@ void block_job_defer_to_main_loop(BlockJob *job,
BlockJobDeferToMainLoopFn *fn,
void *opaque);
+/**
+ * block_job_txn_new:
+ *
+ * Allocate and return a new block job transaction. Jobs can be added to the
+ * transaction using block_job_txn_add_job().
+ *
+ * The transaction is automatically freed when the last job completes or is
+ * cancelled.
+ *
+ * All jobs in the transaction either complete successfully or fail/cancel as a
+ * group. Jobs wait for each other before completing. Cancelling one job
+ * cancels all jobs in the transaction.
+ */
+BlockJobTxn *block_job_txn_new(void);
+
+/**
+ * block_job_txn_unref:
+ *
+ * Release a reference that was previously acquired with block_job_txn_add_job
+ * or block_job_txn_new. If it's the last reference to the object, it will be
+ * freed.
+ */
+void block_job_txn_unref(BlockJobTxn *txn);
+
+/**
+ * block_job_txn_add_job:
+ * @txn: The transaction (may be NULL)
+ * @job: Job to add to the transaction
+ *
+ * Add @job to the transaction. The @job must not already be in a transaction.
+ * The caller must call either block_job_txn_unref() or block_job_completed()
+ * to release the reference that is automatically grabbed here.
+ */
+void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job);
+
#endif
diff --git a/qemu/include/block/coroutine.h b/qemu/include/block/coroutine.h
deleted file mode 100644
index 20c027a7f..000000000
--- a/qemu/include/block/coroutine.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * QEMU coroutine implementation
- *
- * Copyright IBM, Corp. 2011
- *
- * Authors:
- * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- * Kevin Wolf <kwolf@redhat.com>
- *
- * This work is licensed under the terms of the GNU LGPL, version 2 or later.
- * See the COPYING.LIB file in the top-level directory.
- *
- */
-
-#ifndef QEMU_COROUTINE_H
-#define QEMU_COROUTINE_H
-
-#include <stdbool.h>
-#include "qemu/typedefs.h"
-#include "qemu/queue.h"
-#include "qemu/timer.h"
-
-/**
- * Coroutines are a mechanism for stack switching and can be used for
- * cooperative userspace threading. These functions provide a simple but
- * useful flavor of coroutines that is suitable for writing sequential code,
- * rather than callbacks, for operations that need to give up control while
- * waiting for events to complete.
- *
- * These functions are re-entrant and may be used outside the global mutex.
- */
-
-/**
- * Mark a function that executes in coroutine context
- *
- * Functions that execute in coroutine context cannot be called directly from
- * normal functions. In the future it would be nice to enable compiler or
- * static checker support for catching such errors. This annotation might make
- * it possible and in the meantime it serves as documentation.
- *
- * For example:
- *
- * static void coroutine_fn foo(void) {
- * ....
- * }
- */
-#define coroutine_fn
-
-typedef struct Coroutine Coroutine;
-
-/**
- * Coroutine entry point
- *
- * When the coroutine is entered for the first time, opaque is passed in as an
- * argument.
- *
- * When this function returns, the coroutine is destroyed automatically and
- * execution continues in the caller who last entered the coroutine.
- */
-typedef void coroutine_fn CoroutineEntry(void *opaque);
-
-/**
- * Create a new coroutine
- *
- * Use qemu_coroutine_enter() to actually transfer control to the coroutine.
- */
-Coroutine *qemu_coroutine_create(CoroutineEntry *entry);
-
-/**
- * Transfer control to a coroutine
- *
- * The opaque argument is passed as the argument to the entry point when
- * entering the coroutine for the first time. It is subsequently ignored.
- */
-void qemu_coroutine_enter(Coroutine *coroutine, void *opaque);
-
-/**
- * Transfer control back to a coroutine's caller
- *
- * This function does not return until the coroutine is re-entered using
- * qemu_coroutine_enter().
- */
-void coroutine_fn qemu_coroutine_yield(void);
-
-/**
- * Get the currently executing coroutine
- */
-Coroutine *coroutine_fn qemu_coroutine_self(void);
-
-/**
- * Return whether or not currently inside a coroutine
- *
- * This can be used to write functions that work both when in coroutine context
- * and when not in coroutine context. Note that such functions cannot use the
- * coroutine_fn annotation since they work outside coroutine context.
- */
-bool qemu_in_coroutine(void);
-
-
-
-/**
- * CoQueues are a mechanism to queue coroutines in order to continue executing
- * them later. They provide the fundamental primitives on which coroutine locks
- * are built.
- */
-typedef struct CoQueue {
- QTAILQ_HEAD(, Coroutine) entries;
-} CoQueue;
-
-/**
- * Initialise a CoQueue. This must be called before any other operation is used
- * on the CoQueue.
- */
-void qemu_co_queue_init(CoQueue *queue);
-
-/**
- * Adds the current coroutine to the CoQueue and transfers control to the
- * caller of the coroutine.
- */
-void coroutine_fn qemu_co_queue_wait(CoQueue *queue);
-
-/**
- * Restarts the next coroutine in the CoQueue and removes it from the queue.
- *
- * Returns true if a coroutine was restarted, false if the queue is empty.
- */
-bool coroutine_fn qemu_co_queue_next(CoQueue *queue);
-
-/**
- * Restarts all coroutines in the CoQueue and leaves the queue empty.
- */
-void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue);
-
-/**
- * Enter the next coroutine in the queue
- */
-bool qemu_co_enter_next(CoQueue *queue);
-
-/**
- * Checks if the CoQueue is empty.
- */
-bool qemu_co_queue_empty(CoQueue *queue);
-
-
-/**
- * Provides a mutex that can be used to synchronise coroutines
- */
-typedef struct CoMutex {
- bool locked;
- CoQueue queue;
-} CoMutex;
-
-/**
- * Initialises a CoMutex. This must be called before any other operation is used
- * on the CoMutex.
- */
-void qemu_co_mutex_init(CoMutex *mutex);
-
-/**
- * Locks the mutex. If the lock cannot be taken immediately, control is
- * transferred to the caller of the current coroutine.
- */
-void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex);
-
-/**
- * Unlocks the mutex and schedules the next coroutine that was waiting for this
- * lock to be run.
- */
-void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex);
-
-typedef struct CoRwlock {
- bool writer;
- int reader;
- CoQueue queue;
-} CoRwlock;
-
-/**
- * Initialises a CoRwlock. This must be called before any other operation
- * is used on the CoRwlock
- */
-void qemu_co_rwlock_init(CoRwlock *lock);
-
-/**
- * Read locks the CoRwlock. If the lock cannot be taken immediately because
- * of a parallel writer, control is transferred to the caller of the current
- * coroutine.
- */
-void qemu_co_rwlock_rdlock(CoRwlock *lock);
-
-/**
- * Write Locks the mutex. If the lock cannot be taken immediately because
- * of a parallel reader, control is transferred to the caller of the current
- * coroutine.
- */
-void qemu_co_rwlock_wrlock(CoRwlock *lock);
-
-/**
- * Unlocks the read/write lock and schedules the next coroutine that was
- * waiting for this lock to be run.
- */
-void qemu_co_rwlock_unlock(CoRwlock *lock);
-
-/**
- * Yield the coroutine for a given duration
- *
- * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be
- * resumed when using aio_poll().
- */
-void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
- int64_t ns);
-
-/**
- * Yield until a file descriptor becomes readable
- *
- * Note that this function clobbers the handlers for the file descriptor.
- */
-void coroutine_fn yield_until_fd_readable(int fd);
-
-#endif /* QEMU_COROUTINE_H */
diff --git a/qemu/include/block/coroutine_int.h b/qemu/include/block/coroutine_int.h
deleted file mode 100644
index 9aa1aae5d..000000000
--- a/qemu/include/block/coroutine_int.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Coroutine internals
- *
- * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#ifndef QEMU_COROUTINE_INT_H
-#define QEMU_COROUTINE_INT_H
-
-#include "qemu/queue.h"
-#include "block/coroutine.h"
-
-typedef enum {
- COROUTINE_YIELD = 1,
- COROUTINE_TERMINATE = 2,
- COROUTINE_ENTER = 3,
-} CoroutineAction;
-
-struct Coroutine {
- CoroutineEntry *entry;
- void *entry_arg;
- Coroutine *caller;
- QSLIST_ENTRY(Coroutine) pool_next;
-
- /* Coroutines that should be woken up when we yield or terminate */
- QTAILQ_HEAD(, Coroutine) co_queue_wakeup;
- QTAILQ_ENTRY(Coroutine) co_queue_next;
-};
-
-Coroutine *qemu_coroutine_new(void);
-void qemu_coroutine_delete(Coroutine *co);
-CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to,
- CoroutineAction action);
-void coroutine_fn qemu_co_queue_run_restart(Coroutine *co);
-
-#endif
diff --git a/qemu/include/block/dirty-bitmap.h b/qemu/include/block/dirty-bitmap.h
new file mode 100644
index 000000000..80afe603f
--- /dev/null
+++ b/qemu/include/block/dirty-bitmap.h
@@ -0,0 +1,44 @@
+#ifndef BLOCK_DIRTY_BITMAP_H
+#define BLOCK_DIRTY_BITMAP_H
+
+#include "qemu-common.h"
+#include "qemu/hbitmap.h"
+
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+ uint32_t granularity,
+ const char *name,
+ Error **errp);
+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp);
+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp);
+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp);
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
+ const char *name);
+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap);
+void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs);
+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap);
+BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs);
+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs);
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
+DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ int64_t sector);
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int nr_sectors);
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int nr_sectors);
+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi);
+void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
+
+#endif
diff --git a/qemu/include/block/nbd.h b/qemu/include/block/nbd.h
index 65f409d80..b86a97698 100644
--- a/qemu/include/block/nbd.h
+++ b/qemu/include/block/nbd.h
@@ -19,10 +19,11 @@
#ifndef NBD_H
#define NBD_H
-#include <sys/types.h>
#include "qemu-common.h"
#include "qemu/option.h"
+#include "io/channel-socket.h"
+#include "crypto/tlscreds.h"
struct nbd_request {
uint32_t magic;
@@ -55,7 +56,10 @@ struct nbd_reply {
#define NBD_REP_ACK (1) /* Data sending finished. */
#define NBD_REP_SERVER (2) /* Export description. */
#define NBD_REP_ERR_UNSUP ((UINT32_C(1) << 31) | 1) /* Unknown option. */
+#define NBD_REP_ERR_POLICY ((UINT32_C(1) << 31) | 2) /* Server denied */
#define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */
+#define NBD_REP_ERR_TLS_REQD ((UINT32_C(1) << 31) | 5) /* TLS required */
+
#define NBD_CMD_MASK_COMMAND 0x0000ffff
#define NBD_CMD_FLAG_FUA (1 << 16)
@@ -73,12 +77,19 @@ enum {
/* Maximum size of a single READ/WRITE data buffer */
#define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024)
-ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read);
-int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
+ssize_t nbd_wr_syncv(QIOChannel *ioc,
+ struct iovec *iov,
+ size_t niov,
+ size_t offset,
+ size_t length,
+ bool do_read);
+int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags,
+ QCryptoTLSCreds *tlscreds, const char *hostname,
+ QIOChannel **outioc,
off_t *size, Error **errp);
-int nbd_init(int fd, int csock, uint32_t flags, off_t size);
-ssize_t nbd_send_request(int csock, struct nbd_request *request);
-ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply);
+int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size);
+ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request);
+ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply);
int nbd_client(int fd);
int nbd_disconnect(int fd);
@@ -98,8 +109,11 @@ NBDExport *nbd_export_find(const char *name);
void nbd_export_set_name(NBDExport *exp, const char *name);
void nbd_export_close_all(void);
-NBDClient *nbd_client_new(NBDExport *exp, int csock,
- void (*close)(NBDClient *));
+void nbd_client_new(NBDExport *exp,
+ QIOChannelSocket *sioc,
+ QCryptoTLSCreds *tlscreds,
+ const char *tlsaclname,
+ void (*close)(NBDClient *));
void nbd_client_get(NBDClient *client);
void nbd_client_put(NBDClient *client);
diff --git a/qemu/include/block/qapi.h b/qemu/include/block/qapi.h
index 327549d91..82ba4b63a 100644
--- a/qemu/include/block/qapi.h
+++ b/qemu/include/block/qapi.h
@@ -29,7 +29,8 @@
#include "block/block.h"
#include "block/snapshot.h"
-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp);
+BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
+ BlockDriverState *bs, Error **errp);
int bdrv_query_snapshot_info_list(BlockDriverState *bs,
SnapshotInfoList **p_list,
Error **errp);
diff --git a/qemu/include/block/scsi.h b/qemu/include/block/scsi.h
index edde960d1..a311341e6 100644
--- a/qemu/include/block/scsi.h
+++ b/qemu/include/block/scsi.h
@@ -229,7 +229,7 @@ const char *scsi_command_name(uint8_t cmd);
#define MODE_PAGE_TO_PROTECT 0x1d
#define MODE_PAGE_CAPABILITIES 0x2a
#define MODE_PAGE_ALLS 0x3f
-/* Not in Mt. Fuji, but in ATAPI 2.6 -- depricated now in favor
+/* Not in Mt. Fuji, but in ATAPI 2.6 -- deprecated now in favor
* of MODE_PAGE_SENSE_POWER */
#define MODE_PAGE_CDROM 0x0d
diff --git a/qemu/include/block/snapshot.h b/qemu/include/block/snapshot.h
index 770d9bbc8..e5c055311 100644
--- a/qemu/include/block/snapshot.h
+++ b/qemu/include/block/snapshot.h
@@ -26,7 +26,6 @@
#define SNAPSHOT_H
#include "qemu-common.h"
-#include "qapi/error.h"
#include "qemu/option.h"
@@ -63,9 +62,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
const char *snapshot_id,
const char *name,
Error **errp);
-void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
- const char *id_or_name,
- Error **errp);
+int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
+ const char *id_or_name,
+ Error **errp);
int bdrv_snapshot_list(BlockDriverState *bs,
QEMUSnapshotInfo **psn_info);
int bdrv_snapshot_load_tmp(BlockDriverState *bs,
@@ -75,4 +74,22 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
const char *id_or_name,
Error **errp);
+
+
+/* Group operations. All block drivers are involved.
+ * These functions will properly handle dataplane (take aio_context_acquire
+ * when appropriate for appropriate block drivers */
+
+bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs);
+int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs,
+ Error **err);
+int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bsd_bs);
+int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs);
+int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
+ BlockDriverState *vm_state_bs,
+ uint64_t vm_state_size,
+ BlockDriverState **first_bad_bs);
+
+BlockDriverState *bdrv_all_find_vmstate_bs(void);
+
#endif
diff --git a/qemu/include/block/throttle-groups.h b/qemu/include/block/throttle-groups.h
index fab113f6d..aba28f30b 100644
--- a/qemu/include/block/throttle-groups.h
+++ b/qemu/include/block/throttle-groups.h
@@ -30,6 +30,9 @@
const char *throttle_group_get_name(BlockDriverState *bs);
+ThrottleState *throttle_group_incref(const char *name);
+void throttle_group_unref(ThrottleState *ts);
+
void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg);
void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg);
@@ -40,7 +43,4 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs,
unsigned int bytes,
bool is_write);
-void throttle_group_lock(BlockDriverState *bs);
-void throttle_group_unlock(BlockDriverState *bs);
-
#endif
diff --git a/qemu/include/block/write-threshold.h b/qemu/include/block/write-threshold.h
index f1b899cd5..234d2193e 100644
--- a/qemu/include/block/write-threshold.h
+++ b/qemu/include/block/write-threshold.h
@@ -12,9 +12,6 @@
#ifndef BLOCK_WRITE_THRESHOLD_H
#define BLOCK_WRITE_THRESHOLD_H
-#include <stdint.h>
-
-#include "qemu/typedefs.h"
#include "qemu-common.h"
/*