From 437fd90c0250dee670290f9b714253671a990160 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Wed, 18 May 2016 13:18:31 +0300 Subject: These changes are the raw update to qemu-2.6. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collission happened in the following patches: migration: do cleanup operation after completion(738df5b9) Bug fix.(1750c932f86) kvmclock: add a new function to update env->tsc.(b52baab2) The code provided by the patches was already in the upstreamed version. Change-Id: I3cc11841a6a76ae20887b2e245710199e1ea7f9a Signed-off-by: José Pekkarinen --- qemu/include/block/accounting.h | 31 ++++- qemu/include/block/aio.h | 70 +++++++++- qemu/include/block/block.h | 239 ++++++++++++----------------------- qemu/include/block/block_int.h | 135 +++++++++++++++----- qemu/include/block/blockjob.h | 98 +++++++++++++- qemu/include/block/coroutine.h | 219 -------------------------------- qemu/include/block/coroutine_int.h | 54 -------- qemu/include/block/dirty-bitmap.h | 44 +++++++ qemu/include/block/nbd.h | 30 +++-- qemu/include/block/qapi.h | 3 +- qemu/include/block/scsi.h | 2 +- qemu/include/block/snapshot.h | 25 +++- qemu/include/block/throttle-groups.h | 6 +- qemu/include/block/write-threshold.h | 3 - 14 files changed, 465 insertions(+), 494 deletions(-) delete mode 100644 qemu/include/block/coroutine.h delete mode 100644 qemu/include/block/coroutine_int.h create mode 100644 qemu/include/block/dirty-bitmap.h (limited to 'qemu/include/block') diff --git a/qemu/include/block/accounting.h b/qemu/include/block/accounting.h index 4c406cff7..20891639d 100644 --- a/qemu/include/block/accounting.h +++ b/qemu/include/block/accounting.h @@ -2,6 +2,7 @@ * QEMU System Emulator block accounting * * Copyright (c) 2011 Christoph Hellwig + * Copyright (c) 2015 Igalia, S.L. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -24,9 +25,9 @@ #ifndef BLOCK_ACCOUNTING_H #define BLOCK_ACCOUNTING_H -#include +#include "qemu/timed-average.h" -#include "qemu/typedefs.h" +typedef struct BlockAcctTimedStats BlockAcctTimedStats; enum BlockAcctType { BLOCK_ACCT_READ, @@ -35,12 +36,23 @@ enum BlockAcctType { BLOCK_MAX_IOTYPE, }; +struct BlockAcctTimedStats { + TimedAverage latency[BLOCK_MAX_IOTYPE]; + unsigned interval_length; /* in seconds */ + QSLIST_ENTRY(BlockAcctTimedStats) entries; +}; + typedef struct BlockAcctStats { uint64_t nr_bytes[BLOCK_MAX_IOTYPE]; uint64_t nr_ops[BLOCK_MAX_IOTYPE]; + uint64_t invalid_ops[BLOCK_MAX_IOTYPE]; + uint64_t failed_ops[BLOCK_MAX_IOTYPE]; uint64_t total_time_ns[BLOCK_MAX_IOTYPE]; uint64_t merged[BLOCK_MAX_IOTYPE]; - uint64_t wr_highest_sector; + int64_t last_access_time_ns; + QSLIST_HEAD(, BlockAcctTimedStats) intervals; + bool account_invalid; + bool account_failed; } BlockAcctStats; typedef struct BlockAcctCookie { @@ -49,12 +61,21 @@ typedef struct BlockAcctCookie { enum BlockAcctType type; } BlockAcctCookie; +void block_acct_init(BlockAcctStats *stats, bool account_invalid, + bool account_failed); +void block_acct_cleanup(BlockAcctStats *stats); +void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length); +BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats, + BlockAcctTimedStats *s); void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, int64_t bytes, enum BlockAcctType type); void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie); -void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num, - unsigned int nb_sectors); +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie); +void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type); void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, int num_requests); +int64_t block_acct_idle_time_ns(BlockAcctStats *stats); +double block_acct_queue_depth(BlockAcctTimedStats *stats, + enum BlockAcctType type); #endif diff --git a/qemu/include/block/aio.h b/qemu/include/block/aio.h index 400b1b002..88a64eeb3 100644 --- a/qemu/include/block/aio.h +++ b/qemu/include/block/aio.h @@ -14,7 +14,6 @@ #ifndef QEMU_AIO_H #define QEMU_AIO_H -#include "qemu/typedefs.h" #include "qemu-common.h" #include "qemu/queue.h" #include "qemu/event_notifier.h" @@ -122,6 +121,13 @@ struct AioContext { /* TimerLists for calling timers - one per clock type */ QEMUTimerListGroup tlg; + + int external_disable_cnt; + + /* epoll(7) state used when built with CONFIG_EPOLL */ + int epollfd; + bool epoll_enabled; + bool epoll_available; }; /** @@ -206,6 +212,11 @@ void aio_notify(AioContext *ctx); */ void aio_notify_accept(AioContext *ctx); +/** + * aio_bh_call: Executes callback function of the specified BH. + */ +void aio_bh_call(QEMUBH *bh); + /** * aio_bh_poll: Poll bottom halves for an AioContext. * @@ -299,6 +310,7 @@ bool aio_poll(AioContext *ctx, bool blocking); */ void aio_set_fd_handler(AioContext *ctx, int fd, + bool is_external, IOHandler *io_read, IOHandler *io_write, void *opaque); @@ -312,6 +324,7 @@ void aio_set_fd_handler(AioContext *ctx, */ void aio_set_event_notifier(AioContext *ctx, EventNotifier *notifier, + bool is_external, EventNotifierHandler *io_read); /* Return a GSource that lets the main loop poll the file descriptors attached @@ -373,4 +386,59 @@ static inline void aio_timer_init(AioContext *ctx, */ int64_t aio_compute_timeout(AioContext *ctx); +/** + * aio_disable_external: + * @ctx: the aio context + * + * Disable the further processing of external clients. + */ +static inline void aio_disable_external(AioContext *ctx) +{ + atomic_inc(&ctx->external_disable_cnt); +} + +/** + * aio_enable_external: + * @ctx: the aio context + * + * Enable the processing of external clients. + */ +static inline void aio_enable_external(AioContext *ctx) +{ + assert(ctx->external_disable_cnt > 0); + atomic_dec(&ctx->external_disable_cnt); +} + +/** + * aio_external_disabled: + * @ctx: the aio context + * + * Return true if the external clients are disabled. + */ +static inline bool aio_external_disabled(AioContext *ctx) +{ + return atomic_read(&ctx->external_disable_cnt); +} + +/** + * aio_node_check: + * @ctx: the aio context + * @is_external: Whether or not the checked node is an external event source. + * + * Check if the node's is_external flag is okay to be polled by the ctx at this + * moment. True means green light. + */ +static inline bool aio_node_check(AioContext *ctx, bool is_external) +{ + return !is_external || !atomic_read(&ctx->external_disable_cnt); +} + +/** + * aio_context_setup: + * @ctx: the aio context + * + * Initialize the aio context. + */ +void aio_context_setup(AioContext *ctx, Error **errp); + #endif diff --git a/qemu/include/block/block.h b/qemu/include/block/block.h index 37916f720..3a731377d 100644 --- a/qemu/include/block/block.h +++ b/qemu/include/block/block.h @@ -2,18 +2,21 @@ #define BLOCK_H #include "block/aio.h" -#include "qemu-common.h" +#include "qemu/iov.h" #include "qemu/option.h" -#include "block/coroutine.h" +#include "qemu/coroutine.h" #include "block/accounting.h" +#include "block/dirty-bitmap.h" #include "qapi/qmp/qobject.h" #include "qapi-types.h" +#include "qemu/hbitmap.h" /* block.c */ typedef struct BlockDriver BlockDriver; typedef struct BlockJob BlockJob; typedef struct BdrvChild BdrvChild; typedef struct BdrvChildRole BdrvChildRole; +typedef struct BlockJobTxn BlockJobTxn; typedef struct BlockDriverInfo { /* in bytes, 0 if irrelevant */ @@ -23,7 +26,7 @@ typedef struct BlockDriverInfo { bool is_dirty; /* * True if unallocated blocks read back as zeroes. This is equivalent - * to the the LBPRZ flag in the SCSI logical block provisioning page. + * to the LBPRZ flag in the SCSI logical block provisioning page. */ bool unallocated_blocks_are_zero; /* @@ -51,15 +54,17 @@ typedef struct BlockFragInfo { } BlockFragInfo; typedef enum { - BDRV_REQ_COPY_ON_READ = 0x1, - BDRV_REQ_ZERO_WRITE = 0x2, + BDRV_REQ_COPY_ON_READ = 0x1, + BDRV_REQ_ZERO_WRITE = 0x2, /* The BDRV_REQ_MAY_UNMAP flag is used to indicate that the block driver * is allowed to optimize a write zeroes request by unmapping (discarding) * blocks if it is guaranteed that the result will read back as * zeroes. The flag is only passed to the driver if the block device is * opened with BDRV_O_UNMAP. */ - BDRV_REQ_MAY_UNMAP = 0x4, + BDRV_REQ_MAY_UNMAP = 0x4, + BDRV_REQ_NO_SERIALISING = 0x8, + BDRV_REQ_FUA = 0x10, } BdrvRequestFlags; typedef struct BlockSizes { @@ -77,20 +82,20 @@ typedef struct HDGeometry { #define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ #define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ #define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ -#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */ #define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */ #define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ #define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ #define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ -#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */ +#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */ #define BDRV_O_CHECK 0x1000 /* open solely for consistency check */ #define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */ #define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */ #define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given: select an appropriate protocol driver, ignoring the format layer */ +#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */ -#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH) +#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH) /* Option names of options parsed by the block layer */ @@ -109,9 +114,10 @@ typedef struct HDGeometry { /* * Allocation status flags - * BDRV_BLOCK_DATA: data is read from bs->file or another file + * BDRV_BLOCK_DATA: data is read from a file returned by bdrv_get_block_status. * BDRV_BLOCK_ZERO: sectors read as zero - * BDRV_BLOCK_OFFSET_VALID: sector stored in bs->file as raw data + * BDRV_BLOCK_OFFSET_VALID: sector stored as raw data in a file returned by + * bdrv_get_block_status. * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this * layer (as opposed to the backing file) * BDRV_BLOCK_RAW: used internally to indicate that the request @@ -147,6 +153,8 @@ typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; typedef struct BDRVReopenState { BlockDriverState *bs; int flags; + QDict *options; + QDict *explicit_options; void *opaque; } BDRVReopenState; @@ -165,18 +173,14 @@ typedef enum BlockOpType { BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, - BLOCK_OP_TYPE_MIRROR, + BLOCK_OP_TYPE_MIRROR_SOURCE, + BLOCK_OP_TYPE_MIRROR_TARGET, BLOCK_OP_TYPE_RESIZE, BLOCK_OP_TYPE_STREAM, BLOCK_OP_TYPE_REPLACE, BLOCK_OP_TYPE_MAX, } BlockOpType; -void bdrv_iostatus_enable(BlockDriverState *bs); -void bdrv_iostatus_reset(BlockDriverState *bs); -void bdrv_iostatus_disable(BlockDriverState *bs); -bool bdrv_iostatus_is_enabled(const BlockDriverState *bs); -void bdrv_iostatus_set_err(BlockDriverState *bs, int error); void bdrv_info_print(Monitor *mon, const QObject *data); void bdrv_info(Monitor *mon, QObject **ret_data); void bdrv_stats_print(Monitor *mon, const QObject *data); @@ -189,51 +193,43 @@ void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group); void bdrv_init(void); void bdrv_init_with_whitelist(void); +bool bdrv_uses_whitelist(void); BlockDriver *bdrv_find_protocol(const char *filename, bool allow_protocol_prefix, Error **errp); BlockDriver *bdrv_find_format(const char *format_name); -BlockDriver *bdrv_find_whitelisted_format(const char *format_name, - bool readonly); int bdrv_create(BlockDriver *drv, const char* filename, QemuOpts *opts, Error **errp); int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); BlockDriverState *bdrv_new_root(void); BlockDriverState *bdrv_new(void); -void bdrv_make_anon(BlockDriverState *bs); -void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old); void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); -int bdrv_parse_cache_flags(const char *mode, int *flags); +void bdrv_replace_in_backing_chain(BlockDriverState *old, + BlockDriverState *new); + +int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough); int bdrv_parse_discard_flags(const char *mode, int *flags); -int bdrv_open_image(BlockDriverState **pbs, const char *filename, - QDict *options, const char *bdref_key, - BlockDriverState* parent, const BdrvChildRole *child_role, - bool allow_none, Error **errp); BdrvChild *bdrv_open_child(const char *filename, QDict *options, const char *bdref_key, BlockDriverState* parent, const BdrvChildRole *child_role, bool allow_none, Error **errp); void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); -int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); -int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp); +int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + const char *bdref_key, Error **errp); int bdrv_open(BlockDriverState **pbs, const char *filename, - const char *reference, QDict *options, int flags, - BlockDriver *drv, Error **errp); + const char *reference, QDict *options, int flags, Error **errp); BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, - BlockDriverState *bs, int flags); + BlockDriverState *bs, + QDict *options, int flags); int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp); int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue, Error **errp); void bdrv_reopen_commit(BDRVReopenState *reopen_state); void bdrv_reopen_abort(BDRVReopenState *reopen_state); -void bdrv_close(BlockDriverState *bs); -void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify); int bdrv_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors); -int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors); int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, @@ -253,6 +249,8 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); +int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); /* @@ -274,7 +272,6 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); void bdrv_refresh_limits(BlockDriverState *bs, Error **errp); int bdrv_commit(BlockDriverState *bs); -int bdrv_commit_all(void); int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt); void bdrv_register(BlockDriver *bdrv); @@ -307,9 +304,9 @@ int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); * block driver; total_work_size may change during the course of the amendment * operation */ typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset, - int64_t total_work_size); + int64_t total_work_size, void *opaque); int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb); + BlockDriverAmendStatusCB *status_cb, void *cb_opaque); /* external snapshots */ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, @@ -317,11 +314,10 @@ bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, bool bdrv_is_first_non_filter(BlockDriverState *candidate); /* check if a named node can be replaced when doing drive-mirror */ -BlockDriverState *check_to_replace_node(const char *node_name, Error **errp); +BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, + const char *node_name, Error **errp); /* async block I/O */ -typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector, - int sector_num); BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockCompletionFunc *cb, void *opaque); @@ -338,10 +334,18 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb); typedef struct BlockRequest { /* Fields to be filled by multiwrite caller */ - int64_t sector; - int nb_sectors; - int flags; - QEMUIOVector *qiov; + union { + struct { + int64_t sector; + int nb_sectors; + int flags; + QEMUIOVector *qiov; + }; + struct { + int req; + void *buf; + }; + }; BlockCompletionFunc *cb; void *opaque; @@ -361,13 +365,14 @@ BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, /* Invalidate any cached metadata used by image formats */ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp); void bdrv_invalidate_cache_all(Error **errp); +int bdrv_inactivate_all(void); /* Ensure contents are flushed to disk. */ int bdrv_flush(BlockDriverState *bs); int coroutine_fn bdrv_co_flush(BlockDriverState *bs); -int bdrv_flush_all(void); void bdrv_close_all(void); void bdrv_drain(BlockDriverState *bs); +void coroutine_fn bdrv_co_drain(BlockDriverState *bs); void bdrv_drain_all(void); int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); @@ -377,27 +382,21 @@ int bdrv_has_zero_init(BlockDriverState *bs); bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs); bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum); + int nb_sectors, int *pnum, + BlockDriverState **file); int64_t bdrv_get_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t sector_num, - int nb_sectors, int *pnum); + int nb_sectors, int *pnum, + BlockDriverState **file); int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, int64_t sector_num, int nb_sectors, int *pnum); -void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, - BlockdevOnError on_write_error); -BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read); -BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error); -void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, - bool is_read, int error); int bdrv_is_read_only(BlockDriverState *bs); int bdrv_is_sg(BlockDriverState *bs); -int bdrv_enable_write_cache(BlockDriverState *bs); -void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce); -int bdrv_is_inserted(BlockDriverState *bs); +bool bdrv_is_inserted(BlockDriverState *bs); int bdrv_media_changed(BlockDriverState *bs); void bdrv_lock_medium(BlockDriverState *bs, bool locked); void bdrv_eject(BlockDriverState *bs, bool eject_flag); @@ -410,6 +409,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device, bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base); BlockDriverState *bdrv_next_node(BlockDriverState *bs); BlockDriverState *bdrv_next(BlockDriverState *bs); +BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs); int bdrv_is_encrypted(BlockDriverState *bs); int bdrv_key_required(BlockDriverState *bs); int bdrv_set_key(BlockDriverState *bs, const char *key); @@ -464,50 +464,12 @@ void bdrv_img_create(const char *filename, const char *fmt, size_t bdrv_min_mem_align(BlockDriverState *bs); /* Returns optimal alignment in bytes for bounce buffer */ size_t bdrv_opt_mem_align(BlockDriverState *bs); -void bdrv_set_guest_block_size(BlockDriverState *bs, int align); void *qemu_blockalign(BlockDriverState *bs, size_t size); void *qemu_blockalign0(BlockDriverState *bs, size_t size); void *qemu_try_blockalign(BlockDriverState *bs, size_t size); void *qemu_try_blockalign0(BlockDriverState *bs, size_t size); bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); -struct HBitmapIter; -typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; -BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, - uint32_t granularity, - const char *name, - Error **errp); -int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - Error **errp); -BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - Error **errp); -BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, - BdrvDirtyBitmap *bitmap, - Error **errp); -BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, - const char *name); -void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap); -void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); -void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap); -void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); -BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); -uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); -uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap); -bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); -bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); -DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); -int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); -void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors); -void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, - int64_t cur_sector, int nr_sectors); -void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap); -void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); -void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); -int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); - void bdrv_enable_copy_on_read(BlockDriverState *bs); void bdrv_disable_copy_on_read(BlockDriverState *bs); @@ -522,68 +484,14 @@ void bdrv_op_block_all(BlockDriverState *bs, Error *reason); void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason); bool bdrv_op_blocker_is_empty(BlockDriverState *bs); -typedef enum { - BLKDBG_L1_UPDATE, - - BLKDBG_L1_GROW_ALLOC_TABLE, - BLKDBG_L1_GROW_WRITE_TABLE, - BLKDBG_L1_GROW_ACTIVATE_TABLE, - - BLKDBG_L2_LOAD, - BLKDBG_L2_UPDATE, - BLKDBG_L2_UPDATE_COMPRESSED, - BLKDBG_L2_ALLOC_COW_READ, - BLKDBG_L2_ALLOC_WRITE, - - BLKDBG_READ_AIO, - BLKDBG_READ_BACKING_AIO, - BLKDBG_READ_COMPRESSED, - - BLKDBG_WRITE_AIO, - BLKDBG_WRITE_COMPRESSED, - - BLKDBG_VMSTATE_LOAD, - BLKDBG_VMSTATE_SAVE, - - BLKDBG_COW_READ, - BLKDBG_COW_WRITE, - - BLKDBG_REFTABLE_LOAD, - BLKDBG_REFTABLE_GROW, - BLKDBG_REFTABLE_UPDATE, - - BLKDBG_REFBLOCK_LOAD, - BLKDBG_REFBLOCK_UPDATE, - BLKDBG_REFBLOCK_UPDATE_PART, - BLKDBG_REFBLOCK_ALLOC, - BLKDBG_REFBLOCK_ALLOC_HOOKUP, - BLKDBG_REFBLOCK_ALLOC_WRITE, - BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS, - BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE, - BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE, +#define BLKDBG_EVENT(child, evt) \ + do { \ + if (child) { \ + bdrv_debug_event(child->bs, evt); \ + } \ + } while (0) - BLKDBG_CLUSTER_ALLOC, - BLKDBG_CLUSTER_ALLOC_BYTES, - BLKDBG_CLUSTER_FREE, - - BLKDBG_FLUSH_TO_OS, - BLKDBG_FLUSH_TO_DISK, - - BLKDBG_PWRITEV_RMW_HEAD, - BLKDBG_PWRITEV_RMW_AFTER_HEAD, - BLKDBG_PWRITEV_RMW_TAIL, - BLKDBG_PWRITEV_RMW_AFTER_TAIL, - BLKDBG_PWRITEV, - BLKDBG_PWRITEV_ZERO, - BLKDBG_PWRITEV_DONE, - - BLKDBG_EMPTY_IMAGE_PREPARE, - - BLKDBG_EVENT_MAX, -} BlkDebugEvent; - -#define BLKDBG_EVENT(bs, evt) bdrv_debug_event(bs, evt) -void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event); +void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event); int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, const char *tag); @@ -614,6 +522,23 @@ void bdrv_io_plug(BlockDriverState *bs); void bdrv_io_unplug(BlockDriverState *bs); void bdrv_flush_io_queue(BlockDriverState *bs); -BlockAcctStats *bdrv_get_stats(BlockDriverState *bs); +/** + * bdrv_drained_begin: + * + * Begin a quiesced section for exclusive access to the BDS, by disabling + * external request sources including NBD server and device model. Note that + * this doesn't block timers or coroutines from submitting more requests, which + * means block_job_pause is still necessary. + * + * This function can be recursive. + */ +void bdrv_drained_begin(BlockDriverState *bs); + +/** + * bdrv_drained_end: + * + * End a quiescent section started by bdrv_drained_begin(). + */ +void bdrv_drained_end(BlockDriverState *bs); #endif diff --git a/qemu/include/block/block_int.h b/qemu/include/block/block_int.h index 14ad4c334..10d87595b 100644 --- a/qemu/include/block/block_int.h +++ b/qemu/include/block/block_int.h @@ -26,9 +26,10 @@ #include "block/accounting.h" #include "block/block.h" +#include "block/throttle-groups.h" #include "qemu/option.h" #include "qemu/queue.h" -#include "block/coroutine.h" +#include "qemu/coroutine.h" #include "qemu/timer.h" #include "qapi-types.h" #include "qemu/hbitmap.h" @@ -59,11 +60,19 @@ #define BLOCK_PROBE_BUF_SIZE 512 +enum BdrvTrackedRequestType { + BDRV_TRACKED_READ, + BDRV_TRACKED_WRITE, + BDRV_TRACKED_FLUSH, + BDRV_TRACKED_IOCTL, + BDRV_TRACKED_DISCARD, +}; + typedef struct BdrvTrackedRequest { BlockDriverState *bs; int64_t offset; unsigned int bytes; - bool is_write; + enum BdrvTrackedRequestType type; bool serialising; int64_t overlap_offset; @@ -112,6 +121,7 @@ struct BlockDriver { BlockReopenQueue *queue, Error **errp); void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + void (*bdrv_join_options)(QDict *options, QDict *old_options); int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags, Error **errp); @@ -122,12 +132,11 @@ struct BlockDriver { int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); void (*bdrv_close)(BlockDriverState *bs); - void (*bdrv_rebind)(BlockDriverState *bs); int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp); int (*bdrv_set_key)(BlockDriverState *bs, const char *key); int (*bdrv_make_empty)(BlockDriverState *bs); - void (*bdrv_refresh_filename)(BlockDriverState *bs); + void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options); /* aio */ BlockAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs, @@ -146,6 +155,11 @@ struct BlockDriver { int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + int coroutine_fn (*bdrv_co_writev_flags)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags); + + int supported_write_flags; + /* * Efficiently zero a region of the disk image. Typically an image format * would use a compact metadata representation to implement this. This @@ -157,12 +171,21 @@ struct BlockDriver { int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum); + int64_t sector_num, int nb_sectors, int *pnum, + BlockDriverState **file); /* * Invalidate any cached meta-data. */ void (*bdrv_invalidate_cache)(BlockDriverState *bs, Error **errp); + int (*bdrv_inactivate)(BlockDriverState *bs); + + /* + * Flushes all data for all layers by calling bdrv_co_flush for underlying + * layers, if needed. This function is needed for deterministic + * synchronization of the flush finishing callback. + */ + int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs); /* * Flushes all data that was already written to the OS all the way down to @@ -213,13 +236,12 @@ struct BlockDriver { const char *backing_file, const char *backing_fmt); /* removable device specific */ - int (*bdrv_is_inserted)(BlockDriverState *bs); + bool (*bdrv_is_inserted)(BlockDriverState *bs); int (*bdrv_media_changed)(BlockDriverState *bs); void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); /* to control generic scsi devices */ - int (*bdrv_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf); BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque); @@ -235,9 +257,10 @@ struct BlockDriver { BdrvCheckMode fix); int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb); + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque); - void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event); + void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event); /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, @@ -288,6 +311,12 @@ struct BlockDriver { */ int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); + /** + * Drain and stop any internal sources of requests in the driver, and + * remain so until next I/O callback (e.g. bdrv_co_writev) is called. + */ + void (*bdrv_drain)(BlockDriverState *bs); + QLIST_ENTRY(BlockDriver) list; }; @@ -315,6 +344,9 @@ typedef struct BlockLimits { /* memory alignment for bounce buffer */ size_t opt_mem_alignment; + + /* maximum number of iovec elements */ + int max_iov; } BlockLimits; typedef struct BdrvOpBlocker BdrvOpBlocker; @@ -329,7 +361,8 @@ typedef struct BdrvAioNotifier { } BdrvAioNotifier; struct BdrvChildRole { - int (*inherit_flags)(int parent_flags); + void (*inherit_options)(int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options); }; extern const BdrvChildRole child_file; @@ -337,8 +370,10 @@ extern const BdrvChildRole child_format; struct BdrvChild { BlockDriverState *bs; + char *name; const BdrvChildRole *role; QLIST_ENTRY(BdrvChild) next; + QLIST_ENTRY(BdrvChild) next_parent; }; /* @@ -378,11 +413,8 @@ struct BlockDriverState { QDict *full_open_options; char exact_filename[PATH_MAX]; - BlockDriverState *backing_hd; - BdrvChild *backing_child; - BlockDriverState *file; - - NotifierList close_notifiers; + BdrvChild *backing; + BdrvChild *file; /* Callback before write request is processed */ NotifierWithReturnList before_write_notifiers; @@ -390,7 +422,10 @@ struct BlockDriverState { /* number of in-flight serialising requests */ unsigned int serialising_in_flight; - /* I/O throttling */ + /* I/O throttling. + * throttle_state tells us if this BDS has I/O limits configured. + * io_limits_enabled tells us if they are currently being + * enforced, but it can be temporarily set to false */ CoQueue throttled_reqs[2]; bool io_limits_enabled; /* The following fields are protected by the ThrottleGroup lock. @@ -400,8 +435,8 @@ struct BlockDriverState { unsigned pending_reqs[2]; QLIST_ENTRY(BlockDriverState) round_robin; - /* I/O stats (display with "info blockstats"). */ - BlockAcctStats stats; + /* Offset after the highest byte written to */ + uint64_t wr_highest_offset; /* I/O Limits */ BlockLimits bl; @@ -412,24 +447,14 @@ struct BlockDriverState { /* Alignment requirement for offset/length of I/O requests */ unsigned int request_alignment; - /* the block size for which the guest device expects atomicity */ - int guest_block_size; - - /* do we need to tell the quest if we have a volatile write cache? */ - int enable_write_cache; - - /* NOTE: the following infos are only hints for real hardware - drivers. They are not used by the block driver */ - BlockdevOnError on_read_error, on_write_error; - bool iostatus_enabled; - BlockDeviceIoStatus iostatus; - /* the following member gives a name to every node on the bs graph. */ char node_name[32]; /* element of the list of named nodes building the graph */ QTAILQ_ENTRY(BlockDriverState) node_list; - /* element of the list of "drives" the guest sees */ - QTAILQ_ENTRY(BlockDriverState) device_list; + /* element of the list of all BlockDriverStates (all_bdrv_states) */ + QTAILQ_ENTRY(BlockDriverState) bs_list; + /* element of the list of monitor-owned BDS */ + QTAILQ_ENTRY(BlockDriverState) monitor_list; QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; int refcnt; @@ -446,8 +471,10 @@ struct BlockDriverState { * parent node of this node. */ BlockDriverState *inherits_from; QLIST_HEAD(, BdrvChild) children; + QLIST_HEAD(, BdrvChild) parents; QDict *options; + QDict *explicit_options; BlockdevDetectZeroesOptions detect_zeroes; /* The error object in use for blocking operations on backing_hd */ @@ -456,8 +483,24 @@ struct BlockDriverState { /* threshold limit for writes, in bytes. "High water mark". */ uint64_t write_threshold_offset; NotifierWithReturn write_threshold_notifier; + + int quiesce_counter; +}; + +struct BlockBackendRootState { + int open_flags; + bool read_only; + BlockdevDetectZeroesOptions detect_zeroes; + + char *throttle_group; + ThrottleState *throttle_state; }; +static inline BlockDriverState *backing_bs(BlockDriverState *bs) +{ + return bs->backing ? bs->backing->bs : NULL; +} + /* Essential block drivers which must always be statically linked into qemu, and * which therefore can be accessed without using bdrv_find_format() */ @@ -474,6 +517,13 @@ extern BlockDriver bdrv_qcow2; */ void bdrv_setup_io_funcs(BlockDriver *bdrv); +int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); +int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags); + int get_tmp_filename(char *filename, int size); BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, const char *filename); @@ -496,7 +546,7 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs, * * May be called from .bdrv_detach_aio_context() to detach children from the * current #AioContext. This is only needed by block drivers that manage their - * own children. Both ->file and ->backing_hd are automatically handled and + * own children. Both ->file and ->backing are automatically handled and * block drivers should not call this function on them explicitly. */ void bdrv_detach_aio_context(BlockDriverState *bs); @@ -506,7 +556,7 @@ void bdrv_detach_aio_context(BlockDriverState *bs); * * May be called from .bdrv_attach_aio_context() to attach children to the new * #AioContext. This is only needed by block drivers that manage their own - * children. Both ->file and ->backing_hd are automatically handled and block + * children. Both ->file and ->backing are automatically handled and block * drivers should not call this function on them explicitly. */ void bdrv_attach_aio_context(BlockDriverState *bs, @@ -643,6 +693,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, * @on_target_error: The action to take upon error writing to the target. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. + * @txn: Transaction that this job is part of (may be NULL). * * Start a backup operation on @bs. Clusters in @bs are written to @target * until the job is cancelled or manually completed. @@ -653,15 +704,29 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockCompletionFunc *cb, void *opaque, - Error **errp); + BlockJobTxn *txn, Error **errp); + +void hmp_drive_add_node(Monitor *mon, const char *optstr); + +BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs, + const char *child_name, + const BdrvChildRole *child_role); +void bdrv_root_unref_child(BdrvChild *child); void blk_dev_change_media_cb(BlockBackend *blk, bool load); bool blk_dev_has_removable_media(BlockBackend *blk); +bool blk_dev_has_tray(BlockBackend *blk); void blk_dev_eject_request(BlockBackend *blk, bool force); bool blk_dev_is_tray_open(BlockBackend *blk); bool blk_dev_is_medium_locked(BlockBackend *blk); void blk_dev_resize_cb(BlockBackend *blk); void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); +bool bdrv_requests_pending(BlockDriverState *bs); + +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); +void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); + +void blockdev_close_all_bdrv_states(void); #endif /* BLOCK_INT_H */ diff --git a/qemu/include/block/blockjob.h b/qemu/include/block/blockjob.h index dd9d5e6aa..8bedc4936 100644 --- a/qemu/include/block/blockjob.h +++ b/qemu/include/block/blockjob.h @@ -50,6 +50,26 @@ typedef struct BlockJobDriver { * manually. */ void (*complete)(BlockJob *job, Error **errp); + + /** + * If the callback is not NULL, it will be invoked when all the jobs + * belonging to the same transaction complete; or upon this job's + * completion if it is not in a transaction. Skipped if NULL. + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. + */ + void (*commit)(BlockJob *job); + + /** + * If the callback is not NULL, it will be invoked when any job in the + * same transaction fails; or upon this job's failure (due to error or + * cancellation) if it is not in a transaction. Skipped if NULL. + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. + */ + void (*abort)(BlockJob *job); } BlockJobDriver; /** @@ -64,6 +84,14 @@ struct BlockJob { /** The block device on which the job is operating. */ BlockDriverState *bs; + /** + * The ID of the block job. Currently the BlockBackend name of the BDS + * owning the job at the time when the job is started. + * + * TODO Decouple block job IDs from BlockBackend names + */ + char *id; + /** * The coroutine that executes the job. If not NULL, it is * reentered when busy is false and the job is cancelled. @@ -102,6 +130,11 @@ struct BlockJob { */ bool ready; + /** + * Set to true when the job has deferred work to the main loop. + */ + bool deferred_to_main_loop; + /** Status that is published by the query-block-jobs QMP API */ BlockDeviceIoStatus iostatus; @@ -122,6 +155,21 @@ struct BlockJob { /** The opaque value that is passed to the completion function. */ void *opaque; + + /** Reference count of the block job */ + int refcnt; + + /* True if this job has reported completion by calling block_job_completed. + */ + bool completed; + + /* ret code passed to block_job_completed. + */ + int ret; + + /** Non-NULL if this job is part of a transaction */ + BlockJobTxn *txn; + QLIST_ENTRY(BlockJob) txn_list; }; /** @@ -166,12 +214,21 @@ void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); void block_job_yield(BlockJob *job); /** - * block_job_release: + * block_job_ref: * @bs: The block device. * - * Release job resources when an error occurred or job completed. + * Grab a reference to the block job. Should be paired with block_job_unref. */ -void block_job_release(BlockDriverState *bs); +void block_job_ref(BlockJob *job); + +/** + * block_job_unref: + * @bs: The block device. + * + * Release reference to the block job and release resources if it is the last + * reference. + */ +void block_job_unref(BlockJob *job); /** * block_job_completed: @@ -356,4 +413,39 @@ void block_job_defer_to_main_loop(BlockJob *job, BlockJobDeferToMainLoopFn *fn, void *opaque); +/** + * block_job_txn_new: + * + * Allocate and return a new block job transaction. Jobs can be added to the + * transaction using block_job_txn_add_job(). + * + * The transaction is automatically freed when the last job completes or is + * cancelled. + * + * All jobs in the transaction either complete successfully or fail/cancel as a + * group. Jobs wait for each other before completing. Cancelling one job + * cancels all jobs in the transaction. + */ +BlockJobTxn *block_job_txn_new(void); + +/** + * block_job_txn_unref: + * + * Release a reference that was previously acquired with block_job_txn_add_job + * or block_job_txn_new. If it's the last reference to the object, it will be + * freed. + */ +void block_job_txn_unref(BlockJobTxn *txn); + +/** + * block_job_txn_add_job: + * @txn: The transaction (may be NULL) + * @job: Job to add to the transaction + * + * Add @job to the transaction. The @job must not already be in a transaction. + * The caller must call either block_job_txn_unref() or block_job_completed() + * to release the reference that is automatically grabbed here. + */ +void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job); + #endif diff --git a/qemu/include/block/coroutine.h b/qemu/include/block/coroutine.h deleted file mode 100644 index 20c027a7f..000000000 --- a/qemu/include/block/coroutine.h +++ /dev/null @@ -1,219 +0,0 @@ -/* - * QEMU coroutine implementation - * - * Copyright IBM, Corp. 2011 - * - * Authors: - * Stefan Hajnoczi - * Kevin Wolf - * - * This work is licensed under the terms of the GNU LGPL, version 2 or later. - * See the COPYING.LIB file in the top-level directory. - * - */ - -#ifndef QEMU_COROUTINE_H -#define QEMU_COROUTINE_H - -#include -#include "qemu/typedefs.h" -#include "qemu/queue.h" -#include "qemu/timer.h" - -/** - * Coroutines are a mechanism for stack switching and can be used for - * cooperative userspace threading. These functions provide a simple but - * useful flavor of coroutines that is suitable for writing sequential code, - * rather than callbacks, for operations that need to give up control while - * waiting for events to complete. - * - * These functions are re-entrant and may be used outside the global mutex. - */ - -/** - * Mark a function that executes in coroutine context - * - * Functions that execute in coroutine context cannot be called directly from - * normal functions. In the future it would be nice to enable compiler or - * static checker support for catching such errors. This annotation might make - * it possible and in the meantime it serves as documentation. - * - * For example: - * - * static void coroutine_fn foo(void) { - * .... - * } - */ -#define coroutine_fn - -typedef struct Coroutine Coroutine; - -/** - * Coroutine entry point - * - * When the coroutine is entered for the first time, opaque is passed in as an - * argument. - * - * When this function returns, the coroutine is destroyed automatically and - * execution continues in the caller who last entered the coroutine. - */ -typedef void coroutine_fn CoroutineEntry(void *opaque); - -/** - * Create a new coroutine - * - * Use qemu_coroutine_enter() to actually transfer control to the coroutine. - */ -Coroutine *qemu_coroutine_create(CoroutineEntry *entry); - -/** - * Transfer control to a coroutine - * - * The opaque argument is passed as the argument to the entry point when - * entering the coroutine for the first time. It is subsequently ignored. - */ -void qemu_coroutine_enter(Coroutine *coroutine, void *opaque); - -/** - * Transfer control back to a coroutine's caller - * - * This function does not return until the coroutine is re-entered using - * qemu_coroutine_enter(). - */ -void coroutine_fn qemu_coroutine_yield(void); - -/** - * Get the currently executing coroutine - */ -Coroutine *coroutine_fn qemu_coroutine_self(void); - -/** - * Return whether or not currently inside a coroutine - * - * This can be used to write functions that work both when in coroutine context - * and when not in coroutine context. Note that such functions cannot use the - * coroutine_fn annotation since they work outside coroutine context. - */ -bool qemu_in_coroutine(void); - - - -/** - * CoQueues are a mechanism to queue coroutines in order to continue executing - * them later. They provide the fundamental primitives on which coroutine locks - * are built. - */ -typedef struct CoQueue { - QTAILQ_HEAD(, Coroutine) entries; -} CoQueue; - -/** - * Initialise a CoQueue. This must be called before any other operation is used - * on the CoQueue. - */ -void qemu_co_queue_init(CoQueue *queue); - -/** - * Adds the current coroutine to the CoQueue and transfers control to the - * caller of the coroutine. - */ -void coroutine_fn qemu_co_queue_wait(CoQueue *queue); - -/** - * Restarts the next coroutine in the CoQueue and removes it from the queue. - * - * Returns true if a coroutine was restarted, false if the queue is empty. - */ -bool coroutine_fn qemu_co_queue_next(CoQueue *queue); - -/** - * Restarts all coroutines in the CoQueue and leaves the queue empty. - */ -void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue); - -/** - * Enter the next coroutine in the queue - */ -bool qemu_co_enter_next(CoQueue *queue); - -/** - * Checks if the CoQueue is empty. - */ -bool qemu_co_queue_empty(CoQueue *queue); - - -/** - * Provides a mutex that can be used to synchronise coroutines - */ -typedef struct CoMutex { - bool locked; - CoQueue queue; -} CoMutex; - -/** - * Initialises a CoMutex. This must be called before any other operation is used - * on the CoMutex. - */ -void qemu_co_mutex_init(CoMutex *mutex); - -/** - * Locks the mutex. If the lock cannot be taken immediately, control is - * transferred to the caller of the current coroutine. - */ -void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex); - -/** - * Unlocks the mutex and schedules the next coroutine that was waiting for this - * lock to be run. - */ -void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex); - -typedef struct CoRwlock { - bool writer; - int reader; - CoQueue queue; -} CoRwlock; - -/** - * Initialises a CoRwlock. This must be called before any other operation - * is used on the CoRwlock - */ -void qemu_co_rwlock_init(CoRwlock *lock); - -/** - * Read locks the CoRwlock. If the lock cannot be taken immediately because - * of a parallel writer, control is transferred to the caller of the current - * coroutine. - */ -void qemu_co_rwlock_rdlock(CoRwlock *lock); - -/** - * Write Locks the mutex. If the lock cannot be taken immediately because - * of a parallel reader, control is transferred to the caller of the current - * coroutine. - */ -void qemu_co_rwlock_wrlock(CoRwlock *lock); - -/** - * Unlocks the read/write lock and schedules the next coroutine that was - * waiting for this lock to be run. - */ -void qemu_co_rwlock_unlock(CoRwlock *lock); - -/** - * Yield the coroutine for a given duration - * - * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be - * resumed when using aio_poll(). - */ -void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, - int64_t ns); - -/** - * Yield until a file descriptor becomes readable - * - * Note that this function clobbers the handlers for the file descriptor. - */ -void coroutine_fn yield_until_fd_readable(int fd); - -#endif /* QEMU_COROUTINE_H */ diff --git a/qemu/include/block/coroutine_int.h b/qemu/include/block/coroutine_int.h deleted file mode 100644 index 9aa1aae5d..000000000 --- a/qemu/include/block/coroutine_int.h +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Coroutine internals - * - * Copyright (c) 2011 Kevin Wolf - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - */ - -#ifndef QEMU_COROUTINE_INT_H -#define QEMU_COROUTINE_INT_H - -#include "qemu/queue.h" -#include "block/coroutine.h" - -typedef enum { - COROUTINE_YIELD = 1, - COROUTINE_TERMINATE = 2, - COROUTINE_ENTER = 3, -} CoroutineAction; - -struct Coroutine { - CoroutineEntry *entry; - void *entry_arg; - Coroutine *caller; - QSLIST_ENTRY(Coroutine) pool_next; - - /* Coroutines that should be woken up when we yield or terminate */ - QTAILQ_HEAD(, Coroutine) co_queue_wakeup; - QTAILQ_ENTRY(Coroutine) co_queue_next; -}; - -Coroutine *qemu_coroutine_new(void); -void qemu_coroutine_delete(Coroutine *co); -CoroutineAction qemu_coroutine_switch(Coroutine *from, Coroutine *to, - CoroutineAction action); -void coroutine_fn qemu_co_queue_run_restart(Coroutine *co); - -#endif diff --git a/qemu/include/block/dirty-bitmap.h b/qemu/include/block/dirty-bitmap.h new file mode 100644 index 000000000..80afe603f --- /dev/null +++ b/qemu/include/block/dirty-bitmap.h @@ -0,0 +1,44 @@ +#ifndef BLOCK_DIRTY_BITMAP_H +#define BLOCK_DIRTY_BITMAP_H + +#include "qemu-common.h" +#include "qemu/hbitmap.h" + +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, + uint32_t granularity, + const char *name, + Error **errp); +int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, + const char *name); +void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap); +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); +void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs); +void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap); +void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); +BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); +uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); +uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); +DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t sector); +void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors); +void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors); +void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); +void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); +int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs); + +#endif diff --git a/qemu/include/block/nbd.h b/qemu/include/block/nbd.h index 65f409d80..b86a97698 100644 --- a/qemu/include/block/nbd.h +++ b/qemu/include/block/nbd.h @@ -19,10 +19,11 @@ #ifndef NBD_H #define NBD_H -#include #include "qemu-common.h" #include "qemu/option.h" +#include "io/channel-socket.h" +#include "crypto/tlscreds.h" struct nbd_request { uint32_t magic; @@ -55,7 +56,10 @@ struct nbd_reply { #define NBD_REP_ACK (1) /* Data sending finished. */ #define NBD_REP_SERVER (2) /* Export description. */ #define NBD_REP_ERR_UNSUP ((UINT32_C(1) << 31) | 1) /* Unknown option. */ +#define NBD_REP_ERR_POLICY ((UINT32_C(1) << 31) | 2) /* Server denied */ #define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */ +#define NBD_REP_ERR_TLS_REQD ((UINT32_C(1) << 31) | 5) /* TLS required */ + #define NBD_CMD_MASK_COMMAND 0x0000ffff #define NBD_CMD_FLAG_FUA (1 << 16) @@ -73,12 +77,19 @@ enum { /* Maximum size of a single READ/WRITE data buffer */ #define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024) -ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read); -int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, +ssize_t nbd_wr_syncv(QIOChannel *ioc, + struct iovec *iov, + size_t niov, + size_t offset, + size_t length, + bool do_read); +int nbd_receive_negotiate(QIOChannel *ioc, const char *name, uint32_t *flags, + QCryptoTLSCreds *tlscreds, const char *hostname, + QIOChannel **outioc, off_t *size, Error **errp); -int nbd_init(int fd, int csock, uint32_t flags, off_t size); -ssize_t nbd_send_request(int csock, struct nbd_request *request); -ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply); +int nbd_init(int fd, QIOChannelSocket *sioc, uint32_t flags, off_t size); +ssize_t nbd_send_request(QIOChannel *ioc, struct nbd_request *request); +ssize_t nbd_receive_reply(QIOChannel *ioc, struct nbd_reply *reply); int nbd_client(int fd); int nbd_disconnect(int fd); @@ -98,8 +109,11 @@ NBDExport *nbd_export_find(const char *name); void nbd_export_set_name(NBDExport *exp, const char *name); void nbd_export_close_all(void); -NBDClient *nbd_client_new(NBDExport *exp, int csock, - void (*close)(NBDClient *)); +void nbd_client_new(NBDExport *exp, + QIOChannelSocket *sioc, + QCryptoTLSCreds *tlscreds, + const char *tlsaclname, + void (*close)(NBDClient *)); void nbd_client_get(NBDClient *client); void nbd_client_put(NBDClient *client); diff --git a/qemu/include/block/qapi.h b/qemu/include/block/qapi.h index 327549d91..82ba4b63a 100644 --- a/qemu/include/block/qapi.h +++ b/qemu/include/block/qapi.h @@ -29,7 +29,8 @@ #include "block/block.h" #include "block/snapshot.h" -BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp); +BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, + BlockDriverState *bs, Error **errp); int bdrv_query_snapshot_info_list(BlockDriverState *bs, SnapshotInfoList **p_list, Error **errp); diff --git a/qemu/include/block/scsi.h b/qemu/include/block/scsi.h index edde960d1..a311341e6 100644 --- a/qemu/include/block/scsi.h +++ b/qemu/include/block/scsi.h @@ -229,7 +229,7 @@ const char *scsi_command_name(uint8_t cmd); #define MODE_PAGE_TO_PROTECT 0x1d #define MODE_PAGE_CAPABILITIES 0x2a #define MODE_PAGE_ALLS 0x3f -/* Not in Mt. Fuji, but in ATAPI 2.6 -- depricated now in favor +/* Not in Mt. Fuji, but in ATAPI 2.6 -- deprecated now in favor * of MODE_PAGE_SENSE_POWER */ #define MODE_PAGE_CDROM 0x0d diff --git a/qemu/include/block/snapshot.h b/qemu/include/block/snapshot.h index 770d9bbc8..e5c055311 100644 --- a/qemu/include/block/snapshot.h +++ b/qemu/include/block/snapshot.h @@ -26,7 +26,6 @@ #define SNAPSHOT_H #include "qemu-common.h" -#include "qapi/error.h" #include "qemu/option.h" @@ -63,9 +62,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs, const char *snapshot_id, const char *name, Error **errp); -void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, - const char *id_or_name, - Error **errp); +int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, + const char *id_or_name, + Error **errp); int bdrv_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_info); int bdrv_snapshot_load_tmp(BlockDriverState *bs, @@ -75,4 +74,22 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs, int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, const char *id_or_name, Error **errp); + + +/* Group operations. All block drivers are involved. + * These functions will properly handle dataplane (take aio_context_acquire + * when appropriate for appropriate block drivers */ + +bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs); +int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs, + Error **err); +int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bsd_bs); +int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs); +int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + BlockDriverState *vm_state_bs, + uint64_t vm_state_size, + BlockDriverState **first_bad_bs); + +BlockDriverState *bdrv_all_find_vmstate_bs(void); + #endif diff --git a/qemu/include/block/throttle-groups.h b/qemu/include/block/throttle-groups.h index fab113f6d..aba28f30b 100644 --- a/qemu/include/block/throttle-groups.h +++ b/qemu/include/block/throttle-groups.h @@ -30,6 +30,9 @@ const char *throttle_group_get_name(BlockDriverState *bs); +ThrottleState *throttle_group_incref(const char *name); +void throttle_group_unref(ThrottleState *ts); + void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg); void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg); @@ -40,7 +43,4 @@ void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs, unsigned int bytes, bool is_write); -void throttle_group_lock(BlockDriverState *bs); -void throttle_group_unlock(BlockDriverState *bs); - #endif diff --git a/qemu/include/block/write-threshold.h b/qemu/include/block/write-threshold.h index f1b899cd5..234d2193e 100644 --- a/qemu/include/block/write-threshold.h +++ b/qemu/include/block/write-threshold.h @@ -12,9 +12,6 @@ #ifndef BLOCK_WRITE_THRESHOLD_H #define BLOCK_WRITE_THRESHOLD_H -#include - -#include "qemu/typedefs.h" #include "qemu-common.h" /* -- cgit 1.2.3-korg