diff options
author | SeokYeon Hwang <syeon.hwang@samsung.com> | 2016-06-20 15:25:33 +0900 |
---|---|---|
committer | SeokYeon Hwang <syeon.hwang@samsung.com> | 2016-06-20 15:25:33 +0900 |
commit | b5b5a72b01ac12c4dfb5867deb00c7b39f4fece5 (patch) | |
tree | f28c18fd9706691e01dfbb9dd4d5b91b02bb78dd /block | |
parent | 00b7f2169caa54deb7f2e9d64908e7c6d2ff9d8e (diff) | |
parent | bfc766d38e1fae5767d43845c15c79ac8fa6d6af (diff) | |
download | qemu-b5b5a72b01ac12c4dfb5867deb00c7b39f4fece5.tar.gz qemu-b5b5a72b01ac12c4dfb5867deb00c7b39f4fece5.tar.bz2 qemu-b5b5a72b01ac12c4dfb5867deb00c7b39f4fece5.zip |
Merge tag 'v2.6.0' into develop
v2.6.0 release
Change-Id: I76aaeae2ace35ddf6dbbd4c11436724688b01929
Signed-off-by: SeokYeon Hwang <syeon.hwang@samsung.com>
Diffstat (limited to 'block')
61 files changed, 4539 insertions, 1330 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs index 58ef2ef3f2..44a5416225 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -4,7 +4,7 @@ block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o block-obj-y += quorum.o -block-obj-y += parallels.o blkdebug.o blkverify.o +block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o block-obj-y += block-backend.o snapshot.o qapi.o block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += raw-posix.o @@ -20,9 +20,11 @@ block-obj-$(CONFIG_RBD) += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o block-obj-$(CONFIG_LIBSSH2) += ssh.o -block-obj-y += accounting.o +block-obj-y += accounting.o dirty-bitmap.o block-obj-y += write-threshold.o +block-obj-y += crypto.o + common-obj-y += stream.o common-obj-y += commit.o common-obj-y += backup.o diff --git a/block/accounting.c b/block/accounting.c index 185025ec1e..3f457c4e73 100644 --- a/block/accounting.c +++ b/block/accounting.c @@ -23,6 +23,7 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "block/accounting.h" #include "block/block_int.h" #include "qemu/timer.h" diff --git a/block/archipelago.c b/block/archipelago.c index 855655c6bd..b9f5e69d4a 100644 --- a/block/archipelago.c +++ b/block/archipelago.c @@ -50,7 +50,8 @@ * */ -#include "qemu-common.h" +#include "qemu/osdep.h" +#include "qemu/cutils.h" #include "block/block_int.h" #include "qemu/error-report.h" #include "qemu/thread.h" @@ -59,7 +60,6 @@ #include "qapi/qmp/qjson.h" #include "qemu/atomic.h" -#include <inttypes.h> #include <xseg/xseg.h> #include <xseg/protocol.h> diff --git a/block/backup.c b/block/backup.c index 705bb77661..491fd14068 100644 --- a/block/backup.c +++ b/block/backup.c @@ -11,22 +11,20 @@ * */ -#include <stdio.h> -#include <errno.h> -#include <unistd.h> +#include "qemu/osdep.h" #include "trace.h" #include "block/block.h" #include "block/block_int.h" #include "block/blockjob.h" +#include "qapi/error.h" #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" +#include "qemu/cutils.h" #include "sysemu/block-backend.h" +#include "qemu/bitmap.h" -#define BACKUP_CLUSTER_BITS 16 -#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS) -#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE) - +#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16) #define SLICE_TIME 100000000ULL /* ns */ typedef struct CowRequest { @@ -47,10 +45,17 @@ typedef struct BackupBlockJob { BlockdevOnError on_target_error; CoRwlock flush_rwlock; uint64_t sectors_read; - HBitmap *bitmap; + unsigned long *done_bitmap; + int64_t cluster_size; QLIST_HEAD(, CowRequest) inflight_reqs; } BackupBlockJob; +/* Size of a cluster in sectors, instead of bytes. */ +static inline int64_t cluster_size_sectors(BackupBlockJob *job) +{ + return job->cluster_size / BDRV_SECTOR_SIZE; +} + /* See if in-flight requests overlap and wait for them to complete */ static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job, int64_t start, @@ -99,13 +104,14 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, QEMUIOVector bounce_qiov; void *bounce_buffer = NULL; int ret = 0; + int64_t sectors_per_cluster = cluster_size_sectors(job); int64_t start, end; int n; qemu_co_rwlock_rdlock(&job->flush_rwlock); - start = sector_num / BACKUP_SECTORS_PER_CLUSTER; - end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER); + start = sector_num / sectors_per_cluster; + end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster); trace_backup_do_cow_enter(job, start, sector_num, nb_sectors); @@ -113,19 +119,19 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, cow_request_begin(&cow_request, job, start, end); for (; start < end; start++) { - if (hbitmap_get(job->bitmap, start)) { + if (test_bit(start, job->done_bitmap)) { trace_backup_do_cow_skip(job, start); continue; /* already copied */ } trace_backup_do_cow_process(job, start); - n = MIN(BACKUP_SECTORS_PER_CLUSTER, + n = MIN(sectors_per_cluster, job->common.len / BDRV_SECTOR_SIZE - - start * BACKUP_SECTORS_PER_CLUSTER); + start * sectors_per_cluster); if (!bounce_buffer) { - bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE); + bounce_buffer = qemu_blockalign(bs, job->cluster_size); } iov.iov_base = bounce_buffer; iov.iov_len = n * BDRV_SECTOR_SIZE; @@ -133,10 +139,10 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, if (is_write_notifier) { ret = bdrv_co_readv_no_serialising(bs, - start * BACKUP_SECTORS_PER_CLUSTER, + start * sectors_per_cluster, n, &bounce_qiov); } else { - ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n, + ret = bdrv_co_readv(bs, start * sectors_per_cluster, n, &bounce_qiov); } if (ret < 0) { @@ -149,11 +155,11 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, if (buffer_is_zero(iov.iov_base, iov.iov_len)) { ret = bdrv_co_write_zeroes(job->target, - start * BACKUP_SECTORS_PER_CLUSTER, + start * sectors_per_cluster, n, BDRV_REQ_MAY_UNMAP); } else { ret = bdrv_co_writev(job->target, - start * BACKUP_SECTORS_PER_CLUSTER, n, + start * sectors_per_cluster, n, &bounce_qiov); } if (ret < 0) { @@ -164,7 +170,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, goto out; } - hbitmap_set(job->bitmap, start, 1); + set_bit(start, job->done_bitmap); /* Publish progress, guest I/O counts as progress too. Note that the * offset field is an opaque progress value, it is not a disk offset. @@ -324,21 +330,22 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) int64_t cluster; int64_t end; int64_t last_cluster = -1; + int64_t sectors_per_cluster = cluster_size_sectors(job); BlockDriverState *bs = job->common.bs; HBitmapIter hbi; granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap); - clusters_per_iter = MAX((granularity / BACKUP_CLUSTER_SIZE), 1); + clusters_per_iter = MAX((granularity / job->cluster_size), 1); bdrv_dirty_iter_init(job->sync_bitmap, &hbi); /* Find the next dirty sector(s) */ while ((sector = hbitmap_iter_next(&hbi)) != -1) { - cluster = sector / BACKUP_SECTORS_PER_CLUSTER; + cluster = sector / sectors_per_cluster; /* Fake progress updates for any clusters we skipped */ if (cluster != last_cluster + 1) { job->common.offset += ((cluster - last_cluster - 1) * - BACKUP_CLUSTER_SIZE); + job->cluster_size); } for (end = cluster + clusters_per_iter; cluster < end; cluster++) { @@ -346,8 +353,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) if (yield_and_check(job)) { return ret; } - ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER, - BACKUP_SECTORS_PER_CLUSTER, &error_is_read, + ret = backup_do_cow(bs, cluster * sectors_per_cluster, + sectors_per_cluster, &error_is_read, false); if ((ret < 0) && backup_error_action(job, error_is_read, -ret) == @@ -359,17 +366,17 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) /* If the bitmap granularity is smaller than the backup granularity, * we need to advance the iterator pointer to the next cluster. */ - if (granularity < BACKUP_CLUSTER_SIZE) { - bdrv_set_dirty_iter(&hbi, cluster * BACKUP_SECTORS_PER_CLUSTER); + if (granularity < job->cluster_size) { + bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster); } last_cluster = cluster - 1; } /* Play some final catchup with the progress meter */ - end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE); + end = DIV_ROUND_UP(job->common.len, job->cluster_size); if (last_cluster + 1 < end) { - job->common.offset += ((end - last_cluster - 1) * BACKUP_CLUSTER_SIZE); + job->common.offset += ((end - last_cluster - 1) * job->cluster_size); } return ret; @@ -386,17 +393,17 @@ static void coroutine_fn backup_run(void *opaque) .notify = backup_before_write_notify, }; int64_t start, end; + int64_t sectors_per_cluster = cluster_size_sectors(job); int ret = 0; QLIST_INIT(&job->inflight_reqs); qemu_co_rwlock_init(&job->flush_rwlock); start = 0; - end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE); + end = DIV_ROUND_UP(job->common.len, job->cluster_size); - job->bitmap = hbitmap_alloc(end, 0); + job->done_bitmap = bitmap_new(end); - bdrv_set_enable_write_cache(target, true); if (target->blk) { blk_set_on_error(target->blk, on_target_error, on_target_error); blk_iostatus_enable(target->blk); @@ -429,7 +436,7 @@ static void coroutine_fn backup_run(void *opaque) /* Check to see if these blocks are already in the * backing file. */ - for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) { + for (i = 0; i < sectors_per_cluster;) { /* bdrv_is_allocated() only returns true/false based * on the first set of sectors it comes across that * are are all in the same state. @@ -438,8 +445,8 @@ static void coroutine_fn backup_run(void *opaque) * needed but at some point that is always the case. */ alloced = bdrv_is_allocated(bs, - start * BACKUP_SECTORS_PER_CLUSTER + i, - BACKUP_SECTORS_PER_CLUSTER - i, &n); + start * sectors_per_cluster + i, + sectors_per_cluster - i, &n); i += n; if (alloced == 1 || n == 0) { @@ -454,8 +461,8 @@ static void coroutine_fn backup_run(void *opaque) } } /* FULL sync mode we copy the whole drive. */ - ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, - BACKUP_SECTORS_PER_CLUSTER, &error_is_read, false); + ret = backup_do_cow(bs, start * sectors_per_cluster, + sectors_per_cluster, &error_is_read, false); if (ret < 0) { /* Depending on error action, fail now or retry cluster */ BlockErrorAction action = @@ -475,7 +482,7 @@ static void coroutine_fn backup_run(void *opaque) /* wait until pending backup_do_cow() calls have completed */ qemu_co_rwlock_wrlock(&job->flush_rwlock); qemu_co_rwlock_unlock(&job->flush_rwlock); - hbitmap_free(job->bitmap); + g_free(job->done_bitmap); if (target->blk) { blk_iostatus_disable(target->blk); @@ -496,6 +503,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, BlockJobTxn *txn, Error **errp) { int64_t len; + BlockDriverInfo bdi; + int ret; assert(bs); assert(target); @@ -565,14 +574,32 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, goto error; } - bdrv_op_block_all(target, job->common.blocker); - job->on_source_error = on_source_error; job->on_target_error = on_target_error; job->target = target; job->sync_mode = sync_mode; job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ? sync_bitmap : NULL; + + /* If there is no backing file on the target, we cannot rely on COW if our + * backup cluster size is smaller than the target cluster size. Even for + * targets with a backing file, try to avoid COW if possible. */ + ret = bdrv_get_info(job->target, &bdi); + if (ret < 0 && !target->backing) { + error_setg_errno(errp, -ret, + "Couldn't determine the cluster size of the target image, " + "which has no backing file"); + error_append_hint(errp, + "Aborting, since this may create an unusable destination image\n"); + goto error; + } else if (ret < 0 && target->backing) { + /* Not fatal; just trudge on ahead. */ + job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT; + } else { + job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size); + } + + bdrv_op_block_all(target, job->common.blocker); job->common.len = len; job->common.co = qemu_coroutine_create(backup_run); block_job_txn_add_job(txn, &job->common); diff --git a/block/blkdebug.c b/block/blkdebug.c index dee3a0edfc..20d25bda67 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -22,7 +22,9 @@ * THE SOFTWARE. */ -#include "qemu-common.h" +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/cutils.h" #include "qemu/config-file.h" #include "block/block_int.h" #include "qemu/module.h" @@ -36,7 +38,7 @@ typedef struct BDRVBlkdebugState { int state; int new_state; - QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX]; + QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX]; QSIMPLEQ_HEAD(, BlkdebugRule) active_rules; QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs; } BDRVBlkdebugState; @@ -64,7 +66,7 @@ enum { }; typedef struct BlkdebugRule { - BlkDebugEvent event; + BlkdebugEvent event; int action; int state; union { @@ -143,69 +145,12 @@ static QemuOptsList *config_groups[] = { NULL }; -static const char *event_names[BLKDBG_EVENT_MAX] = { - [BLKDBG_L1_UPDATE] = "l1_update", - [BLKDBG_L1_GROW_ALLOC_TABLE] = "l1_grow.alloc_table", - [BLKDBG_L1_GROW_WRITE_TABLE] = "l1_grow.write_table", - [BLKDBG_L1_GROW_ACTIVATE_TABLE] = "l1_grow.activate_table", - - [BLKDBG_L2_LOAD] = "l2_load", - [BLKDBG_L2_UPDATE] = "l2_update", - [BLKDBG_L2_UPDATE_COMPRESSED] = "l2_update_compressed", - [BLKDBG_L2_ALLOC_COW_READ] = "l2_alloc.cow_read", - [BLKDBG_L2_ALLOC_WRITE] = "l2_alloc.write", - - [BLKDBG_READ_AIO] = "read_aio", - [BLKDBG_READ_BACKING_AIO] = "read_backing_aio", - [BLKDBG_READ_COMPRESSED] = "read_compressed", - - [BLKDBG_WRITE_AIO] = "write_aio", - [BLKDBG_WRITE_COMPRESSED] = "write_compressed", - - [BLKDBG_VMSTATE_LOAD] = "vmstate_load", - [BLKDBG_VMSTATE_SAVE] = "vmstate_save", - - [BLKDBG_COW_READ] = "cow_read", - [BLKDBG_COW_WRITE] = "cow_write", - - [BLKDBG_REFTABLE_LOAD] = "reftable_load", - [BLKDBG_REFTABLE_GROW] = "reftable_grow", - [BLKDBG_REFTABLE_UPDATE] = "reftable_update", - - [BLKDBG_REFBLOCK_LOAD] = "refblock_load", - [BLKDBG_REFBLOCK_UPDATE] = "refblock_update", - [BLKDBG_REFBLOCK_UPDATE_PART] = "refblock_update_part", - [BLKDBG_REFBLOCK_ALLOC] = "refblock_alloc", - [BLKDBG_REFBLOCK_ALLOC_HOOKUP] = "refblock_alloc.hookup", - [BLKDBG_REFBLOCK_ALLOC_WRITE] = "refblock_alloc.write", - [BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS] = "refblock_alloc.write_blocks", - [BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE] = "refblock_alloc.write_table", - [BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE] = "refblock_alloc.switch_table", - - [BLKDBG_CLUSTER_ALLOC] = "cluster_alloc", - [BLKDBG_CLUSTER_ALLOC_BYTES] = "cluster_alloc_bytes", - [BLKDBG_CLUSTER_FREE] = "cluster_free", - - [BLKDBG_FLUSH_TO_OS] = "flush_to_os", - [BLKDBG_FLUSH_TO_DISK] = "flush_to_disk", - - [BLKDBG_PWRITEV_RMW_HEAD] = "pwritev_rmw.head", - [BLKDBG_PWRITEV_RMW_AFTER_HEAD] = "pwritev_rmw.after_head", - [BLKDBG_PWRITEV_RMW_TAIL] = "pwritev_rmw.tail", - [BLKDBG_PWRITEV_RMW_AFTER_TAIL] = "pwritev_rmw.after_tail", - [BLKDBG_PWRITEV] = "pwritev", - [BLKDBG_PWRITEV_ZERO] = "pwritev_zero", - [BLKDBG_PWRITEV_DONE] = "pwritev_done", - - [BLKDBG_EMPTY_IMAGE_PREPARE] = "empty_image_prepare", -}; - -static int get_event_by_name(const char *name, BlkDebugEvent *event) +static int get_event_by_name(const char *name, BlkdebugEvent *event) { int i; - for (i = 0; i < BLKDBG_EVENT_MAX; i++) { - if (!strcmp(event_names[i], name)) { + for (i = 0; i < BLKDBG__MAX; i++) { + if (!strcmp(BlkdebugEvent_lookup[i], name)) { *event = i; return 0; } @@ -224,7 +169,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp) struct add_rule_data *d = opaque; BDRVBlkdebugState *s = d->s; const char* event_name; - BlkDebugEvent event; + BlkdebugEvent event; struct BlkdebugRule *rule; /* Find the right event for the rule */ @@ -564,7 +509,7 @@ static void blkdebug_close(BlockDriverState *bs) BlkdebugRule *rule, *next; int i; - for (i = 0; i < BLKDBG_EVENT_MAX; i++) { + for (i = 0; i < BLKDBG__MAX; i++) { QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) { remove_rule(rule); } @@ -627,13 +572,13 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, return injected; } -static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event) +static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event) { BDRVBlkdebugState *s = bs->opaque; struct BlkdebugRule *rule, *next; bool injected; - assert((int)event >= 0 && event < BLKDBG_EVENT_MAX); + assert((int)event >= 0 && event < BLKDBG__MAX); injected = false; s->new_state = s->state; @@ -648,7 +593,7 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event, { BDRVBlkdebugState *s = bs->opaque; struct BlkdebugRule *rule; - BlkDebugEvent blkdebug_event; + BlkdebugEvent blkdebug_event; if (get_event_by_name(event, &blkdebug_event) < 0) { return -ENOENT; @@ -690,7 +635,7 @@ static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs, BlkdebugRule *rule, *next; int i, ret = -ENOENT; - for (i = 0; i < BLKDBG_EVENT_MAX; i++) { + for (i = 0; i < BLKDBG__MAX; i++) { QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) { if (rule->action == ACTION_SUSPEND && !strcmp(rule->options.suspend.tag, tag)) { @@ -731,17 +676,15 @@ static int blkdebug_truncate(BlockDriverState *bs, int64_t offset) return bdrv_truncate(bs->file->bs, offset); } -static void blkdebug_refresh_filename(BlockDriverState *bs) +static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options) { QDict *opts; const QDictEntry *e; bool force_json = false; - for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { + for (e = qdict_first(options); e; e = qdict_next(options, e)) { if (strcmp(qdict_entry_key(e), "config") && - strcmp(qdict_entry_key(e), "x-image") && - strcmp(qdict_entry_key(e), "image") && - strncmp(qdict_entry_key(e), "image.", strlen("image."))) + strcmp(qdict_entry_key(e), "x-image")) { force_json = true; break; @@ -757,7 +700,7 @@ static void blkdebug_refresh_filename(BlockDriverState *bs) if (!force_json && bs->file->bs->exact_filename[0]) { snprintf(bs->exact_filename, sizeof(bs->exact_filename), "blkdebug:%s:%s", - qdict_get_try_str(bs->options, "config") ?: "", + qdict_get_try_str(options, "config") ?: "", bs->file->bs->exact_filename); } @@ -767,11 +710,8 @@ static void blkdebug_refresh_filename(BlockDriverState *bs) QINCREF(bs->file->bs->full_open_options); qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options)); - for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { - if (strcmp(qdict_entry_key(e), "x-image") && - strcmp(qdict_entry_key(e), "image") && - strncmp(qdict_entry_key(e), "image.", strlen("image."))) - { + for (e = qdict_first(options); e; e = qdict_next(options, e)) { + if (strcmp(qdict_entry_key(e), "x-image")) { qobject_incref(qdict_entry_value(e)); qdict_put_obj(opts, qdict_entry_key(e), qdict_entry_value(e)); } @@ -780,6 +720,12 @@ static void blkdebug_refresh_filename(BlockDriverState *bs) bs->full_open_options = opts; } +static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp) +{ + return 0; +} + static BlockDriver bdrv_blkdebug = { .format_name = "blkdebug", .protocol_name = "blkdebug", @@ -788,6 +734,7 @@ static BlockDriver bdrv_blkdebug = { .bdrv_parse_filename = blkdebug_parse_filename, .bdrv_file_open = blkdebug_open, .bdrv_close = blkdebug_close, + .bdrv_reopen_prepare = blkdebug_reopen_prepare, .bdrv_getlength = blkdebug_getlength, .bdrv_truncate = blkdebug_truncate, .bdrv_refresh_filename = blkdebug_refresh_filename, diff --git a/block/blkreplay.c b/block/blkreplay.c new file mode 100755 index 0000000000..42f1813af1 --- /dev/null +++ b/block/blkreplay.c @@ -0,0 +1,160 @@ +/* + * Block protocol for record/replay + * + * Copyright (c) 2010-2016 Institute for System Programming + * of the Russian Academy of Sciences. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "block/block_int.h" +#include "sysemu/replay.h" +#include "qapi/error.h" + +typedef struct Request { + Coroutine *co; + QEMUBH *bh; +} Request; + +/* Next request id. + This counter is global, because requests from different + block devices should not get overlapping ids. */ +static uint64_t request_id; + +static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + Error *local_err = NULL; + int ret; + + /* Open the image file */ + bs->file = bdrv_open_child(NULL, options, "image", + bs, &child_file, false, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + ret = 0; +fail: + if (ret < 0) { + bdrv_unref_child(bs, bs->file); + } + return ret; +} + +static void blkreplay_close(BlockDriverState *bs) +{ +} + +static int64_t blkreplay_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + +/* This bh is used for synchronization of return from coroutines. + It continues yielded coroutine which then finishes its execution. + BH is called adjusted to some replay checkpoint, therefore + record and replay will always finish coroutines deterministically. +*/ +static void blkreplay_bh_cb(void *opaque) +{ + Request *req = opaque; + qemu_coroutine_enter(req->co, NULL); + qemu_bh_delete(req->bh); + g_free(req); +} + +static void block_request_create(uint64_t reqid, BlockDriverState *bs, + Coroutine *co) +{ + Request *req = g_new(Request, 1); + *req = (Request) { + .co = co, + .bh = aio_bh_new(bdrv_get_aio_context(bs), blkreplay_bh_cb, req), + }; + replay_block_event(req->bh, reqid); +} + +static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) +{ + uint64_t reqid = request_id++; + int ret = bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov); + block_request_create(reqid, bs, qemu_coroutine_self()); + qemu_coroutine_yield(); + + return ret; +} + +static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) +{ + uint64_t reqid = request_id++; + int ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov); + block_request_create(reqid, bs, qemu_coroutine_self()); + qemu_coroutine_yield(); + + return ret; +} + +static int coroutine_fn blkreplay_co_write_zeroes(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) +{ + uint64_t reqid = request_id++; + int ret = bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags); + block_request_create(reqid, bs, qemu_coroutine_self()); + qemu_coroutine_yield(); + + return ret; +} + +static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) +{ + uint64_t reqid = request_id++; + int ret = bdrv_co_discard(bs->file->bs, sector_num, nb_sectors); + block_request_create(reqid, bs, qemu_coroutine_self()); + qemu_coroutine_yield(); + + return ret; +} + +static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs) +{ + uint64_t reqid = request_id++; + int ret = bdrv_co_flush(bs->file->bs); + block_request_create(reqid, bs, qemu_coroutine_self()); + qemu_coroutine_yield(); + + return ret; +} + +static BlockDriver bdrv_blkreplay = { + .format_name = "blkreplay", + .protocol_name = "blkreplay", + .instance_size = 0, + + .bdrv_file_open = blkreplay_open, + .bdrv_close = blkreplay_close, + .bdrv_getlength = blkreplay_getlength, + + .bdrv_co_readv = blkreplay_co_readv, + .bdrv_co_writev = blkreplay_co_writev, + + .bdrv_co_write_zeroes = blkreplay_co_write_zeroes, + .bdrv_co_discard = blkreplay_co_discard, + .bdrv_co_flush = blkreplay_co_flush, +}; + +static void bdrv_blkreplay_init(void) +{ + bdrv_register(&bdrv_blkreplay); +} + +block_init(bdrv_blkreplay_init); diff --git a/block/blkverify.c b/block/blkverify.c index c5f8e8dcba..9414b7a84e 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -7,11 +7,13 @@ * See the COPYING file in the top-level directory. */ -#include <stdarg.h> +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu/sockets.h" /* for EINPROGRESS on Windows */ #include "block/block_int.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qstring.h" +#include "qemu/cutils.h" typedef struct { BdrvChild *test_file; @@ -307,7 +309,7 @@ static void blkverify_attach_aio_context(BlockDriverState *bs, bdrv_attach_aio_context(s->test_file->bs, new_context); } -static void blkverify_refresh_filename(BlockDriverState *bs) +static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options) { BDRVBlkverifyState *s = bs->opaque; diff --git a/block/block-backend.c b/block/block-backend.c index 36ccc9e616..16c9d5e0f2 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -10,6 +10,7 @@ * or later. See the COPYING.LIB file in the top-level directory. */ +#include "qemu/osdep.h" #include "sysemu/block-backend.h" #include "block/block_int.h" #include "block/blockjob.h" @@ -17,18 +18,22 @@ #include "sysemu/blockdev.h" #include "sysemu/sysemu.h" #include "qapi-event.h" +#include "qemu/id.h" /* Number of coroutines to reserve per attached device model */ #define COROUTINE_POOL_RESERVATION 64 +#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ + static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); struct BlockBackend { char *name; int refcnt; - BlockDriverState *bs; + BdrvChild *root; DriveInfo *legacy_dinfo; /* null unless created by drive_new() */ - QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */ + QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */ + QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */ void *dev; /* attached device model, if any */ /* TODO change to DeviceState when all users are qdevified */ @@ -42,12 +47,18 @@ struct BlockBackend { * can be used to restore those options in the new BDS on insert) */ BlockBackendRootState root_state; + bool enable_write_cache; + /* I/O stats (display with "info blockstats"). */ BlockAcctStats stats; BlockdevOnError on_read_error, on_write_error; bool iostatus_enabled; BlockDeviceIoStatus iostatus; + + bool allow_write_beyond_eof; + + NotifierList remove_bs_notifiers, insert_bs_notifiers; }; typedef struct BlockBackendAIOCB { @@ -64,41 +75,40 @@ static const AIOCBInfo block_backend_aiocb_info = { static void drive_info_del(DriveInfo *dinfo); -/* All the BlockBackends (except for hidden ones) */ -static QTAILQ_HEAD(, BlockBackend) blk_backends = - QTAILQ_HEAD_INITIALIZER(blk_backends); +/* All BlockBackends */ +static QTAILQ_HEAD(, BlockBackend) block_backends = + QTAILQ_HEAD_INITIALIZER(block_backends); + +/* All BlockBackends referenced by the monitor and which are iterated through by + * blk_next() */ +static QTAILQ_HEAD(, BlockBackend) monitor_block_backends = + QTAILQ_HEAD_INITIALIZER(monitor_block_backends); + +static void blk_root_inherit_options(int *child_flags, QDict *child_options, + int parent_flags, QDict *parent_options) +{ + /* We're not supposed to call this function for root nodes */ + abort(); +} + +static const BdrvChildRole child_root = { + .inherit_options = blk_root_inherit_options, +}; /* - * Create a new BlockBackend with @name, with a reference count of one. - * @name must not be null or empty. - * Fail if a BlockBackend with this name already exists. + * Create a new BlockBackend with a reference count of one. * Store an error through @errp on failure, unless it's null. * Return the new BlockBackend on success, null on failure. */ -BlockBackend *blk_new(const char *name, Error **errp) +BlockBackend *blk_new(Error **errp) { BlockBackend *blk; - assert(name && name[0]); - if (!id_wellformed(name)) { - error_setg(errp, "Invalid device name"); - return NULL; - } - if (blk_by_name(name)) { - error_setg(errp, "Device with id '%s' already exists", name); - return NULL; - } - if (bdrv_find_node(name)) { - error_setg(errp, - "Device name '%s' conflicts with an existing node name", - name); - return NULL; - } - blk = g_new0(BlockBackend, 1); - blk->name = g_strdup(name); blk->refcnt = 1; - QTAILQ_INSERT_TAIL(&blk_backends, blk, link); + notifier_list_init(&blk->remove_bs_notifiers); + notifier_list_init(&blk->insert_bs_notifiers); + QTAILQ_INSERT_TAIL(&block_backends, blk, link); return blk; } @@ -106,18 +116,18 @@ BlockBackend *blk_new(const char *name, Error **errp) * Create a new BlockBackend with a new BlockDriverState attached. * Otherwise just like blk_new(), which see. */ -BlockBackend *blk_new_with_bs(const char *name, Error **errp) +BlockBackend *blk_new_with_bs(Error **errp) { BlockBackend *blk; BlockDriverState *bs; - blk = blk_new(name, errp); + blk = blk_new(errp); if (!blk) { return NULL; } bs = bdrv_new_root(); - blk->bs = bs; + blk->root = bdrv_root_attach_child(bs, "root", &child_root); bs->blk = blk; return blk; } @@ -134,47 +144,44 @@ BlockBackend *blk_new_with_bs(const char *name, Error **errp) * though, so callers of this function have to be able to specify @filename and * @flags. */ -BlockBackend *blk_new_open(const char *name, const char *filename, - const char *reference, QDict *options, int flags, - Error **errp) +BlockBackend *blk_new_open(const char *filename, const char *reference, + QDict *options, int flags, Error **errp) { BlockBackend *blk; int ret; - blk = blk_new_with_bs(name, errp); + blk = blk_new_with_bs(errp); if (!blk) { QDECREF(options); return NULL; } - ret = bdrv_open(&blk->bs, filename, reference, options, flags, errp); + ret = bdrv_open(&blk->root->bs, filename, reference, options, flags, errp); if (ret < 0) { blk_unref(blk); return NULL; } + blk_set_enable_write_cache(blk, true); + return blk; } static void blk_delete(BlockBackend *blk) { assert(!blk->refcnt); + assert(!blk->name); assert(!blk->dev); - if (blk->bs) { - assert(blk->bs->blk == blk); - blk->bs->blk = NULL; - bdrv_unref(blk->bs); - blk->bs = NULL; + if (blk->root) { + blk_remove_bs(blk); } + assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers)); + assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers)); if (blk->root_state.throttle_state) { g_free(blk->root_state.throttle_group); throttle_group_unref(blk->root_state.throttle_state); } - /* Avoid double-remove after blk_hide_on_behalf_of_hmp_drive_del() */ - if (blk->name[0]) { - QTAILQ_REMOVE(&blk_backends, blk, link); - } - g_free(blk->name); + QTAILQ_REMOVE(&block_backends, blk, link); drive_info_del(blk->legacy_dinfo); block_acct_cleanup(&blk->stats); g_free(blk); @@ -220,7 +227,32 @@ void blk_unref(BlockBackend *blk) } /* - * Return the BlockBackend after @blk. + * Behaves similarly to blk_next() but iterates over all BlockBackends, even the + * ones which are hidden (i.e. are not referenced by the monitor). + */ +static BlockBackend *blk_all_next(BlockBackend *blk) +{ + return blk ? QTAILQ_NEXT(blk, link) + : QTAILQ_FIRST(&block_backends); +} + +void blk_remove_all_bs(void) +{ + BlockBackend *blk = NULL; + + while ((blk = blk_all_next(blk)) != NULL) { + AioContext *ctx = blk_get_aio_context(blk); + + aio_context_acquire(ctx); + if (blk->root) { + blk_remove_bs(blk); + } + aio_context_release(ctx); + } +} + +/* + * Return the monitor-owned BlockBackend after @blk. * If @blk is null, return the first one. * Else, return @blk's next sibling, which may be null. * @@ -231,17 +263,91 @@ void blk_unref(BlockBackend *blk) */ BlockBackend *blk_next(BlockBackend *blk) { - return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends); + return blk ? QTAILQ_NEXT(blk, monitor_link) + : QTAILQ_FIRST(&monitor_block_backends); +} + +/* + * Iterates over all BlockDriverStates which are attached to a BlockBackend. + * This function is for use by bdrv_next(). + * + * @bs must be NULL or a BDS that is attached to a BB. + */ +BlockDriverState *blk_next_root_bs(BlockDriverState *bs) +{ + BlockBackend *blk; + + if (bs) { + assert(bs->blk); + blk = bs->blk; + } else { + blk = NULL; + } + + do { + blk = blk_all_next(blk); + } while (blk && !blk->root); + + return blk ? blk->root->bs : NULL; +} + +/* + * Add a BlockBackend into the list of backends referenced by the monitor, with + * the given @name acting as the handle for the monitor. + * Strictly for use by blockdev.c. + * + * @name must not be null or empty. + * + * Returns true on success and false on failure. In the latter case, an Error + * object is returned through @errp. + */ +bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp) +{ + assert(!blk->name); + assert(name && name[0]); + + if (!id_wellformed(name)) { + error_setg(errp, "Invalid device name"); + return false; + } + if (blk_by_name(name)) { + error_setg(errp, "Device with id '%s' already exists", name); + return false; + } + if (bdrv_find_node(name)) { + error_setg(errp, + "Device name '%s' conflicts with an existing node name", + name); + return false; + } + + blk->name = g_strdup(name); + QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link); + return true; +} + +/* + * Remove a BlockBackend from the list of backends referenced by the monitor. + * Strictly for use by blockdev.c. + */ +void monitor_remove_blk(BlockBackend *blk) +{ + if (!blk->name) { + return; + } + + QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link); + g_free(blk->name); + blk->name = NULL; } /* * Return @blk's name, a non-null string. - * Wart: the name is empty iff @blk has been hidden with - * blk_hide_on_behalf_of_hmp_drive_del(). + * Returns an empty string iff @blk is not referenced by the monitor. */ const char *blk_name(BlockBackend *blk) { - return blk->name; + return blk->name ?: ""; } /* @@ -250,10 +356,10 @@ const char *blk_name(BlockBackend *blk) */ BlockBackend *blk_by_name(const char *name) { - BlockBackend *blk; + BlockBackend *blk = NULL; assert(name); - QTAILQ_FOREACH(blk, &blk_backends, link) { + while ((blk = blk_next(blk)) != NULL) { if (!strcmp(name, blk->name)) { return blk; } @@ -266,24 +372,7 @@ BlockBackend *blk_by_name(const char *name) */ BlockDriverState *blk_bs(BlockBackend *blk) { - return blk->bs; -} - -/* - * Changes the BlockDriverState attached to @blk - */ -void blk_set_bs(BlockBackend *blk, BlockDriverState *bs) -{ - bdrv_ref(bs); - - if (blk->bs) { - blk->bs->blk = NULL; - bdrv_unref(blk->bs); - } - assert(bs->blk == NULL); - - blk->bs = bs; - bs->blk = blk; + return blk->root ? blk->root->bs : NULL; } /* @@ -311,9 +400,9 @@ DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo) */ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo) { - BlockBackend *blk; + BlockBackend *blk = NULL; - QTAILQ_FOREACH(blk, &blk_backends, link) { + while ((blk = blk_next(blk)) != NULL) { if (blk->legacy_dinfo == dinfo) { return blk; } @@ -322,33 +411,19 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo) } /* - * Hide @blk. - * @blk must not have been hidden already. - * Make attached BlockDriverState, if any, anonymous. - * Once hidden, @blk is invisible to all functions that don't receive - * it as argument. For example, blk_by_name() won't return it. - * Strictly for use by do_drive_del(). - * TODO get rid of it! - */ -void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk) -{ - QTAILQ_REMOVE(&blk_backends, blk, link); - blk->name[0] = 0; - if (blk->bs) { - bdrv_make_anon(blk->bs); - } -} - -/* * Disassociates the currently associated BlockDriverState from @blk. */ void blk_remove_bs(BlockBackend *blk) { + assert(blk->root->bs->blk == blk); + + notifier_list_notify(&blk->remove_bs_notifiers, blk); + blk_update_root_state(blk); - blk->bs->blk = NULL; - bdrv_unref(blk->bs); - blk->bs = NULL; + blk->root->bs->blk = NULL; + bdrv_root_unref_child(blk->root); + blk->root = NULL; } /* @@ -356,10 +431,12 @@ void blk_remove_bs(BlockBackend *blk) */ void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs) { - assert(!blk->bs && !bs->blk); + assert(!blk->root && !bs->blk); bdrv_ref(bs); - blk->bs = bs; + blk->root = bdrv_root_attach_child(bs, "root", &child_root); bs->blk = blk; + + notifier_list_notify(&blk->insert_bs_notifiers, blk); } /* @@ -458,6 +535,14 @@ bool blk_dev_has_removable_media(BlockBackend *blk) } /* + * Does @blk's attached device model have a tray? + */ +bool blk_dev_has_tray(BlockBackend *blk) +{ + return blk->dev_ops && blk->dev_ops->is_tray_open; +} + +/* * Notify @blk's attached device model of a media eject request. * If @force is true, the medium is about to be yanked out forcefully. */ @@ -473,7 +558,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force) */ bool blk_dev_is_tray_open(BlockBackend *blk) { - if (blk->dev_ops && blk->dev_ops->is_tray_open) { + if (blk_dev_has_tray(blk)) { return blk->dev_ops->is_tray_open(blk->dev_opaque); } return false; @@ -530,9 +615,10 @@ void blk_iostatus_disable(BlockBackend *blk) void blk_iostatus_reset(BlockBackend *blk) { if (blk_iostatus_is_enabled(blk)) { + BlockDriverState *bs = blk_bs(blk); blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; - if (blk->bs && blk->bs->job) { - block_job_iostatus_reset(blk->bs->job); + if (bs && bs->job) { + block_job_iostatus_reset(bs->job); } } } @@ -546,6 +632,11 @@ void blk_iostatus_set_err(BlockBackend *blk, int error) } } +void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow) +{ + blk->allow_write_beyond_eof = allow; +} + static int blk_check_byte_request(BlockBackend *blk, int64_t offset, size_t size) { @@ -559,17 +650,19 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, return -ENOMEDIUM; } - len = blk_getlength(blk); - if (len < 0) { - return len; - } - if (offset < 0) { return -EIO; } - if (offset > len || len - offset < size) { - return -EIO; + if (!blk->allow_write_beyond_eof) { + len = blk_getlength(blk); + if (len < 0) { + return len; + } + + if (offset > len || len - offset < size) { + return -EIO; + } } return 0; @@ -590,48 +683,144 @@ static int blk_check_request(BlockBackend *blk, int64_t sector_num, nb_sectors * BDRV_SECTOR_SIZE); } -int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf, - int nb_sectors) +static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { - int ret = blk_check_request(blk, sector_num, nb_sectors); + int ret = blk_check_byte_request(blk, offset, bytes); if (ret < 0) { return ret; } - return bdrv_read(blk->bs, sector_num, buf, nb_sectors); + return bdrv_co_do_preadv(blk_bs(blk), offset, bytes, qiov, flags); } -int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf, - int nb_sectors) +static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset, + unsigned int bytes, QEMUIOVector *qiov, + BdrvRequestFlags flags) { - int ret = blk_check_request(blk, sector_num, nb_sectors); + int ret; + + ret = blk_check_byte_request(blk, offset, bytes); if (ret < 0) { return ret; } - return bdrv_read_unthrottled(blk->bs, sector_num, buf, nb_sectors); + if (!blk->enable_write_cache) { + flags |= BDRV_REQ_FUA; + } + + return bdrv_co_do_pwritev(blk_bs(blk), offset, bytes, qiov, flags); } -int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf, - int nb_sectors) +typedef struct BlkRwCo { + BlockBackend *blk; + int64_t offset; + QEMUIOVector *qiov; + int ret; + BdrvRequestFlags flags; +} BlkRwCo; + +static void blk_read_entry(void *opaque) { - int ret = blk_check_request(blk, sector_num, nb_sectors); - if (ret < 0) { - return ret; + BlkRwCo *rwco = opaque; + + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size, + rwco->qiov, rwco->flags); +} + +static void blk_write_entry(void *opaque) +{ + BlkRwCo *rwco = opaque; + + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size, + rwco->qiov, rwco->flags); +} + +static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf, + int64_t bytes, CoroutineEntry co_entry, + BdrvRequestFlags flags) +{ + AioContext *aio_context; + QEMUIOVector qiov; + struct iovec iov; + Coroutine *co; + BlkRwCo rwco; + + iov = (struct iovec) { + .iov_base = buf, + .iov_len = bytes, + }; + qemu_iovec_init_external(&qiov, &iov, 1); + + rwco = (BlkRwCo) { + .blk = blk, + .offset = offset, + .qiov = &qiov, + .flags = flags, + .ret = NOT_DONE, + }; + + co = qemu_coroutine_create(co_entry); + qemu_coroutine_enter(co, &rwco); + + aio_context = blk_get_aio_context(blk); + while (rwco.ret == NOT_DONE) { + aio_poll(aio_context, true); } - return bdrv_write(blk->bs, sector_num, buf, nb_sectors); + return rwco.ret; } -int blk_write_zeroes(BlockBackend *blk, int64_t sector_num, - int nb_sectors, BdrvRequestFlags flags) +static int blk_rw(BlockBackend *blk, int64_t sector_num, uint8_t *buf, + int nb_sectors, CoroutineEntry co_entry, + BdrvRequestFlags flags) { - int ret = blk_check_request(blk, sector_num, nb_sectors); + if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { + return -EINVAL; + } + + return blk_prw(blk, sector_num << BDRV_SECTOR_BITS, buf, + nb_sectors << BDRV_SECTOR_BITS, co_entry, flags); +} + +int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf, + int nb_sectors) +{ + return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0); +} + +int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf, + int nb_sectors) +{ + BlockDriverState *bs = blk_bs(blk); + bool enabled; + int ret; + + ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { return ret; } - return bdrv_write_zeroes(blk->bs, sector_num, nb_sectors, flags); + enabled = bs->io_limits_enabled; + bs->io_limits_enabled = false; + ret = blk_read(blk, sector_num, buf, nb_sectors); + bs->io_limits_enabled = enabled; + return ret; +} + +int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf, + int nb_sectors) +{ + return blk_rw(blk, sector_num, (uint8_t*) buf, nb_sectors, + blk_write_entry, 0); +} + +int blk_write_zeroes(BlockBackend *blk, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags) +{ + return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry, + flags | BDRV_REQ_ZERO_WRITE); } static void error_callback_bh(void *opaque) @@ -660,37 +849,119 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, return &acb->common; } +typedef struct BlkAioEmAIOCB { + BlockAIOCB common; + BlkRwCo rwco; + int bytes; + bool has_returned; + QEMUBH* bh; +} BlkAioEmAIOCB; + +static const AIOCBInfo blk_aio_em_aiocb_info = { + .aiocb_size = sizeof(BlkAioEmAIOCB), +}; + +static void blk_aio_complete(BlkAioEmAIOCB *acb) +{ + if (acb->bh) { + assert(acb->has_returned); + qemu_bh_delete(acb->bh); + } + if (acb->has_returned) { + acb->common.cb(acb->common.opaque, acb->rwco.ret); + qemu_aio_unref(acb); + } +} + +static void blk_aio_complete_bh(void *opaque) +{ + blk_aio_complete(opaque); +} + +static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes, + QEMUIOVector *qiov, CoroutineEntry co_entry, + BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque) +{ + BlkAioEmAIOCB *acb; + Coroutine *co; + + acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque); + acb->rwco = (BlkRwCo) { + .blk = blk, + .offset = offset, + .qiov = qiov, + .flags = flags, + .ret = NOT_DONE, + }; + acb->bytes = bytes; + acb->bh = NULL; + acb->has_returned = false; + + co = qemu_coroutine_create(co_entry); + qemu_coroutine_enter(co, acb); + + acb->has_returned = true; + if (acb->rwco.ret != NOT_DONE) { + acb->bh = aio_bh_new(blk_get_aio_context(blk), blk_aio_complete_bh, acb); + qemu_bh_schedule(acb->bh); + } + + return &acb->common; +} + +static void blk_aio_read_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + assert(rwco->qiov->size == acb->bytes); + rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes, + rwco->qiov, rwco->flags); + blk_aio_complete(acb); +} + +static void blk_aio_write_entry(void *opaque) +{ + BlkAioEmAIOCB *acb = opaque; + BlkRwCo *rwco = &acb->rwco; + + assert(!rwco->qiov || rwco->qiov->size == acb->bytes); + rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes, + rwco->qiov, rwco->flags); + blk_aio_complete(acb); +} + BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags, BlockCompletionFunc *cb, void *opaque) { - int ret = blk_check_request(blk, sector_num, nb_sectors); - if (ret < 0) { - return blk_abort_aio_request(blk, cb, opaque, ret); + if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { + return blk_abort_aio_request(blk, cb, opaque, -EINVAL); } - return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags, - cb, opaque); + return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, + nb_sectors << BDRV_SECTOR_BITS, NULL, + blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE, + cb, opaque); } int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count) { - int ret = blk_check_byte_request(blk, offset, count); + int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0); if (ret < 0) { return ret; } - - return bdrv_pread(blk->bs, offset, buf, count); + return count; } int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count) { - int ret = blk_check_byte_request(blk, offset, count); + int ret = blk_prw(blk, offset, (void*) buf, count, blk_write_entry, 0); if (ret < 0) { return ret; } - - return bdrv_pwrite(blk->bs, offset, buf, count); + return count; } int64_t blk_getlength(BlockBackend *blk) @@ -699,15 +970,15 @@ int64_t blk_getlength(BlockBackend *blk) return -ENOMEDIUM; } - return bdrv_getlength(blk->bs); + return bdrv_getlength(blk_bs(blk)); } void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr) { - if (!blk->bs) { + if (!blk_bs(blk)) { *nb_sectors_ptr = 0; } else { - bdrv_get_geometry(blk->bs, nb_sectors_ptr); + bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr); } } @@ -717,31 +988,33 @@ int64_t blk_nb_sectors(BlockBackend *blk) return -ENOMEDIUM; } - return bdrv_nb_sectors(blk->bs); + return bdrv_nb_sectors(blk_bs(blk)); } BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockCompletionFunc *cb, void *opaque) { - int ret = blk_check_request(blk, sector_num, nb_sectors); - if (ret < 0) { - return blk_abort_aio_request(blk, cb, opaque, ret); + if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { + return blk_abort_aio_request(blk, cb, opaque, -EINVAL); } - return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque); + assert(nb_sectors << BDRV_SECTOR_BITS == iov->size); + return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov, + blk_aio_read_entry, 0, cb, opaque); } BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num, QEMUIOVector *iov, int nb_sectors, BlockCompletionFunc *cb, void *opaque) { - int ret = blk_check_request(blk, sector_num, nb_sectors); - if (ret < 0) { - return blk_abort_aio_request(blk, cb, opaque, ret); + if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { + return blk_abort_aio_request(blk, cb, opaque, -EINVAL); } - return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque); + assert(nb_sectors << BDRV_SECTOR_BITS == iov->size); + return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov, + blk_aio_write_entry, 0, cb, opaque); } BlockAIOCB *blk_aio_flush(BlockBackend *blk, @@ -751,7 +1024,7 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk, return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM); } - return bdrv_aio_flush(blk->bs, cb, opaque); + return bdrv_aio_flush(blk_bs(blk), cb, opaque); } BlockAIOCB *blk_aio_discard(BlockBackend *blk, @@ -763,7 +1036,7 @@ BlockAIOCB *blk_aio_discard(BlockBackend *blk, return blk_abort_aio_request(blk, cb, opaque, ret); } - return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque); + return bdrv_aio_discard(blk_bs(blk), sector_num, nb_sectors, cb, opaque); } void blk_aio_cancel(BlockAIOCB *acb) @@ -787,7 +1060,7 @@ int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs) } } - return bdrv_aio_multiwrite(blk->bs, reqs, num_reqs); + return bdrv_aio_multiwrite(blk_bs(blk), reqs, num_reqs); } int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) @@ -796,7 +1069,7 @@ int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) return -ENOMEDIUM; } - return bdrv_ioctl(blk->bs, req, buf); + return bdrv_ioctl(blk_bs(blk), req, buf); } BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, @@ -806,7 +1079,7 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM); } - return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque); + return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque); } int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors) @@ -816,7 +1089,7 @@ int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors) return ret; } - return bdrv_co_discard(blk->bs, sector_num, nb_sectors); + return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors); } int blk_co_flush(BlockBackend *blk) @@ -825,7 +1098,7 @@ int blk_co_flush(BlockBackend *blk) return -ENOMEDIUM; } - return bdrv_co_flush(blk->bs); + return bdrv_co_flush(blk_bs(blk)); } int blk_flush(BlockBackend *blk) @@ -834,18 +1107,13 @@ int blk_flush(BlockBackend *blk) return -ENOMEDIUM; } - return bdrv_flush(blk->bs); -} - -int blk_flush_all(void) -{ - return bdrv_flush_all(); + return bdrv_flush(blk_bs(blk)); } void blk_drain(BlockBackend *blk) { - if (blk->bs) { - bdrv_drain(blk->bs); + if (blk_bs(blk)) { + bdrv_drain(blk_bs(blk)); } } @@ -933,8 +1201,10 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action, int blk_is_read_only(BlockBackend *blk) { - if (blk->bs) { - return bdrv_is_read_only(blk->bs); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + return bdrv_is_read_only(bs); } else { return blk->root_state.read_only; } @@ -942,48 +1212,42 @@ int blk_is_read_only(BlockBackend *blk) int blk_is_sg(BlockBackend *blk) { - if (!blk->bs) { + BlockDriverState *bs = blk_bs(blk); + + if (!bs) { return 0; } - return bdrv_is_sg(blk->bs); + return bdrv_is_sg(bs); } int blk_enable_write_cache(BlockBackend *blk) { - if (blk->bs) { - return bdrv_enable_write_cache(blk->bs); - } else { - return !!(blk->root_state.open_flags & BDRV_O_CACHE_WB); - } + return blk->enable_write_cache; } void blk_set_enable_write_cache(BlockBackend *blk, bool wce) { - if (blk->bs) { - bdrv_set_enable_write_cache(blk->bs, wce); - } else { - if (wce) { - blk->root_state.open_flags |= BDRV_O_CACHE_WB; - } else { - blk->root_state.open_flags &= ~BDRV_O_CACHE_WB; - } - } + blk->enable_write_cache = wce; } void blk_invalidate_cache(BlockBackend *blk, Error **errp) { - if (!blk->bs) { + BlockDriverState *bs = blk_bs(blk); + + if (!bs) { error_setg(errp, "Device '%s' has no medium", blk->name); return; } - bdrv_invalidate_cache(blk->bs, errp); + bdrv_invalidate_cache(bs, errp); } bool blk_is_inserted(BlockBackend *blk) { - return blk->bs && bdrv_is_inserted(blk->bs); + BlockDriverState *bs = blk_bs(blk); + + return bs && bdrv_is_inserted(bs); } bool blk_is_available(BlockBackend *blk) @@ -993,22 +1257,28 @@ bool blk_is_available(BlockBackend *blk) void blk_lock_medium(BlockBackend *blk, bool locked) { - if (blk->bs) { - bdrv_lock_medium(blk->bs, locked); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_lock_medium(bs, locked); } } void blk_eject(BlockBackend *blk, bool eject_flag) { - if (blk->bs) { - bdrv_eject(blk->bs, eject_flag); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_eject(bs, eject_flag); } } int blk_get_flags(BlockBackend *blk) { - if (blk->bs) { - return bdrv_get_flags(blk->bs); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + return bdrv_get_flags(bs); } else { return blk->root_state.open_flags; } @@ -1016,57 +1286,79 @@ int blk_get_flags(BlockBackend *blk) int blk_get_max_transfer_length(BlockBackend *blk) { - if (blk->bs) { - return blk->bs->bl.max_transfer_length; + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + return bs->bl.max_transfer_length; } else { return 0; } } +int blk_get_max_iov(BlockBackend *blk) +{ + return blk->root->bs->bl.max_iov; +} + void blk_set_guest_block_size(BlockBackend *blk, int align) { blk->guest_block_size = align; } +void *blk_try_blockalign(BlockBackend *blk, size_t size) +{ + return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size); +} + void *blk_blockalign(BlockBackend *blk, size_t size) { - return qemu_blockalign(blk ? blk->bs : NULL, size); + return qemu_blockalign(blk ? blk_bs(blk) : NULL, size); } bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp) { - if (!blk->bs) { + BlockDriverState *bs = blk_bs(blk); + + if (!bs) { return false; } - return bdrv_op_is_blocked(blk->bs, op, errp); + return bdrv_op_is_blocked(bs, op, errp); } void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason) { - if (blk->bs) { - bdrv_op_unblock(blk->bs, op, reason); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_op_unblock(bs, op, reason); } } void blk_op_block_all(BlockBackend *blk, Error *reason) { - if (blk->bs) { - bdrv_op_block_all(blk->bs, reason); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_op_block_all(bs, reason); } } void blk_op_unblock_all(BlockBackend *blk, Error *reason) { - if (blk->bs) { - bdrv_op_unblock_all(blk->bs, reason); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_op_unblock_all(bs, reason); } } AioContext *blk_get_aio_context(BlockBackend *blk) { - if (blk->bs) { - return bdrv_get_aio_context(blk->bs); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + return bdrv_get_aio_context(bs); } else { return qemu_get_aio_context(); } @@ -1080,8 +1372,10 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb) void blk_set_aio_context(BlockBackend *blk, AioContext *new_context) { - if (blk->bs) { - bdrv_set_aio_context(blk->bs, new_context); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_set_aio_context(bs, new_context); } } @@ -1089,8 +1383,10 @@ void blk_add_aio_context_notifier(BlockBackend *blk, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque) { - if (blk->bs) { - bdrv_add_aio_context_notifier(blk->bs, attached_aio_context, + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_add_aio_context_notifier(bs, attached_aio_context, detach_aio_context, opaque); } } @@ -1101,30 +1397,39 @@ void blk_remove_aio_context_notifier(BlockBackend *blk, void (*detach_aio_context)(void *), void *opaque) { - if (blk->bs) { - bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context, + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_remove_aio_context_notifier(bs, attached_aio_context, detach_aio_context, opaque); } } -void blk_add_close_notifier(BlockBackend *blk, Notifier *notify) +void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify) { - if (blk->bs) { - bdrv_add_close_notifier(blk->bs, notify); - } + notifier_list_add(&blk->remove_bs_notifiers, notify); +} + +void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify) +{ + notifier_list_add(&blk->insert_bs_notifiers, notify); } void blk_io_plug(BlockBackend *blk) { - if (blk->bs) { - bdrv_io_plug(blk->bs); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_io_plug(bs); } } void blk_io_unplug(BlockBackend *blk) { - if (blk->bs) { - bdrv_io_unplug(blk->bs); + BlockDriverState *bs = blk_bs(blk); + + if (bs) { + bdrv_io_unplug(bs); } } @@ -1142,12 +1447,13 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { - int ret = blk_check_request(blk, sector_num, nb_sectors); - if (ret < 0) { - return ret; + if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) { + return -EINVAL; } - return bdrv_co_write_zeroes(blk->bs, sector_num, nb_sectors, flags); + return blk_co_pwritev(blk, sector_num << BDRV_SECTOR_BITS, + nb_sectors << BDRV_SECTOR_BITS, NULL, + flags | BDRV_REQ_ZERO_WRITE); } int blk_write_compressed(BlockBackend *blk, int64_t sector_num, @@ -1158,7 +1464,7 @@ int blk_write_compressed(BlockBackend *blk, int64_t sector_num, return ret; } - return bdrv_write_compressed(blk->bs, sector_num, buf, nb_sectors); + return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors); } int blk_truncate(BlockBackend *blk, int64_t offset) @@ -1167,7 +1473,7 @@ int blk_truncate(BlockBackend *blk, int64_t offset) return -ENOMEDIUM; } - return bdrv_truncate(blk->bs, offset); + return bdrv_truncate(blk_bs(blk), offset); } int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors) @@ -1177,17 +1483,28 @@ int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors) return ret; } - return bdrv_discard(blk->bs, sector_num, nb_sectors); + return bdrv_discard(blk_bs(blk), sector_num, nb_sectors); } int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int64_t pos, int size) { + int ret; + if (!blk_is_available(blk)) { return -ENOMEDIUM; } - return bdrv_save_vmstate(blk->bs, buf, pos, size); + ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size); + if (ret < 0) { + return ret; + } + + if (ret == size && !blk->enable_write_cache) { + ret = bdrv_flush(blk_bs(blk)); + } + + return ret < 0 ? ret : size; } int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) @@ -1196,7 +1513,7 @@ int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) return -ENOMEDIUM; } - return bdrv_load_vmstate(blk->bs, buf, pos, size); + return bdrv_load_vmstate(blk_bs(blk), buf, pos, size); } int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz) @@ -1205,7 +1522,7 @@ int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz) return -ENOMEDIUM; } - return bdrv_probe_blocksizes(blk->bs, bsz); + return bdrv_probe_blocksizes(blk_bs(blk), bsz); } int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) @@ -1214,7 +1531,7 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) return -ENOMEDIUM; } - return bdrv_probe_geometry(blk->bs, geo); + return bdrv_probe_geometry(blk_bs(blk), geo); } /* @@ -1223,18 +1540,18 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) */ void blk_update_root_state(BlockBackend *blk) { - assert(blk->bs); + assert(blk->root); - blk->root_state.open_flags = blk->bs->open_flags; - blk->root_state.read_only = blk->bs->read_only; - blk->root_state.detect_zeroes = blk->bs->detect_zeroes; + blk->root_state.open_flags = blk->root->bs->open_flags; + blk->root_state.read_only = blk->root->bs->read_only; + blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes; if (blk->root_state.throttle_group) { g_free(blk->root_state.throttle_group); throttle_group_unref(blk->root_state.throttle_state); } - if (blk->bs->throttle_state) { - const char *name = throttle_group_get_name(blk->bs); + if (blk->root->bs->throttle_state) { + const char *name = throttle_group_get_name(blk->root->bs); blk->root_state.throttle_group = g_strdup(name); blk->root_state.throttle_state = throttle_group_incref(name); } else { @@ -1274,3 +1591,45 @@ BlockBackendRootState *blk_get_root_state(BlockBackend *blk) { return &blk->root_state; } + +int blk_commit_all(void) +{ + BlockBackend *blk = NULL; + + while ((blk = blk_all_next(blk)) != NULL) { + AioContext *aio_context = blk_get_aio_context(blk); + + aio_context_acquire(aio_context); + if (blk_is_inserted(blk) && blk->root->bs->backing) { + int ret = bdrv_commit(blk->root->bs); + if (ret < 0) { + aio_context_release(aio_context); + return ret; + } + } + aio_context_release(aio_context); + } + return 0; +} + +int blk_flush_all(void) +{ + BlockBackend *blk = NULL; + int result = 0; + + while ((blk = blk_all_next(blk)) != NULL) { + AioContext *aio_context = blk_get_aio_context(blk); + int ret; + + aio_context_acquire(aio_context); + if (blk_is_inserted(blk)) { + ret = blk_flush(blk); + if (ret < 0 && !result) { + result = ret; + } + } + aio_context_release(aio_context); + } + + return result; +} diff --git a/block/bochs.c b/block/bochs.c index 18949b9d4f..af8b7abdfd 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -22,6 +22,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu-common.h" #include "block/block_int.h" #include "qemu/module.h" diff --git a/block/cloop.c b/block/cloop.c index 4190ae06d7..a84f14019c 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -21,6 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu-common.h" #include "block/block_int.h" #include "qemu/module.h" diff --git a/block/commit.c b/block/commit.c index a5d02aa560..cba0e8c1e8 100644 --- a/block/commit.c +++ b/block/commit.c @@ -12,9 +12,11 @@ * */ +#include "qemu/osdep.h" #include "trace.h" #include "block/block_int.h" #include "block/blockjob.h" +#include "qapi/error.h" #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" #include "sysemu/block-backend.h" diff --git a/block/crypto.c b/block/crypto.c new file mode 100644 index 0000000000..1903e84fbd --- /dev/null +++ b/block/crypto.c @@ -0,0 +1,586 @@ +/* + * QEMU block full disk encryption + * + * Copyright (c) 2015-2016 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "qemu/osdep.h" + +#include "block/block_int.h" +#include "sysemu/block-backend.h" +#include "crypto/block.h" +#include "qapi/opts-visitor.h" +#include "qapi-visit.h" +#include "qapi/error.h" + +#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret" +#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg" +#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode" +#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg" +#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg" +#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg" + +typedef struct BlockCrypto BlockCrypto; + +struct BlockCrypto { + QCryptoBlock *block; +}; + + +static int block_crypto_probe_generic(QCryptoBlockFormat format, + const uint8_t *buf, + int buf_size, + const char *filename) +{ + if (qcrypto_block_has_format(format, buf, buf_size)) { + return 100; + } else { + return 0; + } +} + + +static ssize_t block_crypto_read_func(QCryptoBlock *block, + size_t offset, + uint8_t *buf, + size_t buflen, + Error **errp, + void *opaque) +{ + BlockDriverState *bs = opaque; + ssize_t ret; + + ret = bdrv_pread(bs->file->bs, offset, buf, buflen); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read encryption header"); + return ret; + } + return ret; +} + + +struct BlockCryptoCreateData { + const char *filename; + QemuOpts *opts; + BlockBackend *blk; + uint64_t size; +}; + + +static ssize_t block_crypto_write_func(QCryptoBlock *block, + size_t offset, + const uint8_t *buf, + size_t buflen, + Error **errp, + void *opaque) +{ + struct BlockCryptoCreateData *data = opaque; + ssize_t ret; + + ret = blk_pwrite(data->blk, offset, buf, buflen); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not write encryption header"); + return ret; + } + return ret; +} + + +static ssize_t block_crypto_init_func(QCryptoBlock *block, + size_t headerlen, + Error **errp, + void *opaque) +{ + struct BlockCryptoCreateData *data = opaque; + int ret; + + /* User provided size should reflect amount of space made + * available to the guest, so we must take account of that + * which will be used by the crypto header + */ + data->size += headerlen; + + qemu_opt_set_number(data->opts, BLOCK_OPT_SIZE, data->size, &error_abort); + ret = bdrv_create_file(data->filename, data->opts, errp); + if (ret < 0) { + return -1; + } + + data->blk = blk_new_open(data->filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, errp); + if (!data->blk) { + return -1; + } + + return 0; +} + + +static QemuOptsList block_crypto_runtime_opts_luks = { + .name = "crypto", + .head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head), + .desc = { + { + .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET, + .type = QEMU_OPT_STRING, + .help = "ID of the secret that provides the encryption key", + }, + { /* end of list */ } + }, +}; + + +static QemuOptsList block_crypto_create_opts_luks = { + .name = "crypto", + .head = QTAILQ_HEAD_INITIALIZER(block_crypto_create_opts_luks.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size" + }, + { + .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET, + .type = QEMU_OPT_STRING, + .help = "ID of the secret that provides the encryption key", + }, + { + .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG, + .type = QEMU_OPT_STRING, + .help = "Name of encryption cipher algorithm", + }, + { + .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE, + .type = QEMU_OPT_STRING, + .help = "Name of encryption cipher mode", + }, + { + .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG, + .type = QEMU_OPT_STRING, + .help = "Name of IV generator algorithm", + }, + { + .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG, + .type = QEMU_OPT_STRING, + .help = "Name of IV generator hash algorithm", + }, + { + .name = BLOCK_CRYPTO_OPT_LUKS_HASH_ALG, + .type = QEMU_OPT_STRING, + .help = "Name of encryption hash algorithm", + }, + { /* end of list */ } + }, +}; + + +static QCryptoBlockOpenOptions * +block_crypto_open_opts_init(QCryptoBlockFormat format, + QemuOpts *opts, + Error **errp) +{ + OptsVisitor *ov; + QCryptoBlockOpenOptions *ret = NULL; + Error *local_err = NULL; + Error *end_err = NULL; + + ret = g_new0(QCryptoBlockOpenOptions, 1); + ret->format = format; + + ov = opts_visitor_new(opts); + + visit_start_struct(opts_get_visitor(ov), + NULL, NULL, 0, &local_err); + if (local_err) { + goto out; + } + + switch (format) { + case Q_CRYPTO_BLOCK_FORMAT_LUKS: + visit_type_QCryptoBlockOptionsLUKS_members( + opts_get_visitor(ov), &ret->u.luks, &local_err); + break; + + default: + error_setg(&local_err, "Unsupported block format %d", format); + break; + } + + visit_end_struct(opts_get_visitor(ov), &end_err); + error_propagate(&local_err, end_err); + + out: + if (local_err) { + error_propagate(errp, local_err); + qapi_free_QCryptoBlockOpenOptions(ret); + ret = NULL; + } + opts_visitor_cleanup(ov); + return ret; +} + + +static QCryptoBlockCreateOptions * +block_crypto_create_opts_init(QCryptoBlockFormat format, + QemuOpts *opts, + Error **errp) +{ + OptsVisitor *ov; + QCryptoBlockCreateOptions *ret = NULL; + Error *local_err = NULL; + Error *end_err = NULL; + + ret = g_new0(QCryptoBlockCreateOptions, 1); + ret->format = format; + + ov = opts_visitor_new(opts); + + visit_start_struct(opts_get_visitor(ov), + NULL, NULL, 0, &local_err); + if (local_err) { + goto out; + } + + switch (format) { + case Q_CRYPTO_BLOCK_FORMAT_LUKS: + visit_type_QCryptoBlockCreateOptionsLUKS_members( + opts_get_visitor(ov), &ret->u.luks, &local_err); + break; + + default: + error_setg(&local_err, "Unsupported block format %d", format); + break; + } + + visit_end_struct(opts_get_visitor(ov), &end_err); + error_propagate(&local_err, end_err); + + out: + if (local_err) { + error_propagate(errp, local_err); + qapi_free_QCryptoBlockCreateOptions(ret); + ret = NULL; + } + opts_visitor_cleanup(ov); + return ret; +} + + +static int block_crypto_open_generic(QCryptoBlockFormat format, + QemuOptsList *opts_spec, + BlockDriverState *bs, + QDict *options, + int flags, + Error **errp) +{ + BlockCrypto *crypto = bs->opaque; + QemuOpts *opts = NULL; + Error *local_err = NULL; + int ret = -EINVAL; + QCryptoBlockOpenOptions *open_opts = NULL; + unsigned int cflags = 0; + + opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto cleanup; + } + + open_opts = block_crypto_open_opts_init(format, opts, errp); + if (!open_opts) { + goto cleanup; + } + + if (flags & BDRV_O_NO_IO) { + cflags |= QCRYPTO_BLOCK_OPEN_NO_IO; + } + crypto->block = qcrypto_block_open(open_opts, + block_crypto_read_func, + bs, + cflags, + errp); + + if (!crypto->block) { + ret = -EIO; + goto cleanup; + } + + bs->encrypted = 1; + bs->valid_key = 1; + + ret = 0; + cleanup: + qapi_free_QCryptoBlockOpenOptions(open_opts); + return ret; +} + + +static int block_crypto_create_generic(QCryptoBlockFormat format, + const char *filename, + QemuOpts *opts, + Error **errp) +{ + int ret = -EINVAL; + QCryptoBlockCreateOptions *create_opts = NULL; + QCryptoBlock *crypto = NULL; + struct BlockCryptoCreateData data = { + .size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE), + .opts = opts, + .filename = filename, + }; + + create_opts = block_crypto_create_opts_init(format, opts, errp); + if (!create_opts) { + return -1; + } + + crypto = qcrypto_block_create(create_opts, + block_crypto_init_func, + block_crypto_write_func, + &data, + errp); + + if (!crypto) { + ret = -EIO; + goto cleanup; + } + + ret = 0; + cleanup: + qcrypto_block_free(crypto); + blk_unref(data.blk); + qapi_free_QCryptoBlockCreateOptions(create_opts); + return ret; +} + +static int block_crypto_truncate(BlockDriverState *bs, int64_t offset) +{ + BlockCrypto *crypto = bs->opaque; + size_t payload_offset = + qcrypto_block_get_payload_offset(crypto->block); + + offset += payload_offset; + + return bdrv_truncate(bs->file->bs, offset); +} + +static void block_crypto_close(BlockDriverState *bs) +{ + BlockCrypto *crypto = bs->opaque; + qcrypto_block_free(crypto->block); +} + + +#define BLOCK_CRYPTO_MAX_SECTORS 32 + +static coroutine_fn int +block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num, + int remaining_sectors, QEMUIOVector *qiov) +{ + BlockCrypto *crypto = bs->opaque; + int cur_nr_sectors; /* number of sectors in current iteration */ + uint64_t bytes_done = 0; + uint8_t *cipher_data = NULL; + QEMUIOVector hd_qiov; + int ret = 0; + size_t payload_offset = + qcrypto_block_get_payload_offset(crypto->block) / 512; + + qemu_iovec_init(&hd_qiov, qiov->niov); + + /* Bounce buffer so we have a linear mem region for + * entire sector. XXX optimize so we avoid bounce + * buffer in case that qiov->niov == 1 + */ + cipher_data = + qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512, + qiov->size)); + if (cipher_data == NULL) { + ret = -ENOMEM; + goto cleanup; + } + + while (remaining_sectors) { + cur_nr_sectors = remaining_sectors; + + if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) { + cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS; + } + + qemu_iovec_reset(&hd_qiov); + qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512); + + ret = bdrv_co_readv(bs->file->bs, + payload_offset + sector_num, + cur_nr_sectors, &hd_qiov); + if (ret < 0) { + goto cleanup; + } + + if (qcrypto_block_decrypt(crypto->block, + sector_num, + cipher_data, cur_nr_sectors * 512, + NULL) < 0) { + ret = -EIO; + goto cleanup; + } + + qemu_iovec_from_buf(qiov, bytes_done, + cipher_data, cur_nr_sectors * 512); + + remaining_sectors -= cur_nr_sectors; + sector_num += cur_nr_sectors; + bytes_done += cur_nr_sectors * 512; + } + + cleanup: + qemu_iovec_destroy(&hd_qiov); + qemu_vfree(cipher_data); + + return ret; +} + + +static coroutine_fn int +block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num, + int remaining_sectors, QEMUIOVector *qiov) +{ + BlockCrypto *crypto = bs->opaque; + int cur_nr_sectors; /* number of sectors in current iteration */ + uint64_t bytes_done = 0; + uint8_t *cipher_data = NULL; + QEMUIOVector hd_qiov; + int ret = 0; + size_t payload_offset = + qcrypto_block_get_payload_offset(crypto->block) / 512; + + qemu_iovec_init(&hd_qiov, qiov->niov); + + /* Bounce buffer so we have a linear mem region for + * entire sector. XXX optimize so we avoid bounce + * buffer in case that qiov->niov == 1 + */ + cipher_data = + qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512, + qiov->size)); + if (cipher_data == NULL) { + ret = -ENOMEM; + goto cleanup; + } + + while (remaining_sectors) { + cur_nr_sectors = remaining_sectors; + + if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) { + cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS; + } + + qemu_iovec_to_buf(qiov, bytes_done, + cipher_data, cur_nr_sectors * 512); + + if (qcrypto_block_encrypt(crypto->block, + sector_num, + cipher_data, cur_nr_sectors * 512, + NULL) < 0) { + ret = -EIO; + goto cleanup; + } + + qemu_iovec_reset(&hd_qiov); + qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512); + + ret = bdrv_co_writev(bs->file->bs, + payload_offset + sector_num, + cur_nr_sectors, &hd_qiov); + if (ret < 0) { + goto cleanup; + } + + remaining_sectors -= cur_nr_sectors; + sector_num += cur_nr_sectors; + bytes_done += cur_nr_sectors * 512; + } + + cleanup: + qemu_iovec_destroy(&hd_qiov); + qemu_vfree(cipher_data); + + return ret; +} + + +static int64_t block_crypto_getlength(BlockDriverState *bs) +{ + BlockCrypto *crypto = bs->opaque; + int64_t len = bdrv_getlength(bs->file->bs); + + ssize_t offset = qcrypto_block_get_payload_offset(crypto->block); + + len -= offset; + + return len; +} + + +static int block_crypto_probe_luks(const uint8_t *buf, + int buf_size, + const char *filename) { + return block_crypto_probe_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS, + buf, buf_size, filename); +} + +static int block_crypto_open_luks(BlockDriverState *bs, + QDict *options, + int flags, + Error **errp) +{ + return block_crypto_open_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS, + &block_crypto_runtime_opts_luks, + bs, options, flags, errp); +} + +static int block_crypto_create_luks(const char *filename, + QemuOpts *opts, + Error **errp) +{ + return block_crypto_create_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS, + filename, opts, errp); +} + +BlockDriver bdrv_crypto_luks = { + .format_name = "luks", + .instance_size = sizeof(BlockCrypto), + .bdrv_probe = block_crypto_probe_luks, + .bdrv_open = block_crypto_open_luks, + .bdrv_close = block_crypto_close, + .bdrv_create = block_crypto_create_luks, + .bdrv_truncate = block_crypto_truncate, + .create_opts = &block_crypto_create_opts_luks, + + .bdrv_co_readv = block_crypto_co_readv, + .bdrv_co_writev = block_crypto_co_writev, + .bdrv_getlength = block_crypto_getlength, +}; + +static void block_crypto_init(void) +{ + bdrv_register(&bdrv_crypto_luks); +} + +block_init(block_crypto_init); diff --git a/block/curl.c b/block/curl.c index 89941826ed..5a8f8b6239 100644 --- a/block/curl.c +++ b/block/curl.c @@ -21,12 +21,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu-common.h" #include "qemu/error-report.h" #include "block/block_int.h" #include "qapi/qmp/qbool.h" #include "qapi/qmp/qstring.h" +#include "crypto/secret.h" #include <curl/curl.h> +#include "qemu/cutils.h" // #define DEBUG_CURL // #define DEBUG_VERBOSE @@ -77,6 +81,10 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle, #define CURL_BLOCK_OPT_SSLVERIFY "sslverify" #define CURL_BLOCK_OPT_TIMEOUT "timeout" #define CURL_BLOCK_OPT_COOKIE "cookie" +#define CURL_BLOCK_OPT_USERNAME "username" +#define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret" +#define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username" +#define CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET "proxy-password-secret" struct BDRVCURLState; @@ -119,6 +127,10 @@ typedef struct BDRVCURLState { char *cookie; bool accept_range; AioContext *aio_context; + char *username; + char *password; + char *proxyusername; + char *proxypassword; } BDRVCURLState; static void curl_clean_state(CURLState *s); @@ -418,6 +430,21 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s) curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg); curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1); + if (s->username) { + curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username); + } + if (s->password) { + curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password); + } + if (s->proxyusername) { + curl_easy_setopt(state->curl, + CURLOPT_PROXYUSERNAME, s->proxyusername); + } + if (s->proxypassword) { + curl_easy_setopt(state->curl, + CURLOPT_PROXYPASSWORD, s->proxypassword); + } + /* Restrict supported protocols to avoid security issues in the more * obscure protocols. For example, do not allow POP3/SMTP/IMAP see * CVE-2013-0249. @@ -524,10 +551,31 @@ static QemuOptsList runtime_opts = { .type = QEMU_OPT_STRING, .help = "Pass the cookie or list of cookies with each request" }, + { + .name = CURL_BLOCK_OPT_USERNAME, + .type = QEMU_OPT_STRING, + .help = "Username for HTTP auth" + }, + { + .name = CURL_BLOCK_OPT_PASSWORD_SECRET, + .type = QEMU_OPT_STRING, + .help = "ID of secret used as password for HTTP auth", + }, + { + .name = CURL_BLOCK_OPT_PROXY_USERNAME, + .type = QEMU_OPT_STRING, + .help = "Username for HTTP proxy auth" + }, + { + .name = CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET, + .type = QEMU_OPT_STRING, + .help = "ID of secret used as password for HTTP proxy auth", + }, { /* end of list */ } }, }; + static int curl_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -538,6 +586,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, const char *file; const char *cookie; double d; + const char *secretid; static int inited = 0; @@ -579,6 +628,26 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags, goto out_noclean; } + s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME)); + secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET); + + if (secretid) { + s->password = qcrypto_secret_lookup_as_utf8(secretid, errp); + if (!s->password) { + goto out_noclean; + } + } + + s->proxyusername = g_strdup( + qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_USERNAME)); + secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET); + if (secretid) { + s->proxypassword = qcrypto_secret_lookup_as_utf8(secretid, errp); + if (!s->proxypassword) { + goto out_noclean; + } + } + if (!inited) { curl_global_init(CURL_GLOBAL_ALL); inited = 1; diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c new file mode 100644 index 0000000000..4902ca557f --- /dev/null +++ b/block/dirty-bitmap.c @@ -0,0 +1,387 @@ +/* + * Block Dirty Bitmap + * + * Copyright (c) 2016 Red Hat. Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu-common.h" +#include "trace.h" +#include "block/block_int.h" +#include "block/blockjob.h" + +/** + * A BdrvDirtyBitmap can be in three possible states: + * (1) successor is NULL and disabled is false: full r/w mode + * (2) successor is NULL and disabled is true: read only mode ("disabled") + * (3) successor is set: frozen mode. + * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set, + * or enabled. A frozen bitmap can only abdicate() or reclaim(). + */ +struct BdrvDirtyBitmap { + HBitmap *bitmap; /* Dirty sector bitmap implementation */ + BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */ + char *name; /* Optional non-empty unique ID */ + int64_t size; /* Size of the bitmap (Number of sectors) */ + bool disabled; /* Bitmap is read-only */ + QLIST_ENTRY(BdrvDirtyBitmap) list; +}; + +BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name) +{ + BdrvDirtyBitmap *bm; + + assert(name); + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + if (bm->name && !strcmp(name, bm->name)) { + return bm; + } + } + return NULL; +} + +void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap) +{ + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + g_free(bitmap->name); + bitmap->name = NULL; +} + +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, + uint32_t granularity, + const char *name, + Error **errp) +{ + int64_t bitmap_size; + BdrvDirtyBitmap *bitmap; + uint32_t sector_granularity; + + assert((granularity & (granularity - 1)) == 0); + + if (name && bdrv_find_dirty_bitmap(bs, name)) { + error_setg(errp, "Bitmap already exists: %s", name); + return NULL; + } + sector_granularity = granularity >> BDRV_SECTOR_BITS; + assert(sector_granularity); + bitmap_size = bdrv_nb_sectors(bs); + if (bitmap_size < 0) { + error_setg_errno(errp, -bitmap_size, "could not get length of device"); + errno = -bitmap_size; + return NULL; + } + bitmap = g_new0(BdrvDirtyBitmap, 1); + bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity)); + bitmap->size = bitmap_size; + bitmap->name = g_strdup(name); + bitmap->disabled = false; + QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); + return bitmap; +} + +bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap) +{ + return bitmap->successor; +} + +bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap) +{ + return !(bitmap->disabled || bitmap->successor); +} + +DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap) +{ + if (bdrv_dirty_bitmap_frozen(bitmap)) { + return DIRTY_BITMAP_STATUS_FROZEN; + } else if (!bdrv_dirty_bitmap_enabled(bitmap)) { + return DIRTY_BITMAP_STATUS_DISABLED; + } else { + return DIRTY_BITMAP_STATUS_ACTIVE; + } +} + +/** + * Create a successor bitmap destined to replace this bitmap after an operation. + * Requires that the bitmap is not frozen and has no successor. + */ +int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, Error **errp) +{ + uint64_t granularity; + BdrvDirtyBitmap *child; + + if (bdrv_dirty_bitmap_frozen(bitmap)) { + error_setg(errp, "Cannot create a successor for a bitmap that is " + "currently frozen"); + return -1; + } + assert(!bitmap->successor); + + /* Create an anonymous successor */ + granularity = bdrv_dirty_bitmap_granularity(bitmap); + child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); + if (!child) { + return -1; + } + + /* Successor will be on or off based on our current state. */ + child->disabled = bitmap->disabled; + + /* Install the successor and freeze the parent */ + bitmap->successor = child; + return 0; +} + +/** + * For a bitmap with a successor, yield our name to the successor, + * delete the old bitmap, and return a handle to the new bitmap. + */ +BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp) +{ + char *name; + BdrvDirtyBitmap *successor = bitmap->successor; + + if (successor == NULL) { + error_setg(errp, "Cannot relinquish control if " + "there's no successor present"); + return NULL; + } + + name = bitmap->name; + bitmap->name = NULL; + successor->name = name; + bitmap->successor = NULL; + bdrv_release_dirty_bitmap(bs, bitmap); + + return successor; +} + +/** + * In cases of failure where we can no longer safely delete the parent, + * we may wish to re-join the parent and child/successor. + * The merged parent will be un-frozen, but not explicitly re-enabled. + */ +BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, + BdrvDirtyBitmap *parent, + Error **errp) +{ + BdrvDirtyBitmap *successor = parent->successor; + + if (!successor) { + error_setg(errp, "Cannot reclaim a successor when none is present"); + return NULL; + } + + if (!hbitmap_merge(parent->bitmap, successor->bitmap)) { + error_setg(errp, "Merging of parent and successor bitmap failed"); + return NULL; + } + bdrv_release_dirty_bitmap(bs, successor); + parent->successor = NULL; + + return parent; +} + +/** + * Truncates _all_ bitmaps attached to a BDS. + */ +void bdrv_dirty_bitmap_truncate(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bitmap; + uint64_t size = bdrv_nb_sectors(bs); + + QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + hbitmap_truncate(bitmap->bitmap, size); + bitmap->size = size; + } +} + +static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + bool only_named) +{ + BdrvDirtyBitmap *bm, *next; + QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) { + if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) { + assert(!bdrv_dirty_bitmap_frozen(bm)); + QLIST_REMOVE(bm, list); + hbitmap_free(bm->bitmap); + g_free(bm->name); + g_free(bm); + + if (bitmap) { + return; + } + } + } +} + +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap) +{ + bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false); +} + +/** + * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()). + * There must not be any frozen bitmaps attached. + */ +void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs) +{ + bdrv_do_release_matching_dirty_bitmap(bs, NULL, true); +} + +void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + bitmap->disabled = true; +} + +void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap) +{ + assert(!bdrv_dirty_bitmap_frozen(bitmap)); + bitmap->disabled = false; +} + +BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs) +{ + BdrvDirtyBitmap *bm; + BlockDirtyInfoList *list = NULL; + BlockDirtyInfoList **plist = &list; + + QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) { + BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1); + BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1); + info->count = bdrv_get_dirty_count(bm); + info->granularity = bdrv_dirty_bitmap_granularity(bm); + info->has_name = !!bm->name; + info->name = g_strdup(bm->name); + info->status = bdrv_dirty_bitmap_status(bm); + entry->value = info; + *plist = entry; + plist = &entry->next; + } + + return list; +} + +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, + int64_t sector) +{ + if (bitmap) { + return hbitmap_get(bitmap->bitmap, sector); + } else { + return 0; + } +} + +/** + * Chooses a default granularity based on the existing cluster size, + * but clamped between [4K, 64K]. Defaults to 64K in the case that there + * is no cluster size information available. + */ +uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs) +{ + BlockDriverInfo bdi; + uint32_t granularity; + + if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) { + granularity = MAX(4096, bdi.cluster_size); + granularity = MIN(65536, granularity); + } else { + granularity = 65536; + } + + return granularity; +} + +uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap) +{ + return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap); +} + +void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi) +{ + hbitmap_iter_init(hbi, bitmap->bitmap, 0); +} + +void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors) +{ + assert(bdrv_dirty_bitmap_enabled(bitmap)); + hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); +} + +void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors) +{ + assert(bdrv_dirty_bitmap_enabled(bitmap)); + hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors); +} + +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out) +{ + assert(bdrv_dirty_bitmap_enabled(bitmap)); + if (!out) { + hbitmap_reset_all(bitmap->bitmap); + } else { + HBitmap *backup = bitmap->bitmap; + bitmap->bitmap = hbitmap_alloc(bitmap->size, + hbitmap_granularity(backup)); + *out = backup; + } +} + +void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in) +{ + HBitmap *tmp = bitmap->bitmap; + assert(bdrv_dirty_bitmap_enabled(bitmap)); + bitmap->bitmap = in; + hbitmap_free(tmp); +} + +void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, + int nr_sectors) +{ + BdrvDirtyBitmap *bitmap; + QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) { + if (!bdrv_dirty_bitmap_enabled(bitmap)) { + continue; + } + hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors); + } +} + +/** + * Advance an HBitmapIter to an arbitrary offset. + */ +void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset) +{ + assert(hbi->hb); + hbitmap_iter_init(hbi, hbi->hb, offset); +} + +int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap) +{ + return hbitmap_count(bitmap->bitmap); +} diff --git a/block/dmg.c b/block/dmg.c index 546a6f5330..a496eb7c9b 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -21,6 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu-common.h" #include "block/block_int.h" #include "qemu/bswap.h" diff --git a/block/gluster.c b/block/gluster.c index 0857c14645..a8aaacf645 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -7,8 +7,10 @@ * See the COPYING file in the top-level directory. * */ +#include "qemu/osdep.h" #include <glusterfs/api/glfs.h> #include "block/block_int.h" +#include "qapi/error.h" #include "qemu/uri.h" typedef struct GlusterAIOCB { @@ -245,7 +247,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) if (!ret || ret == acb->size) { acb->ret = 0; /* Success */ } else if (ret < 0) { - acb->ret = ret; /* Read/Write failed */ + acb->ret = -errno; /* Read/Write failed */ } else { acb->ret = -EIO; /* Partial read/write - fail it */ } @@ -312,6 +314,23 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options, goto out; } +#ifdef CONFIG_GLUSTERFS_XLATOR_OPT + /* Without this, if fsync fails for a recoverable reason (for instance, + * ENOSPC), gluster will dump its cache, preventing retries. This means + * almost certain data loss. Not all gluster versions support the + * 'resync-failed-syncs-after-fsync' key value, but there is no way to + * discover during runtime if it is supported (this api returns success for + * unknown key/value pairs) */ + ret = glfs_set_xlator_option(s->glfs, "*-write-behind", + "resync-failed-syncs-after-fsync", + "on"); + if (ret < 0) { + error_setg_errno(errp, errno, "Unable to set xlator key/value pair"); + ret = -errno; + goto out; + } +#endif + qemu_gluster_parse_flags(bdrv_flags, &open_flags); s->fd = glfs_open(s->glfs, gconf->image, open_flags); @@ -364,6 +383,16 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state, goto exit; } +#ifdef CONFIG_GLUSTERFS_XLATOR_OPT + ret = glfs_set_xlator_option(reop_s->glfs, "*-write-behind", + "resync-failed-syncs-after-fsync", "on"); + if (ret < 0) { + error_setg_errno(errp, errno, "Unable to set xlator key/value pair"); + ret = -errno; + goto exit; + } +#endif + reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags); if (reop_s->fd == NULL) { /* reops->glfs will be cleaned up in _abort */ @@ -587,6 +616,17 @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs, return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1); } +static void qemu_gluster_close(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + + if (s->fd) { + glfs_close(s->fd); + s->fd = NULL; + } + glfs_fini(s->glfs); +} + static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs) { int ret; @@ -600,11 +640,35 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs) ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb); if (ret < 0) { - return -errno; + ret = -errno; + goto error; } qemu_coroutine_yield(); + if (acb.ret < 0) { + ret = acb.ret; + goto error; + } + return acb.ret; + +error: + /* Some versions of Gluster (3.5.6 -> 3.5.8?) will not retain its cache + * after a fsync failure, so we have no way of allowing the guest to safely + * continue. Gluster versions prior to 3.5.6 don't retain the cache + * either, but will invalidate the fd on error, so this is again our only + * option. + * + * The 'resync-failed-syncs-after-fsync' xlator option for the + * write-behind cache will cause later gluster versions to retain its + * cache after error, so long as the fd remains open. However, we + * currently have no way of knowing if this option is supported. + * + * TODO: Once gluster provides a way for us to determine if the option + * is supported, bypass the closure and setting drv to NULL. */ + qemu_gluster_close(bs); + bs->drv = NULL; + return ret; } #ifdef CONFIG_GLUSTERFS_DISCARD @@ -659,17 +723,6 @@ static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs) } } -static void qemu_gluster_close(BlockDriverState *bs) -{ - BDRVGlusterState *s = bs->opaque; - - if (s->fd) { - glfs_close(s->fd); - s->fd = NULL; - } - glfs_fini(s->glfs); -} - static int qemu_gluster_has_zero_init(BlockDriverState *bs) { /* GlusterFS volume could be backed by a block device */ diff --git a/block/io.c b/block/io.c index e00fb5d690..a7dbf85b19 100644 --- a/block/io.c +++ b/block/io.c @@ -22,11 +22,14 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "trace.h" #include "sysemu/block-backend.h" #include "block/blockjob.h" #include "block/block_int.h" #include "block/throttle-groups.h" +#include "qemu/cutils.h" +#include "qapi/error.h" #include "qemu/error-report.h" #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ @@ -43,12 +46,6 @@ static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs, static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *iov); -static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, - int64_t offset, unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); -static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, - int64_t offset, unsigned int bytes, QEMUIOVector *qiov, - BdrvRequestFlags flags); static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, int64_t sector_num, QEMUIOVector *qiov, @@ -166,9 +163,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.max_transfer_length = bs->file->bs->bl.max_transfer_length; bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment; bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment; + bs->bl.max_iov = bs->file->bs->bl.max_iov; } else { bs->bl.min_mem_alignment = 512; bs->bl.opt_mem_alignment = getpagesize(); + + /* Safe default since most protocols use readv()/writev()/etc */ + bs->bl.max_iov = IOV_MAX; } if (bs->backing) { @@ -189,6 +190,9 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.min_mem_alignment = MAX(bs->bl.min_mem_alignment, bs->backing->bs->bl.min_mem_alignment); + bs->bl.max_iov = + MIN(bs->bl.max_iov, + bs->backing->bs->bl.max_iov); } /* Then let the driver override it */ @@ -249,6 +253,47 @@ static void bdrv_drain_recurse(BlockDriverState *bs) } } +typedef struct { + Coroutine *co; + BlockDriverState *bs; + QEMUBH *bh; + bool done; +} BdrvCoDrainData; + +static void bdrv_co_drain_bh_cb(void *opaque) +{ + BdrvCoDrainData *data = opaque; + Coroutine *co = data->co; + + qemu_bh_delete(data->bh); + bdrv_drain(data->bs); + data->done = true; + qemu_coroutine_enter(co, NULL); +} + +void coroutine_fn bdrv_co_drain(BlockDriverState *bs) +{ + BdrvCoDrainData data; + + /* Calling bdrv_drain() from a BH ensures the current coroutine yields and + * other coroutines run if they were queued from + * qemu_co_queue_run_restart(). */ + + assert(qemu_in_coroutine()); + data = (BdrvCoDrainData) { + .co = qemu_coroutine_self(), + .bs = bs, + .done = false, + .bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_drain_bh_cb, &data), + }; + qemu_bh_schedule(data.bh); + + qemu_coroutine_yield(); + /* If we are resumed from some other event (such as an aio completion or a + * timer callback), it is a bug in the caller that should be fixed. */ + assert(data.done); +} + /* * Wait for pending requests to complete on a single BlockDriverState subtree, * and suspend block driver's internal I/O until next request arrives. @@ -265,6 +310,10 @@ void bdrv_drain(BlockDriverState *bs) bool busy = true; bdrv_drain_recurse(bs); + if (qemu_in_coroutine()) { + bdrv_co_drain(bs); + return; + } while (busy) { /* Keep iterating */ bdrv_flush_io_queue(bs); @@ -293,6 +342,7 @@ void bdrv_drain_all(void) if (bs->job) { block_job_pause(bs->job); } + bdrv_drain_recurse(bs); aio_context_release(aio_context); if (!g_slist_find(aio_ctxs, aio_context)) { @@ -612,20 +662,6 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num, return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0); } -/* Just like bdrv_read(), but with I/O throttling temporarily disabled */ -int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - bool enabled; - int ret; - - enabled = bs->io_limits_enabled; - bs->io_limits_enabled = false; - ret = bdrv_read(bs, sector_num, buf, nb_sectors); - bs->io_limits_enabled = enabled; - return ret; -} - /* Return < 0 if error. Important errors are: -EIO generic I/O error (may happen for all errors) -ENOMEDIUM No media inserted. @@ -656,6 +692,7 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) { int64_t target_sectors, ret, nb_sectors, sector_num = 0; + BlockDriverState *file; int n; target_sectors = bdrv_nb_sectors(bs); @@ -668,7 +705,7 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) if (nb_sectors <= 0) { return 0; } - ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n); + ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n, &file); if (ret < 0) { error_report("error getting block status at sector %" PRId64 ": %s", sector_num, strerror(-ret)); @@ -755,9 +792,9 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, return ret; } - /* No flush needed for cache modes that already do it */ - if (bs->enable_write_cache) { - bdrv_flush(bs); + ret = bdrv_flush(bs); + if (ret < 0) { + return ret; } return 0; @@ -852,6 +889,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); assert(!qiov || bytes == qiov->size); + assert((bs->open_flags & BDRV_O_NO_IO) == 0); /* Handle Copy on Read and associated serialisation */ if (flags & BDRV_REQ_COPY_ON_READ) { @@ -929,7 +967,7 @@ out: /* * Handle a read request in coroutine context */ -static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, +int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { @@ -1138,6 +1176,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); assert(!qiov || bytes == qiov->size); + assert((bs->open_flags & BDRV_O_NO_IO) == 0); waited = wait_serialising_requests(req); assert(!waited || !req->serialising); @@ -1160,13 +1199,20 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, } else if (flags & BDRV_REQ_ZERO_WRITE) { bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO); ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags); + } else if (drv->bdrv_co_writev_flags) { + bdrv_debug_event(bs, BLKDBG_PWRITEV); + ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov, + flags); } else { + assert(drv->supported_write_flags == 0); bdrv_debug_event(bs, BLKDBG_PWRITEV); ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); } bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE); - if (ret == 0 && !bs->enable_write_cache) { + if (ret == 0 && (flags & BDRV_REQ_FUA) && + !(drv->supported_write_flags & BDRV_REQ_FUA)) + { ret = bdrv_co_flush(bs); } @@ -1274,7 +1320,7 @@ fail: /* * Handle a write request in coroutine context */ -static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, +int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, int64_t offset, unsigned int bytes, QEMUIOVector *qiov, BdrvRequestFlags flags) { @@ -1293,6 +1339,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, if (bs->read_only) { return -EPERM; } + assert(!(bs->open_flags & BDRV_O_INACTIVE)); ret = bdrv_check_byte_request(bs, offset, bytes); if (ret < 0) { @@ -1434,29 +1481,10 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, BDRV_REQ_ZERO_WRITE | flags); } -int bdrv_flush_all(void) -{ - BlockDriverState *bs = NULL; - int result = 0; - - while ((bs = bdrv_next(bs))) { - AioContext *aio_context = bdrv_get_aio_context(bs); - int ret; - - aio_context_acquire(aio_context); - ret = bdrv_flush(bs); - if (ret < 0 && !result) { - result = ret; - } - aio_context_release(aio_context); - } - - return result; -} - typedef struct BdrvCoGetBlockStatusData { BlockDriverState *bs; BlockDriverState *base; + BlockDriverState **file; int64_t sector_num; int nb_sectors; int *pnum; @@ -1478,10 +1506,14 @@ typedef struct BdrvCoGetBlockStatusData { * * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes * beyond the end of the disk image it will be clamped. + * + * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file' + * points to the BDS which the sector range is allocated in. */ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) + int nb_sectors, int *pnum, + BlockDriverState **file) { int64_t total_sectors; int64_t n; @@ -1511,7 +1543,9 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, return ret; } - ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum); + *file = NULL; + ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum, + file); if (ret < 0) { *pnum = 0; return ret; @@ -1520,7 +1554,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, if (ret & BDRV_BLOCK_RAW) { assert(ret & BDRV_BLOCK_OFFSET_VALID); return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS, - *pnum, pnum); + *pnum, pnum, file); } if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { @@ -1537,13 +1571,14 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, } } - if (bs->file && + if (*file && *file != bs && (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) && (ret & BDRV_BLOCK_OFFSET_VALID)) { + BlockDriverState *file2; int file_pnum; - ret2 = bdrv_co_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS, - *pnum, &file_pnum); + ret2 = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS, + *pnum, &file_pnum, &file2); if (ret2 >= 0) { /* Ignore errors. This is just providing extra information, it * is useful but not necessary. @@ -1568,14 +1603,15 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t sector_num, int nb_sectors, - int *pnum) + int *pnum, + BlockDriverState **file) { BlockDriverState *p; int64_t ret = 0; assert(bs != base); for (p = bs; p != base; p = backing_bs(p)) { - ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum); + ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file); if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) { break; } @@ -1594,7 +1630,8 @@ static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque) data->ret = bdrv_co_get_block_status_above(data->bs, data->base, data->sector_num, data->nb_sectors, - data->pnum); + data->pnum, + data->file); data->done = true; } @@ -1606,12 +1643,14 @@ static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque) int64_t bdrv_get_block_status_above(BlockDriverState *bs, BlockDriverState *base, int64_t sector_num, - int nb_sectors, int *pnum) + int nb_sectors, int *pnum, + BlockDriverState **file) { Coroutine *co; BdrvCoGetBlockStatusData data = { .bs = bs, .base = base, + .file = file, .sector_num = sector_num, .nb_sectors = nb_sectors, .pnum = pnum, @@ -1635,16 +1674,19 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs, int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) + int nb_sectors, int *pnum, + BlockDriverState **file) { return bdrv_get_block_status_above(bs, backing_bs(bs), - sector_num, nb_sectors, pnum); + sector_num, nb_sectors, pnum, file); } int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { - int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum); + BlockDriverState *file; + int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum, + &file); if (ret < 0) { return ret; } @@ -1882,7 +1924,8 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, merge = 1; } - if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) { + if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > + bs->bl.max_iov) { merge = 0; } @@ -2342,6 +2385,13 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) } tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH); + + /* Write back all layers by calling one driver function */ + if (bs->drv->bdrv_co_flush) { + ret = bs->drv->bdrv_co_flush(bs); + goto out; + } + /* Write back cached data to the OS even with cache=unsafe */ BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS); if (bs->drv->bdrv_co_flush_to_os) { @@ -2453,6 +2503,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, } else if (bs->read_only) { return -EPERM; } + assert(!(bs->open_flags & BDRV_O_INACTIVE)); /* Do nothing if disabled. */ if (!(bs->open_flags & BDRV_O_UNMAP)) { @@ -2614,10 +2665,11 @@ int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) bdrv_co_ioctl_entry(&data); } else { Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry); + qemu_coroutine_enter(co, &data); - } - while (data.ret == -EINPROGRESS) { - aio_poll(bdrv_get_aio_context(bs), true); + while (data.ret == -EINPROGRESS) { + aio_poll(bdrv_get_aio_context(bs), true); + } } return data.ret; } diff --git a/block/iscsi.c b/block/iscsi.c index bd1f1bfcd1..302baf84c1 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -23,7 +23,7 @@ * THE SOFTWARE. */ -#include "config-host.h" +#include "qemu/osdep.h" #include <poll.h> #include <math.h> @@ -39,6 +39,7 @@ #include "sysemu/sysemu.h" #include "qmp-commands.h" #include "qapi/qmp/qstring.h" +#include "crypto/secret.h" #include <iscsi/iscsi.h> #include <iscsi/scsi-lowlevel.h> @@ -69,7 +70,6 @@ typedef struct IscsiLun { bool lbprz; bool dpofua; bool has_write_same; - bool force_next_flush; bool request_timed_out; } IscsiLun; @@ -83,7 +83,6 @@ typedef struct IscsiTask { QEMUBH *bh; IscsiLun *iscsilun; QEMUTimer retry_timer; - bool force_next_flush; int err_code; } IscsiTask; @@ -281,8 +280,6 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, } iTask->err_code = iscsi_translate_sense(&task->sense); error_report("iSCSI Failure: %s", iscsi_get_error(iscsi)); - } else { - iTask->iscsilun->force_next_flush |= iTask->force_next_flush; } out: @@ -451,15 +448,15 @@ static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num, } } -static int coroutine_fn iscsi_co_writev(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, - QEMUIOVector *iov) +static int coroutine_fn +iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors, + QEMUIOVector *iov, int flags) { IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; uint64_t lba; uint32_t num_sectors; - int fua; + bool fua; if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) { return -EINVAL; @@ -475,8 +472,7 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs, num_sectors = sector_qemu2lun(nb_sectors, iscsilun); iscsi_co_init_iscsitask(iscsilun, &iTask); retry: - fua = iscsilun->dpofua && !bs->enable_write_cache; - iTask.force_next_flush = !fua; + fua = iscsilun->dpofua && (flags & BDRV_REQ_FUA); if (iscsilun->use_16_for_rw) { iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba, NULL, num_sectors * iscsilun->block_size, @@ -517,6 +513,13 @@ retry: return 0; } +static int coroutine_fn +iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, + QEMUIOVector *iov) +{ + return iscsi_co_writev_flags(bs, sector_num, nb_sectors, iov, 0); +} + static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num, int nb_sectors) @@ -532,7 +535,8 @@ static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun, static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) + int nb_sectors, int *pnum, + BlockDriverState **file) { IscsiLun *iscsilun = bs->opaque; struct scsi_get_lba_status *lbas = NULL; @@ -624,6 +628,9 @@ out: if (iTask.task != NULL) { scsi_free_scsi_task(iTask.task); } + if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) { + *file = bs; + } return ret; } @@ -650,7 +657,8 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs, !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) { int64_t ret; int pnum; - ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum); + BlockDriverState *file; + ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum, &file); if (ret < 0) { return ret; } @@ -709,11 +717,6 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs) IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; - if (!iscsilun->force_next_flush) { - return 0; - } - iscsilun->force_next_flush = false; - iscsi_co_init_iscsitask(iscsilun, &iTask); retry: if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0, @@ -1013,7 +1016,6 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, } iscsi_co_init_iscsitask(iscsilun, &iTask); - iTask.force_next_flush = true; retry: if (use_16_for_ws) { iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba, @@ -1075,6 +1077,8 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target, QemuOpts *opts; const char *user = NULL; const char *password = NULL; + const char *secretid; + char *secret = NULL; list = qemu_find_opts("iscsi"); if (!list) { @@ -1094,8 +1098,20 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target, return; } + secretid = qemu_opt_get(opts, "password-secret"); password = qemu_opt_get(opts, "password"); - if (!password) { + if (secretid && password) { + error_setg(errp, "'password' and 'password-secret' properties are " + "mutually exclusive"); + return; + } + if (secretid) { + secret = qcrypto_secret_lookup_as_utf8(secretid, errp); + if (!secret) { + return; + } + password = secret; + } else if (!password) { error_setg(errp, "CHAP username specified but no password was given"); return; } @@ -1103,6 +1119,8 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target, if (iscsi_set_initiator_username_pwd(iscsi, user, password)) { error_setg(errp, "Failed to set initiator username and password"); } + + g_free(secret); } static void parse_header_digest(struct iscsi_context *iscsi, const char *target, @@ -1243,8 +1261,13 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp) iscsilun->lbprz = !!rc16->lbprz; iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff); } + break; } - break; + if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION + && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) { + break; + } + /* Fall through and try READ CAPACITY(10) instead. */ case TYPE_ROM: task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0); if (task != NULL && task->status == SCSI_STATUS_GOOD) { @@ -1270,7 +1293,7 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp) && retries-- > 0); if (task == NULL || task->status != SCSI_STATUS_GOOD) { - error_setg(errp, "iSCSI: failed to send readcapacity10 command."); + error_setg(errp, "iSCSI: failed to send readcapacity10/16 command"); } else if (!iscsilun->block_size || iscsilun->block_size % BDRV_SECTOR_SIZE) { error_setg(errp, "iSCSI: the target returned an invalid " @@ -1825,6 +1848,8 @@ static BlockDriver bdrv_iscsi = { .bdrv_co_write_zeroes = iscsi_co_write_zeroes, .bdrv_co_readv = iscsi_co_readv, .bdrv_co_writev = iscsi_co_writev, + .bdrv_co_writev_flags = iscsi_co_writev_flags, + .supported_write_flags = BDRV_REQ_FUA, .bdrv_co_flush_to_disk = iscsi_co_flush, #ifdef __linux__ @@ -1848,6 +1873,11 @@ static QemuOptsList qemu_iscsi_opts = { .type = QEMU_OPT_STRING, .help = "password for CHAP authentication to target", },{ + .name = "password-secret", + .type = QEMU_OPT_STRING, + .help = "ID of the secret providing password for CHAP " + "authentication to target", + },{ .name = "header-digest", .type = QEMU_OPT_STRING, .help = "HeaderDigest setting. " diff --git a/block/linux-aio.c b/block/linux-aio.c index 88b0520a8b..805757e02e 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -7,6 +7,7 @@ * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "block/aio.h" #include "qemu/queue.h" diff --git a/block/mirror.c b/block/mirror.c index 0e8f5565a5..039f48125e 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -11,13 +11,16 @@ * */ +#include "qemu/osdep.h" #include "trace.h" #include "block/blockjob.h" #include "block/block_int.h" #include "sysemu/block-backend.h" +#include "qapi/error.h" #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" #include "qemu/bitmap.h" +#include "qemu/error-report.h" #define SLICE_TIME 100000000ULL /* ns */ #define MAX_IN_FLIGHT 16 @@ -45,7 +48,6 @@ typedef struct MirrorBlockJob { BlockdevOnError on_source_error, on_target_error; bool synced; bool should_complete; - int64_t sector_num; int64_t granularity; size_t buf_size; int64_t bdev_length; @@ -62,6 +64,8 @@ typedef struct MirrorBlockJob { int ret; bool unmap; bool waiting_for_io; + int target_cluster_sectors; + int max_iov; } MirrorBlockJob; typedef struct MirrorOp { @@ -104,7 +108,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret) sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; chunk_num = op->sector_num / sectors_per_chunk; - nb_chunks = op->nb_sectors / sectors_per_chunk; + nb_chunks = DIV_ROUND_UP(op->nb_sectors, sectors_per_chunk); bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks); if (ret >= 0) { if (s->cow_bitmap) { @@ -157,112 +161,94 @@ static void mirror_read_complete(void *opaque, int ret) mirror_write_complete, op); } -static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) +static inline void mirror_clip_sectors(MirrorBlockJob *s, + int64_t sector_num, + int *nb_sectors) { - BlockDriverState *source = s->common.bs; - int nb_sectors, sectors_per_chunk, nb_chunks; - int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector; - uint64_t delay_ns = 0; - MirrorOp *op; - int pnum; - int64_t ret; - - s->sector_num = hbitmap_iter_next(&s->hbi); - if (s->sector_num < 0) { - bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); - s->sector_num = hbitmap_iter_next(&s->hbi); - trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); - assert(s->sector_num >= 0); - } - - hbitmap_next_sector = s->sector_num; - sector_num = s->sector_num; - sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; - end = s->bdev_length / BDRV_SECTOR_SIZE; - - /* Extend the QEMUIOVector to include all adjacent blocks that will - * be copied in this operation. - * - * We have to do this if we have no backing file yet in the destination, - * and the cluster size is very large. Then we need to do COW ourselves. - * The first time a cluster is copied, copy it entirely. Note that, - * because both the granularity and the cluster size are powers of two, - * the number of sectors to copy cannot exceed one cluster. - * - * We also want to extend the QEMUIOVector to include more adjacent - * dirty blocks if possible, to limit the number of I/O operations and - * run efficiently even with a small granularity. - */ - nb_chunks = 0; - nb_sectors = 0; - next_sector = sector_num; - next_chunk = sector_num / sectors_per_chunk; + *nb_sectors = MIN(*nb_sectors, + s->bdev_length / BDRV_SECTOR_SIZE - sector_num); +} - /* Wait for I/O to this cluster (from a previous iteration) to be done. */ - while (test_bit(next_chunk, s->in_flight_bitmap)) { - trace_mirror_yield_in_flight(s, sector_num, s->in_flight); - s->waiting_for_io = true; - qemu_coroutine_yield(); - s->waiting_for_io = false; +/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and + * return the offset of the adjusted tail sector against original. */ +static int mirror_cow_align(MirrorBlockJob *s, + int64_t *sector_num, + int *nb_sectors) +{ + bool need_cow; + int ret = 0; + int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS; + int64_t align_sector_num = *sector_num; + int align_nb_sectors = *nb_sectors; + int max_sectors = chunk_sectors * s->max_iov; + + need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap); + need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors, + s->cow_bitmap); + if (need_cow) { + bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors, + &align_sector_num, &align_nb_sectors); + } + + if (align_nb_sectors > max_sectors) { + align_nb_sectors = max_sectors; + if (need_cow) { + align_nb_sectors = QEMU_ALIGN_DOWN(align_nb_sectors, + s->target_cluster_sectors); + } } + /* Clipping may result in align_nb_sectors unaligned to chunk boundary, but + * that doesn't matter because it's already the end of source image. */ + mirror_clip_sectors(s, align_sector_num, &align_nb_sectors); - do { - int added_sectors, added_chunks; - - if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) || - test_bit(next_chunk, s->in_flight_bitmap)) { - assert(nb_sectors > 0); - break; - } + ret = align_sector_num + align_nb_sectors - (*sector_num + *nb_sectors); + *sector_num = align_sector_num; + *nb_sectors = align_nb_sectors; + assert(ret >= 0); + return ret; +} - added_sectors = sectors_per_chunk; - if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) { - bdrv_round_to_clusters(s->target, - next_sector, added_sectors, - &next_sector, &added_sectors); +static inline void mirror_wait_for_io(MirrorBlockJob *s) +{ + assert(!s->waiting_for_io); + s->waiting_for_io = true; + qemu_coroutine_yield(); + s->waiting_for_io = false; +} - /* On the first iteration, the rounding may make us copy - * sectors before the first dirty one. - */ - if (next_sector < sector_num) { - assert(nb_sectors == 0); - sector_num = next_sector; - next_chunk = next_sector / sectors_per_chunk; - } - } +/* Submit async read while handling COW. + * Returns: nb_sectors if no alignment is necessary, or + * (new_end - sector_num) if tail is rounded up or down due to + * alignment or buffer limit. + */ +static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num, + int nb_sectors) +{ + BlockDriverState *source = s->common.bs; + int sectors_per_chunk, nb_chunks; + int ret = nb_sectors; + MirrorOp *op; - added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors)); - added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk; + sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; - /* When doing COW, it may happen that there is not enough space for - * a full cluster. Wait if that is the case. - */ - while (nb_chunks == 0 && s->buf_free_count < added_chunks) { - trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight); - s->waiting_for_io = true; - qemu_coroutine_yield(); - s->waiting_for_io = false; - } - if (s->buf_free_count < nb_chunks + added_chunks) { - trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight); - break; - } - if (IOV_MAX < nb_chunks + added_chunks) { - trace_mirror_break_iov_max(s, nb_chunks, added_chunks); - break; - } + /* We can only handle as much as buf_size at a time. */ + nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors); + assert(nb_sectors); - /* We have enough free space to copy these sectors. */ - bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks); + if (s->cow_bitmap) { + ret += mirror_cow_align(s, §or_num, &nb_sectors); + } + assert(nb_sectors << BDRV_SECTOR_BITS <= s->buf_size); + /* The sector range must meet granularity because: + * 1) Caller passes in aligned values; + * 2) mirror_cow_align is used only when target cluster is larger. */ + assert(!(sector_num % sectors_per_chunk)); + nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk); - nb_sectors += added_sectors; - nb_chunks += added_chunks; - next_sector += added_sectors; - next_chunk += added_chunks; - if (!s->synced && s->common.speed) { - delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors); - } - } while (delay_ns == 0 && next_sector < end); + while (s->buf_free_count < nb_chunks) { + trace_mirror_yield_in_flight(s, sector_num, s->in_flight); + mirror_wait_for_io(s); + } /* Allocate a MirrorOp that is used as an AIO callback. */ op = g_new(MirrorOp, 1); @@ -274,47 +260,158 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) * from s->buf_free. */ qemu_iovec_init(&op->qiov, nb_chunks); - next_sector = sector_num; while (nb_chunks-- > 0) { MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free); - size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size; + size_t remaining = nb_sectors * BDRV_SECTOR_SIZE - op->qiov.size; QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next); s->buf_free_count--; qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining)); - - /* Advance the HBitmapIter in parallel, so that we do not examine - * the same sector twice. - */ - if (next_sector > hbitmap_next_sector - && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) { - hbitmap_next_sector = hbitmap_iter_next(&s->hbi); - } - - next_sector += sectors_per_chunk; } - bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors); - /* Copy the dirty cluster. */ s->in_flight++; s->sectors_in_flight += nb_sectors; trace_mirror_one_iteration(s, sector_num, nb_sectors); - ret = bdrv_get_block_status_above(source, NULL, sector_num, - nb_sectors, &pnum); - if (ret < 0 || pnum < nb_sectors || - (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) { - bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, - mirror_read_complete, op); - } else if (ret & BDRV_BLOCK_ZERO) { + bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors, + mirror_read_complete, op); + return ret; +} + +static void mirror_do_zero_or_discard(MirrorBlockJob *s, + int64_t sector_num, + int nb_sectors, + bool is_discard) +{ + MirrorOp *op; + + /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed + * so the freeing in mirror_iteration_done is nop. */ + op = g_new0(MirrorOp, 1); + op->s = s; + op->sector_num = sector_num; + op->nb_sectors = nb_sectors; + + s->in_flight++; + s->sectors_in_flight += nb_sectors; + if (is_discard) { + bdrv_aio_discard(s->target, sector_num, op->nb_sectors, + mirror_write_complete, op); + } else { bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors, s->unmap ? BDRV_REQ_MAY_UNMAP : 0, mirror_write_complete, op); - } else { - assert(!(ret & BDRV_BLOCK_DATA)); - bdrv_aio_discard(s->target, sector_num, op->nb_sectors, - mirror_write_complete, op); + } +} + +static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) +{ + BlockDriverState *source = s->common.bs; + int64_t sector_num, first_chunk; + uint64_t delay_ns = 0; + /* At least the first dirty chunk is mirrored in one iteration. */ + int nb_chunks = 1; + int64_t end = s->bdev_length / BDRV_SECTOR_SIZE; + int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; + + sector_num = hbitmap_iter_next(&s->hbi); + if (sector_num < 0) { + bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi); + sector_num = hbitmap_iter_next(&s->hbi); + trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap)); + assert(sector_num >= 0); + } + + first_chunk = sector_num / sectors_per_chunk; + while (test_bit(first_chunk, s->in_flight_bitmap)) { + trace_mirror_yield_in_flight(s, first_chunk, s->in_flight); + mirror_wait_for_io(s); + } + + /* Find the number of consective dirty chunks following the first dirty + * one, and wait for in flight requests in them. */ + while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) { + int64_t hbitmap_next; + int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk; + int64_t next_chunk = next_sector / sectors_per_chunk; + if (next_sector >= end || + !bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) { + break; + } + if (test_bit(next_chunk, s->in_flight_bitmap)) { + break; + } + + hbitmap_next = hbitmap_iter_next(&s->hbi); + if (hbitmap_next > next_sector || hbitmap_next < 0) { + /* The bitmap iterator's cache is stale, refresh it */ + bdrv_set_dirty_iter(&s->hbi, next_sector); + hbitmap_next = hbitmap_iter_next(&s->hbi); + } + assert(hbitmap_next == next_sector); + nb_chunks++; + } + + /* Clear dirty bits before querying the block status, because + * calling bdrv_get_block_status_above could yield - if some blocks are + * marked dirty in this window, we need to know. + */ + bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, + nb_chunks * sectors_per_chunk); + bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks); + while (nb_chunks > 0 && sector_num < end) { + int ret; + int io_sectors; + BlockDriverState *file; + enum MirrorMethod { + MIRROR_METHOD_COPY, + MIRROR_METHOD_ZERO, + MIRROR_METHOD_DISCARD + } mirror_method = MIRROR_METHOD_COPY; + + assert(!(sector_num % sectors_per_chunk)); + ret = bdrv_get_block_status_above(source, NULL, sector_num, + nb_chunks * sectors_per_chunk, + &io_sectors, &file); + if (ret < 0) { + io_sectors = nb_chunks * sectors_per_chunk; + } + + io_sectors -= io_sectors % sectors_per_chunk; + if (io_sectors < sectors_per_chunk) { + io_sectors = sectors_per_chunk; + } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) { + int64_t target_sector_num; + int target_nb_sectors; + bdrv_round_to_clusters(s->target, sector_num, io_sectors, + &target_sector_num, &target_nb_sectors); + if (target_sector_num == sector_num && + target_nb_sectors == io_sectors) { + mirror_method = ret & BDRV_BLOCK_ZERO ? + MIRROR_METHOD_ZERO : + MIRROR_METHOD_DISCARD; + } + } + + mirror_clip_sectors(s, sector_num, &io_sectors); + switch (mirror_method) { + case MIRROR_METHOD_COPY: + io_sectors = mirror_do_read(s, sector_num, io_sectors); + break; + case MIRROR_METHOD_ZERO: + mirror_do_zero_or_discard(s, sector_num, io_sectors, false); + break; + case MIRROR_METHOD_DISCARD: + mirror_do_zero_or_discard(s, sector_num, io_sectors, true); + break; + default: + abort(); + } + assert(io_sectors); + sector_num += io_sectors; + nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk); + delay_ns += ratelimit_calculate_delay(&s->limit, io_sectors); } return delay_ns; } @@ -339,9 +436,7 @@ static void mirror_free_init(MirrorBlockJob *s) static void mirror_drain(MirrorBlockJob *s) { while (s->in_flight > 0) { - s->waiting_for_io = true; - qemu_coroutine_yield(); - s->waiting_for_io = false; + mirror_wait_for_io(s); } } @@ -370,11 +465,22 @@ static void mirror_exit(BlockJob *job, void *opaque) if (s->to_replace) { to_replace = s->to_replace; } + + /* This was checked in mirror_start_job(), but meanwhile one of the + * nodes could have been newly attached to a BlockBackend. */ + if (to_replace->blk && s->target->blk) { + error_report("block job: Can't create node with two BlockBackends"); + data->ret = -EINVAL; + goto out; + } + if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); } bdrv_replace_in_backing_chain(to_replace, s->target); } + +out: if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); error_free(s->replace_blocker); @@ -389,6 +495,9 @@ static void mirror_exit(BlockJob *job, void *opaque) block_job_completed(&s->common, data->ret); g_free(data); bdrv_drained_end(src); + if (qemu_get_aio_context() == bdrv_get_aio_context(src)) { + aio_enable_external(iohandler_get_aio_context()); + } bdrv_unref(src); } @@ -404,6 +513,7 @@ static void coroutine_fn mirror_run(void *opaque) checking for a NULL string */ int ret = 0; int n; + int target_cluster_size = BDRV_SECTOR_SIZE; if (block_job_is_cancelled(&s->common)) { goto immediate_exit; @@ -433,16 +543,16 @@ static void coroutine_fn mirror_run(void *opaque) */ bdrv_get_backing_filename(s->target, backing_filename, sizeof(backing_filename)); - if (backing_filename[0] && !s->target->backing) { - ret = bdrv_get_info(s->target, &bdi); - if (ret < 0) { - goto immediate_exit; - } - if (s->granularity < bdi.cluster_size) { - s->buf_size = MAX(s->buf_size, bdi.cluster_size); - s->cow_bitmap = bitmap_new(length); - } + if (!bdrv_get_info(s->target, &bdi) && bdi.cluster_size) { + target_cluster_size = bdi.cluster_size; + } + if (backing_filename[0] && !s->target->backing + && s->granularity < target_cluster_size) { + s->buf_size = MAX(s->buf_size, target_cluster_size); + s->cow_bitmap = bitmap_new(length); } + s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS; + s->max_iov = MIN(s->common.bs->bl.max_iov, s->target->bl.max_iov); end = s->bdev_length / BDRV_SECTOR_SIZE; s->buf = qemu_try_blockalign(bs, s->buf_size); @@ -517,9 +627,7 @@ static void coroutine_fn mirror_run(void *opaque) if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 || (cnt == 0 && s->in_flight > 0)) { trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt); - s->waiting_for_io = true; - qemu_coroutine_yield(); - s->waiting_for_io = false; + mirror_wait_for_io(s); continue; } else if (cnt != 0) { delay_ns = mirror_iteration(s); @@ -562,7 +670,7 @@ static void coroutine_fn mirror_run(void *opaque) * mirror_populate runs. */ trace_mirror_before_drain(s, cnt); - bdrv_drain(bs); + bdrv_co_drain(bs); cnt = bdrv_get_dirty_count(s->dirty_bitmap); } @@ -611,6 +719,12 @@ immediate_exit: /* Before we switch to target in mirror_exit, make sure data doesn't * change. */ bdrv_drained_begin(s->common.bs); + if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) { + /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the + * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we + * need a block layer API change to achieve this. */ + aio_disable_external(iohandler_get_aio_context()); + } block_job_defer_to_main_loop(&s->common, mirror_exit, data); } @@ -640,7 +754,7 @@ static void mirror_complete(BlockJob *job, Error **errp) Error *local_err = NULL; int ret; - ret = bdrv_open_backing_file(s->target, NULL, &local_err); + ret = bdrv_open_backing_file(s->target, NULL, "backing", &local_err); if (ret < 0) { error_propagate(errp, local_err); return; @@ -705,6 +819,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, bool is_none_mode, BlockDriverState *base) { MirrorBlockJob *s; + BlockDriverState *replaced_bs; if (granularity == 0) { granularity = bdrv_get_default_bitmap_granularity(target); @@ -728,6 +843,21 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, buf_size = DEFAULT_MIRROR_BUF_SIZE; } + /* We can't support this case as long as the block layer can't handle + * multiple BlockBackends per BlockDriverState. */ + if (replaces) { + replaced_bs = bdrv_lookup_bs(replaces, replaces, errp); + if (replaced_bs == NULL) { + return; + } + } else { + replaced_bs = bs; + } + if (replaced_bs->blk && target->blk) { + error_setg(errp, "Can't create node with two BlockBackends"); + return; + } + s = block_job_create(driver, bs, speed, cb, opaque, errp); if (!s) { return; @@ -752,7 +882,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, bdrv_op_block_all(s->target, s->common.blocker); - bdrv_set_enable_write_cache(s->target, true); if (s->target->blk) { blk_set_on_error(s->target->blk, on_target_error, on_target_error); blk_iostatus_enable(s->target->blk); diff --git a/block/nbd-client.c b/block/nbd-client.c index b7fd17a115..878e879ace 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -26,8 +26,8 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "nbd-client.h" -#include "qemu/sockets.h" #define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs)) #define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs)) @@ -47,13 +47,21 @@ static void nbd_teardown_connection(BlockDriverState *bs) { NbdClientSession *client = nbd_get_client_session(bs); + if (!client->ioc) { /* Already closed */ + return; + } + /* finish any pending coroutines */ - shutdown(client->sock, 2); + qio_channel_shutdown(client->ioc, + QIO_CHANNEL_SHUTDOWN_BOTH, + NULL); nbd_recv_coroutines_enter_all(client); nbd_client_detach_aio_context(bs); - closesocket(client->sock); - client->sock = -1; + object_unref(OBJECT(client->sioc)); + client->sioc = NULL; + object_unref(OBJECT(client->ioc)); + client->ioc = NULL; } static void nbd_reply_ready(void *opaque) @@ -63,12 +71,16 @@ static void nbd_reply_ready(void *opaque) uint64_t i; int ret; + if (!s->ioc) { /* Already closed */ + return; + } + if (s->reply.handle == 0) { /* No reply already in flight. Fetch a header. It is possible * that another thread has done the same thing in parallel, so * the socket is not readable anymore. */ - ret = nbd_receive_reply(s->sock, &s->reply); + ret = nbd_receive_reply(s->ioc, &s->reply); if (ret == -EAGAIN) { return; } @@ -119,32 +131,35 @@ static int nbd_co_send_request(BlockDriverState *bs, } } + g_assert(qemu_in_coroutine()); assert(i < MAX_NBD_REQUESTS); request->handle = INDEX_TO_HANDLE(s, i); + + if (!s->ioc) { + qemu_co_mutex_unlock(&s->send_mutex); + return -EPIPE; + } + s->send_coroutine = qemu_coroutine_self(); aio_context = bdrv_get_aio_context(bs); - aio_set_fd_handler(aio_context, s->sock, false, + aio_set_fd_handler(aio_context, s->sioc->fd, false, nbd_reply_ready, nbd_restart_write, bs); if (qiov) { - if (!s->is_unix) { - socket_set_cork(s->sock, 1); - } - rc = nbd_send_request(s->sock, request); + qio_channel_set_cork(s->ioc, true); + rc = nbd_send_request(s->ioc, request); if (rc >= 0) { - ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov, - offset, request->len); + ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, + offset, request->len, 0); if (ret != request->len) { rc = -EIO; } } - if (!s->is_unix) { - socket_set_cork(s->sock, 0); - } + qio_channel_set_cork(s->ioc, false); } else { - rc = nbd_send_request(s->sock, request); + rc = nbd_send_request(s->ioc, request); } - aio_set_fd_handler(aio_context, s->sock, false, + aio_set_fd_handler(aio_context, s->sioc->fd, false, nbd_reply_ready, NULL, bs); s->send_coroutine = NULL; qemu_co_mutex_unlock(&s->send_mutex); @@ -161,12 +176,13 @@ static void nbd_co_receive_reply(NbdClientSession *s, * peek at the next reply and avoid yielding if it's ours? */ qemu_coroutine_yield(); *reply = s->reply; - if (reply->handle != request->handle) { + if (reply->handle != request->handle || + !s->ioc) { reply->error = EIO; } else { if (qiov && reply->error == 0) { - ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov, - offset, request->len); + ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, + offset, request->len, 1); if (ret != request->len) { reply->error = EIO; } @@ -227,15 +243,15 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num, static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, - int offset) + int offset, int *flags) { NbdClientSession *client = nbd_get_client_session(bs); struct nbd_request request = { .type = NBD_CMD_WRITE }; struct nbd_reply reply; ssize_t ret; - if (!bdrv_enable_write_cache(bs) && - (client->nbdflags & NBD_FLAG_SEND_FUA)) { + if ((*flags & BDRV_REQ_FUA) && (client->nbdflags & NBD_FLAG_SEND_FUA)) { + *flags &= ~BDRV_REQ_FUA; request.type |= NBD_CMD_FLAG_FUA; } @@ -275,12 +291,13 @@ int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num, } int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) + int nb_sectors, QEMUIOVector *qiov, int *flags) { int offset = 0; int ret; while (nb_sectors > NBD_MAX_SECTORS) { - ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset); + ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset, + flags); if (ret < 0) { return ret; } @@ -288,7 +305,7 @@ int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num, sector_num += NBD_MAX_SECTORS; nb_sectors -= NBD_MAX_SECTORS; } - return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset); + return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset, flags); } int nbd_client_co_flush(BlockDriverState *bs) @@ -302,10 +319,6 @@ int nbd_client_co_flush(BlockDriverState *bs) return 0; } - if (client->nbdflags & NBD_FLAG_SEND_FUA) { - request.type |= NBD_CMD_FLAG_FUA; - } - request.from = 0; request.len = 0; @@ -349,14 +362,14 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num, void nbd_client_detach_aio_context(BlockDriverState *bs) { aio_set_fd_handler(bdrv_get_aio_context(bs), - nbd_get_client_session(bs)->sock, + nbd_get_client_session(bs)->sioc->fd, false, NULL, NULL, NULL); } void nbd_client_attach_aio_context(BlockDriverState *bs, AioContext *new_context) { - aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sock, + aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd, false, nbd_reply_ready, NULL, bs); } @@ -369,16 +382,20 @@ void nbd_client_close(BlockDriverState *bs) .len = 0 }; - if (client->sock == -1) { + if (client->ioc == NULL) { return; } - nbd_send_request(client->sock, &request); + nbd_send_request(client->ioc, &request); nbd_teardown_connection(bs); } -int nbd_client_init(BlockDriverState *bs, int sock, const char *export, +int nbd_client_init(BlockDriverState *bs, + QIOChannelSocket *sioc, + const char *export, + QCryptoTLSCreds *tlscreds, + const char *hostname, Error **errp) { NbdClientSession *client = nbd_get_client_session(bs); @@ -386,22 +403,32 @@ int nbd_client_init(BlockDriverState *bs, int sock, const char *export, /* NBD handshake */ logout("session init %s\n", export); - qemu_set_block(sock); - ret = nbd_receive_negotiate(sock, export, - &client->nbdflags, &client->size, errp); + qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL); + + ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export, + &client->nbdflags, + tlscreds, hostname, + &client->ioc, + &client->size, errp); if (ret < 0) { logout("Failed to negotiate with the NBD server\n"); - closesocket(sock); return ret; } qemu_co_mutex_init(&client->send_mutex); qemu_co_mutex_init(&client->free_sema); - client->sock = sock; + client->sioc = sioc; + object_ref(OBJECT(client->sioc)); + + if (!client->ioc) { + client->ioc = QIO_CHANNEL(sioc); + object_ref(OBJECT(client->ioc)); + } /* Now that we're connected, set the socket to be non-blocking and * kick the reply mechanism. */ - qemu_set_nonblock(sock); + qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL); + nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs)); logout("Established connection with NBD server\n"); diff --git a/block/nbd-client.h b/block/nbd-client.h index e8413408b5..bc7aec0795 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -4,6 +4,7 @@ #include "qemu-common.h" #include "block/nbd.h" #include "block/block_int.h" +#include "io/channel-socket.h" /* #define DEBUG_NBD */ @@ -17,7 +18,8 @@ #define MAX_NBD_REQUESTS 16 typedef struct NbdClientSession { - int sock; + QIOChannelSocket *sioc; /* The master data channel */ + QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */ uint32_t nbdflags; off_t size; @@ -34,7 +36,11 @@ typedef struct NbdClientSession { NbdClientSession *nbd_get_client_session(BlockDriverState *bs); -int nbd_client_init(BlockDriverState *bs, int sock, const char *export_name, +int nbd_client_init(BlockDriverState *bs, + QIOChannelSocket *sock, + const char *export_name, + QCryptoTLSCreds *tlscreds, + const char *hostname, Error **errp); void nbd_client_close(BlockDriverState *bs); @@ -42,7 +48,7 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int nbd_client_co_flush(BlockDriverState *bs); int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov); + int nb_sectors, QEMUIOVector *qiov, int *flags); int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); diff --git a/block/nbd.c b/block/nbd.c index cd6a587776..f7ea3b3608 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -26,18 +26,17 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "block/nbd-client.h" +#include "qapi/error.h" #include "qemu/uri.h" #include "block/block_int.h" #include "qemu/module.h" -#include "qemu/sockets.h" #include "qapi/qmp/qdict.h" #include "qapi/qmp/qjson.h" #include "qapi/qmp/qint.h" #include "qapi/qmp/qstring.h" - -#include <sys/types.h> -#include <unistd.h> +#include "qemu/cutils.h" #define EN_OPTSTR ":exportname=" @@ -206,18 +205,20 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export, saddr = g_new0(SocketAddress, 1); if (qdict_haskey(options, "path")) { + UnixSocketAddress *q_unix; saddr->type = SOCKET_ADDRESS_KIND_UNIX; - saddr->u.q_unix = g_new0(UnixSocketAddress, 1); - saddr->u.q_unix->path = g_strdup(qdict_get_str(options, "path")); + q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1); + q_unix->path = g_strdup(qdict_get_str(options, "path")); qdict_del(options, "path"); } else { + InetSocketAddress *inet; saddr->type = SOCKET_ADDRESS_KIND_INET; - saddr->u.inet = g_new0(InetSocketAddress, 1); - saddr->u.inet->host = g_strdup(qdict_get_str(options, "host")); + inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1); + inet->host = g_strdup(qdict_get_str(options, "host")); if (!qdict_get_try_str(options, "port")) { - saddr->u.inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT); + inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT); } else { - saddr->u.inet->port = g_strdup(qdict_get_str(options, "port")); + inet->port = g_strdup(qdict_get_str(options, "port")); } qdict_del(options, "host"); qdict_del(options, "port"); @@ -239,55 +240,113 @@ NbdClientSession *nbd_get_client_session(BlockDriverState *bs) return &s->client; } -static int nbd_establish_connection(BlockDriverState *bs, - SocketAddress *saddr, - Error **errp) +static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr, + Error **errp) { - BDRVNBDState *s = bs->opaque; - int sock; + QIOChannelSocket *sioc; + Error *local_err = NULL; - sock = socket_connect(saddr, errp, NULL, NULL); + sioc = qio_channel_socket_new(); - if (sock < 0) { - logout("Failed to establish connection to NBD server\n"); - return -EIO; + qio_channel_socket_connect_sync(sioc, + saddr, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return NULL; } - if (!s->client.is_unix) { - socket_set_nodelay(sock); + qio_channel_set_delay(QIO_CHANNEL(sioc), false); + + return sioc; +} + + +static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp) +{ + Object *obj; + QCryptoTLSCreds *creds; + + obj = object_resolve_path_component( + object_get_objects_root(), id); + if (!obj) { + error_setg(errp, "No TLS credentials with id '%s'", + id); + return NULL; + } + creds = (QCryptoTLSCreds *) + object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS); + if (!creds) { + error_setg(errp, "Object with id '%s' is not TLS credentials", + id); + return NULL; } - return sock; + if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) { + error_setg(errp, + "Expecting TLS credentials with a client endpoint"); + return NULL; + } + object_ref(obj); + return creds; } + static int nbd_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVNBDState *s = bs->opaque; char *export = NULL; - int result, sock; + QIOChannelSocket *sioc = NULL; SocketAddress *saddr; + const char *tlscredsid; + QCryptoTLSCreds *tlscreds = NULL; + const char *hostname = NULL; + int ret = -EINVAL; /* Pop the config into our state object. Exit if invalid. */ saddr = nbd_config(s, options, &export, errp); if (!saddr) { - return -EINVAL; + goto error; + } + + tlscredsid = g_strdup(qdict_get_try_str(options, "tls-creds")); + if (tlscredsid) { + qdict_del(options, "tls-creds"); + tlscreds = nbd_get_tls_creds(tlscredsid, errp); + if (!tlscreds) { + goto error; + } + + if (saddr->type != SOCKET_ADDRESS_KIND_INET) { + error_setg(errp, "TLS only supported over IP sockets"); + goto error; + } + hostname = saddr->u.inet.data->host; } /* establish TCP connection, return error if it fails * TODO: Configurable retry-until-timeout behaviour. */ - sock = nbd_establish_connection(bs, saddr, errp); - qapi_free_SocketAddress(saddr); - if (sock < 0) { - g_free(export); - return sock; + sioc = nbd_establish_connection(saddr, errp); + if (!sioc) { + ret = -ECONNREFUSED; + goto error; } /* NBD handshake */ - result = nbd_client_init(bs, sock, export, errp); + ret = nbd_client_init(bs, sioc, export, + tlscreds, hostname, errp); + error: + if (sioc) { + object_unref(OBJECT(sioc)); + } + if (tlscreds) { + object_unref(OBJECT(tlscreds)); + } + qapi_free_SocketAddress(saddr); g_free(export); - return result; + return ret; } static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num, @@ -296,10 +355,29 @@ static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num, return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov); } +static int nbd_co_writev_flags(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov, int flags) +{ + int ret; + + ret = nbd_client_co_writev(bs, sector_num, nb_sectors, qiov, &flags); + if (ret < 0) { + return ret; + } + + /* The flag wasn't sent to the server, so we need to emulate it with an + * explicit flush */ + if (flags & BDRV_REQ_FUA) { + ret = nbd_client_co_flush(bs); + } + + return ret; +} + static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { - return nbd_client_co_writev(bs, sector_num, nb_sectors, qiov); + return nbd_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0); } static int nbd_co_flush(BlockDriverState *bs) @@ -342,13 +420,14 @@ static void nbd_attach_aio_context(BlockDriverState *bs, nbd_client_attach_aio_context(bs, new_context); } -static void nbd_refresh_filename(BlockDriverState *bs) +static void nbd_refresh_filename(BlockDriverState *bs, QDict *options) { QDict *opts = qdict_new(); - const char *path = qdict_get_try_str(bs->options, "path"); - const char *host = qdict_get_try_str(bs->options, "host"); - const char *port = qdict_get_try_str(bs->options, "port"); - const char *export = qdict_get_try_str(bs->options, "export"); + const char *path = qdict_get_try_str(options, "path"); + const char *host = qdict_get_try_str(options, "host"); + const char *port = qdict_get_try_str(options, "port"); + const char *export = qdict_get_try_str(options, "export"); + const char *tlscreds = qdict_get_try_str(options, "tls-creds"); qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd"))); @@ -383,6 +462,9 @@ static void nbd_refresh_filename(BlockDriverState *bs) if (export) { qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export))); } + if (tlscreds) { + qdict_put_obj(opts, "tls-creds", QOBJECT(qstring_from_str(tlscreds))); + } bs->full_open_options = opts; } @@ -395,6 +477,8 @@ static BlockDriver bdrv_nbd = { .bdrv_file_open = nbd_open, .bdrv_co_readv = nbd_co_readv, .bdrv_co_writev = nbd_co_writev, + .bdrv_co_writev_flags = nbd_co_writev_flags, + .supported_write_flags = BDRV_REQ_FUA, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_discard = nbd_co_discard, @@ -413,6 +497,8 @@ static BlockDriver bdrv_nbd_tcp = { .bdrv_file_open = nbd_open, .bdrv_co_readv = nbd_co_readv, .bdrv_co_writev = nbd_co_writev, + .bdrv_co_writev_flags = nbd_co_writev_flags, + .supported_write_flags = BDRV_REQ_FUA, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_discard = nbd_co_discard, @@ -431,6 +517,8 @@ static BlockDriver bdrv_nbd_unix = { .bdrv_file_open = nbd_open, .bdrv_co_readv = nbd_co_readv, .bdrv_co_writev = nbd_co_writev, + .bdrv_co_writev_flags = nbd_co_writev_flags, + .supported_write_flags = BDRV_REQ_FUA, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_discard = nbd_co_discard, diff --git a/block/nfs.c b/block/nfs.c index fd79f89945..9f51cc3f10 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -22,20 +22,23 @@ * THE SOFTWARE. */ -#include "config-host.h" +#include "qemu/osdep.h" #include <poll.h> #include "qemu-common.h" #include "qemu/config-file.h" #include "qemu/error-report.h" +#include "qapi/error.h" #include "block/block_int.h" #include "trace.h" #include "qemu/iov.h" #include "qemu/uri.h" +#include "qemu/cutils.h" #include "sysemu/sysemu.h" #include <nfsc/libnfs.h> #define QEMU_NFS_MAX_READAHEAD_SIZE 1048576 +#define QEMU_NFS_MAX_DEBUG_LEVEL 2 typedef struct NFSClient { struct nfs_context *context; @@ -334,6 +337,17 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename, } nfs_set_readahead(client->context, val); #endif +#ifdef LIBNFS_FEATURE_DEBUG + } else if (!strcmp(qp->p[i].name, "debug")) { + /* limit the maximum debug level to avoid potential flooding + * of our log files. */ + if (val > QEMU_NFS_MAX_DEBUG_LEVEL) { + error_report("NFS Warning: Limiting NFS debug level" + " to %d", QEMU_NFS_MAX_DEBUG_LEVEL); + val = QEMU_NFS_MAX_DEBUG_LEVEL; + } + nfs_set_debug(client->context, val); +#endif } else { error_setg(errp, "Unknown NFS parameter name: %s", qp->p[i].name); diff --git a/block/null.c b/block/null.c index 7d083233fb..396500babd 100644 --- a/block/null.c +++ b/block/null.c @@ -10,13 +10,17 @@ * See the COPYING file in the top-level directory. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "block/block_int.h" #define NULL_OPT_LATENCY "latency-ns" +#define NULL_OPT_ZEROES "read-zeroes" typedef struct { int64_t length; int64_t latency_ns; + bool read_zeroes; } BDRVNullState; static QemuOptsList runtime_opts = { @@ -39,6 +43,11 @@ static QemuOptsList runtime_opts = { .help = "nanoseconds (approximated) to wait " "before completing request", }, + { + .name = NULL_OPT_ZEROES, + .type = QEMU_OPT_BOOL, + .help = "return zeroes when read", + }, { /* end of list */ } }, }; @@ -60,6 +69,7 @@ static int null_file_open(BlockDriverState *bs, QDict *options, int flags, error_setg(errp, "latency-ns is invalid"); ret = -EINVAL; } + s->read_zeroes = qemu_opt_get_bool(opts, NULL_OPT_ZEROES, false); qemu_opts_del(opts); return ret; } @@ -89,6 +99,12 @@ static coroutine_fn int null_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { + BDRVNullState *s = bs->opaque; + + if (s->read_zeroes) { + qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE); + } + return null_co_common(bs); } @@ -158,6 +174,12 @@ static BlockAIOCB *null_aio_readv(BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque) { + BDRVNullState *s = bs->opaque; + + if (s->read_zeroes) { + qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE); + } + return null_aio_common(bs, cb, opaque); } @@ -183,6 +205,24 @@ static int null_reopen_prepare(BDRVReopenState *reopen_state, return 0; } +static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs, + int64_t sector_num, + int nb_sectors, int *pnum, + BlockDriverState **file) +{ + BDRVNullState *s = bs->opaque; + off_t start = sector_num * BDRV_SECTOR_SIZE; + + *pnum = nb_sectors; + *file = bs; + + if (s->read_zeroes) { + return BDRV_BLOCK_OFFSET_VALID | start | BDRV_BLOCK_ZERO; + } else { + return BDRV_BLOCK_OFFSET_VALID | start; + } +} + static BlockDriver bdrv_null_co = { .format_name = "null-co", .protocol_name = "null-co", @@ -196,6 +236,8 @@ static BlockDriver bdrv_null_co = { .bdrv_co_writev = null_co_writev, .bdrv_co_flush_to_disk = null_co_flush, .bdrv_reopen_prepare = null_reopen_prepare, + + .bdrv_co_get_block_status = null_co_get_block_status, }; static BlockDriver bdrv_null_aio = { @@ -211,6 +253,8 @@ static BlockDriver bdrv_null_aio = { .bdrv_aio_writev = null_aio_writev, .bdrv_aio_flush = null_aio_flush, .bdrv_reopen_prepare = null_reopen_prepare, + + .bdrv_co_get_block_status = null_co_get_block_status, }; static void bdrv_null_init(void) diff --git a/block/parallels.c b/block/parallels.c index f689fdeaff..324ed43ac4 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -27,8 +27,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include "qemu/bitmap.h" #include "qapi/util.h" @@ -61,7 +64,7 @@ typedef struct ParallelsHeader { typedef enum ParallelsPreallocMode { PRL_PREALLOC_MODE_FALLOCATE = 0, PRL_PREALLOC_MODE_TRUNCATE = 1, - PRL_PREALLOC_MODE_MAX = 2, + PRL_PREALLOC_MODE__MAX = 2, } ParallelsPreallocMode; static const char *prealloc_mode_lookup[] = { @@ -260,7 +263,7 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs) static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum) + int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) { BDRVParallelsState *s = bs->opaque; int64_t offset; @@ -273,6 +276,7 @@ static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, return 0; } + *file = bs->file->bs; return (offset << BDRV_SECTOR_BITS) | BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; } @@ -459,7 +463,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) int64_t total_size, cl_size; uint8_t tmp[BDRV_SECTOR_SIZE]; Error *local_err = NULL; - BlockDriverState *file; + BlockBackend *file; uint32_t bat_entries, bat_sectors; ParallelsHeader header; int ret; @@ -475,14 +479,16 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) return ret; } - file = NULL; - ret = bdrv_open(&file, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); - if (ret < 0) { + file = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + if (file == NULL) { error_propagate(errp, local_err); - return ret; + return -EIO; } - ret = bdrv_truncate(file, 0); + + blk_set_allow_write_beyond_eof(file, true); + + ret = blk_truncate(file, 0); if (ret < 0) { goto exit; } @@ -506,18 +512,18 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) memset(tmp, 0, sizeof(tmp)); memcpy(tmp, &header, sizeof(header)); - ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE); + ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE); if (ret < 0) { goto exit; } - ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0); + ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0); if (ret < 0) { goto exit; } ret = 0; done: - bdrv_unref(file); + blk_unref(file); return ret; exit: @@ -660,7 +666,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS); buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE); s->prealloc_mode = qapi_enum_parse(prealloc_mode_lookup, buf, - PRL_PREALLOC_MODE_MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err); + PRL_PREALLOC_MODE__MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err); g_free(buf); if (local_err != NULL) { goto fail_options; diff --git a/block/qapi.c b/block/qapi.c index 267f147fe3..c5f6ba643c 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -22,6 +22,7 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "block/qapi.h" #include "block/block_int.h" #include "block/throttle-groups.h" @@ -31,8 +32,10 @@ #include "qapi/qmp-output-visitor.h" #include "qapi/qmp/types.h" #include "sysemu/block-backend.h" +#include "qemu/cutils.h" -BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp) +BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk, + BlockDriverState *bs, Error **errp) { ImageInfo **p_image_info; BlockDriverState *bs0; @@ -46,7 +49,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp) info->cache = g_new(BlockdevCacheInfo, 1); *info->cache = (BlockdevCacheInfo) { - .writeback = bdrv_enable_write_cache(bs), + .writeback = blk ? blk_enable_write_cache(blk) : true, .direct = !!(bs->open_flags & BDRV_O_NOCACHE), .no_flush = !!(bs->open_flags & BDRV_O_NO_FLUSH), }; @@ -91,6 +94,26 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp) info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max; info->iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max; + info->has_bps_max_length = info->has_bps_max; + info->bps_max_length = + cfg.buckets[THROTTLE_BPS_TOTAL].burst_length; + info->has_bps_rd_max_length = info->has_bps_rd_max; + info->bps_rd_max_length = + cfg.buckets[THROTTLE_BPS_READ].burst_length; + info->has_bps_wr_max_length = info->has_bps_wr_max; + info->bps_wr_max_length = + cfg.buckets[THROTTLE_BPS_WRITE].burst_length; + + info->has_iops_max_length = info->has_iops_max; + info->iops_max_length = + cfg.buckets[THROTTLE_OPS_TOTAL].burst_length; + info->has_iops_rd_max_length = info->has_iops_rd_max; + info->iops_rd_max_length = + cfg.buckets[THROTTLE_OPS_READ].burst_length; + info->has_iops_wr_max_length = info->has_iops_wr_max; + info->iops_wr_max_length = + cfg.buckets[THROTTLE_OPS_WRITE].burst_length; + info->has_iops_size = cfg.op_size; info->iops_size = cfg.op_size; @@ -210,11 +233,13 @@ void bdrv_query_image_info(BlockDriverState *bs, Error *err = NULL; ImageInfo *info; + aio_context_acquire(bdrv_get_aio_context(bs)); + size = bdrv_getlength(bs); if (size < 0) { error_setg_errno(errp, -size, "Can't get size of device '%s'", bdrv_get_device_name(bs)); - return; + goto out; } info = g_new0(ImageInfo, 1); @@ -245,15 +270,18 @@ void bdrv_query_image_info(BlockDriverState *bs, info->has_backing_filename = true; bdrv_get_full_backing_filename(bs, backing_filename2, PATH_MAX, &err); if (err) { - error_propagate(errp, err); - qapi_free_ImageInfo(info); + /* Can't reconstruct the full backing filename, so we must omit + * this field and apply a Best Effort to this query. */ g_free(backing_filename2); - return; + backing_filename2 = NULL; + error_free(err); + err = NULL; } - if (strcmp(backing_filename, backing_filename2) != 0) { - info->full_backing_filename = - g_strdup(backing_filename2); + /* Always report the full_backing_filename if present, even if it's the + * same as backing_filename. That they are same is useful info. */ + if (backing_filename2) { + info->full_backing_filename = g_strdup(backing_filename2); info->has_full_backing_filename = true; } @@ -279,10 +307,13 @@ void bdrv_query_image_info(BlockDriverState *bs, default: error_propagate(errp, err); qapi_free_ImageInfo(info); - return; + goto out; } *p_info = info; + +out: + aio_context_release(bdrv_get_aio_context(bs)); } /* @p_info will be set only on success. */ @@ -296,7 +327,7 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, info->locked = blk_dev_is_medium_locked(blk); info->removable = blk_dev_has_removable_media(blk); - if (blk_dev_has_removable_media(blk)) { + if (blk_dev_has_tray(blk)) { info->has_tray_open = true; info->tray_open = blk_dev_is_tray_open(blk); } @@ -313,7 +344,7 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, if (bs && bs->drv) { info->has_inserted = true; - info->inserted = bdrv_block_device_info(bs, errp); + info->inserted = bdrv_block_device_info(blk, bs, errp); if (info->inserted == NULL) { goto err; } @@ -326,100 +357,115 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, qapi_free_BlockInfo(info); } -static BlockStats *bdrv_query_stats(const BlockDriverState *bs, - bool query_backing) +static BlockStats *bdrv_query_stats(BlockBackend *blk, + const BlockDriverState *bs, + bool query_backing); + +static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk) { - BlockStats *s; + BlockAcctStats *stats = blk_get_stats(blk); + BlockAcctTimedStats *ts = NULL; - s = g_malloc0(sizeof(*s)); + ds->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ]; + ds->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE]; + ds->rd_operations = stats->nr_ops[BLOCK_ACCT_READ]; + ds->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE]; - if (bdrv_get_device_name(bs)[0]) { - s->has_device = true; - s->device = g_strdup(bdrv_get_device_name(bs)); - } + ds->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ]; + ds->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE]; + ds->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH]; - if (bdrv_get_node_name(bs)[0]) { - s->has_node_name = true; - s->node_name = g_strdup(bdrv_get_node_name(bs)); + ds->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ]; + ds->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE]; + ds->invalid_flush_operations = + stats->invalid_ops[BLOCK_ACCT_FLUSH]; + + ds->rd_merged = stats->merged[BLOCK_ACCT_READ]; + ds->wr_merged = stats->merged[BLOCK_ACCT_WRITE]; + ds->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH]; + ds->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE]; + ds->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ]; + ds->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH]; + + ds->has_idle_time_ns = stats->last_access_time_ns > 0; + if (ds->has_idle_time_ns) { + ds->idle_time_ns = block_acct_idle_time_ns(stats); } - s->stats = g_malloc0(sizeof(*s->stats)); - if (bs->blk) { - BlockAcctStats *stats = blk_get_stats(bs->blk); - BlockAcctTimedStats *ts = NULL; - - s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ]; - s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE]; - s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ]; - s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE]; - - s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ]; - s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE]; - s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH]; - - s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ]; - s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE]; - s->stats->invalid_flush_operations = - stats->invalid_ops[BLOCK_ACCT_FLUSH]; - - s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ]; - s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE]; - s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH]; - s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE]; - s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ]; - s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH]; - - s->stats->has_idle_time_ns = stats->last_access_time_ns > 0; - if (s->stats->has_idle_time_ns) { - s->stats->idle_time_ns = block_acct_idle_time_ns(stats); - } + ds->account_invalid = stats->account_invalid; + ds->account_failed = stats->account_failed; - s->stats->account_invalid = stats->account_invalid; - s->stats->account_failed = stats->account_failed; + while ((ts = block_acct_interval_next(stats, ts))) { + BlockDeviceTimedStatsList *timed_stats = + g_malloc0(sizeof(*timed_stats)); + BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats)); + timed_stats->next = ds->timed_stats; + timed_stats->value = dev_stats; + ds->timed_stats = timed_stats; - while ((ts = block_acct_interval_next(stats, ts))) { - BlockDeviceTimedStatsList *timed_stats = - g_malloc0(sizeof(*timed_stats)); - BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats)); - timed_stats->next = s->stats->timed_stats; - timed_stats->value = dev_stats; - s->stats->timed_stats = timed_stats; + TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ]; + TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE]; + TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH]; - TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ]; - TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE]; - TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH]; + dev_stats->interval_length = ts->interval_length; - dev_stats->interval_length = ts->interval_length; + dev_stats->min_rd_latency_ns = timed_average_min(rd); + dev_stats->max_rd_latency_ns = timed_average_max(rd); + dev_stats->avg_rd_latency_ns = timed_average_avg(rd); - dev_stats->min_rd_latency_ns = timed_average_min(rd); - dev_stats->max_rd_latency_ns = timed_average_max(rd); - dev_stats->avg_rd_latency_ns = timed_average_avg(rd); + dev_stats->min_wr_latency_ns = timed_average_min(wr); + dev_stats->max_wr_latency_ns = timed_average_max(wr); + dev_stats->avg_wr_latency_ns = timed_average_avg(wr); - dev_stats->min_wr_latency_ns = timed_average_min(wr); - dev_stats->max_wr_latency_ns = timed_average_max(wr); - dev_stats->avg_wr_latency_ns = timed_average_avg(wr); + dev_stats->min_flush_latency_ns = timed_average_min(fl); + dev_stats->max_flush_latency_ns = timed_average_max(fl); + dev_stats->avg_flush_latency_ns = timed_average_avg(fl); - dev_stats->min_flush_latency_ns = timed_average_min(fl); - dev_stats->max_flush_latency_ns = timed_average_max(fl); - dev_stats->avg_flush_latency_ns = timed_average_avg(fl); + dev_stats->avg_rd_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_READ); + dev_stats->avg_wr_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_WRITE); + } +} - dev_stats->avg_rd_queue_depth = - block_acct_queue_depth(ts, BLOCK_ACCT_READ); - dev_stats->avg_wr_queue_depth = - block_acct_queue_depth(ts, BLOCK_ACCT_WRITE); - } +static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs, + bool query_backing) +{ + if (bdrv_get_node_name(bs)[0]) { + s->has_node_name = true; + s->node_name = g_strdup(bdrv_get_node_name(bs)); } s->stats->wr_highest_offset = bs->wr_highest_offset; if (bs->file) { s->has_parent = true; - s->parent = bdrv_query_stats(bs->file->bs, query_backing); + s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing); } if (query_backing && bs->backing) { s->has_backing = true; - s->backing = bdrv_query_stats(bs->backing->bs, query_backing); + s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing); + } + +} + +static BlockStats *bdrv_query_stats(BlockBackend *blk, + const BlockDriverState *bs, + bool query_backing) +{ + BlockStats *s; + + s = g_malloc0(sizeof(*s)); + s->stats = g_malloc0(sizeof(*s->stats)); + + if (blk) { + s->has_device = true; + s->device = g_strdup(blk_name(blk)); + bdrv_query_blk_stats(s->stats, blk); + } + if (bs) { + bdrv_query_bds_stats(s, bs, query_backing); } return s; @@ -448,22 +494,38 @@ BlockInfoList *qmp_query_block(Error **errp) return head; } +static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs, + bool query_nodes) +{ + if (query_nodes) { + *bs = bdrv_next_node(*bs); + return !!*bs; + } + + *blk = blk_next(*blk); + *bs = *blk ? blk_bs(*blk) : NULL; + + return !!*blk; +} + BlockStatsList *qmp_query_blockstats(bool has_query_nodes, bool query_nodes, Error **errp) { BlockStatsList *head = NULL, **p_next = &head; + BlockBackend *blk = NULL; BlockDriverState *bs = NULL; /* Just to be safe if query_nodes is not always initialized */ query_nodes = has_query_nodes && query_nodes; - while ((bs = query_nodes ? bdrv_next_node(bs) : bdrv_next(bs))) { + while (next_query_bds(&blk, &bs, query_nodes)) { BlockStatsList *info = g_malloc0(sizeof(*info)); - AioContext *ctx = bdrv_get_aio_context(bs); + AioContext *ctx = blk ? blk_get_aio_context(blk) + : bdrv_get_aio_context(bs); aio_context_acquire(ctx); - info->value = bdrv_query_stats(bs, !query_nodes); + info->value = bdrv_query_stats(blk, bs, !query_nodes); aio_context_release(ctx); *p_next = info; @@ -588,11 +650,10 @@ static void dump_qlist(fprintf_function func_fprintf, void *f, int indentation, int i = 0; for (entry = qlist_first(list); entry; entry = qlist_next(entry), i++) { - qtype_code type = qobject_type(entry->value); + QType type = qobject_type(entry->value); bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST); - const char *format = composite ? "%*s[%i]:\n" : "%*s[%i]: "; - - func_fprintf(f, format, indentation * 4, "", i); + func_fprintf(f, "%*s[%i]:%c", indentation * 4, "", i, + composite ? '\n' : ' '); dump_qobject(func_fprintf, f, indentation + 1, entry->value); if (!composite) { func_fprintf(f, "\n"); @@ -606,10 +667,9 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation, const QDictEntry *entry; for (entry = qdict_first(dict); entry; entry = qdict_next(dict, entry)) { - qtype_code type = qobject_type(entry->value); + QType type = qobject_type(entry->value); bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST); - const char *format = composite ? "%*s%s:\n" : "%*s%s: "; - char key[strlen(entry->key) + 1]; + char *key = g_malloc(strlen(entry->key) + 1); int i; /* replace dashes with spaces in key (variable) names */ @@ -617,12 +677,13 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation, key[i] = entry->key[i] == '-' ? ' ' : entry->key[i]; } key[i] = 0; - - func_fprintf(f, format, indentation * 4, "", key); + func_fprintf(f, "%*s%s:%c", indentation * 4, "", key, + composite ? '\n' : ' '); dump_qobject(func_fprintf, f, indentation + 1, entry->value); if (!composite) { func_fprintf(f, "\n"); } + g_free(key); } } @@ -632,7 +693,7 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f, QmpOutputVisitor *ov = qmp_output_visitor_new(); QObject *obj, *data; - visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), &info_spec, NULL, + visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), NULL, &info_spec, &error_abort); obj = qmp_output_get_qobject(ov); assert(qobject_type(obj) == QTYPE_QDICT); @@ -676,7 +737,10 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, void *f, if (info->has_backing_filename) { func_fprintf(f, "backing file: %s", info->backing_filename); - if (info->has_full_backing_filename) { + if (!info->has_full_backing_filename) { + func_fprintf(f, " (cannot determine actual path)"); + } else if (strcmp(info->backing_filename, + info->full_backing_filename) != 0) { func_fprintf(f, " (actual path: %s)", info->full_backing_filename); } func_fprintf(f, "\n"); diff --git a/block/qcow.c b/block/qcow.c index 635085e27b..60ddb12eca 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -21,8 +21,12 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu-common.h" +#include "qemu/error-report.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include <zlib.h> #include "qapi/qmp/qerror.h" @@ -119,11 +123,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } if (header.version != QCOW_VERSION) { - char version[64]; - snprintf(version, sizeof(version), "QCOW version %" PRIu32, - header.version); - error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, - bdrv_get_device_or_node_name(bs), "qcow", version); + error_setg(errp, "Unsupported qcow version %" PRIu32, header.version); ret = -ENOTSUP; goto fail; } @@ -159,6 +159,14 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, } s->crypt_method_header = header.crypt_method; if (s->crypt_method_header) { + if (bdrv_uses_whitelist() && + s->crypt_method_header == QCOW_CRYPT_AES) { + error_report("qcow built-in AES encryption is deprecated"); + error_printf("Support for it will be removed in a future release.\n" + "You can use 'qemu-img convert' to switch to an\n" + "unencrypted qcow image, or a LUKS raw image.\n"); + } + bs->encrypted = 1; } s->cluster_bits = header.cluster_bits; @@ -488,7 +496,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, } static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum) + int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) { BDRVQcowState *s = bs->opaque; int index_in_cluster, n; @@ -509,6 +517,7 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs, return BDRV_BLOCK_DATA; } cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); + *file = bs->file->bs; return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset; } @@ -778,7 +787,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) int flags = 0; Error *local_err = NULL; int ret; - BlockDriverState *qcow_bs; + BlockBackend *qcow_blk; /* Read out options */ total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), @@ -794,15 +803,17 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) goto cleanup; } - qcow_bs = NULL; - ret = bdrv_open(&qcow_bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); - if (ret < 0) { + qcow_blk = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + if (qcow_blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto cleanup; } - ret = bdrv_truncate(qcow_bs, 0); + blk_set_allow_write_beyond_eof(qcow_blk, true); + + ret = blk_truncate(qcow_blk, 0); if (ret < 0) { goto exit; } @@ -842,13 +853,13 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) } /* write all the data */ - ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header)); + ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header)); if (ret != sizeof(header)) { goto exit; } if (backing_file) { - ret = bdrv_pwrite(qcow_bs, sizeof(header), + ret = blk_pwrite(qcow_blk, sizeof(header), backing_file, backing_filename_len); if (ret != backing_filename_len) { goto exit; @@ -858,7 +869,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) tmp = g_malloc0(BDRV_SECTOR_SIZE); for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/ BDRV_SECTOR_SIZE); i++) { - ret = bdrv_pwrite(qcow_bs, header_size + + ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE); if (ret != BDRV_SECTOR_SIZE) { g_free(tmp); @@ -869,7 +880,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) g_free(tmp); ret = 0; exit: - bdrv_unref(qcow_bs); + blk_unref(qcow_blk); cleanup: g_free(backing_file); return ret; diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 86dd7f2bd9..0fe8edae41 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -23,7 +23,7 @@ */ /* Needed for CONFIG_MADVISE */ -#include "config-host.h" +#include "qemu/osdep.h" #if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE) #include <sys/mman.h> @@ -31,7 +31,6 @@ #include "block/block_int.h" #include "qemu-common.h" -#include "qemu/osdep.h" #include "qcow2.h" #include "trace.h" diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 24a60e2236..31ecc10304 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -22,8 +22,10 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include <zlib.h> +#include "qapi/error.h" #include "qemu-common.h" #include "block/block_int.h" #include "block/qcow2.h" @@ -1641,7 +1643,8 @@ fail: static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, int l1_size, int64_t *visited_l1_entries, int64_t l1_entries, - BlockDriverAmendStatusCB *status_cb) + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque) { BDRVQcow2State *s = bs->opaque; bool is_active_l1 = (l1_table == s->l1_table); @@ -1667,7 +1670,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, /* unallocated */ (*visited_l1_entries)++; if (status_cb) { - status_cb(bs, *visited_l1_entries, l1_entries); + status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); } continue; } @@ -1804,7 +1807,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, (*visited_l1_entries)++; if (status_cb) { - status_cb(bs, *visited_l1_entries, l1_entries); + status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque); } } @@ -1828,7 +1831,8 @@ fail: * qcow2 version which doesn't yet support metadata zero clusters. */ int qcow2_expand_zero_clusters(BlockDriverState *bs, - BlockDriverAmendStatusCB *status_cb) + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque) { BDRVQcow2State *s = bs->opaque; uint64_t *l1_table = NULL; @@ -1845,7 +1849,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs, ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size, &visited_l1_entries, l1_entries, - status_cb); + status_cb, cb_opaque); if (ret < 0) { goto fail; } @@ -1881,7 +1885,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs, ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size, &visited_l1_entries, l1_entries, - status_cb); + status_cb, cb_opaque); if (ret < 0) { goto fail; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 820f412ab6..ca6094ff5b 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -22,6 +22,8 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu-common.h" #include "block/block_int.h" #include "block/qcow2.h" @@ -1345,6 +1347,9 @@ static int inc_refcounts(BlockDriverState *bs, if (refcount == s->refcount_max) { fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64 "\n", cluster_offset); + fprintf(stderr, "Use qemu-img amend to increase the refcount entry " + "width or qemu-img convert to create a clean copy if the " + "image cannot be opened for writing\n"); res->corruptions++; continue; } @@ -2467,3 +2472,450 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, return 0; } + +/* A pointer to a function of this type is given to walk_over_reftable(). That + * function will create refblocks and pass them to a RefblockFinishOp once they + * are completed (@refblock). @refblock_empty is set if the refblock is + * completely empty. + * + * Along with the refblock, a corresponding reftable entry is passed, in the + * reftable @reftable (which may be reallocated) at @reftable_index. + * + * @allocated should be set to true if a new cluster has been allocated. + */ +typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable, + uint64_t reftable_index, uint64_t *reftable_size, + void *refblock, bool refblock_empty, + bool *allocated, Error **errp); + +/** + * This "operation" for walk_over_reftable() allocates the refblock on disk (if + * it is not empty) and inserts its offset into the new reftable. The size of + * this new reftable is increased as required. + */ +static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable, + uint64_t reftable_index, uint64_t *reftable_size, + void *refblock, bool refblock_empty, bool *allocated, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + int64_t offset; + + if (!refblock_empty && reftable_index >= *reftable_size) { + uint64_t *new_reftable; + uint64_t new_reftable_size; + + new_reftable_size = ROUND_UP(reftable_index + 1, + s->cluster_size / sizeof(uint64_t)); + if (new_reftable_size > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) { + error_setg(errp, + "This operation would make the refcount table grow " + "beyond the maximum size supported by QEMU, aborting"); + return -ENOTSUP; + } + + new_reftable = g_try_realloc(*reftable, new_reftable_size * + sizeof(uint64_t)); + if (!new_reftable) { + error_setg(errp, "Failed to increase reftable buffer size"); + return -ENOMEM; + } + + memset(new_reftable + *reftable_size, 0, + (new_reftable_size - *reftable_size) * sizeof(uint64_t)); + + *reftable = new_reftable; + *reftable_size = new_reftable_size; + } + + if (!refblock_empty && !(*reftable)[reftable_index]) { + offset = qcow2_alloc_clusters(bs, s->cluster_size); + if (offset < 0) { + error_setg_errno(errp, -offset, "Failed to allocate refblock"); + return offset; + } + (*reftable)[reftable_index] = offset; + *allocated = true; + } + + return 0; +} + +/** + * This "operation" for walk_over_reftable() writes the refblock to disk at the + * offset specified by the new reftable's entry. It does not modify the new + * reftable or change any refcounts. + */ +static int flush_refblock(BlockDriverState *bs, uint64_t **reftable, + uint64_t reftable_index, uint64_t *reftable_size, + void *refblock, bool refblock_empty, bool *allocated, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + int64_t offset; + int ret; + + if (reftable_index < *reftable_size && (*reftable)[reftable_index]) { + offset = (*reftable)[reftable_index]; + + ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Overlap check failed"); + return ret; + } + + ret = bdrv_pwrite(bs->file->bs, offset, refblock, s->cluster_size); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write refblock"); + return ret; + } + } else { + assert(refblock_empty); + } + + return 0; +} + +/** + * This function walks over the existing reftable and every referenced refblock; + * if @new_set_refcount is non-NULL, it is called for every refcount entry to + * create an equal new entry in the passed @new_refblock. Once that + * @new_refblock is completely filled, @operation will be called. + * + * @status_cb and @cb_opaque are used for the amend operation's status callback. + * @index is the index of the walk_over_reftable() calls and @total is the total + * number of walk_over_reftable() calls per amend operation. Both are used for + * calculating the parameters for the status callback. + * + * @allocated is set to true if a new cluster has been allocated. + */ +static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable, + uint64_t *new_reftable_index, + uint64_t *new_reftable_size, + void *new_refblock, int new_refblock_size, + int new_refcount_bits, + RefblockFinishOp *operation, bool *allocated, + Qcow2SetRefcountFunc *new_set_refcount, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, int index, int total, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + uint64_t reftable_index; + bool new_refblock_empty = true; + int refblock_index; + int new_refblock_index = 0; + int ret; + + for (reftable_index = 0; reftable_index < s->refcount_table_size; + reftable_index++) + { + uint64_t refblock_offset = s->refcount_table[reftable_index] + & REFT_OFFSET_MASK; + + status_cb(bs, (uint64_t)index * s->refcount_table_size + reftable_index, + (uint64_t)total * s->refcount_table_size, cb_opaque); + + if (refblock_offset) { + void *refblock; + + if (offset_into_cluster(s, refblock_offset)) { + qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" + PRIx64 " unaligned (reftable index: %#" + PRIx64 ")", refblock_offset, + reftable_index); + error_setg(errp, + "Image is corrupt (unaligned refblock offset)"); + return -EIO; + } + + ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offset, + &refblock); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to retrieve refblock"); + return ret; + } + + for (refblock_index = 0; refblock_index < s->refcount_block_size; + refblock_index++) + { + uint64_t refcount; + + if (new_refblock_index >= new_refblock_size) { + /* new_refblock is now complete */ + ret = operation(bs, new_reftable, *new_reftable_index, + new_reftable_size, new_refblock, + new_refblock_empty, allocated, errp); + if (ret < 0) { + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + return ret; + } + + (*new_reftable_index)++; + new_refblock_index = 0; + new_refblock_empty = true; + } + + refcount = s->get_refcount(refblock, refblock_index); + if (new_refcount_bits < 64 && refcount >> new_refcount_bits) { + uint64_t offset; + + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + + offset = ((reftable_index << s->refcount_block_bits) + + refblock_index) << s->cluster_bits; + + error_setg(errp, "Cannot decrease refcount entry width to " + "%i bits: Cluster at offset %#" PRIx64 " has a " + "refcount of %" PRIu64, new_refcount_bits, + offset, refcount); + return -EINVAL; + } + + if (new_set_refcount) { + new_set_refcount(new_refblock, new_refblock_index++, + refcount); + } else { + new_refblock_index++; + } + new_refblock_empty = new_refblock_empty && refcount == 0; + } + + qcow2_cache_put(bs, s->refcount_block_cache, &refblock); + } else { + /* No refblock means every refcount is 0 */ + for (refblock_index = 0; refblock_index < s->refcount_block_size; + refblock_index++) + { + if (new_refblock_index >= new_refblock_size) { + /* new_refblock is now complete */ + ret = operation(bs, new_reftable, *new_reftable_index, + new_reftable_size, new_refblock, + new_refblock_empty, allocated, errp); + if (ret < 0) { + return ret; + } + + (*new_reftable_index)++; + new_refblock_index = 0; + new_refblock_empty = true; + } + + if (new_set_refcount) { + new_set_refcount(new_refblock, new_refblock_index++, 0); + } else { + new_refblock_index++; + } + } + } + } + + if (new_refblock_index > 0) { + /* Complete the potentially existing partially filled final refblock */ + if (new_set_refcount) { + for (; new_refblock_index < new_refblock_size; + new_refblock_index++) + { + new_set_refcount(new_refblock, new_refblock_index, 0); + } + } + + ret = operation(bs, new_reftable, *new_reftable_index, + new_reftable_size, new_refblock, new_refblock_empty, + allocated, errp); + if (ret < 0) { + return ret; + } + + (*new_reftable_index)++; + } + + status_cb(bs, (uint64_t)(index + 1) * s->refcount_table_size, + (uint64_t)total * s->refcount_table_size, cb_opaque); + + return 0; +} + +int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + Qcow2GetRefcountFunc *new_get_refcount; + Qcow2SetRefcountFunc *new_set_refcount; + void *new_refblock = qemu_blockalign(bs->file->bs, s->cluster_size); + uint64_t *new_reftable = NULL, new_reftable_size = 0; + uint64_t *old_reftable, old_reftable_size, old_reftable_offset; + uint64_t new_reftable_index = 0; + uint64_t i; + int64_t new_reftable_offset = 0, allocated_reftable_size = 0; + int new_refblock_size, new_refcount_bits = 1 << refcount_order; + int old_refcount_order; + int walk_index = 0; + int ret; + bool new_allocation; + + assert(s->qcow_version >= 3); + assert(refcount_order >= 0 && refcount_order <= 6); + + /* see qcow2_open() */ + new_refblock_size = 1 << (s->cluster_bits - (refcount_order - 3)); + + new_get_refcount = get_refcount_funcs[refcount_order]; + new_set_refcount = set_refcount_funcs[refcount_order]; + + + do { + int total_walks; + + new_allocation = false; + + /* At least we have to do this walk and the one which writes the + * refblocks; also, at least we have to do this loop here at least + * twice (normally), first to do the allocations, and second to + * determine that everything is correctly allocated, this then makes + * three walks in total */ + total_walks = MAX(walk_index + 2, 3); + + /* First, allocate the structures so they are present in the refcount + * structures */ + ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index, + &new_reftable_size, NULL, new_refblock_size, + new_refcount_bits, &alloc_refblock, + &new_allocation, NULL, status_cb, cb_opaque, + walk_index++, total_walks, errp); + if (ret < 0) { + goto done; + } + + new_reftable_index = 0; + + if (new_allocation) { + if (new_reftable_offset) { + qcow2_free_clusters(bs, new_reftable_offset, + allocated_reftable_size * sizeof(uint64_t), + QCOW2_DISCARD_NEVER); + } + + new_reftable_offset = qcow2_alloc_clusters(bs, new_reftable_size * + sizeof(uint64_t)); + if (new_reftable_offset < 0) { + error_setg_errno(errp, -new_reftable_offset, + "Failed to allocate the new reftable"); + ret = new_reftable_offset; + goto done; + } + allocated_reftable_size = new_reftable_size; + } + } while (new_allocation); + + /* Second, write the new refblocks */ + ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index, + &new_reftable_size, new_refblock, + new_refblock_size, new_refcount_bits, + &flush_refblock, &new_allocation, new_set_refcount, + status_cb, cb_opaque, walk_index, walk_index + 1, + errp); + if (ret < 0) { + goto done; + } + assert(!new_allocation); + + + /* Write the new reftable */ + ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset, + new_reftable_size * sizeof(uint64_t)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Overlap check failed"); + goto done; + } + + for (i = 0; i < new_reftable_size; i++) { + cpu_to_be64s(&new_reftable[i]); + } + + ret = bdrv_pwrite(bs->file->bs, new_reftable_offset, new_reftable, + new_reftable_size * sizeof(uint64_t)); + + for (i = 0; i < new_reftable_size; i++) { + be64_to_cpus(&new_reftable[i]); + } + + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to write the new reftable"); + goto done; + } + + + /* Empty the refcount cache */ + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to flush the refblock cache"); + goto done; + } + + /* Update the image header to point to the new reftable; this only updates + * the fields which are relevant to qcow2_update_header(); other fields + * such as s->refcount_table or s->refcount_bits stay stale for now + * (because we have to restore everything if qcow2_update_header() fails) */ + old_refcount_order = s->refcount_order; + old_reftable_size = s->refcount_table_size; + old_reftable_offset = s->refcount_table_offset; + + s->refcount_order = refcount_order; + s->refcount_table_size = new_reftable_size; + s->refcount_table_offset = new_reftable_offset; + + ret = qcow2_update_header(bs); + if (ret < 0) { + s->refcount_order = old_refcount_order; + s->refcount_table_size = old_reftable_size; + s->refcount_table_offset = old_reftable_offset; + error_setg_errno(errp, -ret, "Failed to update the qcow2 header"); + goto done; + } + + /* Now update the rest of the in-memory information */ + old_reftable = s->refcount_table; + s->refcount_table = new_reftable; + + s->refcount_bits = 1 << refcount_order; + s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1); + s->refcount_max += s->refcount_max - 1; + + s->refcount_block_bits = s->cluster_bits - (refcount_order - 3); + s->refcount_block_size = 1 << s->refcount_block_bits; + + s->get_refcount = new_get_refcount; + s->set_refcount = new_set_refcount; + + /* For cleaning up all old refblocks and the old reftable below the "done" + * label */ + new_reftable = old_reftable; + new_reftable_size = old_reftable_size; + new_reftable_offset = old_reftable_offset; + +done: + if (new_reftable) { + /* On success, new_reftable actually points to the old reftable (and + * new_reftable_size is the old reftable's size); but that is just + * fine */ + for (i = 0; i < new_reftable_size; i++) { + uint64_t offset = new_reftable[i] & REFT_OFFSET_MASK; + if (offset) { + qcow2_free_clusters(bs, offset, s->cluster_size, + QCOW2_DISCARD_OTHER); + } + } + g_free(new_reftable); + + if (new_reftable_offset > 0) { + qcow2_free_clusters(bs, new_reftable_offset, + new_reftable_size * sizeof(uint64_t), + QCOW2_DISCARD_OTHER); + } + } + + qemu_vfree(new_refblock); + return ret; +} diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index def720164d..5f4a17e473 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -22,10 +22,12 @@ * THE SOFTWARE. */ -#include "qemu-common.h" +#include "qemu/osdep.h" +#include "qapi/error.h" #include "block/block_int.h" #include "block/qcow2.h" #include "qemu/error-report.h" +#include "qemu/cutils.h" void qcow2_free_snapshots(BlockDriverState *bs) { diff --git a/block/qcow2.c b/block/qcow2.c index 88f56c8868..470734be9f 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -21,8 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#include "qemu-common.h" +#include "qemu/osdep.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include <zlib.h> #include "block/qcow2.h" @@ -34,6 +35,7 @@ #include "qapi-event.h" #include "trace.h" #include "qemu/option_int.h" +#include "qemu/cutils.h" /* Differences with QCOW: @@ -196,22 +198,8 @@ static void cleanup_unknown_header_ext(BlockDriverState *bs) } } -static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs, - Error **errp, const char *fmt, ...) -{ - char msg[64]; - va_list ap; - - va_start(ap, fmt); - vsnprintf(msg, sizeof(msg), fmt, ap); - va_end(ap); - - error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, - bdrv_get_device_or_node_name(bs), "qcow2", msg); -} - -static void report_unsupported_feature(BlockDriverState *bs, - Error **errp, Qcow2Feature *table, uint64_t mask) +static void report_unsupported_feature(Error **errp, Qcow2Feature *table, + uint64_t mask) { char *features = g_strdup(""); char *old; @@ -236,7 +224,7 @@ static void report_unsupported_feature(BlockDriverState *bs, g_free(old); } - report_unsupported(bs, errp, "%s", features); + error_setg(errp, "Unsupported qcow2 feature(s): %s", features); g_free(features); } @@ -853,7 +841,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } if (header.version < 2 || header.version > 3) { - report_unsupported(bs, errp, "QCOW version %" PRIu32, header.version); + error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version); ret = -ENOTSUP; goto fail; } @@ -933,7 +921,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, void *feature_table = NULL; qcow2_read_extensions(bs, header.header_length, ext_end, &feature_table, NULL); - report_unsupported_feature(bs, errp, feature_table, + report_unsupported_feature(errp, feature_table, s->incompatible_features & ~QCOW2_INCOMPAT_MASK); ret = -ENOTSUP; @@ -977,6 +965,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } s->crypt_method_header = header.crypt_method; if (s->crypt_method_header) { + if (bdrv_uses_whitelist() && + s->crypt_method_header == QCOW_CRYPT_AES) { + error_report("qcow2 built-in AES encryption is deprecated"); + error_printf("Support for it will be removed in a future release.\n" + "You can use 'qemu-img convert' to switch to an\n" + "unencrypted qcow2 image, or a LUKS raw image.\n"); + } + bs->encrypted = 1; } @@ -1140,7 +1136,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } /* Clear unknown autoclear feature bits */ - if (!bs->read_only && !(flags & BDRV_O_INCOMING) && s->autoclear_features) { + if (!bs->read_only && !(flags & BDRV_O_INACTIVE) && s->autoclear_features) { s->autoclear_features = 0; ret = qcow2_update_header(bs); if (ret < 0) { @@ -1153,7 +1149,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, qemu_co_mutex_init(&s->lock); /* Repair image if dirty */ - if (!(flags & (BDRV_O_CHECK | BDRV_O_INCOMING)) && !bs->read_only && + if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only && (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) { BdrvCheckResult result = {0}; @@ -1282,8 +1278,54 @@ static void qcow2_reopen_abort(BDRVReopenState *state) g_free(state->opaque); } +static void qcow2_join_options(QDict *options, QDict *old_options) +{ + bool has_new_overlap_template = + qdict_haskey(options, QCOW2_OPT_OVERLAP) || + qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE); + bool has_new_total_cache_size = + qdict_haskey(options, QCOW2_OPT_CACHE_SIZE); + bool has_all_cache_options; + + /* New overlap template overrides all old overlap options */ + if (has_new_overlap_template) { + qdict_del(old_options, QCOW2_OPT_OVERLAP); + qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE); + qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER); + qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1); + qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2); + qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE); + qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK); + qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE); + qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1); + qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2); + } + + /* New total cache size overrides all old options */ + if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) { + qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE); + qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); + } + + qdict_join(options, old_options, false); + + /* + * If after merging all cache size options are set, an old total size is + * overwritten. Do keep all options, however, if all three are new. The + * resulting error message is what we want to happen. + */ + has_all_cache_options = + qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) || + qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) || + qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE); + + if (has_all_cache_options && !has_new_total_cache_size) { + qdict_del(options, QCOW2_OPT_CACHE_SIZE); + } +} + static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum) + int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) { BDRVQcow2State *s = bs->opaque; uint64_t cluster_offset; @@ -1302,6 +1344,7 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, !s->cipher) { index_in_cluster = sector_num & (s->cluster_sectors - 1); cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS); + *file = bs->file->bs; status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset; } if (ret == QCOW2_CLUSTER_ZERO) { @@ -1639,6 +1682,32 @@ fail: return ret; } +static int qcow2_inactivate(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + int ret, result = 0; + + ret = qcow2_cache_flush(bs, s->l2_table_cache); + if (ret) { + result = ret; + error_report("Failed to flush the L2 table cache: %s", + strerror(-ret)); + } + + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret) { + result = ret; + error_report("Failed to flush the refcount block cache: %s", + strerror(-ret)); + } + + if (result == 0) { + qcow2_mark_clean(bs); + } + + return result; +} + static void qcow2_close(BlockDriverState *bs) { BDRVQcow2State *s = bs->opaque; @@ -1646,24 +1715,8 @@ static void qcow2_close(BlockDriverState *bs) /* else pre-write overlap checks in cache_destroy may crash */ s->l1_table = NULL; - if (!(bs->open_flags & BDRV_O_INCOMING)) { - int ret1, ret2; - - ret1 = qcow2_cache_flush(bs, s->l2_table_cache); - ret2 = qcow2_cache_flush(bs, s->refcount_block_cache); - - if (ret1) { - error_report("Failed to flush the L2 table cache: %s", - strerror(-ret1)); - } - if (ret2) { - error_report("Failed to flush the refcount block cache: %s", - strerror(-ret2)); - } - - if (!ret1 && !ret2) { - qcow2_mark_clean(bs); - } + if (!(s->flags & BDRV_O_INACTIVE)) { + qcow2_inactivate(bs); } cache_clean_timer_del(bs); @@ -1707,21 +1760,24 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) bdrv_invalidate_cache(bs->file->bs, &local_err); if (local_err) { error_propagate(errp, local_err); + bs->drv = NULL; return; } memset(s, 0, sizeof(BDRVQcow2State)); options = qdict_clone_shallow(bs->options); + flags &= ~BDRV_O_INACTIVE; ret = qcow2_open(bs, options, flags, &local_err); QDECREF(options); if (local_err) { - error_setg(errp, "Could not reopen qcow2 layer: %s", - error_get_pretty(local_err)); - error_free(local_err); + error_propagate(errp, local_err); + error_prepend(errp, "Could not reopen qcow2 layer: "); + bs->drv = NULL; return; } else if (ret < 0) { error_setg_errno(errp, -ret, "Could not reopen qcow2 layer"); + bs->drv = NULL; return; } @@ -1849,31 +1905,33 @@ int qcow2_update_header(BlockDriverState *bs) } /* Feature table */ - Qcow2Feature features[] = { - { - .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, - .bit = QCOW2_INCOMPAT_DIRTY_BITNR, - .name = "dirty bit", - }, - { - .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, - .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, - .name = "corrupt bit", - }, - { - .type = QCOW2_FEAT_TYPE_COMPATIBLE, - .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, - .name = "lazy refcounts", - }, - }; + if (s->qcow_version >= 3) { + Qcow2Feature features[] = { + { + .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, + .bit = QCOW2_INCOMPAT_DIRTY_BITNR, + .name = "dirty bit", + }, + { + .type = QCOW2_FEAT_TYPE_INCOMPATIBLE, + .bit = QCOW2_INCOMPAT_CORRUPT_BITNR, + .name = "corrupt bit", + }, + { + .type = QCOW2_FEAT_TYPE_COMPATIBLE, + .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR, + .name = "lazy refcounts", + }, + }; - ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, - features, sizeof(features), buflen); - if (ret < 0) { - goto fail; + ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE, + features, sizeof(features), buflen); + if (ret < 0) { + goto fail; + } + buf += ret; + buflen -= ret; } - buf += ret; - buflen -= ret; /* Keep unknown header extensions */ QLIST_FOREACH(uext, &s->unknown_header_ext, next) { @@ -1928,6 +1986,10 @@ static int qcow2_change_backing_file(BlockDriverState *bs, { BDRVQcow2State *s = bs->opaque; + if (backing_file && strlen(backing_file) > 1023) { + return -EINVAL; + } + pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); @@ -2034,7 +2096,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file * size for any qcow2 image. */ - BlockDriverState* bs; + BlockBackend *blk; QCowHeader *header; uint64_t* refcount_table; Error *local_err = NULL; @@ -2109,14 +2171,15 @@ static int qcow2_create2(const char *filename, int64_t total_size, return ret; } - bs = NULL; - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + blk = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + if (blk == NULL) { error_propagate(errp, local_err); - return ret; + return -EIO; } + blk_set_allow_write_beyond_eof(blk, true); + /* Write the header */ QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header)); header = g_malloc0(cluster_size); @@ -2144,7 +2207,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS); } - ret = bdrv_pwrite(bs, 0, header, cluster_size); + ret = blk_pwrite(blk, 0, header, cluster_size); g_free(header); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write qcow2 header"); @@ -2154,7 +2217,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* Write a refcount table with one refcount block */ refcount_table = g_malloc0(2 * cluster_size); refcount_table[0] = cpu_to_be64(2 * cluster_size); - ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size); + ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size); g_free(refcount_table); if (ret < 0) { @@ -2162,8 +2225,8 @@ static int qcow2_create2(const char *filename, int64_t total_size, goto out; } - bdrv_unref(bs); - bs = NULL; + blk_unref(blk); + blk = NULL; /* * And now open the image and make it consistent first (i.e. increase the @@ -2172,15 +2235,15 @@ static int qcow2_create2(const char *filename, int64_t total_size, */ options = qdict_new(); qdict_put(options, "driver", qstring_from_str("qcow2")); - ret = bdrv_open(&bs, filename, NULL, options, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, - &local_err); - if (ret < 0) { + blk = blk_new_open(filename, NULL, options, + BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto out; } - ret = qcow2_alloc_clusters(bs, 3 * cluster_size); + ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 " "header and refcount table"); @@ -2191,8 +2254,15 @@ static int qcow2_create2(const char *filename, int64_t total_size, abort(); } + /* Create a full header (including things like feature table) */ + ret = qcow2_update_header(blk_bs(blk)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not update qcow2 header"); + goto out; + } + /* Okay, now that we have a valid image, let's give it the right size */ - ret = bdrv_truncate(bs, total_size); + ret = blk_truncate(blk, total_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not resize image"); goto out; @@ -2200,7 +2270,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* Want a backing file? There you go.*/ if (backing_file) { - ret = bdrv_change_backing_file(bs, backing_file, backing_format); + ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format); if (ret < 0) { error_setg_errno(errp, -ret, "Could not assign backing file '%s' " "with format '%s'", backing_file, backing_format); @@ -2210,9 +2280,9 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* And if we're supposed to preallocate metadata, do that now */ if (prealloc != PREALLOC_MODE_OFF) { - BDRVQcow2State *s = bs->opaque; + BDRVQcow2State *s = blk_bs(blk)->opaque; qemu_co_mutex_lock(&s->lock); - ret = preallocate(bs); + ret = preallocate(blk_bs(blk)); qemu_co_mutex_unlock(&s->lock); if (ret < 0) { error_setg_errno(errp, -ret, "Could not preallocate metadata"); @@ -2220,24 +2290,24 @@ static int qcow2_create2(const char *filename, int64_t total_size, } } - bdrv_unref(bs); - bs = NULL; + blk_unref(blk); + blk = NULL; /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ options = qdict_new(); qdict_put(options, "driver", qstring_from_str("qcow2")); - ret = bdrv_open(&bs, filename, NULL, options, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING, - &local_err); - if (local_err) { + blk = blk_new_open(filename, NULL, options, + BDRV_O_RDWR | BDRV_O_NO_BACKING, &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto out; } ret = 0; out: - if (bs) { - bdrv_unref(bs); + if (blk) { + blk_unref(blk); } return ret; } @@ -2269,7 +2339,7 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp) DEFAULT_CLUSTER_SIZE); buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); prealloc = qapi_enum_parse(PreallocMode_lookup, buf, - PREALLOC_MODE_MAX, PREALLOC_MODE_OFF, + PREALLOC_MODE__MAX, PREALLOC_MODE_OFF, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -2739,15 +2809,15 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) *spec_info = (ImageInfoSpecific){ .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, - .u.qcow2 = g_new(ImageInfoSpecificQCow2, 1), + .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1), }; if (s->qcow_version == 2) { - *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){ + *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ .compat = g_strdup("0.10"), .refcount_bits = s->refcount_bits, }; } else if (s->qcow_version == 3) { - *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){ + *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){ .compat = g_strdup("1.1"), .lazy_refcounts = s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS, @@ -2757,6 +2827,10 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) .has_corrupt = true, .refcount_bits = s->refcount_bits, }; + } else { + /* if this assertion fails, this probably means a new version was + * added without having it covered here */ + assert(false); } return spec_info; @@ -2824,7 +2898,7 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, * have to be removed. */ static int qcow2_downgrade(BlockDriverState *bs, int target_version, - BlockDriverAmendStatusCB *status_cb) + BlockDriverAmendStatusCB *status_cb, void *cb_opaque) { BDRVQcow2State *s = bs->opaque; int current_version = s->qcow_version; @@ -2839,13 +2913,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, } if (s->refcount_order != 4) { - /* we would have to convert the image to a refcount_order == 4 image - * here; however, since qemu (at the time of writing this) does not - * support anything different than 4 anyway, there is no point in doing - * so right now; however, we should error out (if qemu supports this in - * the future and this code has not been adapted) */ - error_report("qcow2_downgrade: Image refcount orders other than 4 are " - "currently not supported."); + error_report("compat=0.10 requires refcount_bits=16"); return -ENOTSUP; } @@ -2873,7 +2941,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, /* clearing autoclear features is trivial */ s->autoclear_features = 0; - ret = qcow2_expand_zero_clusters(bs, status_cb); + ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque); if (ret < 0) { return ret; } @@ -2887,8 +2955,79 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, return 0; } +typedef enum Qcow2AmendOperation { + /* This is the value Qcow2AmendHelperCBInfo::last_operation will be + * statically initialized to so that the helper CB can discern the first + * invocation from an operation change */ + QCOW2_NO_OPERATION = 0, + + QCOW2_CHANGING_REFCOUNT_ORDER, + QCOW2_DOWNGRADING, +} Qcow2AmendOperation; + +typedef struct Qcow2AmendHelperCBInfo { + /* The code coordinating the amend operations should only modify + * these four fields; the rest will be managed by the CB */ + BlockDriverAmendStatusCB *original_status_cb; + void *original_cb_opaque; + + Qcow2AmendOperation current_operation; + + /* Total number of operations to perform (only set once) */ + int total_operations; + + /* The following fields are managed by the CB */ + + /* Number of operations completed */ + int operations_completed; + + /* Cumulative offset of all completed operations */ + int64_t offset_completed; + + Qcow2AmendOperation last_operation; + int64_t last_work_size; +} Qcow2AmendHelperCBInfo; + +static void qcow2_amend_helper_cb(BlockDriverState *bs, + int64_t operation_offset, + int64_t operation_work_size, void *opaque) +{ + Qcow2AmendHelperCBInfo *info = opaque; + int64_t current_work_size; + int64_t projected_work_size; + + if (info->current_operation != info->last_operation) { + if (info->last_operation != QCOW2_NO_OPERATION) { + info->offset_completed += info->last_work_size; + info->operations_completed++; + } + + info->last_operation = info->current_operation; + } + + assert(info->total_operations > 0); + assert(info->operations_completed < info->total_operations); + + info->last_work_size = operation_work_size; + + current_work_size = info->offset_completed + operation_work_size; + + /* current_work_size is the total work size for (operations_completed + 1) + * operations (which includes this one), so multiply it by the number of + * operations not covered and divide it by the number of operations + * covered to get a projection for the operations not covered */ + projected_work_size = current_work_size * (info->total_operations - + info->operations_completed - 1) + / (info->operations_completed + 1); + + info->original_status_cb(bs, info->offset_completed + operation_offset, + current_work_size + projected_work_size, + info->original_cb_opaque); +} + static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, - BlockDriverAmendStatusCB *status_cb) + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque) { BDRVQcow2State *s = bs->opaque; int old_version = s->qcow_version, new_version = old_version; @@ -2898,8 +3037,10 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, const char *compat = NULL; uint64_t cluster_size = s->cluster_size; bool encrypt; + int refcount_bits = s->refcount_bits; int ret; QemuOptDesc *desc = opts->list->desc; + Qcow2AmendHelperCBInfo helper_cb_info; while (desc && desc->name) { if (!qemu_opt_find(opts, desc->name)) { @@ -2917,11 +3058,11 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } else if (!strcmp(compat, "1.1")) { new_version = 3; } else { - fprintf(stderr, "Unknown compatibility level %s.\n", compat); + error_report("Unknown compatibility level %s", compat); return -EINVAL; } } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) { - fprintf(stderr, "Cannot change preallocation mode.\n"); + error_report("Cannot change preallocation mode"); return -ENOTSUP; } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) { new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); @@ -2934,47 +3075,74 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, !!s->cipher); if (encrypt != !!s->cipher) { - fprintf(stderr, "Changing the encryption flag is not " - "supported.\n"); + error_report("Changing the encryption flag is not supported"); return -ENOTSUP; } } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) { cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, cluster_size); if (cluster_size != s->cluster_size) { - fprintf(stderr, "Changing the cluster size is not " - "supported.\n"); + error_report("Changing the cluster size is not supported"); return -ENOTSUP; } } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) { lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS, lazy_refcounts); } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) { - error_report("Cannot change refcount entry width"); - return -ENOTSUP; + refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS, + refcount_bits); + + if (refcount_bits <= 0 || refcount_bits > 64 || + !is_power_of_2(refcount_bits)) + { + error_report("Refcount width must be a power of two and may " + "not exceed 64 bits"); + return -EINVAL; + } } else { - /* if this assertion fails, this probably means a new option was + /* if this point is reached, this probably means a new option was * added without having it covered here */ - assert(false); + abort(); } desc++; } - if (new_version != old_version) { - if (new_version > old_version) { - /* Upgrade */ - s->qcow_version = new_version; - ret = qcow2_update_header(bs); - if (ret < 0) { - s->qcow_version = old_version; - return ret; - } - } else { - ret = qcow2_downgrade(bs, new_version, status_cb); - if (ret < 0) { - return ret; - } + helper_cb_info = (Qcow2AmendHelperCBInfo){ + .original_status_cb = status_cb, + .original_cb_opaque = cb_opaque, + .total_operations = (new_version < old_version) + + (s->refcount_bits != refcount_bits) + }; + + /* Upgrade first (some features may require compat=1.1) */ + if (new_version > old_version) { + s->qcow_version = new_version; + ret = qcow2_update_header(bs); + if (ret < 0) { + s->qcow_version = old_version; + return ret; + } + } + + if (s->refcount_bits != refcount_bits) { + int refcount_order = ctz32(refcount_bits); + Error *local_error = NULL; + + if (new_version < 3 && refcount_bits != 16) { + error_report("Different refcount widths than 16 bits require " + "compatibility level 1.1 or above (use compat=1.1 or " + "greater)"); + return -EINVAL; + } + + helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER; + ret = qcow2_change_refcount_order(bs, refcount_order, + &qcow2_amend_helper_cb, + &helper_cb_info, &local_error); + if (ret < 0) { + error_report_err(local_error); + return ret; } } @@ -2989,9 +3157,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, if (s->use_lazy_refcounts != lazy_refcounts) { if (lazy_refcounts) { - if (s->qcow_version < 3) { - fprintf(stderr, "Lazy refcounts only supported with compatibility " - "level 1.1 and above (use compat=1.1 or greater)\n"); + if (new_version < 3) { + error_report("Lazy refcounts only supported with compatibility " + "level 1.1 and above (use compat=1.1 or greater)"); return -EINVAL; } s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS; @@ -3025,6 +3193,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, } } + /* Downgrade last (so unsupported features can be removed before) */ + if (new_version < old_version) { + helper_cb_info.current_operation = QCOW2_DOWNGRADING; + ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb, + &helper_cb_info); + if (ret < 0) { + return ret; + } + } + return 0; } @@ -3145,6 +3323,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_reopen_prepare = qcow2_reopen_prepare, .bdrv_reopen_commit = qcow2_reopen_commit, .bdrv_reopen_abort = qcow2_reopen_abort, + .bdrv_join_options = qcow2_join_options, .bdrv_create = qcow2_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = qcow2_co_get_block_status, @@ -3176,6 +3355,7 @@ BlockDriver bdrv_qcow2 = { .bdrv_refresh_limits = qcow2_refresh_limits, .bdrv_invalidate_cache = qcow2_invalidate_cache, + .bdrv_inactivate = qcow2_inactivate, .create_opts = &qcow2_create_opts, .bdrv_check = qcow2_check, diff --git a/block/qcow2.h b/block/qcow2.h index b8c500b9dc..a063a3c1a1 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -529,6 +529,10 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset, int64_t size); +int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order, + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque, Error **errp); + /* qcow2-cluster.c functions */ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size); @@ -553,7 +557,8 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors); int qcow2_expand_zero_clusters(BlockDriverState *bs, - BlockDriverAmendStatusCB *status_cb); + BlockDriverAmendStatusCB *status_cb, + void *cb_opaque); /* qcow2-snapshot.c functions */ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info); diff --git a/block/qed-check.c b/block/qed-check.c index 36ecd290d6..622f308976 100644 --- a/block/qed-check.c +++ b/block/qed-check.c @@ -11,6 +11,7 @@ * */ +#include "qemu/osdep.h" #include "qed.h" typedef struct { diff --git a/block/qed-cluster.c b/block/qed-cluster.c index f64b2af8f7..c24e75616a 100644 --- a/block/qed-cluster.c +++ b/block/qed-cluster.c @@ -12,6 +12,7 @@ * */ +#include "qemu/osdep.h" #include "qed.h" /** diff --git a/block/qed-gencb.c b/block/qed-gencb.c index b817a8bf50..faf8ecc840 100644 --- a/block/qed-gencb.c +++ b/block/qed-gencb.c @@ -11,6 +11,7 @@ * */ +#include "qemu/osdep.h" #include "qed.h" void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque) diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c index e9b2aae44d..5cba794650 100644 --- a/block/qed-l2-cache.c +++ b/block/qed-l2-cache.c @@ -50,6 +50,7 @@ * table will be deleted in favor of the existing cache entry. */ +#include "qemu/osdep.h" #include "trace.h" #include "qed.h" diff --git a/block/qed-table.c b/block/qed-table.c index f4219b8acc..802945f5e5 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -12,6 +12,7 @@ * */ +#include "qemu/osdep.h" #include "trace.h" #include "qemu/sockets.h" /* for EINPROGRESS on Windows */ #include "qed.h" diff --git a/block/qed.c b/block/qed.c index 9b88895038..0af52741df 100644 --- a/block/qed.c +++ b/block/qed.c @@ -12,11 +12,14 @@ * */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu/timer.h" #include "trace.h" #include "qed.h" #include "qapi/qmp/qerror.h" #include "migration/migration.h" +#include "sysemu/block-backend.h" static const AIOCBInfo qed_aiocb_info = { .aiocb_size = sizeof(QEDAIOCB), @@ -344,7 +347,7 @@ static void qed_start_need_check_timer(BDRVQEDState *s) * migration. */ timer_mod(s->need_check_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + - get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT); + NANOSECONDS_PER_SECOND * QED_NEED_CHECK_TIMEOUT); } /* It's okay to call this multiple times or when no timer is started */ @@ -375,18 +378,6 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs, } } -static void bdrv_qed_drain(BlockDriverState *bs) -{ - BDRVQEDState *s = bs->opaque; - - /* Cancel timer and start doing I/O that were meant to happen as if it - * fired, that way we get bdrv_drain() taking care of the ongoing requests - * correctly. */ - qed_cancel_need_check_timer(s); - qed_plug_allocating_write_reqs(s); - bdrv_aio_flush(s->bs, qed_clear_need_check, s); -} - static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -410,11 +401,8 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, } if (s->header.features & ~QED_FEATURE_MASK) { /* image uses unsupported feature bits */ - char buf[64]; - snprintf(buf, sizeof(buf), "%" PRIx64, - s->header.features & ~QED_FEATURE_MASK); - error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, - bdrv_get_device_or_node_name(bs), "QED", buf); + error_setg(errp, "Unsupported QED features: %" PRIx64, + s->header.features & ~QED_FEATURE_MASK); return -ENOTSUP; } if (!qed_is_cluster_size_valid(s->header.cluster_size)) { @@ -477,7 +465,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, * feature is no longer valid. */ if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 && - !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INCOMING)) { + !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INACTIVE)) { s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK; ret = qed_write_header_sync(s); @@ -505,7 +493,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, * aid data recovery from an otherwise inconsistent image. */ if (!bdrv_is_read_only(bs->file->bs) && - !(flags & BDRV_O_INCOMING)) { + !(flags & BDRV_O_INACTIVE)) { BdrvCheckResult result = {0}; ret = qed_check(s, &result, true); @@ -579,7 +567,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, size_t l1_size = header.cluster_size * header.table_size; Error *local_err = NULL; int ret = 0; - BlockDriverState *bs; + BlockBackend *blk; ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { @@ -587,17 +575,17 @@ static int qed_create(const char *filename, uint32_t cluster_size, return ret; } - bs = NULL; - ret = bdrv_open(&bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + blk = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + if (blk == NULL) { error_propagate(errp, local_err); - return ret; + return -EIO; } + blk_set_allow_write_beyond_eof(blk, true); + /* File must start empty and grow, check truncate is supported */ - ret = bdrv_truncate(bs, 0); + ret = blk_truncate(blk, 0); if (ret < 0) { goto out; } @@ -613,18 +601,18 @@ static int qed_create(const char *filename, uint32_t cluster_size, } qed_header_cpu_to_le(&header, &le_header); - ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header)); + ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header)); if (ret < 0) { goto out; } - ret = bdrv_pwrite(bs, sizeof(le_header), backing_file, - header.backing_filename_size); + ret = blk_pwrite(blk, sizeof(le_header), backing_file, + header.backing_filename_size); if (ret < 0) { goto out; } l1_table = g_malloc0(l1_size); - ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size); + ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size); if (ret < 0) { goto out; } @@ -632,7 +620,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, ret = 0; /* success */ out: g_free(l1_table); - bdrv_unref(bs); + blk_unref(blk); return ret; } @@ -692,6 +680,7 @@ typedef struct { uint64_t pos; int64_t status; int *pnum; + BlockDriverState **file; } QEDIsAllocatedCB; static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len) @@ -703,6 +692,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l case QED_CLUSTER_FOUND: offset |= qed_offset_into_cluster(s, cb->pos); cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; + *cb->file = cb->bs->file->bs; break; case QED_CLUSTER_ZERO: cb->status = BDRV_BLOCK_ZERO; @@ -724,7 +714,8 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) + int nb_sectors, int *pnum, + BlockDriverState **file) { BDRVQEDState *s = bs->opaque; size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE; @@ -733,6 +724,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs, .pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE, .status = BDRV_BLOCK_OFFSET_MASK, .pnum = pnum, + .file = file, }; QEDRequest request = { .l2_table = NULL }; @@ -1611,9 +1603,8 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp) memset(s, 0, sizeof(BDRVQEDState)); ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err); if (local_err) { - error_setg(errp, "Could not reopen qed layer: %s", - error_get_pretty(local_err)); - error_free(local_err); + error_propagate(errp, local_err); + error_prepend(errp, "Could not reopen qed layer: "); return; } else if (ret < 0) { error_setg_errno(errp, -ret, "Could not reopen qed layer"); @@ -1688,7 +1679,6 @@ static BlockDriver bdrv_qed = { .bdrv_check = bdrv_qed_check, .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, - .bdrv_drain = bdrv_qed_drain, }; static void bdrv_qed_init(void) diff --git a/block/qed.h b/block/qed.h index 615e676fc8..22b3198751 100644 --- a/block/qed.h +++ b/block/qed.h @@ -16,6 +16,7 @@ #define BLOCK_QED_H #include "block/block_int.h" +#include "qemu/cutils.h" /* The layout of a QED file is as follows: * diff --git a/block/quorum.c b/block/quorum.c index b9ba028d46..da15465a9a 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -13,6 +13,7 @@ * See the COPYING file in the top-level directory. */ +#include "qemu/osdep.h" #include "block/block_int.h" #include "qapi/qmp/qbool.h" #include "qapi/qmp/qdict.h" @@ -214,14 +215,16 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s, return acb; } -static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret) +static void quorum_report_bad(QuorumOpType type, uint64_t sector_num, + int nb_sectors, char *node_name, int ret) { const char *msg = NULL; if (ret < 0) { msg = strerror(-ret); } - qapi_event_send_quorum_report_bad(!!msg, msg, node_name, - acb->sector_num, acb->nb_sectors, &error_abort); + + qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name, + sector_num, nb_sectors, &error_abort); } static void quorum_report_failure(QuorumAIOCB *acb) @@ -283,9 +286,19 @@ static void quorum_aio_cb(void *opaque, int ret) BDRVQuorumState *s = acb->common.bs->opaque; bool rewrite = false; + if (ret == 0) { + acb->success_count++; + } else { + QuorumOpType type; + type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE; + quorum_report_bad(type, acb->sector_num, acb->nb_sectors, + sacb->aiocb->bs->node_name, ret); + } + if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) { /* We try to read next child in FIFO order if we fail to read */ - if (ret < 0 && ++acb->child_iter < s->num_children) { + if (ret < 0 && (acb->child_iter + 1) < s->num_children) { + acb->child_iter++; read_fifo_child(acb); return; } @@ -300,11 +313,6 @@ static void quorum_aio_cb(void *opaque, int ret) sacb->ret = ret; acb->count++; - if (ret == 0) { - acb->success_count++; - } else { - quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret); - } assert(acb->count <= s->num_children); assert(acb->success_count <= s->num_children); if (acb->count < s->num_children) { @@ -336,7 +344,9 @@ static void quorum_report_bad_versions(BDRVQuorumState *s, continue; } QLIST_FOREACH(item, &version->items, next) { - quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0); + quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num, + acb->nb_sectors, + s->children[item->index]->bs->node_name, 0); } } } @@ -646,8 +656,9 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb) } for (i = 0; i < s->num_children; i++) { - bdrv_aio_readv(s->children[i]->bs, acb->sector_num, &acb->qcrs[i].qiov, - acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]); + acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num, + &acb->qcrs[i].qiov, acb->nb_sectors, + quorum_aio_cb, &acb->qcrs[i]); } return &acb->common; @@ -662,9 +673,10 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb) qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov); qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov, acb->qcrs[acb->child_iter].buf); - bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num, - &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors, - quorum_aio_cb, &acb->qcrs[acb->child_iter]); + acb->qcrs[acb->child_iter].aiocb = + bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num, + &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors, + quorum_aio_cb, &acb->qcrs[acb->child_iter]); return &acb->common; } @@ -758,19 +770,30 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs) QuorumVoteValue result_value; int i; int result = 0; + int success_count = 0; QLIST_INIT(&error_votes.vote_list); error_votes.compare = quorum_64bits_compare; for (i = 0; i < s->num_children; i++) { result = bdrv_co_flush(s->children[i]->bs); - result_value.l = result; - quorum_count_vote(&error_votes, &result_value, i); + if (result) { + quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0, + bdrv_nb_sectors(s->children[i]->bs), + s->children[i]->bs->node_name, result); + result_value.l = result; + quorum_count_vote(&error_votes, &result_value, i); + } else { + success_count++; + } } - winner = quorum_get_vote_winner(&error_votes); - result = winner->value.l; - + if (success_count >= s->threshold) { + result = 0; + } else { + winner = quorum_get_vote_winner(&error_votes); + result = winner->value.l; + } quorum_free_vote_list(&error_votes); return result; @@ -847,7 +870,7 @@ static int parse_read_pattern(const char *opt) return QUORUM_READ_PATTERN_QUORUM; } - for (i = 0; i < QUORUM_READ_PATTERN_MAX; i++) { + for (i = 0; i < QUORUM_READ_PATTERN__MAX; i++) { if (!strcmp(opt, QuorumReadPattern_lookup[i])) { return i; } @@ -997,7 +1020,7 @@ static void quorum_attach_aio_context(BlockDriverState *bs, } } -static void quorum_refresh_filename(BlockDriverState *bs) +static void quorum_refresh_filename(BlockDriverState *bs, QDict *options) { BDRVQuorumState *s = bs->opaque; QDict *opts; diff --git a/block/raw-aio.h b/block/raw-aio.h index 31d791fe67..811e375018 100644 --- a/block/raw-aio.h +++ b/block/raw-aio.h @@ -15,6 +15,8 @@ #ifndef QEMU_RAW_AIO_H #define QEMU_RAW_AIO_H +#include "qemu/iov.h" + /* AIO request types */ #define QEMU_AIO_READ 0x0001 #define QEMU_AIO_WRITE 0x0002 diff --git a/block/raw-posix.c b/block/raw-posix.c index d9162fd306..906d5c9411 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -21,7 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#include "qemu-common.h" +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/cutils.h" #include "qemu/error-report.h" #include "qemu/timer.h" #include "qemu/log.h" @@ -43,6 +45,7 @@ #include <IOKit/storage/IOMedia.h> #include <IOKit/storage/IOCDMedia.h> //#include <IOKit/storage/IOCDTypes.h> +#include <IOKit/storage/IODVDMedia.h> #include <CoreFoundation/CoreFoundation.h> #endif @@ -51,8 +54,6 @@ #include <sys/dkio.h> #endif #ifdef __linux__ -#include <sys/types.h> -#include <sys/stat.h> #include <sys/ioctl.h> #include <sys/param.h> #include <linux/cdrom.h> @@ -500,21 +501,17 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, goto fail; } if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) { - error_printf("WARNING: aio=native was specified for '%s', but " - "it requires cache.direct=on, which was not " - "specified. Falling back to aio=threads.\n" - " This will become an error condition in " - "future QEMU versions.\n", - bs->filename); + error_setg(errp, "aio=native was specified, but it requires " + "cache.direct=on, which was not specified."); + ret = -EINVAL; + goto fail; } #else if (bdrv_flags & BDRV_O_NATIVE_AIO) { - error_printf("WARNING: aio=native was specified for '%s', but " - "is not supported in this build. Falling back to " - "aio=threads.\n" - " This will become an error condition in " - "future QEMU versions.\n", - bs->filename); + error_setg(errp, "aio=native was specified, but is not supported " + "in this build."); + ret = -EINVAL; + goto fail; } #endif /* !defined(CONFIG_LINUX_AIO) */ @@ -783,7 +780,6 @@ static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo) { BDRVRawState *s = bs->opaque; struct hd_geometry ioctl_geo = {0}; - uint32_t blksize; /* If DASD, get its geometry */ if (check_for_dasd(s->fd) < 0) { @@ -803,12 +799,6 @@ static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo) } geo->heads = ioctl_geo.heads; geo->sectors = ioctl_geo.sectors; - if (!probe_physical_blocksize(s->fd, &blksize)) { - /* overwrite cyls: HDIO_GETGEO result is incorrect for big drives */ - geo->cylinders = bdrv_nb_sectors(bs) / (blksize / BDRV_SECTOR_SIZE) - / (geo->heads * geo->sectors); - return 0; - } geo->cylinders = ioctl_geo.cylinders; return 0; @@ -1636,7 +1626,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false); buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC); prealloc = qapi_enum_parse(PreallocMode_lookup, buf, - PREALLOC_MODE_MAX, PREALLOC_MODE_OFF, + PREALLOC_MODE__MAX, PREALLOC_MODE_OFF, &local_err); g_free(buf); if (local_err) { @@ -1830,7 +1820,8 @@ static int find_allocation(BlockDriverState *bs, off_t start, */ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) + int nb_sectors, int *pnum, + BlockDriverState **file) { off_t start, data = 0, hole = 0; int64_t total_size; @@ -1872,6 +1863,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE); ret = BDRV_BLOCK_ZERO; } + *file = bs; return ret | BDRV_BLOCK_OFFSET_VALID | start; } @@ -1975,33 +1967,47 @@ BlockDriver bdrv_file = { /* host device */ #if defined(__APPLE__) && defined(__MACH__) -static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator ); static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize, int flags); -kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator ) +static char *FindEjectableOpticalMedia(io_iterator_t *mediaIterator) { - kern_return_t kernResult; + kern_return_t kernResult = KERN_FAILURE; mach_port_t masterPort; CFMutableDictionaryRef classesToMatch; + const char *matching_array[] = {kIODVDMediaClass, kIOCDMediaClass}; + char *mediaType = NULL; kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort ); if ( KERN_SUCCESS != kernResult ) { printf( "IOMasterPort returned %d\n", kernResult ); } - classesToMatch = IOServiceMatching( kIOCDMediaClass ); - if ( classesToMatch == NULL ) { - printf( "IOServiceMatching returned a NULL dictionary.\n" ); - } else { - CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue ); - } - kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator ); - if ( KERN_SUCCESS != kernResult ) - { - printf( "IOServiceGetMatchingServices returned %d\n", kernResult ); - } + int index; + for (index = 0; index < ARRAY_SIZE(matching_array); index++) { + classesToMatch = IOServiceMatching(matching_array[index]); + if (classesToMatch == NULL) { + error_report("IOServiceMatching returned NULL for %s", + matching_array[index]); + continue; + } + CFDictionarySetValue(classesToMatch, CFSTR(kIOMediaEjectableKey), + kCFBooleanTrue); + kernResult = IOServiceGetMatchingServices(masterPort, classesToMatch, + mediaIterator); + if (kernResult != KERN_SUCCESS) { + error_report("Note: IOServiceGetMatchingServices returned %d", + kernResult); + continue; + } - return kernResult; + /* If a match was found, leave the loop */ + if (*mediaIterator != 0) { + DPRINTF("Matching using %s\n", matching_array[index]); + mediaType = g_strdup(matching_array[index]); + break; + } + } + return mediaType; } kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath, @@ -2033,7 +2039,46 @@ kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath, return kernResult; } -#endif +/* Sets up a real cdrom for use in QEMU */ +static bool setup_cdrom(char *bsd_path, Error **errp) +{ + int index, num_of_test_partitions = 2, fd; + char test_partition[MAXPATHLEN]; + bool partition_found = false; + + /* look for a working partition */ + for (index = 0; index < num_of_test_partitions; index++) { + snprintf(test_partition, sizeof(test_partition), "%ss%d", bsd_path, + index); + fd = qemu_open(test_partition, O_RDONLY | O_BINARY | O_LARGEFILE); + if (fd >= 0) { + partition_found = true; + qemu_close(fd); + break; + } + } + + /* if a working partition on the device was not found */ + if (partition_found == false) { + error_setg(errp, "Failed to find a working partition on disc"); + } else { + DPRINTF("Using %s as optical disc\n", test_partition); + pstrcpy(bsd_path, MAXPATHLEN, test_partition); + } + return partition_found; +} + +/* Prints directions on mounting and unmounting a device */ +static void print_unmounting_directions(const char *file_name) +{ + error_report("If device %s is mounted on the desktop, unmount" + " it first before using it in QEMU", file_name); + error_report("Command to unmount device: diskutil unmountDisk %s", + file_name); + error_report("Command to mount device: diskutil mountDisk %s", file_name); +} + +#endif /* defined(__APPLE__) && defined(__MACH__) */ static int hdev_probe_device(const char *filename) { @@ -2124,33 +2169,57 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, #if defined(__APPLE__) && defined(__MACH__) const char *filename = qdict_get_str(options, "filename"); + char bsd_path[MAXPATHLEN] = ""; + bool error_occurred = false; + + /* If using a real cdrom */ + if (strcmp(filename, "/dev/cdrom") == 0) { + char *mediaType = NULL; + kern_return_t ret_val; + io_iterator_t mediaIterator = 0; + + mediaType = FindEjectableOpticalMedia(&mediaIterator); + if (mediaType == NULL) { + error_setg(errp, "Please make sure your CD/DVD is in the optical" + " drive"); + error_occurred = true; + goto hdev_open_Mac_error; + } - if (strstart(filename, "/dev/cdrom", NULL)) { - kern_return_t kernResult; - io_iterator_t mediaIterator; - char bsdPath[ MAXPATHLEN ]; - int fd; - - kernResult = FindEjectableCDMedia( &mediaIterator ); - kernResult = GetBSDPath(mediaIterator, bsdPath, sizeof(bsdPath), - flags); - if ( bsdPath[ 0 ] != '\0' ) { - strcat(bsdPath,"s0"); - /* some CDs don't have a partition 0 */ - fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE); - if (fd < 0) { - bsdPath[strlen(bsdPath)-1] = '1'; - } else { - qemu_close(fd); - } - filename = bsdPath; - qdict_put(options, "filename", qstring_from_str(filename)); + ret_val = GetBSDPath(mediaIterator, bsd_path, sizeof(bsd_path), flags); + if (ret_val != KERN_SUCCESS) { + error_setg(errp, "Could not get BSD path for optical drive"); + error_occurred = true; + goto hdev_open_Mac_error; } - if ( mediaIterator ) - IOObjectRelease( mediaIterator ); + /* If a real optical drive was not found */ + if (bsd_path[0] == '\0') { + error_setg(errp, "Failed to obtain bsd path for optical drive"); + error_occurred = true; + goto hdev_open_Mac_error; + } + + /* If using a cdrom disc and finding a partition on the disc failed */ + if (strncmp(mediaType, kIOCDMediaClass, 9) == 0 && + setup_cdrom(bsd_path, errp) == false) { + print_unmounting_directions(bsd_path); + error_occurred = true; + goto hdev_open_Mac_error; + } + + qdict_put(options, "filename", qstring_from_str(bsd_path)); + +hdev_open_Mac_error: + g_free(mediaType); + if (mediaIterator) { + IOObjectRelease(mediaIterator); + } + if (error_occurred) { + return -ENOENT; + } } -#endif +#endif /* defined(__APPLE__) && defined(__MACH__) */ s->type = FTYPE_FILE; @@ -2159,6 +2228,15 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, if (local_err) { error_propagate(errp, local_err); } +#if defined(__APPLE__) && defined(__MACH__) + if (*bsd_path) { + filename = bsd_path; + } + /* if a physical device experienced an error while being opened */ + if (strncmp(filename, "/dev/", 5) == 0) { + print_unmounting_directions(filename); + } +#endif /* defined(__APPLE__) && defined(__MACH__) */ return ret; } diff --git a/block/raw-win32.c b/block/raw-win32.c index f250503112..949bf6dc3e 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -21,7 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#include "qemu-common.h" +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/cutils.h" #include "qemu/timer.h" #include "block/block_int.h" #include "qemu/module.h" diff --git a/block/raw_bsd.c b/block/raw_bsd.c index 915d6fd0e6..a6cc7e9918 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -26,7 +26,9 @@ * IN THE SOFTWARE. */ +#include "qemu/osdep.h" #include "block/block_int.h" +#include "qapi/error.h" #include "qemu/option.h" static QemuOptsList raw_create_opts = { @@ -55,8 +57,9 @@ static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num, return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov); } -static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, QEMUIOVector *qiov) +static int coroutine_fn +raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov, int flags) { void *buf = NULL; BlockDriver *drv; @@ -102,7 +105,8 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num, } BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov); + ret = bdrv_co_do_pwritev(bs->file->bs, sector_num * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE, qiov, flags); fail: if (qiov == &local_qiov) { @@ -112,11 +116,20 @@ fail: return ret; } +static int coroutine_fn +raw_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov) +{ + return raw_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0); +} + static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, int64_t sector_num, - int nb_sectors, int *pnum) + int nb_sectors, int *pnum, + BlockDriverState **file) { *pnum = nb_sectors; + *file = bs->file->bs; return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | (sector_num << BDRV_SECTOR_BITS); } @@ -244,6 +257,8 @@ BlockDriver bdrv_raw = { .bdrv_create = &raw_create, .bdrv_co_readv = &raw_co_readv, .bdrv_co_writev = &raw_co_writev, + .bdrv_co_writev_flags = &raw_co_writev_flags, + .supported_write_flags = BDRV_REQ_FUA, .bdrv_co_write_zeroes = &raw_co_write_zeroes, .bdrv_co_discard = &raw_co_discard, .bdrv_co_get_block_status = &raw_co_get_block_status, diff --git a/block/rbd.c b/block/rbd.c index a60a19d58d..5bc5b32530 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -11,11 +11,13 @@ * GNU GPL, version 2 or (at your option) any later version. */ -#include <inttypes.h> +#include "qemu/osdep.h" -#include "qemu-common.h" +#include "qapi/error.h" #include "qemu/error-report.h" #include "block/block_int.h" +#include "crypto/secret.h" +#include "qemu/cutils.h" #include <rbd/librbd.h> @@ -228,6 +230,27 @@ static char *qemu_rbd_parse_clientname(const char *conf, char *clientname) return NULL; } + +static int qemu_rbd_set_auth(rados_t cluster, const char *secretid, + Error **errp) +{ + if (secretid == 0) { + return 0; + } + + gchar *secret = qcrypto_secret_lookup_as_base64(secretid, + errp); + if (!secret) { + return -1; + } + + rados_conf_set(cluster, "key", secret); + g_free(secret); + + return 0; +} + + static int qemu_rbd_set_conf(rados_t cluster, const char *conf, bool only_read_conf_file, Error **errp) @@ -299,10 +322,13 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) char conf[RBD_MAX_CONF_SIZE]; char clientname_buf[RBD_MAX_CONF_SIZE]; char *clientname; + const char *secretid; rados_t cluster; rados_ioctx_t io_ctx; int ret; + secretid = qemu_opt_get(opts, "password-secret"); + if (qemu_rbd_parsename(filename, pool, sizeof(pool), snap_buf, sizeof(snap_buf), name, sizeof(name), @@ -350,6 +376,11 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp) return -EIO; } + if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) { + rados_shutdown(cluster); + return -EIO; + } + if (rados_connect(cluster) < 0) { error_setg(errp, "error connecting"); rados_shutdown(cluster); @@ -423,6 +454,11 @@ static QemuOptsList runtime_opts = { .type = QEMU_OPT_STRING, .help = "Specification of the rbd image", }, + { + .name = "password-secret", + .type = QEMU_OPT_STRING, + .help = "ID of secret providing the password", + }, { /* end of list */ } }, }; @@ -436,6 +472,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, char conf[RBD_MAX_CONF_SIZE]; char clientname_buf[RBD_MAX_CONF_SIZE]; char *clientname; + const char *secretid; QemuOpts *opts; Error *local_err = NULL; const char *filename; @@ -450,6 +487,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, } filename = qemu_opt_get(opts, "filename"); + secretid = qemu_opt_get(opts, "password-secret"); if (qemu_rbd_parsename(filename, pool, sizeof(pool), snap_buf, sizeof(snap_buf), @@ -488,6 +526,11 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags, } } + if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) { + r = -EIO; + goto failed_shutdown; + } + /* * Fallback to more conservative semantics if setting cache * options fails. Ignore errors from setting rbd_cache because the @@ -919,6 +962,11 @@ static QemuOptsList qemu_rbd_create_opts = { .type = QEMU_OPT_SIZE, .help = "RBD object size" }, + { + .name = "password-secret", + .type = QEMU_OPT_STRING, + .help = "ID of secret providing the password", + }, { /* end of list */ } } }; diff --git a/block/sheepdog.c b/block/sheepdog.c index d80e4ed18d..33e0a33824 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -12,12 +12,15 @@ * GNU GPL, version 2 or (at your option) any later version. */ -#include "qemu-common.h" +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu/uri.h" #include "qemu/error-report.h" #include "qemu/sockets.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/bitops.h" +#include "qemu/cutils.h" #define SD_PROTO_VER 0x01 @@ -283,6 +286,12 @@ static inline bool is_snapshot(struct SheepdogInode *inode) return !!inode->snap_ctime; } +static inline size_t count_data_objs(const struct SheepdogInode *inode) +{ + return DIV_ROUND_UP(inode->vdi_size, + (1UL << inode->block_size_shift)); +} + #undef DPRINTF #ifdef DEBUG_SDOG #define DPRINTF(fmt, args...) \ @@ -608,14 +617,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data, ret = qemu_co_send(sockfd, hdr, sizeof(*hdr)); if (ret != sizeof(*hdr)) { error_report("failed to send a req, %s", strerror(errno)); - ret = -socket_error(); - return ret; + return -errno; } ret = qemu_co_send(sockfd, data, *wlen); if (ret != *wlen) { - ret = -socket_error(); error_report("failed to send a req, %s", strerror(errno)); + return -errno; } return ret; @@ -1630,7 +1638,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, static int sd_prealloc(const char *filename, Error **errp) { - BlockDriverState *bs = NULL; + BlockBackend *blk = NULL; BDRVSheepdogState *base = NULL; unsigned long buf_size; uint32_t idx, max_idx; @@ -1639,19 +1647,22 @@ static int sd_prealloc(const char *filename, Error **errp) void *buf = NULL; int ret; - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - errp); - if (ret < 0) { + blk = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, errp); + if (blk == NULL) { + ret = -EIO; goto out_with_err_set; } - vdi_size = bdrv_getlength(bs); + blk_set_allow_write_beyond_eof(blk, true); + + vdi_size = blk_getlength(blk); if (vdi_size < 0) { ret = vdi_size; goto out; } - base = bs->opaque; + base = blk_bs(blk)->opaque; object_size = (UINT32_C(1) << base->inode.block_size_shift); buf_size = MIN(object_size, SD_DATA_OBJ_SIZE); buf = g_malloc0(buf_size); @@ -1663,23 +1674,24 @@ static int sd_prealloc(const char *filename, Error **errp) * The created image can be a cloned image, so we need to read * a data from the source image. */ - ret = bdrv_pread(bs, idx * buf_size, buf, buf_size); + ret = blk_pread(blk, idx * buf_size, buf, buf_size); if (ret < 0) { goto out; } - ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size); + ret = blk_pwrite(blk, idx * buf_size, buf, buf_size); if (ret < 0) { goto out; } } + ret = 0; out: if (ret < 0) { error_setg_errno(errp, -ret, "Can't pre-allocate"); } out_with_err_set: - if (bs) { - bdrv_unref(bs); + if (blk) { + blk_unref(blk); } g_free(buf); @@ -1819,7 +1831,7 @@ static int sd_create(const char *filename, QemuOpts *opts, } if (backing_file) { - BlockDriverState *bs; + BlockBackend *blk; BDRVSheepdogState *base; BlockDriver *drv; @@ -1831,22 +1843,23 @@ static int sd_create(const char *filename, QemuOpts *opts, goto out; } - bs = NULL; - ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, errp); - if (ret < 0) { + blk = blk_new_open(backing_file, NULL, NULL, + BDRV_O_PROTOCOL, errp); + if (blk == NULL) { + ret = -EIO; goto out; } - base = bs->opaque; + base = blk_bs(blk)->opaque; if (!is_snapshot(&base->inode)) { error_setg(errp, "cannot clone from a non snapshot vdi"); - bdrv_unref(bs); + blk_unref(blk); ret = -EINVAL; goto out; } s->inode.vdi_id = base->inode.vdi_id; - bdrv_unref(bs); + blk_unref(blk); } s->aio_context = qemu_get_aio_context(); @@ -1861,8 +1874,7 @@ static int sd_create(const char *filename, QemuOpts *opts, fd = connect_to_sdog(s, &local_err); if (fd < 0) { - error_report("%s", error_get_pretty(local_err)); - error_free(local_err); + error_report_err(local_err); ret = -EIO; goto out; } @@ -2406,9 +2418,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) ret = do_sd_create(s, &new_vid, 1, &local_err); if (ret < 0) { - error_report("failed to create inode for snapshot: %s", - error_get_pretty(local_err)); - error_free(local_err); + error_reportf_err(local_err, + "failed to create inode for snapshot: "); goto cleanup; } @@ -2479,13 +2490,131 @@ out: return ret; } +#define NR_BATCHED_DISCARD 128 + +static bool remove_objects(BDRVSheepdogState *s) +{ + int fd, i = 0, nr_objs = 0; + Error *local_err = NULL; + int ret = 0; + bool result = true; + SheepdogInode *inode = &s->inode; + + fd = connect_to_sdog(s, &local_err); + if (fd < 0) { + error_report_err(local_err); + return false; + } + + nr_objs = count_data_objs(inode); + while (i < nr_objs) { + int start_idx, nr_filled_idx; + + while (i < nr_objs && !inode->data_vdi_id[i]) { + i++; + } + start_idx = i; + + nr_filled_idx = 0; + while (i < nr_objs && nr_filled_idx < NR_BATCHED_DISCARD) { + if (inode->data_vdi_id[i]) { + inode->data_vdi_id[i] = 0; + nr_filled_idx++; + } + + i++; + } + + ret = write_object(fd, s->aio_context, + (char *)&inode->data_vdi_id[start_idx], + vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies, + (i - start_idx) * sizeof(uint32_t), + offsetof(struct SheepdogInode, + data_vdi_id[start_idx]), + false, s->cache_flags); + if (ret < 0) { + error_report("failed to discard snapshot inode."); + result = false; + goto out; + } + } + +out: + closesocket(fd); + return result; +} + static int sd_snapshot_delete(BlockDriverState *bs, const char *snapshot_id, const char *name, Error **errp) { - /* FIXME: Delete specified snapshot id. */ - return 0; + unsigned long snap_id = 0; + char snap_tag[SD_MAX_VDI_TAG_LEN]; + Error *local_err = NULL; + int fd, ret; + char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN]; + BDRVSheepdogState *s = bs->opaque; + unsigned int wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN, rlen = 0; + uint32_t vid; + SheepdogVdiReq hdr = { + .opcode = SD_OP_DEL_VDI, + .data_length = wlen, + .flags = SD_FLAG_CMD_WRITE, + }; + SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr; + + if (!remove_objects(s)) { + return -1; + } + + memset(buf, 0, sizeof(buf)); + memset(snap_tag, 0, sizeof(snap_tag)); + pstrcpy(buf, SD_MAX_VDI_LEN, s->name); + ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id); + if (ret || snap_id > UINT32_MAX) { + error_setg(errp, "Invalid snapshot ID: %s", + snapshot_id ? snapshot_id : "<null>"); + return -EINVAL; + } + + if (snap_id) { + hdr.snapid = (uint32_t) snap_id; + } else { + pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id); + pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag); + } + + ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true, + &local_err); + if (ret) { + return ret; + } + + fd = connect_to_sdog(s, &local_err); + if (fd < 0) { + error_report_err(local_err); + return -1; + } + + ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, + buf, &wlen, &rlen); + closesocket(fd); + if (ret) { + return ret; + } + + switch (rsp->result) { + case SD_RES_NO_VDI: + error_report("%s was already deleted", s->name); + case SD_RES_SUCCESS: + break; + default: + error_report("%s, %s", sd_strerror(rsp->result), s->name); + return -1; + } + + return ret; } static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) @@ -2709,7 +2838,7 @@ retry: static coroutine_fn int64_t sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, - int *pnum) + int *pnum, BlockDriverState **file) { BDRVSheepdogState *s = bs->opaque; SheepdogInode *inode = &s->inode; @@ -2740,6 +2869,9 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, if (*pnum > nb_sectors) { *pnum = nb_sectors; } + if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) { + *file = bs; + } return ret; } diff --git a/block/snapshot.c b/block/snapshot.c index 6e9fa8da98..e9d721df68 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -22,8 +22,10 @@ * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "block/snapshot.h" #include "block/block_int.h" +#include "qapi/error.h" #include "qapi/qmp/qerror.h" QemuOptsList internal_snapshot_opts = { @@ -229,6 +231,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs, Error **errp) { BlockDriver *drv = bs->drv; + int ret; + if (!drv) { error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs)); return -ENOMEDIUM; @@ -239,18 +243,21 @@ int bdrv_snapshot_delete(BlockDriverState *bs, } /* drain all pending i/o before deleting snapshot */ - bdrv_drain(bs); + bdrv_drained_begin(bs); if (drv->bdrv_snapshot_delete) { - return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); + ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); + } else if (bs->file) { + ret = bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp); + } else { + error_setg(errp, "Block format '%s' used by device '%s' " + "does not support internal snapshot deletion", + drv->format_name, bdrv_get_device_name(bs)); + ret = -ENOTSUP; } - if (bs->file) { - return bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp); - } - error_setg(errp, "Block format '%s' used by device '%s' " - "does not support internal snapshot deletion", - drv->format_name, bdrv_get_device_name(bs)); - return -ENOTSUP; + + bdrv_drained_end(bs); + return ret; } int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, diff --git a/block/ssh.c b/block/ssh.c index af025c08a0..06928ed939 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -22,14 +22,13 @@ * THE SOFTWARE. */ -#include <stdio.h> -#include <stdlib.h> -#include <stdarg.h> +#include "qemu/osdep.h" #include <libssh2.h> #include <libssh2_sftp.h> #include "block/block_int.h" +#include "qapi/error.h" #include "qemu/error-report.h" #include "qemu/sockets.h" #include "qemu/uri.h" diff --git a/block/stream.c b/block/stream.c index 25af7eff62..332b9a183e 100644 --- a/block/stream.c +++ b/block/stream.c @@ -11,9 +11,11 @@ * */ +#include "qemu/osdep.h" #include "trace.h" #include "block/block_int.h" #include "block/blockjob.h" +#include "qapi/error.h" #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" #include "sysemu/block-backend.h" @@ -88,21 +90,21 @@ static void coroutine_fn stream_run(void *opaque) StreamCompleteData *data; BlockDriverState *bs = s->common.bs; BlockDriverState *base = s->base; - int64_t sector_num, end; + int64_t sector_num = 0; + int64_t end = -1; int error = 0; int ret = 0; int n = 0; void *buf; if (!bs->backing) { - block_job_completed(&s->common, 0); - return; + goto out; } s->common.len = bdrv_getlength(bs); if (s->common.len < 0) { - block_job_completed(&s->common, s->common.len); - return; + ret = s->common.len; + goto out; } end = s->common.len >> BDRV_SECTOR_BITS; @@ -189,6 +191,7 @@ wait: qemu_vfree(buf); +out: /* Modify backing chain and close BDSes in main loop */ data = g_malloc(sizeof(*data)); data->ret = ret; diff --git a/block/throttle-groups.c b/block/throttle-groups.c index 13b5baa5d7..4920e09495 100644 --- a/block/throttle-groups.c +++ b/block/throttle-groups.c @@ -22,6 +22,7 @@ * along with this program; if not, see <http://www.gnu.org/licenses/>. */ +#include "qemu/osdep.h" #include "block/throttle-groups.h" #include "qemu/queue.h" #include "qemu/thread.h" diff --git a/block/vdi.c b/block/vdi.c index 17f435fad6..75d4819edb 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -49,11 +49,14 @@ * so this seems to be reasonable. */ -#include "qemu-common.h" +#include "qemu/osdep.h" +#include "qapi/error.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include "migration/migration.h" #include "qemu/coroutine.h" +#include "qemu/cutils.h" #if defined(CONFIG_UUID) #include <uuid/uuid.h> @@ -526,7 +529,7 @@ static int vdi_reopen_prepare(BDRVReopenState *state, } static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum) + int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) { /* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */ BDRVVdiState *s = (BDRVVdiState *)bs->opaque; @@ -550,6 +553,7 @@ static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs, offset = s->header.offset_data + (uint64_t)bmap_entry * s->block_size + sector_in_block * SECTOR_SIZE; + *file = bs->file->bs; return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset; } @@ -731,7 +735,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) size_t bmap_size; int64_t offset = 0; Error *local_err = NULL; - BlockDriverState *bs = NULL; + BlockBackend *blk = NULL; uint32_t *bmap = NULL; logout("\n"); @@ -764,13 +768,17 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) error_propagate(errp, local_err); goto exit; } - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + + blk = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto exit; } + blk_set_allow_write_beyond_eof(blk, true); + /* We need enough blocks to store the given disk size, so always round up. */ blocks = DIV_ROUND_UP(bytes, block_size); @@ -800,7 +808,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) vdi_header_print(&header); #endif vdi_header_to_le(&header); - ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header)); + ret = blk_pwrite(blk, offset, &header, sizeof(header)); if (ret < 0) { error_setg(errp, "Error writing header to %s", filename); goto exit; @@ -821,7 +829,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) bmap[i] = VDI_UNALLOCATED; } } - ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size); + ret = blk_pwrite(blk, offset, bmap, bmap_size); if (ret < 0) { error_setg(errp, "Error writing bmap to %s", filename); goto exit; @@ -830,7 +838,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) } if (image_type == VDI_TYPE_STATIC) { - ret = bdrv_truncate(bs, offset + blocks * block_size); + ret = blk_truncate(blk, offset + blocks * block_size); if (ret < 0) { error_setg(errp, "Failed to statically allocate %s", filename); goto exit; @@ -838,7 +846,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) } exit: - bdrv_unref(bs); + blk_unref(blk); g_free(bmap); return ret; } diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c index 0640d3f4a9..da33cd38ef 100644 --- a/block/vhdx-endian.c +++ b/block/vhdx-endian.c @@ -15,6 +15,7 @@ * */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "block/block_int.h" #include "block/vhdx.h" diff --git a/block/vhdx-log.c b/block/vhdx-log.c index 47ae4b1351..7ea7187fc4 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -17,6 +17,8 @@ * See the COPYING.LIB file in the top-level directory. * */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu-common.h" #include "block/block_int.h" #include "qemu/error-report.h" @@ -784,12 +786,13 @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed, if (logs.valid) { if (bs->read_only) { ret = -EPERM; - error_setg_errno(errp, EPERM, - "VHDX image file '%s' opened read-only, but " - "contains a log that needs to be replayed. To " - "replay the log, execute:\n qemu-img check -r " - "all '%s'", - bs->filename, bs->filename); + error_setg(errp, + "VHDX image file '%s' opened read-only, but " + "contains a log that needs to be replayed", + bs->filename); + error_append_hint(errp, "To replay the log, run:\n" + "qemu-img check -r all '%s'\n", + bs->filename); goto exit; } /* now flush the log */ diff --git a/block/vhdx.c b/block/vhdx.c index 2fe9a5e0cf..2b7b332404 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -15,8 +15,11 @@ * */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include "qemu/crc32c.h" #include "block/vhdx.h" @@ -263,10 +266,10 @@ static void vhdx_region_unregister_all(BDRVVHDXState *s) static void vhdx_set_shift_bits(BDRVVHDXState *s) { - s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size); - s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block); - s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio); - s->block_size_bits = 31 - clz32(s->block_size); + s->logical_sector_size_bits = ctz32(s->logical_sector_size); + s->sectors_per_block_bits = ctz32(s->sectors_per_block); + s->chunk_ratio_bits = ctz64(s->chunk_ratio); + s->block_size_bits = ctz32(s->block_size); } /* @@ -856,14 +859,8 @@ static void vhdx_calc_bat_entries(BDRVVHDXState *s) { uint32_t data_blocks_cnt, bitmap_blocks_cnt; - data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits; - if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) { - data_blocks_cnt++; - } - bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits; - if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) { - bitmap_blocks_cnt++; - } + data_blocks_cnt = DIV_ROUND_UP(s->virtual_disk_size, s->block_size); + bitmap_blocks_cnt = DIV_ROUND_UP(data_blocks_cnt, s->chunk_ratio); if (s->parent_entries) { s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1); @@ -1777,7 +1774,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) gunichar2 *creator = NULL; glong creator_items; - BlockDriverState *bs; + BlockBackend *blk; char *type = NULL; VHDXImageType image_type; Error *local_err = NULL; @@ -1842,14 +1839,16 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } - bs = NULL; - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + blk = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto exit; } + blk_set_allow_write_beyond_eof(blk, true); + /* Create (A) */ /* The creator field is optional, but may be useful for @@ -1857,13 +1856,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL, &creator_items, NULL); signature = cpu_to_le64(VHDX_FILE_SIGNATURE); - ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature)); + ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature)); if (ret < 0) { goto delete_and_exit; } if (creator) { - ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature), - creator, creator_items * sizeof(gunichar2)); + ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature), + creator, creator_items * sizeof(gunichar2)); if (ret < 0) { goto delete_and_exit; } @@ -1871,13 +1870,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) /* Creates (B),(C) */ - ret = vhdx_create_new_headers(bs, image_size, log_size); + ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size); if (ret < 0) { goto delete_and_exit; } /* Creates (D),(E),(G) explicitly. (F) created as by-product */ - ret = vhdx_create_new_region_table(bs, image_size, block_size, 512, + ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512, log_size, use_zero_blocks, image_type, &metadata_offset); if (ret < 0) { @@ -1885,7 +1884,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) } /* Creates (H) */ - ret = vhdx_create_new_metadata(bs, image_size, block_size, 512, + ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512, metadata_offset, image_type); if (ret < 0) { goto delete_and_exit; @@ -1893,7 +1892,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) delete_and_exit: - bdrv_unref(bs); + blk_unref(blk); exit: g_free(type); g_free(creator); diff --git a/block/vmdk.c b/block/vmdk.c index 6f819e413f..45f9d3c5b9 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -23,12 +23,15 @@ * THE SOFTWARE. */ -#include "qemu-common.h" +#include "qemu/osdep.h" +#include "qapi/error.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qapi/qmp/qerror.h" #include "qemu/error-report.h" #include "qemu/module.h" #include "migration/migration.h" +#include "qemu/cutils.h" #include <zlib.h> #include <glib.h> @@ -241,15 +244,17 @@ static void vmdk_free_last_extent(BlockDriverState *bs) static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) { - char desc[DESC_SIZE]; + char *desc; uint32_t cid = 0xffffffff; const char *p_name, *cid_str; size_t cid_str_size; BDRVVmdkState *s = bs->opaque; int ret; + desc = g_malloc0(DESC_SIZE); ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { + g_free(desc); return 0; } @@ -268,41 +273,45 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) sscanf(p_name, "%" SCNx32, &cid); } + g_free(desc); return cid; } static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) { - char desc[DESC_SIZE], tmp_desc[DESC_SIZE]; + char *desc, *tmp_desc; char *p_name, *tmp_str; BDRVVmdkState *s = bs->opaque; - int ret; + int ret = 0; + desc = g_malloc0(DESC_SIZE); + tmp_desc = g_malloc0(DESC_SIZE); ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { - return ret; + goto out; } desc[DESC_SIZE - 1] = '\0'; tmp_str = strstr(desc, "parentCID"); if (tmp_str == NULL) { - return -EINVAL; + ret = -EINVAL; + goto out; } - pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str); + pstrcpy(tmp_desc, DESC_SIZE, tmp_str); p_name = strstr(desc, "CID"); if (p_name != NULL) { p_name += sizeof("CID"); - snprintf(p_name, sizeof(desc) - (p_name - desc), "%" PRIx32 "\n", cid); - pstrcat(desc, sizeof(desc), tmp_desc); + snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid); + pstrcat(desc, DESC_SIZE, tmp_desc); } ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE); - if (ret < 0) { - return ret; - } - return 0; +out: + g_free(desc); + g_free(tmp_desc); + return ret; } static int vmdk_is_cid_valid(BlockDriverState *bs) @@ -336,15 +345,16 @@ static int vmdk_reopen_prepare(BDRVReopenState *state, static int vmdk_parent_open(BlockDriverState *bs) { char *p_name; - char desc[DESC_SIZE + 1]; + char *desc; BDRVVmdkState *s = bs->opaque; int ret; - desc[DESC_SIZE] = '\0'; + desc = g_malloc0(DESC_SIZE + 1); ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { - return ret; + goto out; } + ret = 0; p_name = strstr(desc, "parentFileNameHint"); if (p_name != NULL) { @@ -353,16 +363,20 @@ static int vmdk_parent_open(BlockDriverState *bs) p_name += sizeof("parentFileNameHint") + 1; end_name = strchr(p_name, '\"'); if (end_name == NULL) { - return -EINVAL; + ret = -EINVAL; + goto out; } if ((end_name - p_name) > sizeof(bs->backing_file) - 1) { - return -EINVAL; + ret = -EINVAL; + goto out; } pstrcpy(bs->backing_file, end_name - p_name + 1, p_name); } - return 0; +out: + g_free(desc); + return ret; } /* Create and append extent to the extent array. Return the added VmdkExtent @@ -570,6 +584,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, VmdkExtent *extent; BDRVVmdkState *s = bs->opaque; int64_t l1_backup_offset = 0; + bool compressed; ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header)); if (ret < 0) { @@ -644,14 +659,14 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, header = footer.header; } + compressed = + le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; if (le32_to_cpu(header.version) > 3) { - char buf[64]; - snprintf(buf, sizeof(buf), "VMDK version %" PRId32, - le32_to_cpu(header.version)); - error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, - bdrv_get_device_or_node_name(bs), "vmdk", buf); + error_setg(errp, "Unsupported VMDK version %" PRIu32, + le32_to_cpu(header.version)); return -ENOTSUP; - } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) { + } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) && + !compressed) { /* VMware KB 2064959 explains that version 3 added support for * persistent changed block tracking (CBT), and backup software can * read it as version=1 if it doesn't care about the changed area @@ -760,6 +775,17 @@ static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags, } } +static const char *next_line(const char *s) +{ + while (*s) { + if (*s == '\n') { + return s + 1; + } + s++; + } + return s; +} + static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, const char *desc_file_path, QDict *options, Error **errp) @@ -769,7 +795,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, char access[11]; char type[11]; char fname[512]; - const char *p = desc; + const char *p, *np; int64_t sectors = 0; int64_t flat_offset; char *extent_path; @@ -779,7 +805,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, char extent_opt_prefix[32]; Error *local_err = NULL; - while (*p) { + for (p = desc; *p; p = next_line(p)) { /* parse extent line in one of below formats: * * RW [size in sectors] FLAT "file-name.vmdk" OFFSET @@ -791,29 +817,26 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64, access, §ors, type, fname, &flat_offset); if (matches < 4 || strcmp(access, "RW")) { - goto next_line; + continue; } else if (!strcmp(type, "FLAT")) { if (matches != 5 || flat_offset < 0) { - error_setg(errp, "Invalid extent lines: \n%s", p); - return -EINVAL; + goto invalid; } } else if (!strcmp(type, "VMFS")) { if (matches == 4) { flat_offset = 0; } else { - error_setg(errp, "Invalid extent lines:\n%s", p); - return -EINVAL; + goto invalid; } } else if (matches != 4) { - error_setg(errp, "Invalid extent lines:\n%s", p); - return -EINVAL; + goto invalid; } if (sectors <= 0 || (strcmp(type, "FLAT") && strcmp(type, "SPARSE") && strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) || (strcmp(access, "RW"))) { - goto next_line; + continue; } if (!path_is_absolute(fname) && !path_has_protocol(fname) && @@ -870,17 +893,17 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, return -ENOTSUP; } extent->type = g_strdup(type); -next_line: - /* move to next line */ - while (*p) { - if (*p == '\n') { - p++; - break; - } - p++; - } } return 0; + +invalid: + np = next_line(p); + assert(np != p); + if (np[-1] == '\n') { + np--; + } + error_setg(errp, "Invalid extent line: %.*s", (int)(np - p), p); + return -EINVAL; } static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, @@ -1248,7 +1271,7 @@ static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent, } static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum) + int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) { BDRVVmdkState *s = bs->opaque; int64_t index_in_cluster, n, ret; @@ -1265,6 +1288,7 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, 0, 0); qemu_co_mutex_unlock(&s->lock); + index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); switch (ret) { case VMDK_ERROR: ret = -EIO; @@ -1277,14 +1301,15 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs, break; case VMDK_OK: ret = BDRV_BLOCK_DATA; - if (extent->file == bs->file && !extent->compressed) { - ret |= BDRV_BLOCK_OFFSET_VALID | offset; + if (!extent->compressed) { + ret |= BDRV_BLOCK_OFFSET_VALID; + ret |= (offset + (index_in_cluster << BDRV_SECTOR_BITS)) + & BDRV_BLOCK_OFFSET_MASK; } - + *file = extent->file->bs; break; } - index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num); n = extent->cluster_sectors - index_in_cluster; if (n > nb_sectors) { n = nb_sectors; @@ -1494,8 +1519,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num, if (sector_num > bs->total_sectors) { error_report("Wrong offset: sector_num=0x%" PRIx64 - " total_sectors=0x%" PRIx64 "\n", - sector_num, bs->total_sectors); + " total_sectors=0x%" PRIx64, + sector_num, bs->total_sectors); return -EIO; } @@ -1624,7 +1649,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, QemuOpts *opts, Error **errp) { int ret, i; - BlockDriverState *bs = NULL; + BlockBackend *blk = NULL; VMDK4Header header; Error *local_err = NULL; uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count; @@ -1637,16 +1662,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, goto exit; } - assert(bs == NULL); - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + blk = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto exit; } + blk_set_allow_write_beyond_eof(blk, true); + if (flat) { - ret = bdrv_truncate(bs, filesize); + ret = blk_truncate(blk, filesize); if (ret < 0) { error_setg_errno(errp, -ret, "Could not truncate file"); } @@ -1654,7 +1681,13 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, } magic = cpu_to_be32(VMDK4_MAGIC); memset(&header, 0, sizeof(header)); - header.version = zeroed_grain ? 2 : 1; + if (compress) { + header.version = 3; + } else if (zeroed_grain) { + header.version = 2; + } else { + header.version = 1; + } header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0) | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0); @@ -1695,18 +1728,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, header.check_bytes[3] = 0xa; /* write all the data */ - ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic)); + ret = blk_pwrite(blk, 0, &magic, sizeof(magic)); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); goto exit; } - ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header)); + ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header)); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); goto exit; } - ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9); + ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9); if (ret < 0) { error_setg_errno(errp, -ret, "Could not truncate file"); goto exit; @@ -1719,8 +1752,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, i < gt_count; i++, tmp += gt_size) { gd_buf[i] = cpu_to_le32(tmp); } - ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE, - gd_buf, gd_buf_size); + ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE, + gd_buf, gd_buf_size); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); goto exit; @@ -1731,8 +1764,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, i < gt_count; i++, tmp += gt_size) { gd_buf[i] = cpu_to_le32(tmp); } - ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE, - gd_buf, gd_buf_size); + ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE, + gd_buf, gd_buf_size); if (ret < 0) { error_setg(errp, QERR_IO_ERROR); goto exit; @@ -1740,8 +1773,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, ret = 0; exit: - if (bs) { - bdrv_unref(bs); + if (blk) { + blk_unref(blk); } g_free(gd_buf); return ret; @@ -1790,7 +1823,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix, static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) { int idx = 0; - BlockDriverState *new_bs = NULL; + BlockBackend *new_blk = NULL; Error *local_err = NULL; char *desc = NULL; int64_t total_size = 0, filesize; @@ -1901,7 +1934,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } if (backing_file) { - BlockDriverState *bs = NULL; + BlockBackend *blk; char *full_backing = g_new0(char, PATH_MAX); bdrv_get_full_backing_filename_from_filename(filename, backing_file, full_backing, PATH_MAX, @@ -1912,18 +1945,21 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) ret = -ENOENT; goto exit; } - ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, errp); + + blk = blk_new_open(full_backing, NULL, NULL, + BDRV_O_NO_BACKING, errp); g_free(full_backing); - if (ret != 0) { + if (blk == NULL) { + ret = -EIO; goto exit; } - if (strcmp(bs->drv->format_name, "vmdk")) { - bdrv_unref(bs); + if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) { + blk_unref(blk); ret = -EINVAL; goto exit; } - parent_cid = vmdk_read_cid(bs, 0); - bdrv_unref(bs); + parent_cid = vmdk_read_cid(blk_bs(blk), 0); + blk_unref(blk); snprintf(parent_desc_line, BUF_SIZE, "parentFileNameHint=\"%s\"", backing_file); } @@ -1981,14 +2017,18 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } } - assert(new_bs == NULL); - ret = bdrv_open(&new_bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); - if (ret < 0) { + + new_blk = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + if (new_blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto exit; } - ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len); + + blk_set_allow_write_beyond_eof(new_blk, true); + + ret = blk_pwrite(new_blk, desc_offset, desc, desc_len); if (ret < 0) { error_setg_errno(errp, -ret, "Could not write description"); goto exit; @@ -1996,14 +2036,14 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) /* bdrv_pwrite write padding zeros to align to sector, we don't need that * for description file */ if (desc_offset == 0) { - ret = bdrv_truncate(new_bs, desc_len); + ret = blk_truncate(new_blk, desc_len); if (ret < 0) { error_setg_errno(errp, -ret, "Could not truncate file"); } } exit: - if (new_bs) { - bdrv_unref(new_bs); + if (new_blk) { + blk_unref(new_blk); } g_free(adapter_type); g_free(backing_file); @@ -2162,18 +2202,18 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs) *spec_info = (ImageInfoSpecific){ .type = IMAGE_INFO_SPECIFIC_KIND_VMDK, - { - .vmdk = g_new0(ImageInfoSpecificVmdk, 1), + .u = { + .vmdk.data = g_new0(ImageInfoSpecificVmdk, 1), }, }; - *spec_info->u.vmdk = (ImageInfoSpecificVmdk) { + *spec_info->u.vmdk.data = (ImageInfoSpecificVmdk) { .create_type = g_strdup(s->create_type), .cid = s->cid, .parent_cid = s->parent_cid, }; - next = &spec_info->u.vmdk->extents; + next = &spec_info->u.vmdk.data->extents; for (i = 0; i < s->num_extents; i++) { *next = g_new0(ImageInfoList, 1); (*next)->value = vmdk_get_extent_info(&s->extents[i]); diff --git a/block/vpc.c b/block/vpc.c index 299d373092..3e2ea698d9 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -22,8 +22,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qemu/osdep.h" +#include "qapi/error.h" #include "qemu-common.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu/module.h" #include "migration/migration.h" #if defined(CONFIG_UUID) @@ -42,28 +45,34 @@ enum vhd_type { VHD_DIFFERENCING = 4, }; -// Seconds since Jan 1, 2000 0:00:00 (UTC) +/* Seconds since Jan 1, 2000 0:00:00 (UTC) */ #define VHD_TIMESTAMP_BASE 946684800 -#define VHD_MAX_SECTORS (65535LL * 255 * 255) -#define VHD_MAX_GEOMETRY (65535LL * 16 * 255) +#define VHD_CHS_MAX_C 65535LL +#define VHD_CHS_MAX_H 16 +#define VHD_CHS_MAX_S 255 -// always big-endian +#define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */ +#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S) + +#define VPC_OPT_FORCE_SIZE "force_size" + +/* always big-endian */ typedef struct vhd_footer { - char creator[8]; // "conectix" + char creator[8]; /* "conectix" */ uint32_t features; uint32_t version; - // Offset of next header structure, 0xFFFFFFFF if none + /* Offset of next header structure, 0xFFFFFFFF if none */ uint64_t data_offset; - // Seconds since Jan 1, 2000 0:00:00 (UTC) + /* Seconds since Jan 1, 2000 0:00:00 (UTC) */ uint32_t timestamp; - char creator_app[4]; // "vpc " + char creator_app[4]; /* e.g., "vpc " */ uint16_t major; uint16_t minor; - char creator_os[4]; // "Wi2k" + char creator_os[4]; /* "Wi2k" */ uint64_t orig_size; uint64_t current_size; @@ -74,29 +83,29 @@ typedef struct vhd_footer { uint32_t type; - // Checksum of the Hard Disk Footer ("one's complement of the sum of all - // the bytes in the footer without the checksum field") + /* Checksum of the Hard Disk Footer ("one's complement of the sum of all + the bytes in the footer without the checksum field") */ uint32_t checksum; - // UUID used to identify a parent hard disk (backing file) + /* UUID used to identify a parent hard disk (backing file) */ uint8_t uuid[16]; uint8_t in_saved_state; } QEMU_PACKED VHDFooter; typedef struct vhd_dyndisk_header { - char magic[8]; // "cxsparse" + char magic[8]; /* "cxsparse" */ - // Offset of next header structure, 0xFFFFFFFF if none + /* Offset of next header structure, 0xFFFFFFFF if none */ uint64_t data_offset; - // Offset of the Block Allocation Table (BAT) + /* Offset of the Block Allocation Table (BAT) */ uint64_t table_offset; uint32_t version; - uint32_t max_table_entries; // 32bit/entry + uint32_t max_table_entries; /* 32bit/entry */ - // 2 MB by default, must be a power of two + /* 2 MB by default, must be a power of two */ uint32_t block_size; uint32_t checksum; @@ -104,7 +113,7 @@ typedef struct vhd_dyndisk_header { uint32_t parent_timestamp; uint32_t reserved; - // Backing file name (in UTF-16) + /* Backing file name (in UTF-16) */ uint8_t parent_name[512]; struct { @@ -127,6 +136,8 @@ typedef struct BDRVVPCState { uint32_t block_size; uint32_t bitmap_size; + bool force_use_chs; + bool force_use_sz; #ifdef CACHE uint8_t *pageentry_u8; @@ -139,6 +150,22 @@ typedef struct BDRVVPCState { Error *migration_blocker; } BDRVVPCState; +#define VPC_OPT_SIZE_CALC "force_size_calc" +static QemuOptsList vpc_runtime_opts = { + .name = "vpc-runtime-opts", + .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head), + .desc = { + { + .name = VPC_OPT_SIZE_CALC, + .type = QEMU_OPT_STRING, + .help = "Force disk size calculation to use either CHS geometry, " + "or use the disk current_size specified in the VHD footer. " + "{chs, current_size}" + }, + { /* end of list */ } + } +}; + static uint32_t vpc_checksum(uint8_t* buf, size_t size) { uint32_t res = 0; @@ -158,6 +185,25 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename) return 0; } +static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts, + Error **errp) +{ + BDRVVPCState *s = bs->opaque; + const char *size_calc; + + size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC); + + if (!size_calc) { + /* no override, use autodetect only */ + } else if (!strcmp(size_calc, "current_size")) { + s->force_use_sz = true; + } else if (!strcmp(size_calc, "chs")) { + s->force_use_chs = true; + } else { + error_setg(errp, "Invalid size calculation mode: '%s'", size_calc); + } +} + static int vpc_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -165,6 +211,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, int i; VHDFooter *footer; VHDDynDiskHeader *dyndisk_header; + QemuOpts *opts = NULL; + Error *local_err = NULL; + bool use_chs; uint8_t buf[HEADER_SIZE]; uint32_t checksum; uint64_t computed_size; @@ -172,8 +221,24 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, int disk_type = VHD_DYNAMIC; int ret; + opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + vpc_parse_options(bs, opts, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE); if (ret < 0) { + error_setg(errp, "Unable to read VHD header"); goto fail; } @@ -182,9 +247,11 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, int64_t offset = bdrv_getlength(bs->file->bs); if (offset < 0) { ret = offset; + error_setg(errp, "Invalid file size"); goto fail; } else if (offset < HEADER_SIZE) { ret = -EINVAL; + error_setg(errp, "File too small for a VHD header"); goto fail; } @@ -211,22 +278,50 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, /* Write 'checksum' back to footer, or else will leave it with zero. */ footer->checksum = cpu_to_be32(checksum); - // The visible size of a image in Virtual PC depends on the geometry - // rather than on the size stored in the footer (the size in the footer - // is too large usually) + /* The visible size of a image in Virtual PC depends on the geometry + rather than on the size stored in the footer (the size in the footer + is too large usually) */ bs->total_sectors = (int64_t) be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl; - /* Images that have exactly the maximum geometry are probably bigger and - * would be truncated if we adhered to the geometry for them. Rely on - * footer->current_size for them. */ - if (bs->total_sectors == VHD_MAX_GEOMETRY) { + /* Microsoft Virtual PC and Microsoft Hyper-V produce and read + * VHD image sizes differently. VPC will rely on CHS geometry, + * while Hyper-V and disk2vhd use the size specified in the footer. + * + * We use a couple of approaches to try and determine the correct method: + * look at the Creator App field, and look for images that have CHS + * geometry that is the maximum value. + * + * If the CHS geometry is the maximum CHS geometry, then we assume that + * the size is the footer->current_size to avoid truncation. Otherwise, + * we follow the table based on footer->creator_app: + * + * Known creator apps: + * 'vpc ' : CHS Virtual PC (uses disk geometry) + * 'qemu' : CHS QEMU (uses disk geometry) + * 'qem2' : current_size QEMU (uses current_size) + * 'win ' : current_size Hyper-V + * 'd2v ' : current_size Disk2vhd + * 'tap\0' : current_size XenServer + * 'CTXS' : current_size XenConverter + * + * The user can override the table values via drive options, however + * even with an override we will still use current_size for images + * that have CHS geometry of the maximum size. + */ + use_chs = (!!strncmp(footer->creator_app, "win ", 4) && + !!strncmp(footer->creator_app, "qem2", 4) && + !!strncmp(footer->creator_app, "d2v ", 4) && + !!strncmp(footer->creator_app, "CTXS", 4) && + !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs; + + if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) { bs->total_sectors = be64_to_cpu(footer->current_size) / - BDRV_SECTOR_SIZE; + BDRV_SECTOR_SIZE; } - /* Allow a maximum disk size of approximately 2 TB */ - if (bs->total_sectors >= VHD_MAX_SECTORS) { + /* Allow a maximum disk size of 2040 GiB */ + if (bs->total_sectors > VHD_MAX_SECTORS) { ret = -EFBIG; goto fail; } @@ -235,12 +330,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE); if (ret < 0) { + error_setg(errp, "Error reading dynamic VHD header"); goto fail; } dyndisk_header = (VHDDynDiskHeader *) buf; if (strncmp(dyndisk_header->magic, "cxsparse", 8)) { + error_setg(errp, "Invalid header magic"); ret = -EINVAL; goto fail; } @@ -256,16 +353,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries); if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) { - ret = -EINVAL; - goto fail; - } - if (s->max_table_entries > (VHD_MAX_SECTORS * 512) / s->block_size) { + error_setg(errp, "Too many blocks"); ret = -EINVAL; goto fail; } computed_size = (uint64_t) s->max_table_entries * s->block_size; if (computed_size < bs->total_sectors * 512) { + error_setg(errp, "Page table too small"); ret = -EINVAL; goto fail; } @@ -282,6 +377,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size); if (s->pagetable == NULL) { + error_setg(errp, "Unable to allocate memory for page table"); ret = -ENOMEM; goto fail; } @@ -291,6 +387,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable, pagetable_size); if (ret < 0) { + error_setg(errp, "Error reading pagetable"); goto fail; } @@ -369,16 +466,16 @@ static inline int64_t get_sector_offset(BlockDriverState *bs, pageentry_index = (offset % s->block_size) / 512; if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff) - return -1; // not allocated + return -1; /* not allocated */ bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index]; block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index); - // We must ensure that we don't write to any sectors which are marked as - // unused in the bitmap. We get away with setting all bits in the block - // bitmap each time we write to a new block. This might cause Virtual PC to - // miss sparse read optimization, but it's not a problem in terms of - // correctness. + /* We must ensure that we don't write to any sectors which are marked as + unused in the bitmap. We get away with setting all bits in the block + bitmap each time we write to a new block. This might cause Virtual PC to + miss sparse read optimization, but it's not a problem in terms of + correctness. */ if (write && (s->last_bitmap_offset != bitmap_offset)) { uint8_t bitmap[s->bitmap_size]; @@ -424,18 +521,18 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num) int ret; uint8_t bitmap[s->bitmap_size]; - // Check if sector_num is valid + /* Check if sector_num is valid */ if ((sector_num < 0) || (sector_num > bs->total_sectors)) return -1; - // Write entry into in-memory BAT + /* Write entry into in-memory BAT */ index = (sector_num * 512) / s->block_size; if (s->pagetable[index] != 0xFFFFFFFF) return -1; s->pagetable[index] = s->free_data_block_offset / 512; - // Initialize the block's bitmap + /* Initialize the block's bitmap */ memset(bitmap, 0xff, s->bitmap_size); ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap, s->bitmap_size); @@ -443,13 +540,13 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num) return ret; } - // Write new footer (the old one will be overwritten) + /* Write new footer (the old one will be overwritten) */ s->free_data_block_offset += s->block_size + s->bitmap_size; ret = rewrite_footer(bs); if (ret < 0) goto fail; - // Write BAT entry to disk + /* Write BAT entry to disk */ bat_offset = s->bat_offset + (4 * index); bat_value = cpu_to_be32(s->pagetable[index]); ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4); @@ -578,7 +675,7 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num, } static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int *pnum) + int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file) { BDRVVPCState *s = bs->opaque; VHDFooter *footer = (VHDFooter*) s->footer_buf; @@ -588,6 +685,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs, if (be32_to_cpu(footer->type) == VHD_FIXED) { *pnum = nb_sectors; + *file = bs->file->bs; return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | (sector_num << BDRV_SECTOR_BITS); } @@ -609,6 +707,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs, /* *pnum can't be greater than one block for allocated * sectors since there is always a bitmap in between. */ if (allocated) { + *file = bs->file->bs; return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start; } if (nb_sectors == 0) { @@ -628,7 +727,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs, * Note that the geometry doesn't always exactly match total_sectors but * may round it down. * - * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override + * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB) * and instead allow up to 255 heads. */ @@ -670,7 +769,7 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls, return 0; } -static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, +static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf, int64_t total_sectors) { VHDDynDiskHeader *dyndisk_header = @@ -680,34 +779,34 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, int ret; int64_t offset = 0; - // Write the footer (twice: at the beginning and at the end) + /* Write the footer (twice: at the beginning and at the end) */ block_size = 0x200000; num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512); - ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE); - if (ret) { + ret = blk_pwrite(blk, offset, buf, HEADER_SIZE); + if (ret < 0) { goto fail; } offset = 1536 + ((num_bat_entries * 4 + 511) & ~511); - ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE); + ret = blk_pwrite(blk, offset, buf, HEADER_SIZE); if (ret < 0) { goto fail; } - // Write the initial BAT + /* Write the initial BAT */ offset = 3 * 512; memset(buf, 0xFF, 512); for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) { - ret = bdrv_pwrite_sync(bs, offset, buf, 512); + ret = blk_pwrite(blk, offset, buf, 512); if (ret < 0) { goto fail; } offset += 512; } - // Prepare the Dynamic Disk Header + /* Prepare the Dynamic Disk Header */ memset(buf, 0, 1024); memcpy(dyndisk_header->magic, "cxsparse", 8); @@ -724,10 +823,10 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024)); - // Write the header + /* Write the header */ offset = 512; - ret = bdrv_pwrite_sync(bs, offset, buf, 1024); + ret = blk_pwrite(blk, offset, buf, 1024); if (ret < 0) { goto fail; } @@ -736,7 +835,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf, return ret; } -static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf, +static int create_fixed_disk(BlockBackend *blk, uint8_t *buf, int64_t total_size) { int ret; @@ -744,12 +843,12 @@ static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf, /* Add footer to total size */ total_size += HEADER_SIZE; - ret = bdrv_truncate(bs, total_size); + ret = blk_truncate(blk, total_size); if (ret < 0) { return ret; } - ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE); + ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE); if (ret < 0) { return ret; } @@ -770,8 +869,9 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) int64_t total_size; int disk_type; int ret = -EIO; + bool force_size; Error *local_err = NULL; - BlockDriverState *bs = NULL; + BlockBackend *blk = NULL; /* Read out options */ total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), @@ -783,6 +883,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) } else if (!strcmp(disk_type_param, "fixed")) { disk_type = VHD_FIXED; } else { + error_setg(errp, "Invalid disk type, %s", disk_type_param); ret = -EINVAL; goto out; } @@ -790,36 +891,50 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) disk_type = VHD_DYNAMIC; } + force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false); + ret = bdrv_create_file(filename, opts, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto out; } - ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - &local_err); - if (ret < 0) { + + blk = blk_new_open(filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); + if (blk == NULL) { error_propagate(errp, local_err); + ret = -EIO; goto out; } + blk_set_allow_write_beyond_eof(blk, true); + /* * Calculate matching total_size and geometry. Increase the number of * sectors requested until we get enough (or fail). This ensures that * qemu-img convert doesn't truncate images, but rather rounds up. * - * If the image size can't be represented by a spec conform CHS geometry, + * If the image size can't be represented by a spec conformant CHS geometry, * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use * the image size from the VHD footer to calculate total_sectors. */ - total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE); - for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) { - calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl); + if (force_size) { + /* This will force the use of total_size for sector count, below */ + cyls = VHD_CHS_MAX_C; + heads = VHD_CHS_MAX_H; + secs_per_cyl = VHD_CHS_MAX_S; + } else { + total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE); + for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) { + calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl); + } } if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) { total_sectors = total_size / BDRV_SECTOR_SIZE; - /* Allow a maximum disk size of approximately 2 TB */ + /* Allow a maximum disk size of 2040 GiB */ if (total_sectors > VHD_MAX_SECTORS) { + error_setg(errp, "Disk size is too large, max size is 2040 GiB"); ret = -EFBIG; goto out; } @@ -832,8 +947,11 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) memset(buf, 0, 1024); memcpy(footer->creator, "conectix", 8); - /* TODO Check if "qemu" creator_app is ok for VPC */ - memcpy(footer->creator_app, "qemu", 4); + if (force_size) { + memcpy(footer->creator_app, "qem2", 4); + } else { + memcpy(footer->creator_app, "qemu", 4); + } memcpy(footer->creator_os, "Wi2k", 4); footer->features = cpu_to_be32(0x02); @@ -863,13 +981,16 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE)); if (disk_type == VHD_DYNAMIC) { - ret = create_dynamic_disk(bs, buf, total_sectors); + ret = create_dynamic_disk(blk, buf, total_sectors); } else { - ret = create_fixed_disk(bs, buf, total_size); + ret = create_fixed_disk(blk, buf, total_size); + } + if (ret < 0) { + error_setg(errp, "Unable to create or write VHD header"); } out: - bdrv_unref(bs); + blk_unref(blk); g_free(disk_type_param); return ret; } @@ -914,6 +1035,13 @@ static QemuOptsList vpc_create_opts = { "Type of virtual hard disk format. Supported formats are " "{dynamic (default) | fixed} " }, + { + .name = VPC_OPT_FORCE_SIZE, + .type = QEMU_OPT_BOOL, + .help = "Force disk size calculation to use the actual size " + "specified, rather than using the nearest CHS-based " + "calculation" + }, { /* end of list */ } } }; diff --git a/block/vvfat.c b/block/vvfat.c index b184eca6fc..183fc4f049 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -22,15 +22,16 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -#include <sys/stat.h> +#include "qemu/osdep.h" #include <dirent.h> -#include "qemu-common.h" +#include "qapi/error.h" #include "block/block_int.h" #include "qemu/module.h" #include "migration/migration.h" #include "qapi/qmp/qint.h" #include "qapi/qmp/qbool.h" #include "qapi/qmp/qstring.h" +#include "qemu/cutils.h" #ifndef S_IWGRP #define S_IWGRP 0 @@ -1108,6 +1109,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } memcpy(s->volume_label, label, label_length); + } else { + memcpy(s->volume_label, "QEMU VVFAT", 10); } if (floppy) { @@ -2282,12 +2285,17 @@ DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", mapp factor * (old_cluster_count - new_cluster_count)); for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) { + direntry_t *first_direntry; void* direntry = array_get(&(s->directory), current_dir_index); int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry, s->sectors_per_cluster); if (ret) return ret; - assert(!strncmp(s->directory.pointer, "QEMU", 4)); + + /* The first directory entry on the filesystem is the volume name */ + first_direntry = (direntry_t*) s->directory.pointer; + assert(!memcmp(first_direntry->name, s->volume_label, 11)); + current_dir_index += factor; } @@ -2884,7 +2892,7 @@ static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num, } static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs, - int64_t sector_num, int nb_sectors, int* n) + int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file) { BDRVVVFATState* s = bs->opaque; *n = s->sector_count - sector_num; @@ -2956,8 +2964,7 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp) options = qdict_new(); qdict_put(options, "driver", qstring_from_str("qcow")); ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, options, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, - errp); + BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp); if (ret < 0) { goto err; } diff --git a/block/win32-aio.c b/block/win32-aio.c index bbf2f01c12..2d509a9a7b 100644 --- a/block/win32-aio.c +++ b/block/win32-aio.c @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ +#include "qemu/osdep.h" #include "qemu-common.h" #include "qemu/timer.h" #include "block/block_int.h" diff --git a/block/write-threshold.c b/block/write-threshold.c index 0fe38917c5..cc2ca71835 100644 --- a/block/write-threshold.c +++ b/block/write-threshold.c @@ -10,6 +10,7 @@ * See the COPYING.LIB file in the top-level directory. */ +#include "qemu/osdep.h" #include "block/block_int.h" #include "qemu/coroutine.h" #include "block/write-threshold.h" |