diff options
author | Yonghee Han <onstudy@samsung.com> | 2016-07-27 16:42:54 +0900 |
---|---|---|
committer | Yonghee Han <onstudy@samsung.com> | 2016-07-27 00:56:08 -0700 |
commit | a03c4728275d119af5f66c4a69e8d9d5a1730031 (patch) | |
tree | 2b4ed9542884bf8b947076c55c4ef1814217cb69 /block | |
parent | 3158f4a51894e46ecb593bffbfd12824e1d6534a (diff) | |
download | qemu-a03c4728275d119af5f66c4a69e8d9d5a1730031.tar.gz qemu-a03c4728275d119af5f66c4a69e8d9d5a1730031.tar.bz2 qemu-a03c4728275d119af5f66c4a69e8d9d5a1730031.zip |
Imported Upstream version 2.5.1.1upstream/2.5.1.1
Change-Id: Ie290b0e68882590d8a64fab165a943940b7c98ed
Diffstat (limited to 'block')
46 files changed, 2605 insertions, 1480 deletions
diff --git a/block/accounting.c b/block/accounting.c index 01d594ffd..185025ec1 100644 --- a/block/accounting.c +++ b/block/accounting.c @@ -2,6 +2,7 @@ * QEMU System Emulator block accounting * * Copyright (c) 2011 Christoph Hellwig + * Copyright (c) 2015 Igalia, S.L. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -25,6 +26,54 @@ #include "block/accounting.h" #include "block/block_int.h" #include "qemu/timer.h" +#include "sysemu/qtest.h" + +static QEMUClockType clock_type = QEMU_CLOCK_REALTIME; +static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000; + +void block_acct_init(BlockAcctStats *stats, bool account_invalid, + bool account_failed) +{ + stats->account_invalid = account_invalid; + stats->account_failed = account_failed; + + if (qtest_enabled()) { + clock_type = QEMU_CLOCK_VIRTUAL; + } +} + +void block_acct_cleanup(BlockAcctStats *stats) +{ + BlockAcctTimedStats *s, *next; + QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) { + g_free(s); + } +} + +void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length) +{ + BlockAcctTimedStats *s; + unsigned i; + + s = g_new0(BlockAcctTimedStats, 1); + s->interval_length = interval_length; + QSLIST_INSERT_HEAD(&stats->intervals, s, entries); + + for (i = 0; i < BLOCK_MAX_IOTYPE; i++) { + timed_average_init(&s->latency[i], clock_type, + (uint64_t) interval_length * NANOSECONDS_PER_SECOND); + } +} + +BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats, + BlockAcctTimedStats *s) +{ + if (s == NULL) { + return QSLIST_FIRST(&stats->intervals); + } else { + return QSLIST_NEXT(s, entries); + } +} void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, int64_t bytes, enum BlockAcctType type) @@ -32,26 +81,69 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, assert(type < BLOCK_MAX_IOTYPE); cookie->bytes = bytes; - cookie->start_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); + cookie->start_time_ns = qemu_clock_get_ns(clock_type); cookie->type = type; } void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie) { + BlockAcctTimedStats *s; + int64_t time_ns = qemu_clock_get_ns(clock_type); + int64_t latency_ns = time_ns - cookie->start_time_ns; + + if (qtest_enabled()) { + latency_ns = qtest_latency_ns; + } + assert(cookie->type < BLOCK_MAX_IOTYPE); stats->nr_bytes[cookie->type] += cookie->bytes; stats->nr_ops[cookie->type]++; - stats->total_time_ns[cookie->type] += - qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - cookie->start_time_ns; + stats->total_time_ns[cookie->type] += latency_ns; + stats->last_access_time_ns = time_ns; + + QSLIST_FOREACH(s, &stats->intervals, entries) { + timed_average_account(&s->latency[cookie->type], latency_ns); + } } +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie) +{ + assert(cookie->type < BLOCK_MAX_IOTYPE); + + stats->failed_ops[cookie->type]++; + + if (stats->account_failed) { + BlockAcctTimedStats *s; + int64_t time_ns = qemu_clock_get_ns(clock_type); + int64_t latency_ns = time_ns - cookie->start_time_ns; + + if (qtest_enabled()) { + latency_ns = qtest_latency_ns; + } -void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num, - unsigned int nb_sectors) + stats->total_time_ns[cookie->type] += latency_ns; + stats->last_access_time_ns = time_ns; + + QSLIST_FOREACH(s, &stats->intervals, entries) { + timed_average_account(&s->latency[cookie->type], latency_ns); + } + } +} + +void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type) { - if (stats->wr_highest_sector < sector_num + nb_sectors - 1) { - stats->wr_highest_sector = sector_num + nb_sectors - 1; + assert(type < BLOCK_MAX_IOTYPE); + + /* block_acct_done() and block_acct_failed() update + * total_time_ns[], but this one does not. The reason is that + * invalid requests are accounted during their submission, + * therefore there's no actual I/O involved. */ + + stats->invalid_ops[type]++; + + if (stats->account_invalid) { + stats->last_access_time_ns = qemu_clock_get_ns(clock_type); } } @@ -61,3 +153,20 @@ void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, assert(type < BLOCK_MAX_IOTYPE); stats->merged[type] += num_requests; } + +int64_t block_acct_idle_time_ns(BlockAcctStats *stats) +{ + return qemu_clock_get_ns(clock_type) - stats->last_access_time_ns; +} + +double block_acct_queue_depth(BlockAcctTimedStats *stats, + enum BlockAcctType type) +{ + uint64_t sum, elapsed; + + assert(type < BLOCK_MAX_IOTYPE); + + sum = timed_average_sum(&stats->latency[type], &elapsed); + + return (double) sum / elapsed; +} diff --git a/block/backup.c b/block/backup.c index 965654d52..705bb7766 100644 --- a/block/backup.c +++ b/block/backup.c @@ -21,6 +21,7 @@ #include "block/blockjob.h" #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" +#include "sysemu/block-backend.h" #define BACKUP_CLUSTER_BITS 16 #define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS) @@ -89,7 +90,8 @@ static void cow_request_end(CowRequest *req) static int coroutine_fn backup_do_cow(BlockDriverState *bs, int64_t sector_num, int nb_sectors, - bool *error_is_read) + bool *error_is_read, + bool is_write_notifier) { BackupBlockJob *job = (BackupBlockJob *)bs->job; CowRequest cow_request; @@ -129,8 +131,14 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs, iov.iov_len = n * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&bounce_qiov, &iov, 1); - ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n, - &bounce_qiov); + if (is_write_notifier) { + ret = bdrv_co_readv_no_serialising(bs, + start * BACKUP_SECTORS_PER_CLUSTER, + n, &bounce_qiov); + } else { + ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n, + &bounce_qiov); + } if (ret < 0) { trace_backup_do_cow_read_fail(job, start, ret); if (error_is_read) { @@ -190,7 +198,7 @@ static int coroutine_fn backup_before_write_notify( assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0); - return backup_do_cow(req->bs, sector_num, nb_sectors, NULL); + return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true); } static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp) @@ -208,7 +216,41 @@ static void backup_iostatus_reset(BlockJob *job) { BackupBlockJob *s = container_of(job, BackupBlockJob, common); - bdrv_iostatus_reset(s->target); + if (s->target->blk) { + blk_iostatus_reset(s->target->blk); + } +} + +static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret) +{ + BdrvDirtyBitmap *bm; + BlockDriverState *bs = job->common.bs; + + if (ret < 0 || block_job_is_cancelled(&job->common)) { + /* Merge the successor back into the parent, delete nothing. */ + bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL); + assert(bm); + } else { + /* Everything is fine, delete this bitmap and install the backup. */ + bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL); + assert(bm); + } +} + +static void backup_commit(BlockJob *job) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common); + if (s->sync_bitmap) { + backup_cleanup_sync_bitmap(s, 0); + } +} + +static void backup_abort(BlockJob *job) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common); + if (s->sync_bitmap) { + backup_cleanup_sync_bitmap(s, -1); + } } static const BlockJobDriver backup_job_driver = { @@ -216,6 +258,8 @@ static const BlockJobDriver backup_job_driver = { .job_type = BLOCK_JOB_TYPE_BACKUP, .set_speed = backup_set_speed, .iostatus_reset = backup_iostatus_reset, + .commit = backup_commit, + .abort = backup_abort, }; static BlockErrorAction backup_error_action(BackupBlockJob *job, @@ -303,7 +347,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job) return ret; } ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER, - BACKUP_SECTORS_PER_CLUSTER, &error_is_read); + BACKUP_SECTORS_PER_CLUSTER, &error_is_read, + false); if ((ret < 0) && backup_error_action(job, error_is_read, -ret) == BLOCK_ERROR_ACTION_REPORT) { @@ -352,8 +397,10 @@ static void coroutine_fn backup_run(void *opaque) job->bitmap = hbitmap_alloc(end, 0); bdrv_set_enable_write_cache(target, true); - bdrv_set_on_error(target, on_target_error, on_target_error); - bdrv_iostatus_enable(target); + if (target->blk) { + blk_set_on_error(target->blk, on_target_error, on_target_error); + blk_iostatus_enable(target->blk); + } bdrv_add_before_write_notifier(bs, &before_write); @@ -408,7 +455,7 @@ static void coroutine_fn backup_run(void *opaque) } /* FULL sync mode we copy the whole drive. */ ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, - BACKUP_SECTORS_PER_CLUSTER, &error_is_read); + BACKUP_SECTORS_PER_CLUSTER, &error_is_read, false); if (ret < 0) { /* Depending on error action, fail now or retry cluster */ BlockErrorAction action = @@ -428,22 +475,11 @@ static void coroutine_fn backup_run(void *opaque) /* wait until pending backup_do_cow() calls have completed */ qemu_co_rwlock_wrlock(&job->flush_rwlock); qemu_co_rwlock_unlock(&job->flush_rwlock); - - if (job->sync_bitmap) { - BdrvDirtyBitmap *bm; - if (ret < 0 || block_job_is_cancelled(&job->common)) { - /* Merge the successor back into the parent, delete nothing. */ - bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL); - assert(bm); - } else { - /* Everything is fine, delete this bitmap and install the backup. */ - bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL); - assert(bm); - } - } hbitmap_free(job->bitmap); - bdrv_iostatus_disable(target); + if (target->blk) { + blk_iostatus_disable(target->blk); + } bdrv_op_unblock_all(target, job->common.blocker); data = g_malloc(sizeof(*data)); @@ -457,7 +493,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, BlockdevOnError on_source_error, BlockdevOnError on_target_error, BlockCompletionFunc *cb, void *opaque, - Error **errp) + BlockJobTxn *txn, Error **errp) { int64_t len; @@ -472,7 +508,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && - !bdrv_iostatus_is_enabled(bs)) { + (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) { error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error"); return; } @@ -539,6 +575,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target, sync_bitmap : NULL; job->common.len = len; job->common.co = qemu_coroutine_create(backup_run); + block_job_txn_add_job(txn, &job->common); qemu_coroutine_enter(job->common.co, job); return; diff --git a/block/blkdebug.c b/block/blkdebug.c index bc247f46f..dee3a0edf 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -30,6 +30,7 @@ #include "qapi/qmp/qdict.h" #include "qapi/qmp/qint.h" #include "qapi/qmp/qstring.h" +#include "sysemu/qtest.h" typedef struct BDRVBlkdebugState { int state; @@ -426,11 +427,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, /* Set initial state */ s->state = 1; - /* Open the backing file */ - assert(bs->file == NULL); - ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image", - bs, &child_file, false, &local_err); - if (ret < 0) { + /* Open the image file */ + bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image", + bs, &child_file, false, &local_err); + if (local_err) { + ret = -EINVAL; error_propagate(errp, local_err); goto out; } @@ -449,7 +450,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, goto out; fail_unref: - bdrv_unref(bs->file); + bdrv_unref_child(bs, bs->file); out: qemu_opts_del(opts); return ret; @@ -510,7 +511,8 @@ static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs, return inject_error(bs, cb, opaque, rule); } - return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque); + return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors, + cb, opaque); } static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs, @@ -532,7 +534,8 @@ static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs, return inject_error(bs, cb, opaque, rule); } - return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque); + return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors, + cb, opaque); } static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs, @@ -551,7 +554,7 @@ static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs, return inject_error(bs, cb, opaque, rule); } - return bdrv_aio_flush(bs->file, cb, opaque); + return bdrv_aio_flush(bs->file->bs, cb, opaque); } @@ -581,9 +584,13 @@ static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule) remove_rule(rule); QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next); - printf("blkdebug: Suspended request '%s'\n", r.tag); + if (!qtest_enabled()) { + printf("blkdebug: Suspended request '%s'\n", r.tag); + } qemu_coroutine_yield(); - printf("blkdebug: Resuming request '%s'\n", r.tag); + if (!qtest_enabled()) { + printf("blkdebug: Resuming request '%s'\n", r.tag); + } QLIST_REMOVE(&r, next); g_free(r.tag); @@ -716,12 +723,12 @@ static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag) static int64_t blkdebug_getlength(BlockDriverState *bs) { - return bdrv_getlength(bs->file); + return bdrv_getlength(bs->file->bs); } static int blkdebug_truncate(BlockDriverState *bs, int64_t offset) { - return bdrv_truncate(bs->file, offset); + return bdrv_truncate(bs->file->bs, offset); } static void blkdebug_refresh_filename(BlockDriverState *bs) @@ -741,24 +748,24 @@ static void blkdebug_refresh_filename(BlockDriverState *bs) } } - if (force_json && !bs->file->full_open_options) { + if (force_json && !bs->file->bs->full_open_options) { /* The config file cannot be recreated, so creating a plain filename * is impossible */ return; } - if (!force_json && bs->file->exact_filename[0]) { + if (!force_json && bs->file->bs->exact_filename[0]) { snprintf(bs->exact_filename, sizeof(bs->exact_filename), "blkdebug:%s:%s", qdict_get_try_str(bs->options, "config") ?: "", - bs->file->exact_filename); + bs->file->bs->exact_filename); } opts = qdict_new(); qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug"))); - QINCREF(bs->file->full_open_options); - qdict_put_obj(opts, "image", QOBJECT(bs->file->full_open_options)); + QINCREF(bs->file->bs->full_open_options); + qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options)); for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) { if (strcmp(qdict_entry_key(e), "x-image") && diff --git a/block/blkverify.c b/block/blkverify.c index d277e6322..c5f8e8dcb 100644 --- a/block/blkverify.c +++ b/block/blkverify.c @@ -14,7 +14,7 @@ #include "qapi/qmp/qstring.h" typedef struct { - BlockDriverState *test_file; + BdrvChild *test_file; } BDRVBlkverifyState; typedef struct BlkverifyAIOCB BlkverifyAIOCB; @@ -123,26 +123,29 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags, } /* Open the raw file */ - assert(bs->file == NULL); - ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options, - "raw", bs, &child_file, false, &local_err); - if (ret < 0) { + bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw", + bs, &child_file, false, &local_err); + if (local_err) { + ret = -EINVAL; error_propagate(errp, local_err); goto fail; } /* Open the test file */ - assert(s->test_file == NULL); - ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options, - "test", bs, &child_format, false, &local_err); - if (ret < 0) { + s->test_file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, + "test", bs, &child_format, false, + &local_err); + if (local_err) { + ret = -EINVAL; error_propagate(errp, local_err); - s->test_file = NULL; goto fail; } ret = 0; fail: + if (ret < 0) { + bdrv_unref_child(bs, bs->file); + } qemu_opts_del(opts); return ret; } @@ -151,7 +154,7 @@ static void blkverify_close(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; - bdrv_unref(s->test_file); + bdrv_unref_child(bs, s->test_file); s->test_file = NULL; } @@ -159,7 +162,7 @@ static int64_t blkverify_getlength(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; - return bdrv_getlength(s->test_file); + return bdrv_getlength(s->test_file->bs); } static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write, @@ -238,13 +241,13 @@ static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs, nb_sectors, cb, opaque); acb->verify = blkverify_verify_readv; - acb->buf = qemu_blockalign(bs->file, qiov->size); + acb->buf = qemu_blockalign(bs->file->bs, qiov->size); qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov); qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf); - bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors, + bdrv_aio_readv(s->test_file->bs, sector_num, qiov, nb_sectors, blkverify_aio_cb, acb); - bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors, + bdrv_aio_readv(bs->file->bs, sector_num, &acb->raw_qiov, nb_sectors, blkverify_aio_cb, acb); return &acb->common; } @@ -257,9 +260,9 @@ static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs, BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov, nb_sectors, cb, opaque); - bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors, + bdrv_aio_writev(s->test_file->bs, sector_num, qiov, nb_sectors, blkverify_aio_cb, acb); - bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, + bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors, blkverify_aio_cb, acb); return &acb->common; } @@ -271,7 +274,7 @@ static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs, BDRVBlkverifyState *s = bs->opaque; /* Only flush test file, the raw file is not important */ - return bdrv_aio_flush(s->test_file, cb, opaque); + return bdrv_aio_flush(s->test_file->bs, cb, opaque); } static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs, @@ -279,13 +282,13 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs, { BDRVBlkverifyState *s = bs->opaque; - bool perm = bdrv_recurse_is_first_non_filter(bs->file, candidate); + bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate); if (perm) { return true; } - return bdrv_recurse_is_first_non_filter(s->test_file, candidate); + return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate); } /* Propagate AioContext changes to ->test_file */ @@ -293,7 +296,7 @@ static void blkverify_detach_aio_context(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; - bdrv_detach_aio_context(s->test_file); + bdrv_detach_aio_context(s->test_file->bs); } static void blkverify_attach_aio_context(BlockDriverState *bs, @@ -301,32 +304,38 @@ static void blkverify_attach_aio_context(BlockDriverState *bs, { BDRVBlkverifyState *s = bs->opaque; - bdrv_attach_aio_context(s->test_file, new_context); + bdrv_attach_aio_context(s->test_file->bs, new_context); } static void blkverify_refresh_filename(BlockDriverState *bs) { BDRVBlkverifyState *s = bs->opaque; - /* bs->file has already been refreshed */ - bdrv_refresh_filename(s->test_file); + /* bs->file->bs has already been refreshed */ + bdrv_refresh_filename(s->test_file->bs); - if (bs->file->full_open_options && s->test_file->full_open_options) { + if (bs->file->bs->full_open_options + && s->test_file->bs->full_open_options) + { QDict *opts = qdict_new(); qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify"))); - QINCREF(bs->file->full_open_options); - qdict_put_obj(opts, "raw", QOBJECT(bs->file->full_open_options)); - QINCREF(s->test_file->full_open_options); - qdict_put_obj(opts, "test", QOBJECT(s->test_file->full_open_options)); + QINCREF(bs->file->bs->full_open_options); + qdict_put_obj(opts, "raw", QOBJECT(bs->file->bs->full_open_options)); + QINCREF(s->test_file->bs->full_open_options); + qdict_put_obj(opts, "test", + QOBJECT(s->test_file->bs->full_open_options)); bs->full_open_options = opts; } - if (bs->file->exact_filename[0] && s->test_file->exact_filename[0]) { + if (bs->file->bs->exact_filename[0] + && s->test_file->bs->exact_filename[0]) + { snprintf(bs->exact_filename, sizeof(bs->exact_filename), "blkverify:%s:%s", - bs->file->exact_filename, s->test_file->exact_filename); + bs->file->bs->exact_filename, + s->test_file->bs->exact_filename); } } diff --git a/block/block-backend.c b/block/block-backend.c index aee8a1202..419591f26 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -12,12 +12,17 @@ #include "sysemu/block-backend.h" #include "block/block_int.h" +#include "block/blockjob.h" +#include "block/throttle-groups.h" #include "sysemu/blockdev.h" +#include "sysemu/sysemu.h" #include "qapi-event.h" /* Number of coroutines to reserve per attached device model */ #define COROUTINE_POOL_RESERVATION 64 +static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb); + struct BlockBackend { char *name; int refcnt; @@ -29,15 +34,31 @@ struct BlockBackend { /* TODO change to DeviceState when all users are qdevified */ const BlockDevOps *dev_ops; void *dev_opaque; + + /* the block size for which the guest device expects atomicity */ + int guest_block_size; + + /* If the BDS tree is removed, some of its options are stored here (which + * can be used to restore those options in the new BDS on insert) */ + BlockBackendRootState root_state; + + /* I/O stats (display with "info blockstats"). */ + BlockAcctStats stats; + + BlockdevOnError on_read_error, on_write_error; + bool iostatus_enabled; + BlockDeviceIoStatus iostatus; }; typedef struct BlockBackendAIOCB { BlockAIOCB common; QEMUBH *bh; + BlockBackend *blk; int ret; } BlockBackendAIOCB; static const AIOCBInfo block_backend_aiocb_info = { + .get_aio_context = blk_aiocb_get_aio_context, .aiocb_size = sizeof(BlockBackendAIOCB), }; @@ -126,7 +147,7 @@ BlockBackend *blk_new_open(const char *name, const char *filename, return NULL; } - ret = bdrv_open(&blk->bs, filename, reference, options, flags, NULL, errp); + ret = bdrv_open(&blk->bs, filename, reference, options, flags, errp); if (ret < 0) { blk_unref(blk); return NULL; @@ -145,12 +166,17 @@ static void blk_delete(BlockBackend *blk) bdrv_unref(blk->bs); blk->bs = NULL; } + if (blk->root_state.throttle_state) { + g_free(blk->root_state.throttle_group); + throttle_group_unref(blk->root_state.throttle_state); + } /* Avoid double-remove after blk_hide_on_behalf_of_hmp_drive_del() */ if (blk->name[0]) { QTAILQ_REMOVE(&blk_backends, blk, link); } g_free(blk->name); drive_info_del(blk->legacy_dinfo); + block_acct_cleanup(&blk->stats); g_free(blk); } @@ -164,6 +190,11 @@ static void drive_info_del(DriveInfo *dinfo) g_free(dinfo); } +int blk_get_refcnt(BlockBackend *blk) +{ + return blk ? blk->refcnt : 0; +} + /* * Increment @blk's reference count. * @blk must not be null. @@ -239,6 +270,23 @@ BlockDriverState *blk_bs(BlockBackend *blk) } /* + * Changes the BlockDriverState attached to @blk + */ +void blk_set_bs(BlockBackend *blk, BlockDriverState *bs) +{ + bdrv_ref(bs); + + if (blk->bs) { + blk->bs->blk = NULL; + bdrv_unref(blk->bs); + } + assert(bs->blk == NULL); + + blk->bs = bs; + bs->blk = blk; +} + +/* * Return @blk's DriveInfo if any, else null. */ DriveInfo *blk_legacy_dinfo(BlockBackend *blk) @@ -292,6 +340,29 @@ void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk) } /* + * Disassociates the currently associated BlockDriverState from @blk. + */ +void blk_remove_bs(BlockBackend *blk) +{ + blk_update_root_state(blk); + + blk->bs->blk = NULL; + bdrv_unref(blk->bs); + blk->bs = NULL; +} + +/* + * Associates a new BlockDriverState with @blk. + */ +void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs) +{ + assert(!blk->bs && !bs->blk); + bdrv_ref(bs); + blk->bs = bs; + bs->blk = blk; +} + +/* * Attach device model @dev to @blk. * Return 0 on success, -EBUSY when a device model is attached already. */ @@ -303,7 +374,7 @@ int blk_attach_dev(BlockBackend *blk, void *dev) } blk_ref(blk); blk->dev = dev; - bdrv_iostatus_reset(blk->bs); + blk_iostatus_reset(blk); return 0; } @@ -330,7 +401,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev) blk->dev = NULL; blk->dev_ops = NULL; blk->dev_opaque = NULL; - bdrv_set_guest_block_size(blk->bs, 512); + blk->guest_block_size = 512; blk_unref(blk); } @@ -364,18 +435,15 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void blk_dev_change_media_cb(BlockBackend *blk, bool load) { if (blk->dev_ops && blk->dev_ops->change_media_cb) { - bool tray_was_closed = !blk_dev_is_tray_open(blk); + bool tray_was_open, tray_is_open; + tray_was_open = blk_dev_is_tray_open(blk); blk->dev_ops->change_media_cb(blk->dev_opaque, load); - if (tray_was_closed) { - /* tray open */ - qapi_event_send_device_tray_moved(blk_name(blk), - true, &error_abort); - } - if (load) { - /* tray close */ - qapi_event_send_device_tray_moved(blk_name(blk), - false, &error_abort); + tray_is_open = blk_dev_is_tray_open(blk); + + if (tray_was_open != tray_is_open) { + qapi_event_send_device_tray_moved(blk_name(blk), tray_is_open, + &error_abort); } } } @@ -390,6 +458,14 @@ bool blk_dev_has_removable_media(BlockBackend *blk) } /* + * Does @blk's attached device model have a tray? + */ +bool blk_dev_has_tray(BlockBackend *blk) +{ + return blk->dev_ops && blk->dev_ops->is_tray_open; +} + +/* * Notify @blk's attached device model of a media eject request. * If @force is true, the medium is about to be yanked out forcefully. */ @@ -405,7 +481,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force) */ bool blk_dev_is_tray_open(BlockBackend *blk) { - if (blk->dev_ops && blk->dev_ops->is_tray_open) { + if (blk_dev_has_tray(blk)) { return blk->dev_ops->is_tray_open(blk->dev_opaque); } return false; @@ -435,7 +511,47 @@ void blk_dev_resize_cb(BlockBackend *blk) void blk_iostatus_enable(BlockBackend *blk) { - bdrv_iostatus_enable(blk->bs); + blk->iostatus_enabled = true; + blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; +} + +/* The I/O status is only enabled if the drive explicitly + * enables it _and_ the VM is configured to stop on errors */ +bool blk_iostatus_is_enabled(const BlockBackend *blk) +{ + return (blk->iostatus_enabled && + (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || + blk->on_write_error == BLOCKDEV_ON_ERROR_STOP || + blk->on_read_error == BLOCKDEV_ON_ERROR_STOP)); +} + +BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk) +{ + return blk->iostatus; +} + +void blk_iostatus_disable(BlockBackend *blk) +{ + blk->iostatus_enabled = false; +} + +void blk_iostatus_reset(BlockBackend *blk) +{ + if (blk_iostatus_is_enabled(blk)) { + blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK; + if (blk->bs && blk->bs->job) { + block_job_iostatus_reset(blk->bs->job); + } + } +} + +void blk_iostatus_set_err(BlockBackend *blk, int error) +{ + assert(blk_iostatus_is_enabled(blk)); + if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : + BLOCK_DEVICE_IO_STATUS_FAILED; + } } static int blk_check_byte_request(BlockBackend *blk, int64_t offset, @@ -447,7 +563,7 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset, return -EIO; } - if (!blk_is_inserted(blk)) { + if (!blk_is_available(blk)) { return -ENOMEDIUM; } @@ -534,13 +650,15 @@ static void error_callback_bh(void *opaque) qemu_aio_unref(acb); } -static BlockAIOCB *abort_aio_request(BlockBackend *blk, BlockCompletionFunc *cb, - void *opaque, int ret) +BlockAIOCB *blk_abort_aio_request(BlockBackend *blk, + BlockCompletionFunc *cb, + void *opaque, int ret) { struct BlockBackendAIOCB *acb; QEMUBH *bh; acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque); + acb->blk = blk; acb->ret = ret; bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb); @@ -556,7 +674,7 @@ BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags, @@ -585,16 +703,28 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count) int64_t blk_getlength(BlockBackend *blk) { + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + return bdrv_getlength(blk->bs); } void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr) { - bdrv_get_geometry(blk->bs, nb_sectors_ptr); + if (!blk->bs) { + *nb_sectors_ptr = 0; + } else { + bdrv_get_geometry(blk->bs, nb_sectors_ptr); + } } int64_t blk_nb_sectors(BlockBackend *blk) { + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + return bdrv_nb_sectors(blk->bs); } @@ -604,7 +734,7 @@ BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque); @@ -616,7 +746,7 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque); @@ -625,6 +755,10 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num, BlockAIOCB *blk_aio_flush(BlockBackend *blk, BlockCompletionFunc *cb, void *opaque) { + if (!blk_is_available(blk)) { + return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM); + } + return bdrv_aio_flush(blk->bs, cb, opaque); } @@ -634,7 +768,7 @@ BlockAIOCB *blk_aio_discard(BlockBackend *blk, { int ret = blk_check_request(blk, sector_num, nb_sectors); if (ret < 0) { - return abort_aio_request(blk, cb, opaque, ret); + return blk_abort_aio_request(blk, cb, opaque, ret); } return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque); @@ -666,12 +800,20 @@ int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs) int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf) { + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + return bdrv_ioctl(blk->bs, req, buf); } BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) { + if (!blk_is_available(blk)) { + return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM); + } + return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque); } @@ -687,11 +829,19 @@ int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors) int blk_co_flush(BlockBackend *blk) { + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + return bdrv_co_flush(blk->bs); } int blk_flush(BlockBackend *blk) { + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + return bdrv_flush(blk->bs); } @@ -702,7 +852,9 @@ int blk_flush_all(void) void blk_drain(BlockBackend *blk) { - bdrv_drain(blk->bs); + if (blk->bs) { + bdrv_drain(blk->bs); + } } void blk_drain_all(void) @@ -710,76 +862,178 @@ void blk_drain_all(void) bdrv_drain_all(); } +void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error, + BlockdevOnError on_write_error) +{ + blk->on_read_error = on_read_error; + blk->on_write_error = on_write_error; +} + BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read) { - return bdrv_get_on_error(blk->bs, is_read); + return is_read ? blk->on_read_error : blk->on_write_error; } BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, int error) { - return bdrv_get_error_action(blk->bs, is_read, error); + BlockdevOnError on_err = blk_get_on_error(blk, is_read); + + switch (on_err) { + case BLOCKDEV_ON_ERROR_ENOSPC: + return (error == ENOSPC) ? + BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_STOP: + return BLOCK_ERROR_ACTION_STOP; + case BLOCKDEV_ON_ERROR_REPORT: + return BLOCK_ERROR_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_IGNORE: + return BLOCK_ERROR_ACTION_IGNORE; + default: + abort(); + } +} + +static void send_qmp_error_event(BlockBackend *blk, + BlockErrorAction action, + bool is_read, int error) +{ + IoOperationType optype; + + optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE; + qapi_event_send_block_io_error(blk_name(blk), optype, action, + blk_iostatus_is_enabled(blk), + error == ENOSPC, strerror(error), + &error_abort); } +/* This is done by device models because, while the block layer knows + * about the error, it does not know whether an operation comes from + * the device or the block layer (from a job, for example). + */ void blk_error_action(BlockBackend *blk, BlockErrorAction action, bool is_read, int error) { - bdrv_error_action(blk->bs, action, is_read, error); + assert(error >= 0); + + if (action == BLOCK_ERROR_ACTION_STOP) { + /* First set the iostatus, so that "info block" returns an iostatus + * that matches the events raised so far (an additional error iostatus + * is fine, but not a lost one). + */ + blk_iostatus_set_err(blk, error); + + /* Then raise the request to stop the VM and the event. + * qemu_system_vmstop_request_prepare has two effects. First, + * it ensures that the STOP event always comes after the + * BLOCK_IO_ERROR event. Second, it ensures that even if management + * can observe the STOP event and do a "cont" before the STOP + * event is issued, the VM will not stop. In this case, vm_start() + * also ensures that the STOP/RESUME pair of events is emitted. + */ + qemu_system_vmstop_request_prepare(); + send_qmp_error_event(blk, action, is_read, error); + qemu_system_vmstop_request(RUN_STATE_IO_ERROR); + } else { + send_qmp_error_event(blk, action, is_read, error); + } } int blk_is_read_only(BlockBackend *blk) { - return bdrv_is_read_only(blk->bs); + if (blk->bs) { + return bdrv_is_read_only(blk->bs); + } else { + return blk->root_state.read_only; + } } int blk_is_sg(BlockBackend *blk) { + if (!blk->bs) { + return 0; + } + return bdrv_is_sg(blk->bs); } int blk_enable_write_cache(BlockBackend *blk) { - return bdrv_enable_write_cache(blk->bs); + if (blk->bs) { + return bdrv_enable_write_cache(blk->bs); + } else { + return !!(blk->root_state.open_flags & BDRV_O_CACHE_WB); + } } void blk_set_enable_write_cache(BlockBackend *blk, bool wce) { - bdrv_set_enable_write_cache(blk->bs, wce); + if (blk->bs) { + bdrv_set_enable_write_cache(blk->bs, wce); + } else { + if (wce) { + blk->root_state.open_flags |= BDRV_O_CACHE_WB; + } else { + blk->root_state.open_flags &= ~BDRV_O_CACHE_WB; + } + } } void blk_invalidate_cache(BlockBackend *blk, Error **errp) { + if (!blk->bs) { + error_setg(errp, "Device '%s' has no medium", blk->name); + return; + } + bdrv_invalidate_cache(blk->bs, errp); } -int blk_is_inserted(BlockBackend *blk) +bool blk_is_inserted(BlockBackend *blk) +{ + return blk->bs && bdrv_is_inserted(blk->bs); +} + +bool blk_is_available(BlockBackend *blk) { - return bdrv_is_inserted(blk->bs); + return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk); } void blk_lock_medium(BlockBackend *blk, bool locked) { - bdrv_lock_medium(blk->bs, locked); + if (blk->bs) { + bdrv_lock_medium(blk->bs, locked); + } } void blk_eject(BlockBackend *blk, bool eject_flag) { - bdrv_eject(blk->bs, eject_flag); + if (blk->bs) { + bdrv_eject(blk->bs, eject_flag); + } } int blk_get_flags(BlockBackend *blk) { - return bdrv_get_flags(blk->bs); + if (blk->bs) { + return bdrv_get_flags(blk->bs); + } else { + return blk->root_state.open_flags; + } } int blk_get_max_transfer_length(BlockBackend *blk) { - return blk->bs->bl.max_transfer_length; + if (blk->bs) { + return blk->bs->bl.max_transfer_length; + } else { + return 0; + } } void blk_set_guest_block_size(BlockBackend *blk, int align) { - bdrv_set_guest_block_size(blk->bs, align); + blk->guest_block_size = align; } void *blk_blockalign(BlockBackend *blk, size_t size) @@ -789,40 +1043,64 @@ void *blk_blockalign(BlockBackend *blk, size_t size) bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp) { + if (!blk->bs) { + return false; + } + return bdrv_op_is_blocked(blk->bs, op, errp); } void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason) { - bdrv_op_unblock(blk->bs, op, reason); + if (blk->bs) { + bdrv_op_unblock(blk->bs, op, reason); + } } void blk_op_block_all(BlockBackend *blk, Error *reason) { - bdrv_op_block_all(blk->bs, reason); + if (blk->bs) { + bdrv_op_block_all(blk->bs, reason); + } } void blk_op_unblock_all(BlockBackend *blk, Error *reason) { - bdrv_op_unblock_all(blk->bs, reason); + if (blk->bs) { + bdrv_op_unblock_all(blk->bs, reason); + } } AioContext *blk_get_aio_context(BlockBackend *blk) { - return bdrv_get_aio_context(blk->bs); + if (blk->bs) { + return bdrv_get_aio_context(blk->bs); + } else { + return qemu_get_aio_context(); + } +} + +static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb) +{ + BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb); + return blk_get_aio_context(blk_acb->blk); } void blk_set_aio_context(BlockBackend *blk, AioContext *new_context) { - bdrv_set_aio_context(blk->bs, new_context); + if (blk->bs) { + bdrv_set_aio_context(blk->bs, new_context); + } } void blk_add_aio_context_notifier(BlockBackend *blk, void (*attached_aio_context)(AioContext *new_context, void *opaque), void (*detach_aio_context)(void *opaque), void *opaque) { - bdrv_add_aio_context_notifier(blk->bs, attached_aio_context, - detach_aio_context, opaque); + if (blk->bs) { + bdrv_add_aio_context_notifier(blk->bs, attached_aio_context, + detach_aio_context, opaque); + } } void blk_remove_aio_context_notifier(BlockBackend *blk, @@ -831,28 +1109,36 @@ void blk_remove_aio_context_notifier(BlockBackend *blk, void (*detach_aio_context)(void *), void *opaque) { - bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context, - detach_aio_context, opaque); + if (blk->bs) { + bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context, + detach_aio_context, opaque); + } } void blk_add_close_notifier(BlockBackend *blk, Notifier *notify) { - bdrv_add_close_notifier(blk->bs, notify); + if (blk->bs) { + bdrv_add_close_notifier(blk->bs, notify); + } } void blk_io_plug(BlockBackend *blk) { - bdrv_io_plug(blk->bs); + if (blk->bs) { + bdrv_io_plug(blk->bs); + } } void blk_io_unplug(BlockBackend *blk) { - bdrv_io_unplug(blk->bs); + if (blk->bs) { + bdrv_io_unplug(blk->bs); + } } BlockAcctStats *blk_get_stats(BlockBackend *blk) { - return bdrv_get_stats(blk->bs); + return &blk->stats; } void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, @@ -885,6 +1171,10 @@ int blk_write_compressed(BlockBackend *blk, int64_t sector_num, int blk_truncate(BlockBackend *blk, int64_t offset) { + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + return bdrv_truncate(blk->bs, offset); } @@ -901,20 +1191,94 @@ int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors) int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf, int64_t pos, int size) { + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + return bdrv_save_vmstate(blk->bs, buf, pos, size); } int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size) { + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + return bdrv_load_vmstate(blk->bs, buf, pos, size); } int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz) { + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + return bdrv_probe_blocksizes(blk->bs, bsz); } int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo) { + if (!blk_is_available(blk)) { + return -ENOMEDIUM; + } + return bdrv_probe_geometry(blk->bs, geo); } + +/* + * Updates the BlockBackendRootState object with data from the currently + * attached BlockDriverState. + */ +void blk_update_root_state(BlockBackend *blk) +{ + assert(blk->bs); + + blk->root_state.open_flags = blk->bs->open_flags; + blk->root_state.read_only = blk->bs->read_only; + blk->root_state.detect_zeroes = blk->bs->detect_zeroes; + + if (blk->root_state.throttle_group) { + g_free(blk->root_state.throttle_group); + throttle_group_unref(blk->root_state.throttle_state); + } + if (blk->bs->throttle_state) { + const char *name = throttle_group_get_name(blk->bs); + blk->root_state.throttle_group = g_strdup(name); + blk->root_state.throttle_state = throttle_group_incref(name); + } else { + blk->root_state.throttle_group = NULL; + blk->root_state.throttle_state = NULL; + } +} + +/* + * Applies the information in the root state to the given BlockDriverState. This + * does not include the flags which have to be specified for bdrv_open(), use + * blk_get_open_flags_from_root_state() to inquire them. + */ +void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs) +{ + bs->detect_zeroes = blk->root_state.detect_zeroes; + if (blk->root_state.throttle_group) { + bdrv_io_limits_enable(bs, blk->root_state.throttle_group); + } +} + +/* + * Returns the flags to be used for bdrv_open() of a BlockDriverState which is + * supposed to inherit the root state. + */ +int blk_get_open_flags_from_root_state(BlockBackend *blk) +{ + int bs_flags; + + bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR; + bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR; + + return bs_flags; +} + +BlockBackendRootState *blk_get_root_state(BlockBackend *blk) +{ + return &blk->root_state; +} diff --git a/block/bochs.c b/block/bochs.c index 199ac2b9a..18949b9d4 100644 --- a/block/bochs.c +++ b/block/bochs.c @@ -103,7 +103,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags, bs->read_only = 1; // no write support yet - ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs)); + ret = bdrv_pread(bs->file->bs, 0, &bochs, sizeof(bochs)); if (ret < 0) { return ret; } @@ -137,7 +137,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags, return -ENOMEM; } - ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap, + ret = bdrv_pread(bs->file->bs, le32_to_cpu(bochs.header), s->catalog_bitmap, s->catalog_size * 4); if (ret < 0) { goto fail; @@ -206,7 +206,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num) (s->extent_blocks + s->bitmap_blocks)); /* read in bitmap for current extent */ - ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8), + ret = bdrv_pread(bs->file->bs, bitmap_offset + (extent_offset / 8), &bitmap_entry, 1); if (ret < 0) { return ret; @@ -229,7 +229,7 @@ static int bochs_read(BlockDriverState *bs, int64_t sector_num, if (block_offset < 0) { return block_offset; } else if (block_offset > 0) { - ret = bdrv_pread(bs->file, block_offset, buf, 512); + ret = bdrv_pread(bs->file->bs, block_offset, buf, 512); if (ret < 0) { return ret; } diff --git a/block/cloop.c b/block/cloop.c index f328be06f..4190ae06d 100644 --- a/block/cloop.c +++ b/block/cloop.c @@ -66,7 +66,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags, bs->read_only = 1; /* read header */ - ret = bdrv_pread(bs->file, 128, &s->block_size, 4); + ret = bdrv_pread(bs->file->bs, 128, &s->block_size, 4); if (ret < 0) { return ret; } @@ -92,7 +92,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } - ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4); + ret = bdrv_pread(bs->file->bs, 128 + 4, &s->n_blocks, 4); if (ret < 0) { return ret; } @@ -123,7 +123,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags, return -ENOMEM; } - ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size); + ret = bdrv_pread(bs->file->bs, 128 + 4 + 4, s->offsets, offsets_size); if (ret < 0) { goto fail; } @@ -203,8 +203,8 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num) int ret; uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num]; - ret = bdrv_pread(bs->file, s->offsets[block_num], s->compressed_block, - bytes); + ret = bdrv_pread(bs->file->bs, s->offsets[block_num], + s->compressed_block, bytes); if (ret != bytes) { return -1; } diff --git a/block/commit.c b/block/commit.c index 7312a5bdc..a5d02aa56 100644 --- a/block/commit.c +++ b/block/commit.c @@ -17,6 +17,7 @@ #include "block/blockjob.h" #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" +#include "sysemu/block-backend.h" enum { /* @@ -213,7 +214,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, if ((on_error == BLOCKDEV_ON_ERROR_STOP || on_error == BLOCKDEV_ON_ERROR_ENOSPC) && - !bdrv_iostatus_is_enabled(bs)) { + (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) { error_setg(errp, "Invalid parameter combination"); return; } @@ -235,14 +236,14 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, orig_overlay_flags = bdrv_get_flags(overlay_bs); /* convert base & overlay_bs to r/w, if necessary */ - if (!(orig_base_flags & BDRV_O_RDWR)) { - reopen_queue = bdrv_reopen_queue(reopen_queue, base, - orig_base_flags | BDRV_O_RDWR); - } if (!(orig_overlay_flags & BDRV_O_RDWR)) { - reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, + reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL, orig_overlay_flags | BDRV_O_RDWR); } + if (!(orig_base_flags & BDRV_O_RDWR)) { + reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL, + orig_base_flags | BDRV_O_RDWR); + } if (reopen_queue) { bdrv_reopen_multiple(reopen_queue, &local_err); if (local_err != NULL) { diff --git a/block/curl.c b/block/curl.c index 032cc8ae2..89941826e 100644 --- a/block/curl.c +++ b/block/curl.c @@ -154,18 +154,20 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action, DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd); switch (action) { case CURL_POLL_IN: - aio_set_fd_handler(s->aio_context, fd, curl_multi_read, - NULL, state); + aio_set_fd_handler(s->aio_context, fd, false, + curl_multi_read, NULL, state); break; case CURL_POLL_OUT: - aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state); + aio_set_fd_handler(s->aio_context, fd, false, + NULL, curl_multi_do, state); break; case CURL_POLL_INOUT: - aio_set_fd_handler(s->aio_context, fd, curl_multi_read, - curl_multi_do, state); + aio_set_fd_handler(s->aio_context, fd, false, + curl_multi_read, curl_multi_do, state); break; case CURL_POLL_REMOVE: - aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL); + aio_set_fd_handler(s->aio_context, fd, false, + NULL, NULL, NULL); break; } diff --git a/block/dmg.c b/block/dmg.c index 9f2528169..546a6f533 100644 --- a/block/dmg.c +++ b/block/dmg.c @@ -85,7 +85,7 @@ static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result) uint64_t buffer; int ret; - ret = bdrv_pread(bs->file, offset, &buffer, 8); + ret = bdrv_pread(bs->file->bs, offset, &buffer, 8); if (ret < 0) { return ret; } @@ -99,7 +99,7 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result) uint32_t buffer; int ret; - ret = bdrv_pread(bs->file, offset, &buffer, 4); + ret = bdrv_pread(bs->file->bs, offset, &buffer, 4); if (ret < 0) { return ret; } @@ -354,7 +354,7 @@ static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds, offset += 4; buffer = g_realloc(buffer, count); - ret = bdrv_pread(bs->file, offset, buffer, count); + ret = bdrv_pread(bs->file->bs, offset, buffer, count); if (ret < 0) { goto fail; } @@ -391,7 +391,7 @@ static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds, buffer = g_malloc(info_length + 1); buffer[info_length] = '\0'; - ret = bdrv_pread(bs->file, info_begin, buffer, info_length); + ret = bdrv_pread(bs->file->bs, info_begin, buffer, info_length); if (ret != info_length) { ret = -EINVAL; goto fail; @@ -446,7 +446,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, ds.max_sectors_per_chunk = 1; /* locate the UDIF trailer */ - offset = dmg_find_koly_offset(bs->file, errp); + offset = dmg_find_koly_offset(bs->file->bs, errp); if (offset < 0) { ret = offset; goto fail; @@ -514,9 +514,9 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags, } /* initialize zlib engine */ - s->compressed_chunk = qemu_try_blockalign(bs->file, + s->compressed_chunk = qemu_try_blockalign(bs->file->bs, ds.max_compressed_size + 1); - s->uncompressed_chunk = qemu_try_blockalign(bs->file, + s->uncompressed_chunk = qemu_try_blockalign(bs->file->bs, 512 * ds.max_sectors_per_chunk); if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) { ret = -ENOMEM; @@ -592,7 +592,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num) case 0x80000005: { /* zlib compressed */ /* we need to buffer, because only the chunk as whole can be * inflated. */ - ret = bdrv_pread(bs->file, s->offsets[chunk], + ret = bdrv_pread(bs->file->bs, s->offsets[chunk], s->compressed_chunk, s->lengths[chunk]); if (ret != s->lengths[chunk]) { return -1; @@ -616,7 +616,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num) case 0x80000006: /* bzip2 compressed */ /* we need to buffer, because only the chunk as whole can be * inflated. */ - ret = bdrv_pread(bs->file, s->offsets[chunk], + ret = bdrv_pread(bs->file->bs, s->offsets[chunk], s->compressed_chunk, s->lengths[chunk]); if (ret != s->lengths[chunk]) { return -1; @@ -641,7 +641,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num) break; #endif /* CONFIG_BZIP2 */ case 1: /* copy */ - ret = bdrv_pread(bs->file, s->offsets[chunk], + ret = bdrv_pread(bs->file->bs, s->offsets[chunk], s->uncompressed_chunk, s->lengths[chunk]); if (ret != s->lengths[chunk]) { return -1; diff --git a/block/gluster.c b/block/gluster.c index 1eb3a8c39..0857c1464 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -429,28 +429,23 @@ static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { int ret; - GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); + GlusterAIOCB acb; BDRVGlusterState *s = bs->opaque; off_t size = nb_sectors * BDRV_SECTOR_SIZE; off_t offset = sector_num * BDRV_SECTOR_SIZE; - acb->size = size; - acb->ret = 0; - acb->coroutine = qemu_coroutine_self(); - acb->aio_context = bdrv_get_aio_context(bs); + acb.size = size; + acb.ret = 0; + acb.coroutine = qemu_coroutine_self(); + acb.aio_context = bdrv_get_aio_context(bs); - ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb); + ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb); if (ret < 0) { - ret = -errno; - goto out; + return -errno; } qemu_coroutine_yield(); - ret = acb->ret; - -out: - g_slice_free(GlusterAIOCB, acb); - return ret; + return acb.ret; } static inline bool gluster_supports_zerofill(void) @@ -541,35 +536,30 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write) { int ret; - GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); + GlusterAIOCB acb; BDRVGlusterState *s = bs->opaque; size_t size = nb_sectors * BDRV_SECTOR_SIZE; off_t offset = sector_num * BDRV_SECTOR_SIZE; - acb->size = size; - acb->ret = 0; - acb->coroutine = qemu_coroutine_self(); - acb->aio_context = bdrv_get_aio_context(bs); + acb.size = size; + acb.ret = 0; + acb.coroutine = qemu_coroutine_self(); + acb.aio_context = bdrv_get_aio_context(bs); if (write) { ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, - &gluster_finish_aiocb, acb); + gluster_finish_aiocb, &acb); } else { ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0, - &gluster_finish_aiocb, acb); + gluster_finish_aiocb, &acb); } if (ret < 0) { - ret = -errno; - goto out; + return -errno; } qemu_coroutine_yield(); - ret = acb->ret; - -out: - g_slice_free(GlusterAIOCB, acb); - return ret; + return acb.ret; } static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset) @@ -600,26 +590,21 @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs, static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs) { int ret; - GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); + GlusterAIOCB acb; BDRVGlusterState *s = bs->opaque; - acb->size = 0; - acb->ret = 0; - acb->coroutine = qemu_coroutine_self(); - acb->aio_context = bdrv_get_aio_context(bs); + acb.size = 0; + acb.ret = 0; + acb.coroutine = qemu_coroutine_self(); + acb.aio_context = bdrv_get_aio_context(bs); - ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb); + ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb); if (ret < 0) { - ret = -errno; - goto out; + return -errno; } qemu_coroutine_yield(); - ret = acb->ret; - -out: - g_slice_free(GlusterAIOCB, acb); - return ret; + return acb.ret; } #ifdef CONFIG_GLUSTERFS_DISCARD @@ -627,28 +612,23 @@ static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { int ret; - GlusterAIOCB *acb = g_slice_new(GlusterAIOCB); + GlusterAIOCB acb; BDRVGlusterState *s = bs->opaque; size_t size = nb_sectors * BDRV_SECTOR_SIZE; off_t offset = sector_num * BDRV_SECTOR_SIZE; - acb->size = 0; - acb->ret = 0; - acb->coroutine = qemu_coroutine_self(); - acb->aio_context = bdrv_get_aio_context(bs); + acb.size = 0; + acb.ret = 0; + acb.coroutine = qemu_coroutine_self(); + acb.aio_context = bdrv_get_aio_context(bs); - ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb); + ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb); if (ret < 0) { - ret = -errno; - goto out; + return -errno; } qemu_coroutine_yield(); - ret = acb->ret; - -out: - g_slice_free(GlusterAIOCB, acb); - return ret; + return acb.ret; } #endif diff --git a/block/io.c b/block/io.c index d4bc83b33..e00fb5d69 100644 --- a/block/io.c +++ b/block/io.c @@ -23,6 +23,7 @@ */ #include "trace.h" +#include "sysemu/block-backend.h" #include "block/blockjob.h" #include "block/block_int.h" #include "block/throttle-groups.h" @@ -156,38 +157,38 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) /* Take some limits from the children as a default */ if (bs->file) { - bdrv_refresh_limits(bs->file, &local_err); + bdrv_refresh_limits(bs->file->bs, &local_err); if (local_err) { error_propagate(errp, local_err); return; } - bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; - bs->bl.max_transfer_length = bs->file->bl.max_transfer_length; - bs->bl.min_mem_alignment = bs->file->bl.min_mem_alignment; - bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; + bs->bl.opt_transfer_length = bs->file->bs->bl.opt_transfer_length; + bs->bl.max_transfer_length = bs->file->bs->bl.max_transfer_length; + bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment; + bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment; } else { bs->bl.min_mem_alignment = 512; bs->bl.opt_mem_alignment = getpagesize(); } - if (bs->backing_hd) { - bdrv_refresh_limits(bs->backing_hd, &local_err); + if (bs->backing) { + bdrv_refresh_limits(bs->backing->bs, &local_err); if (local_err) { error_propagate(errp, local_err); return; } bs->bl.opt_transfer_length = MAX(bs->bl.opt_transfer_length, - bs->backing_hd->bl.opt_transfer_length); + bs->backing->bs->bl.opt_transfer_length); bs->bl.max_transfer_length = MIN_NON_ZERO(bs->bl.max_transfer_length, - bs->backing_hd->bl.max_transfer_length); + bs->backing->bs->bl.max_transfer_length); bs->bl.opt_mem_alignment = MAX(bs->bl.opt_mem_alignment, - bs->backing_hd->bl.opt_mem_alignment); + bs->backing->bs->bl.opt_mem_alignment); bs->bl.min_mem_alignment = MAX(bs->bl.min_mem_alignment, - bs->backing_hd->bl.min_mem_alignment); + bs->backing->bs->bl.min_mem_alignment); } /* Then let the driver override it */ @@ -213,8 +214,10 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs) } /* Check if any requests are in-flight (including throttled requests) */ -static bool bdrv_requests_pending(BlockDriverState *bs) +bool bdrv_requests_pending(BlockDriverState *bs) { + BdrvChild *child; + if (!QLIST_EMPTY(&bs->tracked_requests)) { return true; } @@ -224,17 +227,31 @@ static bool bdrv_requests_pending(BlockDriverState *bs) if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) { return true; } - if (bs->file && bdrv_requests_pending(bs->file)) { - return true; - } - if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) { - return true; + + QLIST_FOREACH(child, &bs->children, next) { + if (bdrv_requests_pending(child->bs)) { + return true; + } } + return false; } +static void bdrv_drain_recurse(BlockDriverState *bs) +{ + BdrvChild *child; + + if (bs->drv && bs->drv->bdrv_drain) { + bs->drv->bdrv_drain(bs); + } + QLIST_FOREACH(child, &bs->children, next) { + bdrv_drain_recurse(child->bs); + } +} + /* - * Wait for pending requests to complete on a single BlockDriverState subtree + * Wait for pending requests to complete on a single BlockDriverState subtree, + * and suspend block driver's internal I/O until next request arrives. * * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState * AioContext. @@ -247,6 +264,7 @@ void bdrv_drain(BlockDriverState *bs) { bool busy = true; + bdrv_drain_recurse(bs); while (busy) { /* Keep iterating */ bdrv_flush_io_queue(bs); @@ -344,13 +362,14 @@ static void tracked_request_end(BdrvTrackedRequest *req) static void tracked_request_begin(BdrvTrackedRequest *req, BlockDriverState *bs, int64_t offset, - unsigned int bytes, bool is_write) + unsigned int bytes, + enum BdrvTrackedRequestType type) { *req = (BdrvTrackedRequest){ .bs = bs, .offset = offset, .bytes = bytes, - .is_write = is_write, + .type = type, .co = qemu_coroutine_self(), .serialising = false, .overlap_offset = offset, @@ -844,7 +863,9 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, mark_request_serialising(req, bdrv_get_cluster_size(bs)); } - wait_serialising_requests(req); + if (!(flags & BDRV_REQ_NO_SERIALISING)) { + wait_serialising_requests(req); + } if (flags & BDRV_REQ_COPY_ON_READ) { int pnum; @@ -932,7 +953,8 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, return ret; } - if (bs->copy_on_read) { + /* Don't do copy-on-read if we read data before write operation */ + if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) { flags |= BDRV_REQ_COPY_ON_READ; } @@ -966,7 +988,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, bytes = ROUND_UP(bytes, align); } - tracked_request_begin(&req, bs, offset, bytes, false); + tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align, use_local_qiov ? &local_qiov : qiov, flags); @@ -1001,6 +1023,15 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0); } +int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) +{ + trace_bdrv_co_readv_no_serialising(bs, sector_num, nb_sectors); + + return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, + BDRV_REQ_NO_SERIALISING); +} + int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { @@ -1127,13 +1158,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, if (ret < 0) { /* Do nothing, write notifier decided to fail this request */ } else if (flags & BDRV_REQ_ZERO_WRITE) { - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO); + bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO); ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags); } else { - BLKDBG_EVENT(bs, BLKDBG_PWRITEV); + bdrv_debug_event(bs, BLKDBG_PWRITEV); ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); } - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE); + bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE); if (ret == 0 && !bs->enable_write_cache) { ret = bdrv_co_flush(bs); @@ -1141,7 +1172,9 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, bdrv_set_dirty(bs, sector_num, nb_sectors); - block_acct_highest_sector(&bs->stats, sector_num, nb_sectors); + if (bs->wr_highest_offset < offset + bytes) { + bs->wr_highest_offset = offset + bytes; + } if (ret >= 0) { bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors); @@ -1182,13 +1215,13 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, /* RMW the unaligned part before head. */ mark_request_serialising(req, align); wait_serialising_requests(req); - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD); + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align, align, &local_qiov, 0); if (ret < 0) { goto fail; } - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); memset(buf + head_padding_bytes, 0, zero_bytes); ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align, @@ -1220,13 +1253,13 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, /* RMW the unaligned part after tail. */ mark_request_serialising(req, align); wait_serialising_requests(req); - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL); + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); ret = bdrv_aligned_preadv(bs, req, offset, align, align, &local_qiov, 0); if (ret < 0) { goto fail; } - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); memset(buf, 0, bytes); ret = bdrv_aligned_pwritev(bs, req, offset, align, @@ -1276,7 +1309,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, * Pad qiov with the read parts and be sure to have a tracked request not * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle. */ - tracked_request_begin(&req, bs, offset, bytes, true); + tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE); if (!qiov) { ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req); @@ -1297,13 +1330,13 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, }; qemu_iovec_init_external(&head_qiov, &head_iov, 1); - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD); + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align, align, &head_qiov, 0); if (ret < 0) { goto fail; } - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); qemu_iovec_init(&local_qiov, qiov->niov + 2); qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); @@ -1331,13 +1364,13 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, }; qemu_iovec_init_external(&tail_qiov, &tail_iov, 1); - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL); + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align, align, &tail_qiov, 0); if (ret < 0) { goto fail; } - BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); if (!use_local_qiov) { qemu_iovec_init(&local_qiov, qiov->niov + 1); @@ -1486,7 +1519,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, if (ret & BDRV_BLOCK_RAW) { assert(ret & BDRV_BLOCK_OFFSET_VALID); - return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, + return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS, *pnum, pnum); } @@ -1495,8 +1528,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, } else { if (bdrv_unallocated_blocks_are_zero(bs)) { ret |= BDRV_BLOCK_ZERO; - } else if (bs->backing_hd) { - BlockDriverState *bs2 = bs->backing_hd; + } else if (bs->backing) { + BlockDriverState *bs2 = bs->backing->bs; int64_t nb_sectors2 = bdrv_nb_sectors(bs2); if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) { ret |= BDRV_BLOCK_ZERO; @@ -1509,7 +1542,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, (ret & BDRV_BLOCK_OFFSET_VALID)) { int file_pnum; - ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS, + ret2 = bdrv_co_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS, *pnum, &file_pnum); if (ret2 >= 0) { /* Ignore errors. This is just providing extra information, it @@ -1541,7 +1574,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs, int64_t ret = 0; assert(bs != base); - for (p = bs; p != base; p = p->backing_hd) { + for (p = bs; p != base; p = backing_bs(p)) { ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum); if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) { break; @@ -1604,7 +1637,7 @@ int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { - return bdrv_get_block_status_above(bs, bs->backing_hd, + return bdrv_get_block_status_above(bs, backing_bs(bs), sector_num, nb_sectors, pnum); } @@ -1662,7 +1695,7 @@ int bdrv_is_allocated_above(BlockDriverState *top, n = pnum_inter; } - intermediate = intermediate->backing_hd; + intermediate = backing_bs(intermediate); } *pnum = n; @@ -1713,7 +1746,7 @@ int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) } else if (drv->bdrv_save_vmstate) { return drv->bdrv_save_vmstate(bs, qiov, pos); } else if (bs->file) { - return bdrv_writev_vmstate(bs->file, qiov, pos); + return bdrv_writev_vmstate(bs->file->bs, qiov, pos); } return -ENOTSUP; @@ -1728,7 +1761,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, if (drv->bdrv_load_vmstate) return drv->bdrv_load_vmstate(bs, buf, pos, size); if (bs->file) - return bdrv_load_vmstate(bs->file, buf, pos, size); + return bdrv_load_vmstate(bs->file->bs, buf, pos, size); return -ENOTSUP; } @@ -1893,7 +1926,10 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs, } } - block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1); + if (bs->blk) { + block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE, + num_reqs - outidx - 1); + } return outidx + 1; } @@ -2208,7 +2244,7 @@ void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, { BlockAIOCB *acb; - acb = g_slice_alloc(aiocb_info->aiocb_size); + acb = g_malloc(aiocb_info->aiocb_size); acb->aiocb_info = aiocb_info; acb->bs = bs; acb->cb = cb; @@ -2228,7 +2264,7 @@ void qemu_aio_unref(void *p) BlockAIOCB *acb = p; assert(acb->refcnt > 0); if (--acb->refcnt == 0) { - g_slice_free1(acb->aiocb_info->aiocb_size, acb); + g_free(acb); } } @@ -2298,18 +2334,20 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque) int coroutine_fn bdrv_co_flush(BlockDriverState *bs) { int ret; + BdrvTrackedRequest req; if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) || bdrv_is_sg(bs)) { return 0; } + tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH); /* Write back cached data to the OS even with cache=unsafe */ BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS); if (bs->drv->bdrv_co_flush_to_os) { ret = bs->drv->bdrv_co_flush_to_os(bs); if (ret < 0) { - return ret; + goto out; } } @@ -2349,14 +2387,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) ret = 0; } if (ret < 0) { - return ret; + goto out; } /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH * in the case of cache=unsafe, so there are no useless flushes. */ flush_parent: - return bdrv_co_flush(bs->file); + ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0; +out: + tracked_request_end(&req); + return ret; } int bdrv_flush(BlockDriverState *bs) @@ -2399,6 +2440,7 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque) int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { + BdrvTrackedRequest req; int max_discard, ret; if (!bs->drv) { @@ -2421,6 +2463,8 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, return 0; } + tracked_request_begin(&req, bs, sector_num, nb_sectors, + BDRV_TRACKED_DISCARD); bdrv_set_dirty(bs, sector_num, nb_sectors); max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS); @@ -2454,20 +2498,24 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors, bdrv_co_io_em_complete, &co); if (acb == NULL) { - return -EIO; + ret = -EIO; + goto out; } else { qemu_coroutine_yield(); ret = co.ret; } } if (ret && ret != -ENOTSUP) { - return ret; + goto out; } sector_num += num; nb_sectors -= num; } - return 0; + ret = 0; +out: + tracked_request_end(&req); + return ret; } int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) @@ -2496,26 +2544,109 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) return rwco.ret; } -/* needed for generic scsi interface */ +typedef struct { + CoroutineIOCompletion *co; + QEMUBH *bh; +} BdrvIoctlCompletionData; -int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) +static void bdrv_ioctl_bh_cb(void *opaque) +{ + BdrvIoctlCompletionData *data = opaque; + + bdrv_co_io_em_complete(data->co, -ENOTSUP); + qemu_bh_delete(data->bh); +} + +static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf) { BlockDriver *drv = bs->drv; + BdrvTrackedRequest tracked_req; + CoroutineIOCompletion co = { + .coroutine = qemu_coroutine_self(), + }; + BlockAIOCB *acb; - if (drv && drv->bdrv_ioctl) - return drv->bdrv_ioctl(bs, req, buf); - return -ENOTSUP; + tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL); + if (!drv || !drv->bdrv_aio_ioctl) { + co.ret = -ENOTSUP; + goto out; + } + + acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co); + if (!acb) { + BdrvIoctlCompletionData *data = g_new(BdrvIoctlCompletionData, 1); + data->bh = aio_bh_new(bdrv_get_aio_context(bs), + bdrv_ioctl_bh_cb, data); + data->co = &co; + qemu_bh_schedule(data->bh); + } + qemu_coroutine_yield(); +out: + tracked_request_end(&tracked_req); + return co.ret; +} + +typedef struct { + BlockDriverState *bs; + int req; + void *buf; + int ret; +} BdrvIoctlCoData; + +static void coroutine_fn bdrv_co_ioctl_entry(void *opaque) +{ + BdrvIoctlCoData *data = opaque; + data->ret = bdrv_co_do_ioctl(data->bs, data->req, data->buf); +} + +/* needed for generic scsi interface */ +int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) +{ + BdrvIoctlCoData data = { + .bs = bs, + .req = req, + .buf = buf, + .ret = -EINPROGRESS, + }; + + if (qemu_in_coroutine()) { + /* Fast-path if already in coroutine context */ + bdrv_co_ioctl_entry(&data); + } else { + Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry); + qemu_coroutine_enter(co, &data); + } + while (data.ret == -EINPROGRESS) { + aio_poll(bdrv_get_aio_context(bs), true); + } + return data.ret; +} + +static void coroutine_fn bdrv_co_aio_ioctl_entry(void *opaque) +{ + BlockAIOCBCoroutine *acb = opaque; + acb->req.error = bdrv_co_do_ioctl(acb->common.bs, + acb->req.req, acb->req.buf); + bdrv_co_complete(acb); } BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) { - BlockDriver *drv = bs->drv; + BlockAIOCBCoroutine *acb = qemu_aio_get(&bdrv_em_co_aiocb_info, + bs, cb, opaque); + Coroutine *co; - if (drv && drv->bdrv_aio_ioctl) - return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque); - return NULL; + acb->need_bh = true; + acb->req.error = -EINPROGRESS; + acb->req.req = req; + acb->req.buf = buf; + co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry); + qemu_coroutine_enter(co, acb); + + bdrv_co_maybe_schedule_bh(acb); + return &acb->common; } void *qemu_blockalign(BlockDriverState *bs, size_t size) @@ -2584,7 +2715,7 @@ void bdrv_io_plug(BlockDriverState *bs) if (drv && drv->bdrv_io_plug) { drv->bdrv_io_plug(bs); } else if (bs->file) { - bdrv_io_plug(bs->file); + bdrv_io_plug(bs->file->bs); } } @@ -2594,7 +2725,7 @@ void bdrv_io_unplug(BlockDriverState *bs) if (drv && drv->bdrv_io_unplug) { drv->bdrv_io_unplug(bs); } else if (bs->file) { - bdrv_io_unplug(bs->file); + bdrv_io_unplug(bs->file->bs); } } @@ -2604,7 +2735,24 @@ void bdrv_flush_io_queue(BlockDriverState *bs) if (drv && drv->bdrv_flush_io_queue) { drv->bdrv_flush_io_queue(bs); } else if (bs->file) { - bdrv_flush_io_queue(bs->file); + bdrv_flush_io_queue(bs->file->bs); } bdrv_start_throttled_reqs(bs); } + +void bdrv_drained_begin(BlockDriverState *bs) +{ + if (!bs->quiesce_counter++) { + aio_disable_external(bdrv_get_aio_context(bs)); + } + bdrv_drain(bs); +} + +void bdrv_drained_end(BlockDriverState *bs) +{ + assert(bs->quiesce_counter > 0); + if (--bs->quiesce_counter > 0) { + return; + } + aio_enable_external(bdrv_get_aio_context(bs)); +} diff --git a/block/iscsi.c b/block/iscsi.c index 93f1ee4c6..bd1f1bfcd 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -84,6 +84,7 @@ typedef struct IscsiTask { IscsiLun *iscsilun; QEMUTimer retry_timer; bool force_next_flush; + int err_code; } IscsiTask; typedef struct IscsiAIOCB { @@ -96,6 +97,7 @@ typedef struct IscsiAIOCB { int status; int64_t sector_num; int nb_sectors; + int ret; #ifdef __linux__ sg_io_hdr_t *ioh; #endif @@ -169,19 +171,70 @@ static inline unsigned exp_random(double mean) return -mean * log((double)rand() / RAND_MAX); } -/* SCSI_STATUS_TASK_SET_FULL and SCSI_STATUS_TIMEOUT were introduced - * in libiscsi 1.10.0 as part of an enum. The LIBISCSI_API_VERSION - * macro was introduced in 1.11.0. So use the API_VERSION macro as - * a hint that the macros are defined and define them ourselves - * otherwise to keep the required libiscsi version at 1.9.0 */ -#if !defined(LIBISCSI_API_VERSION) -#define QEMU_SCSI_STATUS_TASK_SET_FULL 0x28 -#define QEMU_SCSI_STATUS_TIMEOUT 0x0f000002 -#else -#define QEMU_SCSI_STATUS_TASK_SET_FULL SCSI_STATUS_TASK_SET_FULL -#define QEMU_SCSI_STATUS_TIMEOUT SCSI_STATUS_TIMEOUT +/* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in + * libiscsi 1.10.0, together with other constants we need. Use it as + * a hint that we have to define them ourselves if needed, to keep the + * minimum required libiscsi version at 1.9.0. We use an ASCQ macro for + * the test because SCSI_STATUS_* is an enum. + * + * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes + * an enum, check against the LIBISCSI_API_VERSION macro, which was + * introduced in 1.11.0. If it is present, there is no need to define + * anything. + */ +#if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \ + !defined(LIBISCSI_API_VERSION) +#define SCSI_STATUS_TASK_SET_FULL 0x28 +#define SCSI_STATUS_TIMEOUT 0x0f000002 +#define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST 0x2600 +#define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR 0x1a00 #endif +static int iscsi_translate_sense(struct scsi_sense *sense) +{ + int ret; + + switch (sense->key) { + case SCSI_SENSE_NOT_READY: + return -EBUSY; + case SCSI_SENSE_DATA_PROTECTION: + return -EACCES; + case SCSI_SENSE_COMMAND_ABORTED: + return -ECANCELED; + case SCSI_SENSE_ILLEGAL_REQUEST: + /* Parse ASCQ */ + break; + default: + return -EIO; + } + switch (sense->ascq) { + case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR: + case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE: + case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB: + case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST: + ret = -EINVAL; + break; + case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE: + ret = -ENOSPC; + break; + case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED: + ret = -ENOTSUP; + break; + case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT: + case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED: + case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN: + ret = -ENOMEDIUM; + break; + case SCSI_SENSE_ASCQ_WRITE_PROTECTED: + ret = -EACCES; + break; + default: + ret = -EIO; + break; + } + return ret; +} + static void iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, void *command_data, void *opaque) @@ -203,11 +256,11 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, goto out; } if (status == SCSI_STATUS_BUSY || - status == QEMU_SCSI_STATUS_TIMEOUT || - status == QEMU_SCSI_STATUS_TASK_SET_FULL) { + status == SCSI_STATUS_TIMEOUT || + status == SCSI_STATUS_TASK_SET_FULL) { unsigned retry_time = exp_random(iscsi_retry_times[iTask->retries - 1]); - if (status == QEMU_SCSI_STATUS_TIMEOUT) { + if (status == SCSI_STATUS_TIMEOUT) { /* make sure the request is rescheduled AFTER the * reconnect is initiated */ retry_time = EVENT_INTERVAL * 2; @@ -226,6 +279,7 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, return; } } + iTask->err_code = iscsi_translate_sense(&task->sense); error_report("iSCSI Failure: %s", iscsi_get_error(iscsi)); } else { iTask->iscsilun->force_next_flush |= iTask->force_next_flush; @@ -291,8 +345,8 @@ iscsi_set_events(IscsiLun *iscsilun) int ev = iscsi_which_events(iscsi); if (ev != iscsilun->events) { - aio_set_fd_handler(iscsilun->aio_context, - iscsi_get_fd(iscsi), + aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi), + false, (ev & POLLIN) ? iscsi_process_read : NULL, (ev & POLLOUT) ? iscsi_process_write : NULL, iscsilun); @@ -455,7 +509,7 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { - return -EIO; + return iTask.err_code; } iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors); @@ -644,7 +698,7 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { - return -EIO; + return iTask.err_code; } return 0; @@ -683,7 +737,7 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { - return -EIO; + return iTask.err_code; } return 0; @@ -703,7 +757,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status, if (status < 0) { error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s", iscsi_get_error(iscsi)); - acb->status = -EIO; + acb->status = iscsi_translate_sense(&acb->task->sense); } acb->ioh->driver_status = 0; @@ -726,6 +780,38 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status, iscsi_schedule_bh(acb); } +static void iscsi_ioctl_bh_completion(void *opaque) +{ + IscsiAIOCB *acb = opaque; + + qemu_bh_delete(acb->bh); + acb->common.cb(acb->common.opaque, acb->ret); + qemu_aio_unref(acb); +} + +static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf) +{ + BlockDriverState *bs = acb->common.bs; + IscsiLun *iscsilun = bs->opaque; + int ret = 0; + + switch (req) { + case SG_GET_VERSION_NUM: + *(int *)buf = 30000; + break; + case SG_GET_SCSI_ID: + ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type; + break; + default: + ret = -EINVAL; + } + assert(!acb->bh); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), + iscsi_ioctl_bh_completion, acb); + acb->ret = ret; + qemu_bh_schedule(acb->bh); +} + static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, unsigned long int req, void *buf, BlockCompletionFunc *cb, void *opaque) @@ -735,8 +821,6 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, struct iscsi_data data; IscsiAIOCB *acb; - assert(req == SG_IO); - acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque); acb->iscsilun = iscsilun; @@ -745,6 +829,11 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, acb->buf = NULL; acb->ioh = buf; + if (req != SG_IO) { + iscsi_ioctl_handle_emulated(acb, req, buf); + return &acb->common; + } + acb->task = malloc(sizeof(struct scsi_task)); if (acb->task == NULL) { error_report("iSCSI: Failed to allocate task for scsi command. %s", @@ -809,38 +898,6 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs, return &acb->common; } -static void ioctl_cb(void *opaque, int status) -{ - int *p_status = opaque; - *p_status = status; -} - -static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - IscsiLun *iscsilun = bs->opaque; - int status; - - switch (req) { - case SG_GET_VERSION_NUM: - *(int *)buf = 30000; - break; - case SG_GET_SCSI_ID: - ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type; - break; - case SG_IO: - status = -EINPROGRESS; - iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status); - - while (status == -EINPROGRESS) { - aio_poll(iscsilun->aio_context, true); - } - - return 0; - default: - return -1; - } - return 0; -} #endif static int64_t @@ -905,7 +962,7 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { - return -EIO; + return iTask.err_code; } iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors); @@ -999,7 +1056,7 @@ retry: } if (iTask.status != SCSI_STATUS_GOOD) { - return -EIO; + return iTask.err_code; } if (flags & BDRV_REQ_MAY_UNMAP) { @@ -1280,9 +1337,8 @@ static void iscsi_detach_aio_context(BlockDriverState *bs) { IscsiLun *iscsilun = bs->opaque; - aio_set_fd_handler(iscsilun->aio_context, - iscsi_get_fd(iscsilun->iscsi), - NULL, NULL, NULL); + aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi), + false, NULL, NULL, NULL); iscsilun->events = 0; if (iscsilun->nop_timer) { @@ -1772,7 +1828,6 @@ static BlockDriver bdrv_iscsi = { .bdrv_co_flush_to_disk = iscsi_co_flush, #ifdef __linux__ - .bdrv_ioctl = iscsi_ioctl, .bdrv_aio_ioctl = iscsi_aio_ioctl, #endif diff --git a/block/linux-aio.c b/block/linux-aio.c index c991443c5..88b0520a8 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c @@ -287,7 +287,7 @@ void laio_detach_aio_context(void *s_, AioContext *old_context) { struct qemu_laio_state *s = s_; - aio_set_event_notifier(old_context, &s->e, NULL); + aio_set_event_notifier(old_context, &s->e, false, NULL); qemu_bh_delete(s->completion_bh); } @@ -296,7 +296,8 @@ void laio_attach_aio_context(void *s_, AioContext *new_context) struct qemu_laio_state *s = s_; s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s); - aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb); + aio_set_event_notifier(new_context, &s->e, false, + qemu_laio_completion_cb); } void *laio_init(void) diff --git a/block/mirror.c b/block/mirror.c index b2fb4b9b1..0e8f5565a 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -14,6 +14,7 @@ #include "trace.h" #include "block/blockjob.h" #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" #include "qemu/bitmap.h" @@ -113,7 +114,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret) } qemu_iovec_destroy(&op->qiov); - g_slice_free(MirrorOp, op); + g_free(op); if (s->waiting_for_io) { qemu_coroutine_enter(s->common.co, NULL); @@ -246,6 +247,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight); break; } + if (IOV_MAX < nb_chunks + added_chunks) { + trace_mirror_break_iov_max(s, nb_chunks, added_chunks); + break; + } /* We have enough free space to copy these sectors. */ bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks); @@ -260,7 +265,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s) } while (delay_ns == 0 && next_sector < end); /* Allocate a MirrorOp that is used as an AIO callback. */ - op = g_slice_new(MirrorOp); + op = g_new(MirrorOp, 1); op->s = s; op->sector_num = sector_num; op->nb_sectors = nb_sectors; @@ -349,6 +354,11 @@ static void mirror_exit(BlockJob *job, void *opaque) MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); MirrorExitData *data = opaque; AioContext *replace_aio_context = NULL; + BlockDriverState *src = s->common.bs; + + /* Make sure that the source BDS doesn't go away before we called + * block_job_completed(). */ + bdrv_ref(src); if (s->to_replace) { replace_aio_context = bdrv_get_aio_context(s->to_replace); @@ -363,14 +373,7 @@ static void mirror_exit(BlockJob *job, void *opaque) if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) { bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL); } - bdrv_swap(s->target, to_replace); - if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) { - /* drop the bs loop chain formed by the swap: break the loop then - * trigger the unref from the top one */ - BlockDriverState *p = s->base->backing_hd; - bdrv_set_backing_hd(s->base, NULL); - bdrv_unref(p); - } + bdrv_replace_in_backing_chain(to_replace, s->target); } if (s->to_replace) { bdrv_op_unblock_all(s->to_replace, s->replace_blocker); @@ -381,9 +384,12 @@ static void mirror_exit(BlockJob *job, void *opaque) aio_context_release(replace_aio_context); } g_free(s->replaces); + bdrv_op_unblock_all(s->target, s->common.blocker); bdrv_unref(s->target); block_job_completed(&s->common, data->ret); g_free(data); + bdrv_drained_end(src); + bdrv_unref(src); } static void coroutine_fn mirror_run(void *opaque) @@ -427,7 +433,7 @@ static void coroutine_fn mirror_run(void *opaque) */ bdrv_get_backing_filename(s->target, backing_filename, sizeof(backing_filename)); - if (backing_filename[0] && !s->target->backing_hd) { + if (backing_filename[0] && !s->target->backing) { ret = bdrv_get_info(s->target, &bdi); if (ret < 0) { goto immediate_exit; @@ -451,6 +457,8 @@ static void coroutine_fn mirror_run(void *opaque) if (!s->is_none_mode) { /* First part, loop on the sectors and initialize the dirty bitmap. */ BlockDriverState *base = s->base; + bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(s->target); + for (sector_num = 0; sector_num < end; ) { /* Just to make sure we are not exceeding int limit. */ int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS, @@ -473,7 +481,7 @@ static void coroutine_fn mirror_run(void *opaque) } assert(n > 0); - if (ret == 1) { + if (ret == 1 || mark_all_dirty) { bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n); } sector_num += n; @@ -594,10 +602,15 @@ immediate_exit: g_free(s->cow_bitmap); g_free(s->in_flight_bitmap); bdrv_release_dirty_bitmap(bs, s->dirty_bitmap); - bdrv_iostatus_disable(s->target); + if (s->target->blk) { + blk_iostatus_disable(s->target->blk); + } data = g_malloc(sizeof(*data)); data->ret = ret; + /* Before we switch to target in mirror_exit, make sure data doesn't + * change. */ + bdrv_drained_begin(s->common.bs); block_job_defer_to_main_loop(&s->common, mirror_exit, data); } @@ -616,7 +629,9 @@ static void mirror_iostatus_reset(BlockJob *job) { MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); - bdrv_iostatus_reset(s->target); + if (s->target->blk) { + blk_iostatus_reset(s->target->blk); + } } static void mirror_complete(BlockJob *job, Error **errp) @@ -631,8 +646,7 @@ static void mirror_complete(BlockJob *job, Error **errp) return; } if (!s->synced) { - error_setg(errp, QERR_BLOCK_JOB_NOT_READY, - bdrv_get_device_name(job->bs)); + error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id); return; } @@ -640,9 +654,9 @@ static void mirror_complete(BlockJob *job, Error **errp) if (s->replaces) { AioContext *replace_aio_context; - s->to_replace = check_to_replace_node(s->replaces, &local_err); + s->to_replace = bdrv_find_node(s->replaces); if (!s->to_replace) { - error_propagate(errp, local_err); + error_setg(errp, "Node name '%s' not found", s->replaces); return; } @@ -700,7 +714,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && - !bdrv_iostatus_is_enabled(bs)) { + (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) { error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error"); return; } @@ -732,12 +746,17 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target, s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp); if (!s->dirty_bitmap) { g_free(s->replaces); - block_job_release(bs); + block_job_unref(&s->common); return; } + + bdrv_op_block_all(s->target, s->common.blocker); + bdrv_set_enable_write_cache(s->target, true); - bdrv_set_on_error(s->target, on_target_error, on_target_error); - bdrv_iostatus_enable(s->target); + if (s->target->blk) { + blk_set_on_error(s->target->blk, on_target_error, on_target_error); + blk_iostatus_enable(s->target->blk); + } s->common.co = qemu_coroutine_create(mirror_run); trace_mirror_start(bs, s, s->common.co, opaque); qemu_coroutine_enter(s->common.co, s); @@ -760,7 +779,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, return; } is_none_mode = mode == MIRROR_SYNC_MODE_NONE; - base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL; + base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL; mirror_start_job(bs, target, replaces, speed, granularity, buf_size, on_source_error, on_target_error, unmap, cb, opaque, errp, diff --git a/block/nbd-client.c b/block/nbd-client.c index e1bb9198c..b7fd17a11 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -124,7 +124,7 @@ static int nbd_co_send_request(BlockDriverState *bs, s->send_coroutine = qemu_coroutine_self(); aio_context = bdrv_get_aio_context(bs); - aio_set_fd_handler(aio_context, s->sock, + aio_set_fd_handler(aio_context, s->sock, false, nbd_reply_ready, nbd_restart_write, bs); if (qiov) { if (!s->is_unix) { @@ -144,7 +144,8 @@ static int nbd_co_send_request(BlockDriverState *bs, } else { rc = nbd_send_request(s->sock, request); } - aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, bs); + aio_set_fd_handler(aio_context, s->sock, false, + nbd_reply_ready, NULL, bs); s->send_coroutine = NULL; qemu_co_mutex_unlock(&s->send_mutex); return rc; @@ -348,14 +349,15 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num, void nbd_client_detach_aio_context(BlockDriverState *bs) { aio_set_fd_handler(bdrv_get_aio_context(bs), - nbd_get_client_session(bs)->sock, NULL, NULL, NULL); + nbd_get_client_session(bs)->sock, + false, NULL, NULL, NULL); } void nbd_client_attach_aio_context(BlockDriverState *bs, AioContext *new_context) { aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sock, - nbd_reply_ready, NULL, bs); + false, nbd_reply_ready, NULL, bs); } void nbd_client_close(BlockDriverState *bs) diff --git a/block/nbd.c b/block/nbd.c index 217618612..cd6a58777 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -43,7 +43,6 @@ typedef struct BDRVNBDState { NbdClientSession client; - QemuOpts *socket_opts; } BDRVNBDState; static int nbd_parse_uri(const char *filename, QDict *options) @@ -190,10 +189,10 @@ out: g_free(file); } -static void nbd_config(BDRVNBDState *s, QDict *options, char **export, - Error **errp) +static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export, + Error **errp) { - Error *local_err = NULL; + SocketAddress *saddr; if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) { if (qdict_haskey(options, "path")) { @@ -201,28 +200,37 @@ static void nbd_config(BDRVNBDState *s, QDict *options, char **export, } else { error_setg(errp, "one of path and host must be specified."); } - return; + return NULL; } - s->client.is_unix = qdict_haskey(options, "path"); - s->socket_opts = qemu_opts_create(&socket_optslist, NULL, 0, - &error_abort); + saddr = g_new0(SocketAddress, 1); - qemu_opts_absorb_qdict(s->socket_opts, options, &local_err); - if (local_err) { - error_propagate(errp, local_err); - return; + if (qdict_haskey(options, "path")) { + saddr->type = SOCKET_ADDRESS_KIND_UNIX; + saddr->u.q_unix = g_new0(UnixSocketAddress, 1); + saddr->u.q_unix->path = g_strdup(qdict_get_str(options, "path")); + qdict_del(options, "path"); + } else { + saddr->type = SOCKET_ADDRESS_KIND_INET; + saddr->u.inet = g_new0(InetSocketAddress, 1); + saddr->u.inet->host = g_strdup(qdict_get_str(options, "host")); + if (!qdict_get_try_str(options, "port")) { + saddr->u.inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT); + } else { + saddr->u.inet->port = g_strdup(qdict_get_str(options, "port")); + } + qdict_del(options, "host"); + qdict_del(options, "port"); } - if (!qemu_opt_get(s->socket_opts, "port")) { - qemu_opt_set_number(s->socket_opts, "port", NBD_DEFAULT_PORT, - &error_abort); - } + s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX; *export = g_strdup(qdict_get_try_str(options, "export")); if (*export) { qdict_del(options, "export"); } + + return saddr; } NbdClientSession *nbd_get_client_session(BlockDriverState *bs) @@ -231,26 +239,24 @@ NbdClientSession *nbd_get_client_session(BlockDriverState *bs) return &s->client; } -static int nbd_establish_connection(BlockDriverState *bs, Error **errp) +static int nbd_establish_connection(BlockDriverState *bs, + SocketAddress *saddr, + Error **errp) { BDRVNBDState *s = bs->opaque; int sock; - if (s->client.is_unix) { - sock = unix_connect_opts(s->socket_opts, errp, NULL, NULL); - } else { - sock = inet_connect_opts(s->socket_opts, errp, NULL, NULL); - if (sock >= 0) { - socket_set_nodelay(sock); - } - } + sock = socket_connect(saddr, errp, NULL, NULL); - /* Failed to establish connection */ if (sock < 0) { logout("Failed to establish connection to NBD server\n"); return -EIO; } + if (!s->client.is_unix) { + socket_set_nodelay(sock); + } + return sock; } @@ -260,19 +266,19 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags, BDRVNBDState *s = bs->opaque; char *export = NULL; int result, sock; - Error *local_err = NULL; + SocketAddress *saddr; /* Pop the config into our state object. Exit if invalid. */ - nbd_config(s, options, &export, &local_err); - if (local_err) { - error_propagate(errp, local_err); + saddr = nbd_config(s, options, &export, errp); + if (!saddr) { return -EINVAL; } /* establish TCP connection, return error if it fails * TODO: Configurable retry-until-timeout behaviour. */ - sock = nbd_establish_connection(bs, errp); + sock = nbd_establish_connection(bs, saddr, errp); + qapi_free_SocketAddress(saddr); if (sock < 0) { g_free(export); return sock; @@ -315,9 +321,6 @@ static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num, static void nbd_close(BlockDriverState *bs) { - BDRVNBDState *s = bs->opaque; - - qemu_opts_del(s->socket_opts); nbd_client_close(bs); } diff --git a/block/nfs.c b/block/nfs.c index 02eb4e464..fd79f8994 100644 --- a/block/nfs.c +++ b/block/nfs.c @@ -43,6 +43,7 @@ typedef struct NFSClient { int events; bool has_zero_init; AioContext *aio_context; + blkcnt_t st_blocks; } NFSClient; typedef struct NFSRPC { @@ -62,11 +63,10 @@ static void nfs_set_events(NFSClient *client) { int ev = nfs_which_events(client->context); if (ev != client->events) { - aio_set_fd_handler(client->aio_context, - nfs_get_fd(client->context), + aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), + false, (ev & POLLIN) ? nfs_process_read : NULL, - (ev & POLLOUT) ? nfs_process_write : NULL, - client); + (ev & POLLOUT) ? nfs_process_write : NULL, client); } client->events = ev; @@ -241,9 +241,8 @@ static void nfs_detach_aio_context(BlockDriverState *bs) { NFSClient *client = bs->opaque; - aio_set_fd_handler(client->aio_context, - nfs_get_fd(client->context), - NULL, NULL, NULL); + aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), + false, NULL, NULL, NULL); client->events = 0; } @@ -262,9 +261,8 @@ static void nfs_client_close(NFSClient *client) if (client->fh) { nfs_close(client->context, client->fh); } - aio_set_fd_handler(client->aio_context, - nfs_get_fd(client->context), - NULL, NULL, NULL); + aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context), + false, NULL, NULL, NULL); nfs_destroy_context(client->context); } memset(client, 0, sizeof(NFSClient)); @@ -374,6 +372,7 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename, } ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE); + client->st_blocks = st.st_blocks; client->has_zero_init = S_ISREG(st.st_mode); goto out; fail: @@ -464,6 +463,11 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs) NFSRPC task = {0}; struct stat st; + if (bdrv_is_read_only(bs) && + !(bs->open_flags & BDRV_O_NOCACHE)) { + return client->st_blocks * 512; + } + task.st = &st; if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb, &task) != 0) { @@ -484,6 +488,34 @@ static int nfs_file_truncate(BlockDriverState *bs, int64_t offset) return nfs_ftruncate(client->context, client->fh, offset); } +/* Note that this will not re-establish a connection with the NFS server + * - it is effectively a NOP. */ +static int nfs_reopen_prepare(BDRVReopenState *state, + BlockReopenQueue *queue, Error **errp) +{ + NFSClient *client = state->bs->opaque; + struct stat st; + int ret = 0; + + if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) { + error_setg(errp, "Cannot open a read-only mount as read-write"); + return -EACCES; + } + + /* Update cache for read-only reopens */ + if (!(state->flags & BDRV_O_RDWR)) { + ret = nfs_fstat(client->context, client->fh, &st); + if (ret < 0) { + error_setg(errp, "Failed to fstat file: %s", + nfs_get_error(client->context)); + return ret; + } + client->st_blocks = st.st_blocks; + } + + return 0; +} + static BlockDriver bdrv_nfs = { .format_name = "nfs", .protocol_name = "nfs", @@ -499,6 +531,7 @@ static BlockDriver bdrv_nfs = { .bdrv_file_open = nfs_file_open, .bdrv_close = nfs_file_close, .bdrv_create = nfs_file_create, + .bdrv_reopen_prepare = nfs_reopen_prepare, .bdrv_co_readv = nfs_co_readv, .bdrv_co_writev = nfs_co_writev, diff --git a/block/parallels.c b/block/parallels.c index 046b56844..f689fdeaf 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -202,13 +202,13 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx; space = to_allocate * s->tracks; - if (s->data_end + space > bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS) { + if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) { int ret; space += s->prealloc_size; if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) { - ret = bdrv_write_zeroes(bs->file, s->data_end, space, 0); + ret = bdrv_write_zeroes(bs->file->bs, s->data_end, space, 0); } else { - ret = bdrv_truncate(bs->file, + ret = bdrv_truncate(bs->file->bs, (s->data_end + space) << BDRV_SECTOR_BITS); } if (ret < 0) { @@ -220,7 +220,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier); s->data_end += s->tracks; bitmap_set(s->bat_dirty_bmap, - bat_entry_off(idx) / s->bat_dirty_block, 1); + bat_entry_off(idx + i) / s->bat_dirty_block, 1); } return bat2sect(s, idx) + sector_num % s->tracks; @@ -244,7 +244,8 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs) if (off + to_write > s->header_size) { to_write = s->header_size - off; } - ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off, to_write); + ret = bdrv_pwrite(bs->file->bs, off, (uint8_t *)s->header + off, + to_write); if (ret < 0) { qemu_co_mutex_unlock(&s->lock); return ret; @@ -303,7 +304,7 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs, qemu_iovec_reset(&hd_qiov); qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); - ret = bdrv_co_writev(bs->file, position, n, &hd_qiov); + ret = bdrv_co_writev(bs->file->bs, position, n, &hd_qiov); if (ret < 0) { break; } @@ -343,7 +344,7 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs, qemu_iovec_reset(&hd_qiov); qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); - ret = bdrv_co_readv(bs->file, position, n, &hd_qiov); + ret = bdrv_co_readv(bs->file->bs, position, n, &hd_qiov); if (ret < 0) { break; } @@ -369,7 +370,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res, bool flush_bat = false; int cluster_size = s->tracks << BDRV_SECTOR_BITS; - size = bdrv_getlength(bs->file); + size = bdrv_getlength(bs->file->bs); if (size < 0) { res->check_errors++; return size; @@ -424,7 +425,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res, } if (flush_bat) { - ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size); + ret = bdrv_pwrite_sync(bs->file->bs, 0, s->header, s->header_size); if (ret < 0) { res->check_errors++; return ret; @@ -440,7 +441,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res, size - res->image_end_offset); res->leaks += count; if (fix & BDRV_FIX_LEAKS) { - ret = bdrv_truncate(bs->file, res->image_end_offset); + ret = bdrv_truncate(bs->file->bs, res->image_end_offset); if (ret < 0) { res->check_errors++; return ret; @@ -476,7 +477,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) file = NULL; ret = bdrv_open(&file, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err); + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); if (ret < 0) { error_propagate(errp, local_err); return ret; @@ -546,12 +547,13 @@ static int parallels_probe(const uint8_t *buf, int buf_size, static int parallels_update_header(BlockDriverState *bs) { BDRVParallelsState *s = bs->opaque; - unsigned size = MAX(bdrv_opt_mem_align(bs->file), sizeof(ParallelsHeader)); + unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs), + sizeof(ParallelsHeader)); if (size > s->header_size) { size = s->header_size; } - return bdrv_pwrite_sync(bs->file, 0, s->header, size); + return bdrv_pwrite_sync(bs->file->bs, 0, s->header, size); } static int parallels_open(BlockDriverState *bs, QDict *options, int flags, @@ -564,7 +566,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; char *buf; - ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph)); + ret = bdrv_pread(bs->file->bs, 0, &ph, sizeof(ph)); if (ret < 0) { goto fail; } @@ -603,8 +605,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } size = bat_entry_off(s->bat_size); - s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file)); - s->header = qemu_try_blockalign(bs->file, s->header_size); + s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs)); + s->header = qemu_try_blockalign(bs->file->bs, s->header_size); if (s->header == NULL) { ret = -ENOMEM; goto fail; @@ -619,7 +621,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->header_size = size; } - ret = bdrv_pread(bs->file, 0, s->header, s->header_size); + ret = bdrv_pread(bs->file->bs, 0, s->header, s->header_size); if (ret < 0) { goto fail; } @@ -663,8 +665,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, if (local_err != NULL) { goto fail_options; } - if (!bdrv_has_zero_init(bs->file) || - bdrv_truncate(bs->file, bdrv_getlength(bs->file)) != 0) { + if (!bdrv_has_zero_init(bs->file->bs) || + bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) { s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE; } @@ -707,7 +709,7 @@ static void parallels_close(BlockDriverState *bs) } if (bs->open_flags & BDRV_O_RDWR) { - bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS); + bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS); } g_free(s->bat_dirty_bmap); diff --git a/block/qapi.c b/block/qapi.c index 2ce509711..267f147fe 100644 --- a/block/qapi.c +++ b/block/qapi.c @@ -64,7 +64,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp) info->backing_file_depth = bdrv_get_backing_file_depth(bs); info->detect_zeroes = bs->detect_zeroes; - if (bs->io_limits_enabled) { + if (bs->throttle_state) { ThrottleConfig cfg; throttle_group_get_config(bs, &cfg); @@ -110,8 +110,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp) qapi_free_BlockDeviceInfo(info); return NULL; } - if (bs0->drv && bs0->backing_hd) { - bs0 = bs0->backing_hd; + if (bs0->drv && bs0->backing) { + bs0 = bs0->backing->bs; (*p_image_info)->has_backing_image = true; p_image_info = &((*p_image_info)->backing_image); } else { @@ -301,17 +301,17 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info, info->tray_open = blk_dev_is_tray_open(blk); } - if (bdrv_iostatus_is_enabled(bs)) { + if (blk_iostatus_is_enabled(blk)) { info->has_io_status = true; - info->io_status = bs->iostatus; + info->io_status = blk_iostatus(blk); } - if (!QLIST_EMPTY(&bs->dirty_bitmaps)) { + if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) { info->has_dirty_bitmaps = true; info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs); } - if (bs->drv) { + if (bs && bs->drv) { info->has_inserted = true; info->inserted = bdrv_block_device_info(bs, errp); if (info->inserted == NULL) { @@ -344,27 +344,82 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs, } s->stats = g_malloc0(sizeof(*s->stats)); - s->stats->rd_bytes = bs->stats.nr_bytes[BLOCK_ACCT_READ]; - s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE]; - s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ]; - s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE]; - s->stats->rd_merged = bs->stats.merged[BLOCK_ACCT_READ]; - s->stats->wr_merged = bs->stats.merged[BLOCK_ACCT_WRITE]; - s->stats->wr_highest_offset = - bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE; - s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH]; - s->stats->wr_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_WRITE]; - s->stats->rd_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_READ]; - s->stats->flush_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_FLUSH]; + if (bs->blk) { + BlockAcctStats *stats = blk_get_stats(bs->blk); + BlockAcctTimedStats *ts = NULL; + + s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ]; + s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE]; + s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ]; + s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE]; + + s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ]; + s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE]; + s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH]; + + s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ]; + s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE]; + s->stats->invalid_flush_operations = + stats->invalid_ops[BLOCK_ACCT_FLUSH]; + + s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ]; + s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE]; + s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH]; + s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE]; + s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ]; + s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH]; + + s->stats->has_idle_time_ns = stats->last_access_time_ns > 0; + if (s->stats->has_idle_time_ns) { + s->stats->idle_time_ns = block_acct_idle_time_ns(stats); + } + + s->stats->account_invalid = stats->account_invalid; + s->stats->account_failed = stats->account_failed; + + while ((ts = block_acct_interval_next(stats, ts))) { + BlockDeviceTimedStatsList *timed_stats = + g_malloc0(sizeof(*timed_stats)); + BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats)); + timed_stats->next = s->stats->timed_stats; + timed_stats->value = dev_stats; + s->stats->timed_stats = timed_stats; + + TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ]; + TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE]; + TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH]; + + dev_stats->interval_length = ts->interval_length; + + dev_stats->min_rd_latency_ns = timed_average_min(rd); + dev_stats->max_rd_latency_ns = timed_average_max(rd); + dev_stats->avg_rd_latency_ns = timed_average_avg(rd); + + dev_stats->min_wr_latency_ns = timed_average_min(wr); + dev_stats->max_wr_latency_ns = timed_average_max(wr); + dev_stats->avg_wr_latency_ns = timed_average_avg(wr); + + dev_stats->min_flush_latency_ns = timed_average_min(fl); + dev_stats->max_flush_latency_ns = timed_average_max(fl); + dev_stats->avg_flush_latency_ns = timed_average_avg(fl); + + dev_stats->avg_rd_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_READ); + dev_stats->avg_wr_queue_depth = + block_acct_queue_depth(ts, BLOCK_ACCT_WRITE); + } + } + + s->stats->wr_highest_offset = bs->wr_highest_offset; if (bs->file) { s->has_parent = true; - s->parent = bdrv_query_stats(bs->file, query_backing); + s->parent = bdrv_query_stats(bs->file->bs, query_backing); } - if (query_backing && bs->backing_hd) { + if (query_backing && bs->backing) { s->has_backing = true; - s->backing = bdrv_query_stats(bs->backing_hd, query_backing); + s->backing = bdrv_query_stats(bs->backing->bs, query_backing); } return s; @@ -381,7 +436,9 @@ BlockInfoList *qmp_query_block(Error **errp) bdrv_query_info(blk, &info->value, &local_err); if (local_err) { error_propagate(errp, local_err); - goto err; + g_free(info); + qapi_free_BlockInfoList(head); + return NULL; } *p_next = info; @@ -389,10 +446,6 @@ BlockInfoList *qmp_query_block(Error **errp) } return head; - - err: - qapi_free_BlockInfoList(head); - return NULL; } BlockStatsList *qmp_query_blockstats(bool has_query_nodes, diff --git a/block/qcow.c b/block/qcow.c index 01fba54ce..635085e27 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -100,7 +100,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, int ret; QCowHeader header; - ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); + ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header)); if (ret < 0) { goto fail; } @@ -193,7 +193,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, + ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)); if (ret < 0) { goto fail; @@ -205,7 +205,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, /* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */ s->l2_cache = - qemu_try_blockalign(bs->file, + qemu_try_blockalign(bs->file->bs, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t)); if (s->l2_cache == NULL) { error_setg(errp, "Could not allocate L2 table cache"); @@ -224,7 +224,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags, ret = -EINVAL; goto fail; } - ret = bdrv_pread(bs->file, header.backing_file_offset, + ret = bdrv_pread(bs->file->bs, header.backing_file_offset, bs->backing_file, len); if (ret < 0) { goto fail; @@ -369,13 +369,13 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, if (!allocate) return 0; /* allocate a new l2 entry */ - l2_offset = bdrv_getlength(bs->file); + l2_offset = bdrv_getlength(bs->file->bs); /* round to cluster size */ l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); /* update the L1 entry */ s->l1_table[l1_index] = l2_offset; tmp = cpu_to_be64(l2_offset); - if (bdrv_pwrite_sync(bs->file, + if (bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset + l1_index * sizeof(tmp), &tmp, sizeof(tmp)) < 0) return 0; @@ -405,11 +405,12 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, l2_table = s->l2_cache + (min_index << s->l2_bits); if (new_l2_table) { memset(l2_table, 0, s->l2_size * sizeof(uint64_t)); - if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table, + if (bdrv_pwrite_sync(bs->file->bs, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) < 0) return 0; } else { - if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) != + if (bdrv_pread(bs->file->bs, l2_offset, l2_table, + s->l2_size * sizeof(uint64_t)) != s->l2_size * sizeof(uint64_t)) return 0; } @@ -430,20 +431,21 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, overwritten */ if (decompress_cluster(bs, cluster_offset) < 0) return 0; - cluster_offset = bdrv_getlength(bs->file); + cluster_offset = bdrv_getlength(bs->file->bs); cluster_offset = (cluster_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); /* write the cluster content */ - if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) != + if (bdrv_pwrite(bs->file->bs, cluster_offset, s->cluster_cache, + s->cluster_size) != s->cluster_size) return -1; } else { - cluster_offset = bdrv_getlength(bs->file); + cluster_offset = bdrv_getlength(bs->file->bs); if (allocate == 1) { /* round to cluster size */ cluster_offset = (cluster_offset + s->cluster_size - 1) & ~(s->cluster_size - 1); - bdrv_truncate(bs->file, cluster_offset + s->cluster_size); + bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size); /* if encrypted, we must initialize the cluster content which won't be written */ if (bs->encrypted && @@ -463,7 +465,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, errno = EIO; return -1; } - if (bdrv_pwrite(bs->file, cluster_offset + i * 512, + if (bdrv_pwrite(bs->file->bs, + cluster_offset + i * 512, s->cluster_data, 512) != 512) return -1; } @@ -477,7 +480,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs, /* update L2 table */ tmp = cpu_to_be64(cluster_offset); l2_table[l2_index] = tmp; - if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp), + if (bdrv_pwrite_sync(bs->file->bs, l2_offset + l2_index * sizeof(tmp), &tmp, sizeof(tmp)) < 0) return 0; } @@ -546,7 +549,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) if (s->cluster_cache_offset != coffset) { csize = cluster_offset >> (63 - s->cluster_bits); csize &= (s->cluster_size - 1); - ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize); + ret = bdrv_pread(bs->file->bs, coffset, s->cluster_data, csize); if (ret != csize) return -1; if (decompress_buffer(s->cluster_cache, s->cluster_size, @@ -594,13 +597,13 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, } if (!cluster_offset) { - if (bs->backing_hd) { + if (bs->backing) { /* read from the base image */ hd_iov.iov_base = (void *)buf; hd_iov.iov_len = n * 512; qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_readv(bs->backing_hd, sector_num, + ret = bdrv_co_readv(bs->backing->bs, sector_num, n, &hd_qiov); qemu_co_mutex_lock(&s->lock); if (ret < 0) { @@ -625,7 +628,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num, hd_iov.iov_len = n * 512; qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_readv(bs->file, + ret = bdrv_co_readv(bs->file->bs, (cluster_offset >> 9) + index_in_cluster, n, &hd_qiov); qemu_co_mutex_lock(&s->lock); @@ -727,7 +730,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num, hd_iov.iov_len = n * 512; qemu_iovec_init_external(&hd_qiov, &hd_iov, 1); qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_writev(bs->file, + ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + index_in_cluster, n, &hd_qiov); qemu_co_mutex_lock(&s->lock); @@ -793,7 +796,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp) qcow_bs = NULL; ret = bdrv_open(&qcow_bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err); + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto cleanup; @@ -879,10 +882,10 @@ static int qcow_make_empty(BlockDriverState *bs) int ret; memset(s->l1_table, 0, l1_length); - if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table, + if (bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, s->l1_table, l1_length) < 0) return -1; - ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length); + ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length); if (ret < 0) return ret; @@ -962,7 +965,7 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num, } cluster_offset &= s->cluster_offset_mask; - ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len); + ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len); if (ret < 0) { goto fail; } diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c index 53b8afc3d..86dd7f2bd 100644 --- a/block/qcow2-cache.c +++ b/block/qcow2-cache.c @@ -22,16 +22,24 @@ * THE SOFTWARE. */ +/* Needed for CONFIG_MADVISE */ +#include "config-host.h" + +#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE) +#include <sys/mman.h> +#endif + #include "block/block_int.h" #include "qemu-common.h" +#include "qemu/osdep.h" #include "qcow2.h" #include "trace.h" typedef struct Qcow2CachedTable { int64_t offset; - bool dirty; uint64_t lru_counter; int ref; + bool dirty; } Qcow2CachedTable; struct Qcow2Cache { @@ -41,34 +49,85 @@ struct Qcow2Cache { bool depends_on_flush; void *table_array; uint64_t lru_counter; + uint64_t cache_clean_lru_counter; }; static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs, Qcow2Cache *c, int table) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; return (uint8_t *) c->table_array + (size_t) table * s->cluster_size; } static inline int qcow2_cache_get_table_idx(BlockDriverState *bs, Qcow2Cache *c, void *table) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array; int idx = table_offset / s->cluster_size; assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0); return idx; } +static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c, + int i, int num_tables) +{ +#if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID + BDRVQcow2State *s = bs->opaque; + void *t = qcow2_cache_get_table_addr(bs, c, i); + int align = getpagesize(); + size_t mem_size = (size_t) s->cluster_size * num_tables; + size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t; + size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align); + if (length > 0) { + qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED); + } +#endif +} + +static inline bool can_clean_entry(Qcow2Cache *c, int i) +{ + Qcow2CachedTable *t = &c->entries[i]; + return t->ref == 0 && !t->dirty && t->offset != 0 && + t->lru_counter <= c->cache_clean_lru_counter; +} + +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c) +{ + int i = 0; + while (i < c->size) { + int to_clean = 0; + + /* Skip the entries that we don't need to clean */ + while (i < c->size && !can_clean_entry(c, i)) { + i++; + } + + /* And count how many we can clean in a row */ + while (i < c->size && can_clean_entry(c, i)) { + c->entries[i].offset = 0; + c->entries[i].lru_counter = 0; + i++; + to_clean++; + } + + if (to_clean > 0) { + qcow2_cache_table_release(bs, c, i - to_clean, to_clean); + } + } + + c->cache_clean_lru_counter = c->lru_counter; +} + Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; Qcow2Cache *c; c = g_new0(Qcow2Cache, 1); c->size = num_tables; c->entries = g_try_new0(Qcow2CachedTable, num_tables); - c->table_array = qemu_try_blockalign(bs->file, + c->table_array = qemu_try_blockalign(bs->file->bs, (size_t) num_tables * s->cluster_size); if (!c->entries || !c->table_array) { @@ -113,7 +172,7 @@ static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c) static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int ret = 0; if (!c->entries[i].dirty || !c->entries[i].offset) { @@ -126,7 +185,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) if (c->depends) { ret = qcow2_cache_flush_dependency(bs, c); } else if (c->depends_on_flush) { - ret = bdrv_flush(bs->file); + ret = bdrv_flush(bs->file->bs); if (ret >= 0) { c->depends_on_flush = false; } @@ -157,7 +216,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE); } - ret = bdrv_pwrite(bs->file, c->entries[i].offset, + ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset, qcow2_cache_get_table_addr(bs, c, i), s->cluster_size); if (ret < 0) { return ret; @@ -170,7 +229,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i) int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int result = 0; int ret; int i; @@ -185,7 +244,7 @@ int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c) } if (result == 0) { - ret = bdrv_flush(bs->file); + ret = bdrv_flush(bs->file->bs); if (ret < 0) { result = ret; } @@ -237,6 +296,8 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c) c->entries[i].lru_counter = 0; } + qcow2_cache_table_release(bs, c, 0, c->size); + c->lru_counter = 0; return 0; @@ -245,7 +306,7 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c) static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, void **table, bool read_from_disk) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int i; int ret; int lookup_index; @@ -295,7 +356,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c, BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD); } - ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i), + ret = bdrv_pread(bs->file->bs, offset, + qcow2_cache_get_table_addr(bs, c, i), s->cluster_size); if (ret < 0) { return ret; diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index 7e94fe70e..24a60e223 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -32,7 +32,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, bool exact_size) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int new_l1_size2, ret, i; uint64_t *new_l1_table; int64_t old_l1_table_offset, old_l1_size; @@ -72,7 +72,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, #endif new_l1_size2 = sizeof(uint64_t) * new_l1_size; - new_l1_table = qemu_try_blockalign(bs->file, + new_l1_table = qemu_try_blockalign(bs->file->bs, align_offset(new_l1_size2, 512)); if (new_l1_table == NULL) { return -ENOMEM; @@ -105,7 +105,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE); for(i = 0; i < s->l1_size; i++) new_l1_table[i] = cpu_to_be64(new_l1_table[i]); - ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2); + ret = bdrv_pwrite_sync(bs->file->bs, new_l1_table_offset, + new_l1_table, new_l1_size2); if (ret < 0) goto fail; for(i = 0; i < s->l1_size; i++) @@ -115,7 +116,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE); cpu_to_be32w((uint32_t*)data, new_l1_size); stq_be_p(data + 4, new_l1_table_offset); - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data)); + ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_size), + data, sizeof(data)); if (ret < 0) { goto fail; } @@ -148,7 +150,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, static int l2_load(BlockDriverState *bs, uint64_t l2_offset, uint64_t **l2_table) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int ret; ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table); @@ -163,7 +165,7 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset, #define L1_ENTRIES_PER_SECTOR (512 / 8) int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t buf[L1_ENTRIES_PER_SECTOR] = { 0 }; int l1_start_index; int i, ret; @@ -182,8 +184,9 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) } BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE); - ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index, - buf, sizeof(buf)); + ret = bdrv_pwrite_sync(bs->file->bs, + s->l1_table_offset + 8 * l1_start_index, + buf, sizeof(buf)); if (ret < 0) { return ret; } @@ -203,7 +206,7 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index) static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t old_l2_offset; uint64_t *l2_table = NULL; int64_t l2_offset; @@ -309,7 +312,7 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, if (!offset) return 0; - assert(qcow2_get_cluster_type(first_entry) != QCOW2_CLUSTER_COMPRESSED); + assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL); for (i = 0; i < nb_clusters; i++) { uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask; @@ -321,14 +324,16 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size, return i; } -static int count_contiguous_free_clusters(int nb_clusters, uint64_t *l2_table) +static int count_contiguous_clusters_by_type(int nb_clusters, + uint64_t *l2_table, + int wanted_type) { int i; for (i = 0; i < nb_clusters; i++) { int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i])); - if (type != QCOW2_CLUSTER_UNALLOCATED) { + if (type != wanted_type) { break; } } @@ -339,7 +344,7 @@ static int count_contiguous_free_clusters(int nb_clusters, uint64_t *l2_table) /* The crypt function is compatible with the linux cryptoloop algorithm for < 4 GB images. NOTE: out_buf == in_buf is supported */ -int qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, +int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, uint8_t *out_buf, const uint8_t *in_buf, int nb_sectors, bool enc, Error **errp) @@ -387,7 +392,7 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs, uint64_t cluster_offset, int n_start, int n_end) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QEMUIOVector qiov; struct iovec iov; int n, ret; @@ -440,7 +445,8 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs, } BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE); - ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov); + ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n, + &qiov); if (ret < 0) { goto out; } @@ -469,7 +475,7 @@ out: int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, int *num, uint64_t *cluster_offset) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; unsigned int l2_index; uint64_t l1_index, l2_offset, *l2_table; int l1_bits, c; @@ -499,7 +505,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, *cluster_offset = 0; - /* seek the the l2 offset in the l1 table */ + /* seek to the l2 offset in the l1 table */ l1_index = offset >> l1_bits; if (l1_index >= s->l1_size) { @@ -550,13 +556,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, ret = -EIO; goto fail; } - c = count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], QCOW_OFLAG_ZERO); + c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], + QCOW2_CLUSTER_ZERO); *cluster_offset = 0; break; case QCOW2_CLUSTER_UNALLOCATED: /* how many empty clusters ? */ - c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]); + c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index], + QCOW2_CLUSTER_UNALLOCATED); *cluster_offset = 0; break; case QCOW2_CLUSTER_NORMAL: @@ -609,13 +616,13 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, uint64_t **new_l2_table, int *new_l2_index) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; unsigned int l2_index; uint64_t l1_index, l2_offset; uint64_t *l2_table = NULL; int ret; - /* seek the the l2 offset in the l1 table */ + /* seek to the l2 offset in the l1 table */ l1_index = offset >> (s->l2_bits + s->cluster_bits); if (l1_index >= s->l1_size) { @@ -683,7 +690,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset, int compressed_size) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int l2_index, ret; uint64_t *l2_table; int64_t cluster_offset; @@ -728,7 +735,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int ret; if (r->nb_sectors == 0) { @@ -757,7 +764,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int i, j = 0, l2_index, ret; uint64_t *old_cluster, *l2_table; uint64_t cluster_offset = m->alloc_offset; @@ -817,7 +824,6 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) /* * If this was a COW, we need to decrease the refcount of the old cluster. - * Also flush bs->file to get the right order for L2 and refcount update. * * Don't discard clusters that reach a refcount of 0 (e.g. compressed * clusters), the next write will reuse them anyway. @@ -840,7 +846,7 @@ err: * write, but require COW to be performed (this includes yet unallocated space, * which must copy from the backing file) */ -static int count_cow_clusters(BDRVQcowState *s, int nb_clusters, +static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters, uint64_t *l2_table, int l2_index) { int i; @@ -886,7 +892,7 @@ out: static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, uint64_t *cur_bytes, QCowL2Meta **m) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowL2Meta *old_alloc; uint64_t bytes = *cur_bytes; @@ -959,7 +965,7 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int l2_index; uint64_t cluster_offset; uint64_t *l2_table; @@ -1067,7 +1073,7 @@ out: static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, uint64_t *host_offset, uint64_t *nb_clusters) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, *host_offset, *nb_clusters); @@ -1115,7 +1121,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int l2_index; uint64_t *l2_table; uint64_t entry; @@ -1268,7 +1274,7 @@ fail: int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, int *num, uint64_t *host_offset, QCowL2Meta **m) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t start, remaining; uint64_t cluster_offset; uint64_t cur_bytes; @@ -1402,7 +1408,7 @@ static int decompress_buffer(uint8_t *out_buf, int out_buf_size, int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int ret, csize, nb_csectors, sector_offset; uint64_t coffset; @@ -1412,7 +1418,8 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset) sector_offset = coffset & 511; csize = nb_csectors * 512 - sector_offset; BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED); - ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors); + ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data, + nb_csectors); if (ret < 0) { return ret; } @@ -1434,7 +1441,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, uint64_t nb_clusters, enum qcow2_discard_type type, bool full_discard) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t *l2_table; int l2_index; int ret; @@ -1469,7 +1476,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, */ switch (qcow2_get_cluster_type(old_l2_entry)) { case QCOW2_CLUSTER_UNALLOCATED: - if (full_discard || !bs->backing_hd) { + if (full_discard || !bs->backing) { continue; } break; @@ -1508,7 +1515,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors, enum qcow2_discard_type type, bool full_discard) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t end_offset; uint64_t nb_clusters; int ret; @@ -1554,7 +1561,7 @@ fail: static int zero_single_l2(BlockDriverState *bs, uint64_t offset, uint64_t nb_clusters) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t *l2_table; int l2_index; int ret; @@ -1591,7 +1598,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t nb_clusters; int ret; @@ -1636,7 +1643,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, int64_t l1_entries, BlockDriverAmendStatusCB *status_cb) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; bool is_active_l1 = (l1_table == s->l1_table); uint64_t *l2_table = NULL; int ret; @@ -1645,7 +1652,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, if (!is_active_l1) { /* inactive L2 tables require a buffer to be stored in when loading * them from disk */ - l2_table = qemu_try_blockalign(bs->file, s->cluster_size); + l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size); if (l2_table == NULL) { return -ENOMEM; } @@ -1679,8 +1686,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, (void **)&l2_table); } else { /* load inactive L2 tables from disk */ - ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE, - (void *)l2_table, s->cluster_sectors); + ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE, + (void *)l2_table, s->cluster_sectors); } if (ret < 0) { goto fail; @@ -1703,7 +1710,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, } if (!preallocated) { - if (!bs->backing_hd) { + if (!bs->backing) { /* not backed; therefore we can simply deallocate the * cluster */ l2_table[j] = 0; @@ -1754,7 +1761,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, goto fail; } - ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE, + ret = bdrv_write_zeroes(bs->file->bs, offset / BDRV_SECTOR_SIZE, s->cluster_sectors, 0); if (ret < 0) { if (!preallocated) { @@ -1787,8 +1794,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table, goto fail; } - ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE, - (void *)l2_table, s->cluster_sectors); + ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE, + (void *)l2_table, s->cluster_sectors); if (ret < 0) { goto fail; } @@ -1823,7 +1830,7 @@ fail: int qcow2_expand_zero_clusters(BlockDriverState *bs, BlockDriverAmendStatusCB *status_cb) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t *l1_table = NULL; int64_t l1_entries = 0, visited_l1_entries = 0; int ret; @@ -1861,8 +1868,9 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs, l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE); - ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset / - BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors); + ret = bdrv_read(bs->file->bs, + s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE, + (void *)l1_table, l1_sectors); if (ret < 0) { goto fail; } diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c index 0b6c302ee..820f412ab 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c @@ -82,7 +82,7 @@ static Qcow2SetRefcountFunc *const set_refcount_funcs[] = { int qcow2_refcount_init(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; unsigned int refcount_table_size2, i; int ret; @@ -101,7 +101,7 @@ int qcow2_refcount_init(BlockDriverState *bs) goto fail; } BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD); - ret = bdrv_pread(bs->file, s->refcount_table_offset, + ret = bdrv_pread(bs->file->bs, s->refcount_table_offset, s->refcount_table, refcount_table_size2); if (ret < 0) { goto fail; @@ -116,7 +116,7 @@ int qcow2_refcount_init(BlockDriverState *bs) void qcow2_refcount_close(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; g_free(s->refcount_table); } @@ -214,7 +214,7 @@ static int load_refcount_block(BlockDriverState *bs, int64_t refcount_block_offset, void **refcount_block) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int ret; BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD); @@ -231,7 +231,7 @@ static int load_refcount_block(BlockDriverState *bs, int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, uint64_t *refcount) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t refcount_table_index, block_index; int64_t refcount_block_offset; int ret; @@ -274,7 +274,7 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index, * Rounds the refcount table size up to avoid growing the table for each single * refcount block that is allocated. */ -static unsigned int next_refcount_table_size(BDRVQcowState *s, +static unsigned int next_refcount_table_size(BDRVQcow2State *s, unsigned int min_size) { unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1; @@ -290,7 +290,7 @@ static unsigned int next_refcount_table_size(BDRVQcowState *s, /* Checks if two offsets are described by the same refcount block */ -static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a, +static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a, uint64_t offset_b) { uint64_t block_a = offset_a >> (s->cluster_bits + s->refcount_block_bits); @@ -308,7 +308,7 @@ static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a, static int alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index, void **refcount_block) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; unsigned int refcount_table_index; int ret; @@ -431,7 +431,7 @@ static int alloc_refcount_block(BlockDriverState *bs, if (refcount_table_index < s->refcount_table_size) { uint64_t data64 = cpu_to_be64(new_block); BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP); - ret = bdrv_pwrite_sync(bs->file, + ret = bdrv_pwrite_sync(bs->file->bs, s->refcount_table_offset + refcount_table_index * sizeof(uint64_t), &data64, sizeof(data64)); if (ret < 0) { @@ -535,7 +535,7 @@ static int alloc_refcount_block(BlockDriverState *bs, /* Write refcount blocks to disk */ BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS); - ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks, + ret = bdrv_pwrite_sync(bs->file->bs, meta_offset, new_blocks, blocks_clusters * s->cluster_size); g_free(new_blocks); new_blocks = NULL; @@ -549,7 +549,7 @@ static int alloc_refcount_block(BlockDriverState *bs, } BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE); - ret = bdrv_pwrite_sync(bs->file, table_offset, new_table, + ret = bdrv_pwrite_sync(bs->file->bs, table_offset, new_table, table_size * sizeof(uint64_t)); if (ret < 0) { goto fail_table; @@ -560,12 +560,16 @@ static int alloc_refcount_block(BlockDriverState *bs, } /* Hook up the new refcount table in the qcow2 header */ - uint8_t data[12]; - cpu_to_be64w((uint64_t*)data, table_offset); - cpu_to_be32w((uint32_t*)(data + 8), table_clusters); + struct QEMU_PACKED { + uint64_t d64; + uint32_t d32; + } data; + cpu_to_be64w(&data.d64, table_offset); + cpu_to_be32w(&data.d32, table_clusters); BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE); - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset), - data, sizeof(data)); + ret = bdrv_pwrite_sync(bs->file->bs, + offsetof(QCowHeader, refcount_table_offset), + &data, sizeof(data)); if (ret < 0) { goto fail_table; } @@ -605,7 +609,7 @@ fail_block: void qcow2_process_discards(BlockDriverState *bs, int ret) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; Qcow2DiscardRegion *d, *next; QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) { @@ -613,7 +617,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret) /* Discard is optional, ignore the return value */ if (ret >= 0) { - bdrv_discard(bs->file, + bdrv_discard(bs->file->bs, d->offset >> BDRV_SECTOR_BITS, d->bytes >> BDRV_SECTOR_BITS); } @@ -625,7 +629,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret) static void update_refcount_discard(BlockDriverState *bs, uint64_t offset, uint64_t length) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; Qcow2DiscardRegion *d, *p, *next; QTAILQ_FOREACH(d, &s->discards, next) { @@ -682,7 +686,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs, bool decrease, enum qcow2_discard_type type) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t start, last, cluster_offset; void *refcount_block = NULL; int64_t old_table_index = -1; @@ -793,7 +797,7 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, uint64_t addend, bool decrease, enum qcow2_discard_type type) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int ret; ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend, @@ -815,7 +819,7 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs, /* return < 0 if error */ static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t i, nb_clusters, refcount; int ret; @@ -878,7 +882,7 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size) int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, int64_t nb_clusters) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t cluster_index, refcount; uint64_t i; int ret; @@ -916,7 +920,7 @@ int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset, contiguous sectors. size must be <= cluster_size */ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t offset; size_t free_in_cluster; int ret; @@ -949,11 +953,17 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size) if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) { offset = new_cluster; + free_in_cluster = s->cluster_size; + } else { + free_in_cluster += s->cluster_size; } } assert(offset); ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER); + if (ret < 0) { + offset = 0; + } } while (ret == -EAGAIN); if (ret < 0) { return ret; @@ -992,7 +1002,7 @@ void qcow2_free_clusters(BlockDriverState *bs, void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry, int nb_clusters, enum qcow2_discard_type type) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; switch (qcow2_get_cluster_type(l2_entry)) { case QCOW2_CLUSTER_COMPRESSED: @@ -1036,7 +1046,7 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry, int qcow2_update_snapshot_refcount(BlockDriverState *bs, int64_t l1_table_offset, int l1_size, int addend) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount; bool l1_allocated = false; int64_t old_offset, old_l2_offset; @@ -1062,7 +1072,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs, } l1_allocated = true; - ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2); + ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2); if (ret < 0) { goto fail; } @@ -1215,7 +1225,8 @@ fail: cpu_to_be64s(&l1_table[i]); } - ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2); + ret = bdrv_pwrite_sync(bs->file->bs, l1_table_offset, + l1_table, l1_size2); for (i = 0; i < l1_size; i++) { be64_to_cpus(&l1_table[i]); @@ -1233,7 +1244,7 @@ fail: /* refcount checking functions */ -static size_t refcount_array_byte_size(BDRVQcowState *s, uint64_t entries) +static uint64_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries) { /* This assertion holds because there is no way we can address more than * 2^(64 - 9) clusters at once (with cluster size 512 = 2^9, and because @@ -1256,7 +1267,7 @@ static size_t refcount_array_byte_size(BDRVQcowState *s, uint64_t entries) * refcount array buffer will be aligned to a cluster boundary, and the newly * allocated area will be zeroed. */ -static int realloc_refcount_array(BDRVQcowState *s, void **array, +static int realloc_refcount_array(BDRVQcow2State *s, void **array, int64_t *size, int64_t new_size) { int64_t old_byte_size, new_byte_size; @@ -1298,7 +1309,7 @@ static int realloc_refcount_array(BDRVQcowState *s, void **array, /* * Increases the refcount for a range of clusters in a given refcount table. * This is used to construct a temporary refcount table out of L1 and L2 tables - * which can be compared the the refcount table saved in the image. + * which can be compared to the refcount table saved in the image. * * Modifies the number of errors in res. */ @@ -1308,7 +1319,7 @@ static int inc_refcounts(BlockDriverState *bs, int64_t *refcount_table_size, int64_t offset, int64_t size) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t start, last, cluster_offset, k, refcount; int ret; @@ -1361,7 +1372,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, int64_t *refcount_table_size, int64_t l2_offset, int flags) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t *l2_table, l2_entry; uint64_t next_contiguous_offset = 0; int i, l2_size, nb_csectors, ret; @@ -1370,7 +1381,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res, l2_size = s->l2_size * sizeof(uint64_t); l2_table = g_malloc(l2_size); - ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size); + ret = bdrv_pread(bs->file->bs, l2_offset, l2_table, l2_size); if (ret < 0) { fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n"); res->check_errors++; @@ -1481,7 +1492,7 @@ static int check_refcounts_l1(BlockDriverState *bs, int64_t l1_table_offset, int l1_size, int flags) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t *l1_table = NULL, l2_offset, l1_size2; int i, ret; @@ -1502,7 +1513,7 @@ static int check_refcounts_l1(BlockDriverState *bs, res->check_errors++; goto fail; } - ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2); + ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2); if (ret < 0) { fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n"); res->check_errors++; @@ -1558,7 +1569,7 @@ fail: static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size); int ret; uint64_t refcount; @@ -1600,7 +1611,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, } } - ret = bdrv_pread(bs->file, l2_offset, l2_table, + ret = bdrv_pread(bs->file->bs, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)); if (ret < 0) { fprintf(stderr, "ERROR: Could not read L2 table: %s\n", @@ -1652,7 +1663,8 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, goto fail; } - ret = bdrv_pwrite(bs->file, l2_offset, l2_table, s->cluster_size); + ret = bdrv_pwrite(bs->file->bs, l2_offset, l2_table, + s->cluster_size); if (ret < 0) { fprintf(stderr, "ERROR: Could not write L2 table: %s\n", strerror(-ret)); @@ -1677,7 +1689,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix, bool *rebuild, void **refcount_table, int64_t *nb_clusters) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t i, size; int ret; @@ -1707,11 +1719,11 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res, goto resize_fail; } - ret = bdrv_truncate(bs->file, offset + s->cluster_size); + ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size); if (ret < 0) { goto resize_fail; } - size = bdrv_getlength(bs->file); + size = bdrv_getlength(bs->file->bs); if (size < 0) { ret = size; goto resize_fail; @@ -1780,7 +1792,7 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix, bool *rebuild, void **refcount_table, int64_t *nb_clusters) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t i; QCowSnapshot *sn; int ret; @@ -1844,7 +1856,7 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res, int64_t *highest_cluster, void *refcount_table, int64_t nb_clusters) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t i; uint64_t refcount1, refcount2; int ret; @@ -1921,7 +1933,7 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, int64_t *imrt_nb_clusters, int64_t *first_free_cluster) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t cluster = *first_free_cluster, i; bool first_gap = true; int contiguous_free_clusters; @@ -1991,7 +2003,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0; int64_t refblock_offset, refblock_start, refblock_index; uint32_t reftable_size = 0; @@ -2085,7 +2097,7 @@ write_refblocks: on_disk_refblock = (void *)((char *) *refcount_table + refblock_index * s->cluster_size); - ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE, + ret = bdrv_write(bs->file->bs, refblock_offset / BDRV_SECTOR_SIZE, on_disk_refblock, s->cluster_sectors); if (ret < 0) { fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); @@ -2134,7 +2146,7 @@ write_refblocks: } assert(reftable_size < INT_MAX / sizeof(uint64_t)); - ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, + ret = bdrv_pwrite(bs->file->bs, reftable_offset, on_disk_reftable, reftable_size * sizeof(uint64_t)); if (ret < 0) { fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); @@ -2146,8 +2158,8 @@ write_refblocks: reftable_offset); cpu_to_be32w(&reftable_offset_and_clusters.reftable_clusters, size_to_clusters(s, reftable_size * sizeof(uint64_t))); - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, - refcount_table_offset), + ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, + refcount_table_offset), &reftable_offset_and_clusters, sizeof(reftable_offset_and_clusters)); if (ret < 0) { @@ -2178,14 +2190,14 @@ fail: int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; BdrvCheckResult pre_compare_res; int64_t size, highest_cluster, nb_clusters; void *refcount_table = NULL; bool rebuild = false; int ret; - size = bdrv_getlength(bs->file); + size = bdrv_getlength(bs->file->bs); if (size < 0) { res->check_errors++; return size; @@ -2315,7 +2327,7 @@ fail: int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, int64_t size) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int chk = s->overlap_check & ~ign; int i, j; @@ -2394,7 +2406,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, return -ENOMEM; } - ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2); + ret = bdrv_pread(bs->file->bs, l1_ofs, l1, l1_sz2); if (ret < 0) { g_free(l1); return ret; diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c index b6f58c13e..def720164 100644 --- a/block/qcow2-snapshot.c +++ b/block/qcow2-snapshot.c @@ -29,7 +29,7 @@ void qcow2_free_snapshots(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int i; for(i = 0; i < s->nb_snapshots; i++) { @@ -43,7 +43,7 @@ void qcow2_free_snapshots(BlockDriverState *bs) int qcow2_read_snapshots(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowSnapshotHeader h; QCowSnapshotExtraData extra; QCowSnapshot *sn; @@ -64,7 +64,7 @@ int qcow2_read_snapshots(BlockDriverState *bs) for(i = 0; i < s->nb_snapshots; i++) { /* Read statically sized part of the snapshot header */ offset = align_offset(offset, 8); - ret = bdrv_pread(bs->file, offset, &h, sizeof(h)); + ret = bdrv_pread(bs->file->bs, offset, &h, sizeof(h)); if (ret < 0) { goto fail; } @@ -83,7 +83,7 @@ int qcow2_read_snapshots(BlockDriverState *bs) name_size = be16_to_cpu(h.name_size); /* Read extra data */ - ret = bdrv_pread(bs->file, offset, &extra, + ret = bdrv_pread(bs->file->bs, offset, &extra, MIN(sizeof(extra), extra_data_size)); if (ret < 0) { goto fail; @@ -102,7 +102,7 @@ int qcow2_read_snapshots(BlockDriverState *bs) /* Read snapshot ID */ sn->id_str = g_malloc(id_str_size + 1); - ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size); + ret = bdrv_pread(bs->file->bs, offset, sn->id_str, id_str_size); if (ret < 0) { goto fail; } @@ -111,7 +111,7 @@ int qcow2_read_snapshots(BlockDriverState *bs) /* Read snapshot name */ sn->name = g_malloc(name_size + 1); - ret = bdrv_pread(bs->file, offset, sn->name, name_size); + ret = bdrv_pread(bs->file->bs, offset, sn->name, name_size); if (ret < 0) { goto fail; } @@ -136,7 +136,7 @@ fail: /* add at the end of the file a new list of snapshots */ static int qcow2_write_snapshots(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowSnapshot *sn; QCowSnapshotHeader h; QCowSnapshotExtraData extra; @@ -214,25 +214,25 @@ static int qcow2_write_snapshots(BlockDriverState *bs) h.name_size = cpu_to_be16(name_size); offset = align_offset(offset, 8); - ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h)); + ret = bdrv_pwrite(bs->file->bs, offset, &h, sizeof(h)); if (ret < 0) { goto fail; } offset += sizeof(h); - ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra)); + ret = bdrv_pwrite(bs->file->bs, offset, &extra, sizeof(extra)); if (ret < 0) { goto fail; } offset += sizeof(extra); - ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size); + ret = bdrv_pwrite(bs->file->bs, offset, sn->id_str, id_str_size); if (ret < 0) { goto fail; } offset += id_str_size; - ret = bdrv_pwrite(bs->file, offset, sn->name, name_size); + ret = bdrv_pwrite(bs->file->bs, offset, sn->name, name_size); if (ret < 0) { goto fail; } @@ -254,7 +254,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs) header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots); header_data.snapshots_offset = cpu_to_be64(snapshots_offset); - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots), + ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, nb_snapshots), &header_data, sizeof(header_data)); if (ret < 0) { goto fail; @@ -278,7 +278,7 @@ fail: static void find_new_snapshot_id(BlockDriverState *bs, char *id_str, int id_str_size) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowSnapshot *sn; int i; unsigned long id, id_max = 0; @@ -296,7 +296,7 @@ static int find_snapshot_by_id_and_name(BlockDriverState *bs, const char *id, const char *name) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int i; if (id && name) { @@ -338,7 +338,7 @@ static int find_snapshot_by_id_or_name(BlockDriverState *bs, /* if no id is provided, a new one is constructed */ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowSnapshot *new_snapshot_list = NULL; QCowSnapshot *old_snapshot_list = NULL; QCowSnapshot sn1, *sn = &sn1; @@ -396,7 +396,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) goto fail; } - ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table, + ret = bdrv_pwrite(bs->file->bs, sn->l1_table_offset, l1_table, s->l1_size * sizeof(uint64_t)); if (ret < 0) { goto fail; @@ -461,7 +461,7 @@ fail: /* copy the snapshot 'snapshot_name' into the current disk image */ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowSnapshot *sn; int i, snapshot_index; int cur_l1_bytes, sn_l1_bytes; @@ -509,7 +509,8 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) goto fail; } - ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes); + ret = bdrv_pread(bs->file->bs, sn->l1_table_offset, + sn_l1_table, sn_l1_bytes); if (ret < 0) { goto fail; } @@ -526,7 +527,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) goto fail; } - ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table, + ret = bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, sn_l1_table, cur_l1_bytes); if (ret < 0) { goto fail; @@ -587,7 +588,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs, const char *name, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowSnapshot sn; int snapshot_index, ret; @@ -650,7 +651,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs, int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QEMUSnapshotInfo *sn_tab, *sn_info; QCowSnapshot *sn; int i; @@ -683,7 +684,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, Error **errp) { int i, snapshot_index; - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowSnapshot *sn; uint64_t *new_l1_table; int new_l1_bytes; @@ -706,13 +707,14 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs, return -EFBIG; } new_l1_bytes = sn->l1_size * sizeof(uint64_t); - new_l1_table = qemu_try_blockalign(bs->file, + new_l1_table = qemu_try_blockalign(bs->file->bs, align_offset(new_l1_bytes, 512)); if (new_l1_table == NULL) { return -ENOMEM; } - ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes); + ret = bdrv_pread(bs->file->bs, sn->l1_table_offset, + new_l1_table, new_l1_bytes); if (ret < 0) { error_setg(errp, "Failed to read l1 table for snapshot"); qemu_vfree(new_l1_table); diff --git a/block/qcow2.c b/block/qcow2.c index 76c331b38..88f56c886 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -85,7 +85,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, uint64_t end_offset, void **p_feature_table, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowExtension ext; uint64_t offset; int ret; @@ -104,7 +104,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, printf("attempting to read extended header in offset %lu\n", offset); #endif - ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext)); + ret = bdrv_pread(bs->file->bs, offset, &ext, sizeof(ext)); if (ret < 0) { error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: " "pread fail from offset %" PRIu64, offset); @@ -132,7 +132,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, sizeof(bs->backing_format)); return 2; } - ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len); + ret = bdrv_pread(bs->file->bs, offset, bs->backing_format, ext.len); if (ret < 0) { error_setg_errno(errp, -ret, "ERROR: ext_backing_format: " "Could not read format name"); @@ -148,7 +148,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, case QCOW2_EXT_MAGIC_FEATURE_TABLE: if (p_feature_table != NULL) { void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature)); - ret = bdrv_pread(bs->file, offset , feature_table, ext.len); + ret = bdrv_pread(bs->file->bs, offset , feature_table, ext.len); if (ret < 0) { error_setg_errno(errp, -ret, "ERROR: ext_feature_table: " "Could not read table"); @@ -169,7 +169,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, uext->len = ext.len; QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next); - ret = bdrv_pread(bs->file, offset , uext->data, uext->len); + ret = bdrv_pread(bs->file->bs, offset , uext->data, uext->len); if (ret < 0) { error_setg_errno(errp, -ret, "ERROR: unknown extension: " "Could not read data"); @@ -187,7 +187,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset, static void cleanup_unknown_header_ext(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; Qcow2UnknownHeaderExtension *uext, *next; QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) { @@ -249,7 +249,7 @@ static void report_unsupported_feature(BlockDriverState *bs, */ int qcow2_mark_dirty(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t val; int ret; @@ -260,12 +260,12 @@ int qcow2_mark_dirty(BlockDriverState *bs) } val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY); - ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features), + ret = bdrv_pwrite(bs->file->bs, offsetof(QCowHeader, incompatible_features), &val, sizeof(val)); if (ret < 0) { return ret; } - ret = bdrv_flush(bs->file); + ret = bdrv_flush(bs->file->bs); if (ret < 0) { return ret; } @@ -282,7 +282,7 @@ int qcow2_mark_dirty(BlockDriverState *bs) */ static int qcow2_mark_clean(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) { int ret; @@ -304,7 +304,7 @@ static int qcow2_mark_clean(BlockDriverState *bs) */ int qcow2_mark_corrupt(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT; return qcow2_update_header(bs); @@ -316,7 +316,7 @@ int qcow2_mark_corrupt(BlockDriverState *bs) */ int qcow2_mark_consistent(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) { int ret = bdrv_flush(bs); @@ -351,7 +351,7 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result, static int validate_table_offset(BlockDriverState *bs, uint64_t offset, uint64_t entries, size_t entry_len) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t size; /* Use signed INT64_MAX as the maximum even for uint64_t header fields, @@ -467,6 +467,11 @@ static QemuOptsList qcow2_runtime_opts = { .type = QEMU_OPT_SIZE, .help = "Maximum refcount block cache size", }, + { + .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL, + .type = QEMU_OPT_NUMBER, + .help = "Clean unused cache entries after this time (in seconds)", + }, { /* end of list */ } }, }; @@ -482,11 +487,54 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2, }; +static void cache_clean_timer_cb(void *opaque) +{ + BlockDriverState *bs = opaque; + BDRVQcow2State *s = bs->opaque; + qcow2_cache_clean_unused(bs, s->l2_table_cache); + qcow2_cache_clean_unused(bs, s->refcount_block_cache); + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + (int64_t) s->cache_clean_interval * 1000); +} + +static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) +{ + BDRVQcow2State *s = bs->opaque; + if (s->cache_clean_interval > 0) { + s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL, + SCALE_MS, cache_clean_timer_cb, + bs); + timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + + (int64_t) s->cache_clean_interval * 1000); + } +} + +static void cache_clean_timer_del(BlockDriverState *bs) +{ + BDRVQcow2State *s = bs->opaque; + if (s->cache_clean_timer) { + timer_del(s->cache_clean_timer); + timer_free(s->cache_clean_timer); + s->cache_clean_timer = NULL; + } +} + +static void qcow2_detach_aio_context(BlockDriverState *bs) +{ + cache_clean_timer_del(bs); +} + +static void qcow2_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + cache_clean_timer_init(bs, new_context); +} + static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, uint64_t *l2_cache_size, uint64_t *refcount_cache_size, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t combined_cache_size; bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set; @@ -541,22 +589,246 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts, } } +typedef struct Qcow2ReopenState { + Qcow2Cache *l2_table_cache; + Qcow2Cache *refcount_block_cache; + bool use_lazy_refcounts; + int overlap_check; + bool discard_passthrough[QCOW2_DISCARD_MAX]; + uint64_t cache_clean_interval; +} Qcow2ReopenState; + +static int qcow2_update_options_prepare(BlockDriverState *bs, + Qcow2ReopenState *r, + QDict *options, int flags, + Error **errp) +{ + BDRVQcow2State *s = bs->opaque; + QemuOpts *opts = NULL; + const char *opt_overlap_check, *opt_overlap_check_template; + int overlap_check_template = 0; + uint64_t l2_cache_size, refcount_cache_size; + int i; + Error *local_err = NULL; + int ret; + + opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + /* get L2 table/refcount block cache size from command line options */ + read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + l2_cache_size /= s->cluster_size; + if (l2_cache_size < MIN_L2_CACHE_SIZE) { + l2_cache_size = MIN_L2_CACHE_SIZE; + } + if (l2_cache_size > INT_MAX) { + error_setg(errp, "L2 cache size too big"); + ret = -EINVAL; + goto fail; + } + + refcount_cache_size /= s->cluster_size; + if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) { + refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE; + } + if (refcount_cache_size > INT_MAX) { + error_setg(errp, "Refcount cache size too big"); + ret = -EINVAL; + goto fail; + } + + /* alloc new L2 table/refcount block cache, flush old one */ + if (s->l2_table_cache) { + ret = qcow2_cache_flush(bs, s->l2_table_cache); + if (ret) { + error_setg_errno(errp, -ret, "Failed to flush the L2 table cache"); + goto fail; + } + } + + if (s->refcount_block_cache) { + ret = qcow2_cache_flush(bs, s->refcount_block_cache); + if (ret) { + error_setg_errno(errp, -ret, + "Failed to flush the refcount block cache"); + goto fail; + } + } + + r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size); + r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size); + if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) { + error_setg(errp, "Could not allocate metadata caches"); + ret = -ENOMEM; + goto fail; + } + + /* New interval for cache cleanup timer */ + r->cache_clean_interval = + qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL, + s->cache_clean_interval); + if (r->cache_clean_interval > UINT_MAX) { + error_setg(errp, "Cache clean interval too big"); + ret = -EINVAL; + goto fail; + } + + /* lazy-refcounts; flush if going from enabled to disabled */ + r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, + (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); + if (r->use_lazy_refcounts && s->qcow_version < 3) { + error_setg(errp, "Lazy refcounts require a qcow2 image with at least " + "qemu 1.1 compatibility level"); + ret = -EINVAL; + goto fail; + } + + if (s->use_lazy_refcounts && !r->use_lazy_refcounts) { + ret = qcow2_mark_clean(bs); + if (ret < 0) { + error_setg_errno(errp, -ret, "Failed to disable lazy refcounts"); + goto fail; + } + } + + /* Overlap check options */ + opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); + opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); + if (opt_overlap_check_template && opt_overlap_check && + strcmp(opt_overlap_check_template, opt_overlap_check)) + { + error_setg(errp, "Conflicting values for qcow2 options '" + QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE + "' ('%s')", opt_overlap_check, opt_overlap_check_template); + ret = -EINVAL; + goto fail; + } + if (!opt_overlap_check) { + opt_overlap_check = opt_overlap_check_template ?: "cached"; + } + + if (!strcmp(opt_overlap_check, "none")) { + overlap_check_template = 0; + } else if (!strcmp(opt_overlap_check, "constant")) { + overlap_check_template = QCOW2_OL_CONSTANT; + } else if (!strcmp(opt_overlap_check, "cached")) { + overlap_check_template = QCOW2_OL_CACHED; + } else if (!strcmp(opt_overlap_check, "all")) { + overlap_check_template = QCOW2_OL_ALL; + } else { + error_setg(errp, "Unsupported value '%s' for qcow2 option " + "'overlap-check'. Allowed are any of the following: " + "none, constant, cached, all", opt_overlap_check); + ret = -EINVAL; + goto fail; + } + + r->overlap_check = 0; + for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { + /* overlap-check defines a template bitmask, but every flag may be + * overwritten through the associated boolean option */ + r->overlap_check |= + qemu_opt_get_bool(opts, overlap_bool_option_names[i], + overlap_check_template & (1 << i)) << i; + } + + r->discard_passthrough[QCOW2_DISCARD_NEVER] = false; + r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; + r->discard_passthrough[QCOW2_DISCARD_REQUEST] = + qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, + flags & BDRV_O_UNMAP); + r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = + qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); + r->discard_passthrough[QCOW2_DISCARD_OTHER] = + qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); + + ret = 0; +fail: + qemu_opts_del(opts); + opts = NULL; + return ret; +} + +static void qcow2_update_options_commit(BlockDriverState *bs, + Qcow2ReopenState *r) +{ + BDRVQcow2State *s = bs->opaque; + int i; + + if (s->l2_table_cache) { + qcow2_cache_destroy(bs, s->l2_table_cache); + } + if (s->refcount_block_cache) { + qcow2_cache_destroy(bs, s->refcount_block_cache); + } + s->l2_table_cache = r->l2_table_cache; + s->refcount_block_cache = r->refcount_block_cache; + + s->overlap_check = r->overlap_check; + s->use_lazy_refcounts = r->use_lazy_refcounts; + + for (i = 0; i < QCOW2_DISCARD_MAX; i++) { + s->discard_passthrough[i] = r->discard_passthrough[i]; + } + + if (s->cache_clean_interval != r->cache_clean_interval) { + cache_clean_timer_del(bs); + s->cache_clean_interval = r->cache_clean_interval; + cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); + } +} + +static void qcow2_update_options_abort(BlockDriverState *bs, + Qcow2ReopenState *r) +{ + if (r->l2_table_cache) { + qcow2_cache_destroy(bs, r->l2_table_cache); + } + if (r->refcount_block_cache) { + qcow2_cache_destroy(bs, r->refcount_block_cache); + } +} + +static int qcow2_update_options(BlockDriverState *bs, QDict *options, + int flags, Error **errp) +{ + Qcow2ReopenState r = {}; + int ret; + + ret = qcow2_update_options_prepare(bs, &r, options, flags, errp); + if (ret >= 0) { + qcow2_update_options_commit(bs, &r); + } else { + qcow2_update_options_abort(bs, &r); + } + + return ret; +} + static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; unsigned int len, i; int ret = 0; QCowHeader header; - QemuOpts *opts = NULL; Error *local_err = NULL; uint64_t ext_end; uint64_t l1_vm_state_index; - const char *opt_overlap_check, *opt_overlap_check_template; - int overlap_check_template = 0; - uint64_t l2_cache_size, refcount_cache_size; - ret = bdrv_pread(bs->file, 0, &header, sizeof(header)); + ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header)); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read qcow2 header"); goto fail; @@ -631,7 +903,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, if (header.header_length > sizeof(header)) { s->unknown_header_fields_size = header.header_length - sizeof(header); s->unknown_header_fields = g_malloc(s->unknown_header_fields_size); - ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields, + ret = bdrv_pread(bs->file->bs, sizeof(header), s->unknown_header_fields, s->unknown_header_fields_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read unknown qcow2 header " @@ -784,14 +1056,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, if (s->l1_size > 0) { - s->l1_table = qemu_try_blockalign(bs->file, + s->l1_table = qemu_try_blockalign(bs->file->bs, align_offset(s->l1_size * sizeof(uint64_t), 512)); if (s->l1_table == NULL) { error_setg(errp, "Could not allocate L1 table"); ret = -ENOMEM; goto fail; } - ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, + ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table, s->l1_size * sizeof(uint64_t)); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read L1 table"); @@ -802,55 +1074,15 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } } - /* get L2 table/refcount block cache size from command line options */ - opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort); - qemu_opts_absorb_qdict(opts, options, &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - goto fail; - } - - read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size, - &local_err); - if (local_err) { - error_propagate(errp, local_err); - ret = -EINVAL; - goto fail; - } - - l2_cache_size /= s->cluster_size; - if (l2_cache_size < MIN_L2_CACHE_SIZE) { - l2_cache_size = MIN_L2_CACHE_SIZE; - } - if (l2_cache_size > INT_MAX) { - error_setg(errp, "L2 cache size too big"); - ret = -EINVAL; - goto fail; - } - - refcount_cache_size /= s->cluster_size; - if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) { - refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE; - } - if (refcount_cache_size > INT_MAX) { - error_setg(errp, "Refcount cache size too big"); - ret = -EINVAL; - goto fail; - } - - /* alloc L2 table/refcount block cache */ - s->l2_table_cache = qcow2_cache_create(bs, l2_cache_size); - s->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size); - if (s->l2_table_cache == NULL || s->refcount_block_cache == NULL) { - error_setg(errp, "Could not allocate metadata caches"); - ret = -ENOMEM; + /* Parse driver-specific options */ + ret = qcow2_update_options(bs, options, flags, errp); + if (ret < 0) { goto fail; } s->cluster_cache = g_malloc(s->cluster_size); /* one more sector for decompressed data alignment */ - s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS + s->cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512); if (s->cluster_data == NULL) { error_setg(errp, "Could not allocate temporary cluster buffer"); @@ -887,7 +1119,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, ret = -EINVAL; goto fail; } - ret = bdrv_pread(bs->file, header.backing_file_offset, + ret = bdrv_pread(bs->file->bs, header.backing_file_offset, bs->backing_file, len); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read backing file name"); @@ -932,70 +1164,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, } } - /* Enable lazy_refcounts according to image and command line options */ - s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS, - (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS)); - - s->discard_passthrough[QCOW2_DISCARD_NEVER] = false; - s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true; - s->discard_passthrough[QCOW2_DISCARD_REQUEST] = - qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST, - flags & BDRV_O_UNMAP); - s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] = - qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true); - s->discard_passthrough[QCOW2_DISCARD_OTHER] = - qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false); - - opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP); - opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE); - if (opt_overlap_check_template && opt_overlap_check && - strcmp(opt_overlap_check_template, opt_overlap_check)) - { - error_setg(errp, "Conflicting values for qcow2 options '" - QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE - "' ('%s')", opt_overlap_check, opt_overlap_check_template); - ret = -EINVAL; - goto fail; - } - if (!opt_overlap_check) { - opt_overlap_check = opt_overlap_check_template ?: "cached"; - } - - if (!strcmp(opt_overlap_check, "none")) { - overlap_check_template = 0; - } else if (!strcmp(opt_overlap_check, "constant")) { - overlap_check_template = QCOW2_OL_CONSTANT; - } else if (!strcmp(opt_overlap_check, "cached")) { - overlap_check_template = QCOW2_OL_CACHED; - } else if (!strcmp(opt_overlap_check, "all")) { - overlap_check_template = QCOW2_OL_ALL; - } else { - error_setg(errp, "Unsupported value '%s' for qcow2 option " - "'overlap-check'. Allowed are either of the following: " - "none, constant, cached, all", opt_overlap_check); - ret = -EINVAL; - goto fail; - } - - s->overlap_check = 0; - for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) { - /* overlap-check defines a template bitmask, but every flag may be - * overwritten through the associated boolean option */ - s->overlap_check |= - qemu_opt_get_bool(opts, overlap_bool_option_names[i], - overlap_check_template & (1 << i)) << i; - } - - qemu_opts_del(opts); - opts = NULL; - - if (s->use_lazy_refcounts && s->qcow_version < 3) { - error_setg(errp, "Lazy refcounts require a qcow2 image with at least " - "qemu 1.1 compatibility level"); - ret = -EINVAL; - goto fail; - } - #ifdef DEBUG_ALLOC { BdrvCheckResult result = {0}; @@ -1005,7 +1173,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, return ret; fail: - qemu_opts_del(opts); g_free(s->unknown_header_fields); cleanup_unknown_header_ext(bs); qcow2_free_snapshots(bs); @@ -1013,6 +1180,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, qemu_vfree(s->l1_table); /* else pre-write overlap checks in cache_destroy may crash */ s->l1_table = NULL; + cache_clean_timer_del(bs); if (s->l2_table_cache) { qcow2_cache_destroy(bs, s->l2_table_cache); } @@ -1026,14 +1194,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; bs->bl.write_zeroes_alignment = s->cluster_sectors; } static int qcow2_set_key(BlockDriverState *bs, const char *key) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint8_t keybuf[16]; int len, i; Error *err = NULL; @@ -1066,32 +1234,58 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key) return 0; } -/* We have no actual commit/abort logic for qcow2, but we need to write out any - * unwritten data if we reopen read-only. */ static int qcow2_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, Error **errp) { + Qcow2ReopenState *r; int ret; + r = g_new0(Qcow2ReopenState, 1); + state->opaque = r; + + ret = qcow2_update_options_prepare(state->bs, r, state->options, + state->flags, errp); + if (ret < 0) { + goto fail; + } + + /* We need to write out any unwritten data if we reopen read-only. */ if ((state->flags & BDRV_O_RDWR) == 0) { ret = bdrv_flush(state->bs); if (ret < 0) { - return ret; + goto fail; } ret = qcow2_mark_clean(state->bs); if (ret < 0) { - return ret; + goto fail; } } return 0; + +fail: + qcow2_update_options_abort(state->bs, r); + g_free(r); + return ret; +} + +static void qcow2_reopen_commit(BDRVReopenState *state) +{ + qcow2_update_options_commit(state->bs, state->opaque); + g_free(state->opaque); +} + +static void qcow2_reopen_abort(BDRVReopenState *state) +{ + qcow2_update_options_abort(state->bs, state->opaque); + g_free(state->opaque); } static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t cluster_offset; int index_in_cluster, ret; int64_t status = 0; @@ -1138,7 +1332,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov, static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, int remaining_sectors, QEMUIOVector *qiov) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int index_in_cluster, n1; int ret; int cur_nr_sectors; /* number of sectors in current iteration */ @@ -1175,9 +1369,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, switch (ret) { case QCOW2_CLUSTER_UNALLOCATED: - if (bs->backing_hd) { + if (bs->backing) { /* read from the base image */ - n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov, + n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov, sector_num, cur_nr_sectors); if (n1 > 0) { QEMUIOVector local_qiov; @@ -1188,7 +1382,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_readv(bs->backing_hd, sector_num, + ret = bdrv_co_readv(bs->backing->bs, sector_num, n1, &local_qiov); qemu_co_mutex_lock(&s->lock); @@ -1235,8 +1429,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, */ if (!cluster_data) { cluster_data = - qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS - * s->cluster_size); + qemu_try_blockalign(bs->file->bs, + QCOW_MAX_CRYPT_CLUSTERS + * s->cluster_size); if (cluster_data == NULL) { ret = -ENOMEM; goto fail; @@ -1252,7 +1447,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_readv(bs->file, + ret = bdrv_co_readv(bs->file->bs, (cluster_offset >> 9) + index_in_cluster, cur_nr_sectors, &hd_qiov); qemu_co_mutex_lock(&s->lock); @@ -1300,7 +1495,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, int remaining_sectors, QEMUIOVector *qiov) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int index_in_cluster; int ret; int cur_nr_sectors; /* number of sectors in current iteration */ @@ -1349,7 +1544,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, Error *err = NULL; assert(s->cipher); if (!cluster_data) { - cluster_data = qemu_try_blockalign(bs->file, + cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); if (cluster_data == NULL) { @@ -1386,7 +1581,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs, BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); trace_qcow2_writev_data(qemu_coroutine_self(), (cluster_offset >> 9) + index_in_cluster); - ret = bdrv_co_writev(bs->file, + ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + index_in_cluster, cur_nr_sectors, &hd_qiov); qemu_co_mutex_lock(&s->lock); @@ -1446,7 +1641,7 @@ fail: static void qcow2_close(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; qemu_vfree(s->l1_table); /* else pre-write overlap checks in cache_destroy may crash */ s->l1_table = NULL; @@ -1471,6 +1666,7 @@ static void qcow2_close(BlockDriverState *bs) } } + cache_clean_timer_del(bs); qcow2_cache_destroy(bs, s->l2_table_cache); qcow2_cache_destroy(bs, s->refcount_block_cache); @@ -1491,7 +1687,7 @@ static void qcow2_close(BlockDriverState *bs) static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int flags = s->flags; QCryptoCipher *cipher = NULL; QDict *options; @@ -1508,13 +1704,13 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp) qcow2_close(bs); - bdrv_invalidate_cache(bs->file, &local_err); + bdrv_invalidate_cache(bs->file->bs, &local_err); if (local_err) { error_propagate(errp, local_err); return; } - memset(s, 0, sizeof(BDRVQcowState)); + memset(s, 0, sizeof(BDRVQcow2State)); options = qdict_clone_shallow(bs->options); ret = qcow2_open(bs, options, flags, &local_err); @@ -1561,7 +1757,7 @@ static size_t header_ext_add(char *buf, uint32_t magic, const void *s, */ int qcow2_update_header(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; QCowHeader *header; char *buf; size_t buflen = s->cluster_size; @@ -1716,7 +1912,7 @@ int qcow2_update_header(BlockDriverState *bs) } /* Write the new header */ - ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size); + ret = bdrv_pwrite(bs->file->bs, 0, header, s->cluster_size); if (ret < 0) { goto fail; } @@ -1730,7 +1926,7 @@ fail: static int qcow2_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: ""); pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: ""); @@ -1796,7 +1992,8 @@ static int preallocate(BlockDriverState *bs) if (host_offset != 0) { uint8_t buf[BDRV_SECTOR_SIZE]; memset(buf, 0, BDRV_SECTOR_SIZE); - ret = bdrv_write(bs->file, (host_offset >> BDRV_SECTOR_BITS) + num - 1, + ret = bdrv_write(bs->file->bs, + (host_offset >> BDRV_SECTOR_BITS) + num - 1, buf, 1); if (ret < 0) { return ret; @@ -1812,8 +2009,10 @@ static int qcow2_create2(const char *filename, int64_t total_size, QemuOpts *opts, int version, int refcount_order, Error **errp) { - /* Calculate cluster_bits */ int cluster_bits; + QDict *options; + + /* Calculate cluster_bits */ cluster_bits = ctz32(cluster_size); if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS || (1 << cluster_bits) != cluster_size) @@ -1912,7 +2111,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, bs = NULL; ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - NULL, &local_err); + &local_err); if (ret < 0) { error_propagate(errp, local_err); return ret; @@ -1971,9 +2170,11 @@ static int qcow2_create2(const char *filename, int64_t total_size, * refcount of the cluster that is occupied by the header and the refcount * table) */ - ret = bdrv_open(&bs, filename, NULL, NULL, + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str("qcow2")); + ret = bdrv_open(&bs, filename, NULL, options, BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, - &bdrv_qcow2, &local_err); + &local_err); if (ret < 0) { error_propagate(errp, local_err); goto out; @@ -2009,7 +2210,7 @@ static int qcow2_create2(const char *filename, int64_t total_size, /* And if we're supposed to preallocate metadata, do that now */ if (prealloc != PREALLOC_MODE_OFF) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; qemu_co_mutex_lock(&s->lock); ret = preallocate(bs); qemu_co_mutex_unlock(&s->lock); @@ -2023,9 +2224,11 @@ static int qcow2_create2(const char *filename, int64_t total_size, bs = NULL; /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */ - ret = bdrv_open(&bs, filename, NULL, NULL, + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str("qcow2")); + ret = bdrv_open(&bs, filename, NULL, options, BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING, - &bdrv_qcow2, &local_err); + &local_err); if (local_err) { error_propagate(errp, local_err); goto out; @@ -2142,7 +2345,7 @@ static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { int ret; - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; /* Emulate misaligned zero writes */ if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) { @@ -2162,7 +2365,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { int ret; - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; qemu_co_mutex_lock(&s->lock); ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS, @@ -2173,7 +2376,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs, static int qcow2_truncate(BlockDriverState *bs, int64_t offset) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t new_l1_size; int ret; @@ -2202,7 +2405,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset) /* write updated header.size */ offset = cpu_to_be64(offset); - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size), + ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, size), &offset, sizeof(uint64_t)); if (ret < 0) { return ret; @@ -2217,7 +2420,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset) static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; z_stream strm; int ret, out_len; uint8_t *out_buf; @@ -2226,8 +2429,8 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, if (nb_sectors == 0) { /* align end of file to a sector boundary to ease reading with sector based I/Os */ - cluster_offset = bdrv_getlength(bs->file); - return bdrv_truncate(bs->file, cluster_offset); + cluster_offset = bdrv_getlength(bs->file->bs); + return bdrv_truncate(bs->file->bs, cluster_offset); } if (nb_sectors != s->cluster_sectors) { @@ -2294,7 +2497,7 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num, } BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED); - ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len); + ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len); if (ret < 0) { goto fail; } @@ -2308,7 +2511,7 @@ fail: static int make_completely_empty(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int ret, l1_clusters; int64_t offset; uint64_t *new_reftable = NULL; @@ -2343,7 +2546,7 @@ static int make_completely_empty(BlockDriverState *bs) /* After this call, neither the in-memory nor the on-disk refcount * information accurately describe the actual references */ - ret = bdrv_write_zeroes(bs->file, s->l1_table_offset / BDRV_SECTOR_SIZE, + ret = bdrv_write_zeroes(bs->file->bs, s->l1_table_offset / BDRV_SECTOR_SIZE, l1_clusters * s->cluster_sectors, 0); if (ret < 0) { goto fail_broken_refcounts; @@ -2357,7 +2560,7 @@ static int make_completely_empty(BlockDriverState *bs) * overwrite parts of the existing refcount and L1 table, which is not * an issue because the dirty flag is set, complete data loss is in fact * desired and partial data loss is consequently fine as well */ - ret = bdrv_write_zeroes(bs->file, s->cluster_size / BDRV_SECTOR_SIZE, + ret = bdrv_write_zeroes(bs->file->bs, s->cluster_size / BDRV_SECTOR_SIZE, (2 + l1_clusters) * s->cluster_size / BDRV_SECTOR_SIZE, 0); /* This call (even if it failed overall) may have overwritten on-disk @@ -2377,7 +2580,7 @@ static int make_completely_empty(BlockDriverState *bs) cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size); cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size); cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1); - ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset), + ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_table_offset), &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls)); if (ret < 0) { goto fail_broken_refcounts; @@ -2408,7 +2611,7 @@ static int make_completely_empty(BlockDriverState *bs) /* Enter the first refblock into the reftable */ rt_entry = cpu_to_be64(2 * s->cluster_size); - ret = bdrv_pwrite_sync(bs->file, s->cluster_size, + ret = bdrv_pwrite_sync(bs->file->bs, s->cluster_size, &rt_entry, sizeof(rt_entry)); if (ret < 0) { goto fail_broken_refcounts; @@ -2433,7 +2636,7 @@ static int make_completely_empty(BlockDriverState *bs) goto fail; } - ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size); + ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size); if (ret < 0) { goto fail; } @@ -2456,7 +2659,7 @@ fail: static int qcow2_make_empty(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; uint64_t start_sector; int sector_step = INT_MAX / BDRV_SECTOR_SIZE; int l1_clusters, ret = 0; @@ -2497,7 +2700,7 @@ static int qcow2_make_empty(BlockDriverState *bs) static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int ret; qemu_co_mutex_lock(&s->lock); @@ -2521,7 +2724,7 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs) static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; bdi->unallocated_blocks_are_zero = true; bdi->can_write_zeroes_with_unmap = (s->qcow_version >= 3); bdi->cluster_size = s->cluster_size; @@ -2531,22 +2734,20 @@ static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1); *spec_info = (ImageInfoSpecific){ - .kind = IMAGE_INFO_SPECIFIC_KIND_QCOW2, - { - .qcow2 = g_new(ImageInfoSpecificQCow2, 1), - }, + .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2, + .u.qcow2 = g_new(ImageInfoSpecificQCow2, 1), }; if (s->qcow_version == 2) { - *spec_info->qcow2 = (ImageInfoSpecificQCow2){ + *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){ .compat = g_strdup("0.10"), .refcount_bits = s->refcount_bits, }; } else if (s->qcow_version == 3) { - *spec_info->qcow2 = (ImageInfoSpecificQCow2){ + *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){ .compat = g_strdup("1.1"), .lazy_refcounts = s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS, @@ -2564,11 +2765,11 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs) #if 0 static void dump_refcounts(BlockDriverState *bs) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t nb_clusters, k, k1, size; int refcount; - size = bdrv_getlength(bs->file); + size = bdrv_getlength(bs->file->bs); nb_clusters = size_to_clusters(s, size); for(k = 0; k < nb_clusters;) { k1 = k; @@ -2585,7 +2786,7 @@ static void dump_refcounts(BlockDriverState *bs) static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int64_t total_sectors = bs->total_sectors; bool zero_beyond_eof = bs->zero_beyond_eof; int ret; @@ -2606,7 +2807,7 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, int64_t pos, int size) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; bool zero_beyond_eof = bs->zero_beyond_eof; int ret; @@ -2625,7 +2826,7 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf, static int qcow2_downgrade(BlockDriverState *bs, int target_version, BlockDriverAmendStatusCB *status_cb) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int current_version = s->qcow_version; int ret; @@ -2689,7 +2890,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version, static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, BlockDriverAmendStatusCB *status_cb) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; int old_version = s->qcow_version, new_version = old_version; uint64_t new_size = 0; const char *backing_file = NULL, *backing_format = NULL; @@ -2836,7 +3037,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts, void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset, int64_t size, const char *message_format, ...) { - BDRVQcowState *s = bs->opaque; + BDRVQcow2State *s = bs->opaque; const char *node_name; char *message; va_list ap; @@ -2937,11 +3138,13 @@ static QemuOptsList qcow2_create_opts = { BlockDriver bdrv_qcow2 = { .format_name = "qcow2", - .instance_size = sizeof(BDRVQcowState), + .instance_size = sizeof(BDRVQcow2State), .bdrv_probe = qcow2_probe, .bdrv_open = qcow2_open, .bdrv_close = qcow2_close, .bdrv_reopen_prepare = qcow2_reopen_prepare, + .bdrv_reopen_commit = qcow2_reopen_commit, + .bdrv_reopen_abort = qcow2_reopen_abort, .bdrv_create = qcow2_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_get_block_status = qcow2_co_get_block_status, @@ -2977,6 +3180,9 @@ BlockDriver bdrv_qcow2 = { .create_opts = &qcow2_create_opts, .bdrv_check = qcow2_check, .bdrv_amend_options = qcow2_amend_options, + + .bdrv_detach_aio_context = qcow2_detach_aio_context, + .bdrv_attach_aio_context = qcow2_attach_aio_context, }; static void bdrv_qcow2_init(void) diff --git a/block/qcow2.h b/block/qcow2.h index 4b5a6afc8..b8c500b9d 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -26,7 +26,7 @@ #define BLOCK_QCOW2_H #include "crypto/cipher.h" -#include "block/coroutine.h" +#include "qemu/coroutine.h" //#define DEBUG_ALLOC //#define DEBUG_ALLOC2 @@ -96,6 +96,7 @@ #define QCOW2_OPT_CACHE_SIZE "cache-size" #define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size" #define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size" +#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval" typedef struct QCowHeader { uint32_t magic; @@ -221,7 +222,7 @@ typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array, typedef void Qcow2SetRefcountFunc(void *refcount_array, uint64_t index, uint64_t value); -typedef struct BDRVQcowState { +typedef struct BDRVQcow2State { int cluster_bits; int cluster_size; int cluster_sectors; @@ -239,6 +240,8 @@ typedef struct BDRVQcowState { Qcow2Cache* l2_table_cache; Qcow2Cache* refcount_block_cache; + QEMUTimer *cache_clean_timer; + unsigned cache_clean_interval; uint8_t *cluster_cache; uint8_t *cluster_data; @@ -290,9 +293,7 @@ typedef struct BDRVQcowState { * override) */ char *image_backing_file; char *image_backing_format; -} BDRVQcowState; - -struct QCowAIOCB; +} BDRVQcow2State; typedef struct Qcow2COWRegion { /** @@ -402,28 +403,28 @@ typedef enum QCow2MetadataOverlap { #define REFT_OFFSET_MASK 0xfffffffffffffe00ULL -static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset) +static inline int64_t start_of_cluster(BDRVQcow2State *s, int64_t offset) { return offset & ~(s->cluster_size - 1); } -static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset) +static inline int64_t offset_into_cluster(BDRVQcow2State *s, int64_t offset) { return offset & (s->cluster_size - 1); } -static inline uint64_t size_to_clusters(BDRVQcowState *s, uint64_t size) +static inline uint64_t size_to_clusters(BDRVQcow2State *s, uint64_t size) { return (size + (s->cluster_size - 1)) >> s->cluster_bits; } -static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size) +static inline int64_t size_to_l1(BDRVQcow2State *s, int64_t size) { int shift = s->cluster_bits + s->l2_bits; return (size + (1ULL << shift) - 1) >> shift; } -static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset) +static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset) { return (offset >> s->cluster_bits) & (s->l2_size - 1); } @@ -434,12 +435,12 @@ static inline int64_t align_offset(int64_t offset, int n) return offset; } -static inline int64_t qcow2_vm_state_offset(BDRVQcowState *s) +static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s) { return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits); } -static inline uint64_t qcow2_max_refcount_clusters(BDRVQcowState *s) +static inline uint64_t qcow2_max_refcount_clusters(BDRVQcow2State *s) { return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits; } @@ -458,7 +459,7 @@ static inline int qcow2_get_cluster_type(uint64_t l2_entry) } /* Check whether refcounts are eager or lazy */ -static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s) +static inline bool qcow2_need_accurate_refcounts(BDRVQcow2State *s) { return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY); } @@ -534,7 +535,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index); void qcow2_l2_cache_reset(BlockDriverState *bs); int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset); -int qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num, +int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num, uint8_t *out_buf, const uint8_t *in_buf, int nb_sectors, bool enc, Error **errp); @@ -581,6 +582,7 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c, Qcow2Cache *dependency); void qcow2_cache_depends_on_flush(Qcow2Cache *c); +void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c); int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c); int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset, diff --git a/block/qed-table.c b/block/qed-table.c index 513aa872c..f4219b8ac 100644 --- a/block/qed-table.c +++ b/block/qed-table.c @@ -63,7 +63,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size, qemu_iovec_init_external(qiov, &read_table_cb->iov, 1); - bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov, + bdrv_aio_readv(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, qiov, qiov->size / BDRV_SECTOR_SIZE, qed_read_table_cb, read_table_cb); } @@ -152,7 +152,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table, /* Adjust for offset into table */ offset += start * sizeof(uint64_t); - bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, + bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, &write_table_cb->qiov, write_table_cb->qiov.size / BDRV_SECTOR_SIZE, qed_write_table_cb, write_table_cb); diff --git a/block/qed.c b/block/qed.c index 954ed007c..9b8889503 100644 --- a/block/qed.c +++ b/block/qed.c @@ -82,7 +82,7 @@ int qed_write_header_sync(BDRVQEDState *s) int ret; qed_header_cpu_to_le(&s->header, &le); - ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le)); + ret = bdrv_pwrite(s->bs->file->bs, 0, &le, sizeof(le)); if (ret != sizeof(le)) { return ret; } @@ -119,7 +119,7 @@ static void qed_write_header_read_cb(void *opaque, int ret) /* Update header */ qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf); - bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov, + bdrv_aio_writev(s->bs->file->bs, 0, &write_header_cb->qiov, write_header_cb->nsectors, qed_write_header_cb, write_header_cb); } @@ -152,7 +152,7 @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb, write_header_cb->iov.iov_len = len; qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1); - bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors, + bdrv_aio_readv(s->bs->file->bs, 0, &write_header_cb->qiov, nsectors, qed_write_header_read_cb, write_header_cb); } @@ -354,12 +354,6 @@ static void qed_cancel_need_check_timer(BDRVQEDState *s) timer_del(s->need_check_timer); } -static void bdrv_qed_rebind(BlockDriverState *bs) -{ - BDRVQEDState *s = bs->opaque; - s->bs = bs; -} - static void bdrv_qed_detach_aio_context(BlockDriverState *bs) { BDRVQEDState *s = bs->opaque; @@ -381,6 +375,18 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs, } } +static void bdrv_qed_drain(BlockDriverState *bs) +{ + BDRVQEDState *s = bs->opaque; + + /* Cancel timer and start doing I/O that were meant to happen as if it + * fired, that way we get bdrv_drain() taking care of the ongoing requests + * correctly. */ + qed_cancel_need_check_timer(s); + qed_plug_allocating_write_reqs(s); + bdrv_aio_flush(s->bs, qed_clear_need_check, s); +} + static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -392,7 +398,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, s->bs = bs; QSIMPLEQ_INIT(&s->allocating_write_reqs); - ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header)); + ret = bdrv_pread(bs->file->bs, 0, &le_header, sizeof(le_header)); if (ret < 0) { return ret; } @@ -416,7 +422,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, } /* Round down file size to the last cluster */ - file_size = bdrv_getlength(bs->file); + file_size = bdrv_getlength(bs->file->bs); if (file_size < 0) { return file_size; } @@ -452,7 +458,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, return -EINVAL; } - ret = qed_read_string(bs->file, s->header.backing_filename_offset, + ret = qed_read_string(bs->file->bs, s->header.backing_filename_offset, s->header.backing_filename_size, bs->backing_file, sizeof(bs->backing_file)); if (ret < 0) { @@ -471,7 +477,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, * feature is no longer valid. */ if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 && - !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) { + !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INCOMING)) { s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK; ret = qed_write_header_sync(s); @@ -480,7 +486,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, } /* From here on only known autoclear feature bits are valid */ - bdrv_flush(bs->file); + bdrv_flush(bs->file->bs); } s->l1_table = qed_alloc_table(s); @@ -498,7 +504,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags, * potentially inconsistent images to be opened read-only. This can * aid data recovery from an otherwise inconsistent image. */ - if (!bdrv_is_read_only(bs->file) && + if (!bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INCOMING)) { BdrvCheckResult result = {0}; @@ -541,7 +547,7 @@ static void bdrv_qed_close(BlockDriverState *bs) bdrv_qed_detach_aio_context(bs); /* Ensure writes reach stable storage */ - bdrv_flush(bs->file); + bdrv_flush(bs->file->bs); /* Clean shutdown, no check required on next open */ if (s->header.features & QED_F_NEED_CHECK) { @@ -583,7 +589,7 @@ static int qed_create(const char *filename, uint32_t cluster_size, bs = NULL; ret = bdrv_open(&bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, NULL, + BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, &local_err); if (ret < 0) { error_propagate(errp, local_err); @@ -772,8 +778,8 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, /* If there is a backing file, get its length. Treat the absence of a * backing file like a zero length backing file. */ - if (s->bs->backing_hd) { - int64_t l = bdrv_getlength(s->bs->backing_hd); + if (s->bs->backing) { + int64_t l = bdrv_getlength(s->bs->backing->bs); if (l < 0) { cb(opaque, l); return; @@ -802,7 +808,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, qemu_iovec_concat(*backing_qiov, qiov, 0, size); BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO); - bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE, + bdrv_aio_readv(s->bs->backing->bs, pos / BDRV_SECTOR_SIZE, *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque); } @@ -839,7 +845,7 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret) } BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE); - bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE, + bdrv_aio_writev(s->bs->file->bs, copy_cb->offset / BDRV_SECTOR_SIZE, ©_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE, qed_copy_from_backing_file_cb, copy_cb); } @@ -1055,7 +1061,7 @@ static void qed_aio_write_flush_before_l2_update(void *opaque, int ret) QEDAIOCB *acb = opaque; BDRVQEDState *s = acb_to_s(acb); - if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) { + if (!bdrv_aio_flush(s->bs->file->bs, qed_aio_write_l2_update_cb, opaque)) { qed_aio_complete(acb, -EIO); } } @@ -1081,7 +1087,7 @@ static void qed_aio_write_main(void *opaque, int ret) if (acb->find_cluster_ret == QED_CLUSTER_FOUND) { next_fn = qed_aio_next_io; } else { - if (s->bs->backing_hd) { + if (s->bs->backing) { next_fn = qed_aio_write_flush_before_l2_update; } else { next_fn = qed_aio_write_l2_update_cb; @@ -1089,7 +1095,7 @@ static void qed_aio_write_main(void *opaque, int ret) } BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO); - bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE, + bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE, next_fn, acb); } @@ -1139,7 +1145,7 @@ static void qed_aio_write_prefill(void *opaque, int ret) static bool qed_should_set_need_check(BDRVQEDState *s) { /* The flush before L2 update path ensures consistency */ - if (s->bs->backing_hd) { + if (s->bs->backing) { return false; } @@ -1321,7 +1327,7 @@ static void qed_aio_read_data(void *opaque, int ret, } BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE, + bdrv_aio_readv(bs->file->bs, offset / BDRV_SECTOR_SIZE, &acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE, qed_aio_next_io, acb); return; @@ -1443,7 +1449,7 @@ static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs, struct iovec iov; /* Refuse if there are untouched backing file sectors */ - if (bs->backing_hd) { + if (bs->backing) { if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) { return -ENOTSUP; } @@ -1580,7 +1586,7 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs, } /* Write new header */ - ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len); + ret = bdrv_pwrite_sync(bs->file->bs, 0, buffer, buffer_len); g_free(buffer); if (ret == 0) { memcpy(&s->header, &new_header, sizeof(new_header)); @@ -1596,7 +1602,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp) bdrv_qed_close(bs); - bdrv_invalidate_cache(bs->file, &local_err); + bdrv_invalidate_cache(bs->file->bs, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -1664,7 +1670,6 @@ static BlockDriver bdrv_qed = { .supports_backing = true, .bdrv_probe = bdrv_qed_probe, - .bdrv_rebind = bdrv_qed_rebind, .bdrv_open = bdrv_qed_open, .bdrv_close = bdrv_qed_close, .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, @@ -1683,6 +1688,7 @@ static BlockDriver bdrv_qed = { .bdrv_check = bdrv_qed_check, .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, + .bdrv_drain = bdrv_qed_drain, }; static void bdrv_qed_init(void) diff --git a/block/quorum.c b/block/quorum.c index 2f6c45f76..e640688eb 100644 --- a/block/quorum.c +++ b/block/quorum.c @@ -64,7 +64,7 @@ typedef struct QuorumVotes { /* the following structure holds the state of one quorum instance */ typedef struct BDRVQuorumState { - BlockDriverState **bs; /* children BlockDriverStates */ + BdrvChild **children; /* children BlockDriverStates */ int num_children; /* children count */ int threshold; /* if less than threshold children reads gave the * same result a quorum error occurs. @@ -336,7 +336,7 @@ static void quorum_report_bad_versions(BDRVQuorumState *s, continue; } QLIST_FOREACH(item, &version->items, next) { - quorum_report_bad(acb, s->bs[item->index]->node_name, 0); + quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0); } } } @@ -369,8 +369,9 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb, continue; } QLIST_FOREACH(item, &version->items, next) { - bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov, - acb->nb_sectors, quorum_rewrite_aio_cb, acb); + bdrv_aio_writev(s->children[item->index]->bs, acb->sector_num, + acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb, + acb); } } @@ -639,14 +640,15 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb) int i; for (i = 0; i < s->num_children; i++) { - acb->qcrs[i].buf = qemu_blockalign(s->bs[i], acb->qiov->size); + acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size); qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov); qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf); } for (i = 0; i < s->num_children; i++) { - bdrv_aio_readv(s->bs[i], acb->sector_num, &acb->qcrs[i].qiov, - acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]); + acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num, + &acb->qcrs[i].qiov, acb->nb_sectors, + quorum_aio_cb, &acb->qcrs[i]); } return &acb->common; @@ -656,14 +658,15 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb) { BDRVQuorumState *s = acb->common.bs->opaque; - acb->qcrs[acb->child_iter].buf = qemu_blockalign(s->bs[acb->child_iter], - acb->qiov->size); + acb->qcrs[acb->child_iter].buf = + qemu_blockalign(s->children[acb->child_iter]->bs, acb->qiov->size); qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov); qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov, acb->qcrs[acb->child_iter].buf); - bdrv_aio_readv(s->bs[acb->child_iter], acb->sector_num, - &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors, - quorum_aio_cb, &acb->qcrs[acb->child_iter]); + acb->qcrs[acb->child_iter].aiocb = + bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num, + &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors, + quorum_aio_cb, &acb->qcrs[acb->child_iter]); return &acb->common; } @@ -702,8 +705,8 @@ static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs, int i; for (i = 0; i < s->num_children; i++) { - acb->qcrs[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov, - nb_sectors, &quorum_aio_cb, + acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i]->bs, sector_num, + qiov, nb_sectors, &quorum_aio_cb, &acb->qcrs[i]); } @@ -717,12 +720,12 @@ static int64_t quorum_getlength(BlockDriverState *bs) int i; /* check that all file have the same length */ - result = bdrv_getlength(s->bs[0]); + result = bdrv_getlength(s->children[0]->bs); if (result < 0) { return result; } for (i = 1; i < s->num_children; i++) { - int64_t value = bdrv_getlength(s->bs[i]); + int64_t value = bdrv_getlength(s->children[i]->bs); if (value < 0) { return value; } @@ -741,7 +744,7 @@ static void quorum_invalidate_cache(BlockDriverState *bs, Error **errp) int i; for (i = 0; i < s->num_children; i++) { - bdrv_invalidate_cache(s->bs[i], &local_err); + bdrv_invalidate_cache(s->children[i]->bs, &local_err); if (local_err) { error_propagate(errp, local_err); return; @@ -762,7 +765,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs) error_votes.compare = quorum_64bits_compare; for (i = 0; i < s->num_children; i++) { - result = bdrv_co_flush(s->bs[i]); + result = bdrv_co_flush(s->children[i]->bs); result_value.l = result; quorum_count_vote(&error_votes, &result_value, i); } @@ -782,7 +785,7 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs, int i; for (i = 0; i < s->num_children; i++) { - bool perm = bdrv_recurse_is_first_non_filter(s->bs[i], + bool perm = bdrv_recurse_is_first_non_filter(s->children[i]->bs, candidate); if (perm) { return true; @@ -889,6 +892,12 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, } s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0); + /* and validate it against s->num_children */ + ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err); + if (ret < 0) { + goto exit; + } + ret = parse_read_pattern(qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN)); if (ret < 0) { error_setg(&local_err, "Please set read-pattern as fifo or quorum"); @@ -897,12 +906,6 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, s->read_pattern = ret; if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) { - /* and validate it against s->num_children */ - ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err); - if (ret < 0) { - goto exit; - } - /* is the driver in blkverify mode */ if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) && s->num_children == 2 && s->threshold == 2) { @@ -922,8 +925,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, } } - /* allocate the children BlockDriverState array */ - s->bs = g_new0(BlockDriverState *, s->num_children); + /* allocate the children array */ + s->children = g_new0(BdrvChild *, s->num_children); opened = g_new0(bool, s->num_children); for (i = 0; i < s->num_children; i++) { @@ -931,9 +934,10 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags, ret = snprintf(indexstr, 32, "children.%d", i); assert(ret < 32); - ret = bdrv_open_image(&s->bs[i], NULL, options, indexstr, bs, - &child_format, false, &local_err); - if (ret < 0) { + s->children[i] = bdrv_open_child(NULL, options, indexstr, bs, + &child_format, false, &local_err); + if (local_err) { + ret = -EINVAL; goto close_exit; } @@ -949,9 +953,9 @@ close_exit: if (!opened[i]) { continue; } - bdrv_unref(s->bs[i]); + bdrv_unref_child(bs, s->children[i]); } - g_free(s->bs); + g_free(s->children); g_free(opened); exit: qemu_opts_del(opts); @@ -968,10 +972,10 @@ static void quorum_close(BlockDriverState *bs) int i; for (i = 0; i < s->num_children; i++) { - bdrv_unref(s->bs[i]); + bdrv_unref_child(bs, s->children[i]); } - g_free(s->bs); + g_free(s->children); } static void quorum_detach_aio_context(BlockDriverState *bs) @@ -980,7 +984,7 @@ static void quorum_detach_aio_context(BlockDriverState *bs) int i; for (i = 0; i < s->num_children; i++) { - bdrv_detach_aio_context(s->bs[i]); + bdrv_detach_aio_context(s->children[i]->bs); } } @@ -991,7 +995,7 @@ static void quorum_attach_aio_context(BlockDriverState *bs, int i; for (i = 0; i < s->num_children; i++) { - bdrv_attach_aio_context(s->bs[i], new_context); + bdrv_attach_aio_context(s->children[i]->bs, new_context); } } @@ -1003,16 +1007,17 @@ static void quorum_refresh_filename(BlockDriverState *bs) int i; for (i = 0; i < s->num_children; i++) { - bdrv_refresh_filename(s->bs[i]); - if (!s->bs[i]->full_open_options) { + bdrv_refresh_filename(s->children[i]->bs); + if (!s->children[i]->bs->full_open_options) { return; } } children = qlist_new(); for (i = 0; i < s->num_children; i++) { - QINCREF(s->bs[i]->full_open_options); - qlist_append_obj(children, QOBJECT(s->bs[i]->full_open_options)); + QINCREF(s->children[i]->bs->full_open_options); + qlist_append_obj(children, + QOBJECT(s->children[i]->bs->full_open_options)); } opts = qdict_new(); diff --git a/block/raw-posix.c b/block/raw-posix.c index 855febed5..2fff1843c 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -127,11 +127,6 @@ do { \ #define FTYPE_FILE 0 #define FTYPE_CD 1 -#define FTYPE_FD 2 - -/* if the FD is not accessed during that time (in ns), we try to - reopen it to see if the disk has been changed */ -#define FD_OPEN_TIMEOUT (1000000000) #define MAX_BLOCKSIZE 4096 @@ -141,13 +136,6 @@ typedef struct BDRVRawState { int open_flags; size_t buf_align; -#if defined(__linux__) - /* linux floppy specific */ - int64_t fd_open_time; - int64_t fd_error_time; - int fd_got_error; - int fd_media_changed; -#endif #ifdef CONFIG_LINUX_AIO int use_aio; void *aio_ctx; @@ -519,7 +507,16 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, "future QEMU versions.\n", bs->filename); } -#endif +#else + if (bdrv_flags & BDRV_O_NATIVE_AIO) { + error_printf("WARNING: aio=native was specified for '%s', but " + "is not supported in this build. Falling back to " + "aio=threads.\n" + " This will become an error condition in " + "future QEMU versions.\n", + bs->filename); + } +#endif /* !defined(CONFIG_LINUX_AIO) */ s->has_discard = true; s->has_write_zeroes = true; @@ -626,7 +623,7 @@ static int raw_reopen_prepare(BDRVReopenState *state, } #endif - if (s->type == FTYPE_FD || s->type == FTYPE_CD) { + if (s->type == FTYPE_CD) { raw_s->open_flags |= O_NONBLOCK; } @@ -670,11 +667,17 @@ static int raw_reopen_prepare(BDRVReopenState *state, /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */ if (raw_s->fd == -1) { - assert(!(raw_s->open_flags & O_CREAT)); - raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags); - if (raw_s->fd == -1) { - error_setg_errno(errp, errno, "Could not reopen file"); - ret = -1; + const char *normalized_filename = state->bs->filename; + ret = raw_normalize_devicepath(&normalized_filename); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not normalize device path"); + } else { + assert(!(raw_s->open_flags & O_CREAT)); + raw_s->fd = qemu_open(normalized_filename, raw_s->open_flags); + if (raw_s->fd == -1) { + error_setg_errno(errp, errno, "Could not reopen file"); + ret = -1; + } } } @@ -780,7 +783,6 @@ static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo) { BDRVRawState *s = bs->opaque; struct hd_geometry ioctl_geo = {0}; - uint32_t blksize; /* If DASD, get its geometry */ if (check_for_dasd(s->fd) < 0) { @@ -800,12 +802,6 @@ static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo) } geo->heads = ioctl_geo.heads; geo->sectors = ioctl_geo.sectors; - if (!probe_physical_blocksize(s->fd, &blksize)) { - /* overwrite cyls: HDIO_GETGEO result is incorrect for big drives */ - geo->cylinders = bdrv_nb_sectors(bs) / (blksize / BDRV_SECTOR_SIZE) - / (geo->heads * geo->sectors); - return 0; - } geo->cylinders = ioctl_geo.cylinders; return 0; @@ -1253,7 +1249,7 @@ static int aio_worker(void *arg) break; } - g_slice_free(RawPosixAIOData, aiocb); + g_free(aiocb); return ret; } @@ -1261,7 +1257,7 @@ static int paio_submit_co(BlockDriverState *bs, int fd, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, int type) { - RawPosixAIOData *acb = g_slice_new(RawPosixAIOData); + RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); ThreadPool *pool; acb->bs = bs; @@ -1286,7 +1282,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockCompletionFunc *cb, void *opaque, int type) { - RawPosixAIOData *acb = g_slice_new(RawPosixAIOData); + RawPosixAIOData *acb = g_new(RawPosixAIOData, 1); ThreadPool *pool; acb->bs = bs; @@ -1642,7 +1638,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) goto out; } - fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, + fd = qemu_open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0644); if (fd < 0) { result = -errno; @@ -1973,8 +1969,8 @@ BlockDriver bdrv_file = { #if defined(__APPLE__) && defined(__MACH__) static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator ); -static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize ); - +static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath, + CFIndex maxPathSize, int flags); kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator ) { kern_return_t kernResult; @@ -2001,7 +1997,8 @@ kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator ) return kernResult; } -kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize ) +kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath, + CFIndex maxPathSize, int flags) { io_object_t nextMedia; kern_return_t kernResult = KERN_FAILURE; @@ -2014,7 +2011,9 @@ kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex ma if ( bsdPathAsCFString ) { size_t devPathLength; strcpy( bsdPath, _PATH_DEV ); - strcat( bsdPath, "r" ); + if (flags & BDRV_O_NOCACHE) { + strcat(bsdPath, "r"); + } devPathLength = strlen( bsdPath ); if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) { kernResult = KERN_SUCCESS; @@ -2126,8 +2125,8 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, int fd; kernResult = FindEjectableCDMedia( &mediaIterator ); - kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) ); - + kernResult = GetBSDPath(mediaIterator, bsdPath, sizeof(bsdPath), + flags); if ( bsdPath[ 0 ] != '\0' ) { strcat(bsdPath,"s0"); /* some CDs don't have a partition 0 */ @@ -2172,53 +2171,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, } #if defined(__linux__) -/* Note: we do not have a reliable method to detect if the floppy is - present. The current method is to try to open the floppy at every - I/O and to keep it opened during a few hundreds of ms. */ -static int fd_open(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - int last_media_present; - - if (s->type != FTYPE_FD) - return 0; - last_media_present = (s->fd >= 0); - if (s->fd >= 0 && - (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_open_time) >= FD_OPEN_TIMEOUT) { - qemu_close(s->fd); - s->fd = -1; - DPRINTF("Floppy closed\n"); - } - if (s->fd < 0) { - if (s->fd_got_error && - (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_error_time) < FD_OPEN_TIMEOUT) { - DPRINTF("No floppy (open delayed)\n"); - return -EIO; - } - s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK); - if (s->fd < 0) { - s->fd_error_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - s->fd_got_error = 1; - if (last_media_present) - s->fd_media_changed = 1; - DPRINTF("No floppy\n"); - return -EIO; - } - DPRINTF("Floppy opened\n"); - } - if (!last_media_present) - s->fd_media_changed = 1; - s->fd_open_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - s->fd_got_error = 0; - return 0; -} - -static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - BDRVRawState *s = bs->opaque; - - return ioctl(s->fd, req, buf); -} static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs, unsigned long int req, void *buf, @@ -2231,7 +2183,7 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs, if (fd_open(bs) < 0) return NULL; - acb = g_slice_new(RawPosixAIOData); + acb = g_new(RawPosixAIOData, 1); acb->bs = bs; acb->aio_type = QEMU_AIO_IOCTL; acb->aio_fildes = s->fd; @@ -2241,8 +2193,8 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs, pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque); } +#endif /* linux */ -#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) static int fd_open(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; @@ -2252,14 +2204,6 @@ static int fd_open(BlockDriverState *bs) return 0; return -EIO; } -#else /* !linux && !FreeBSD */ - -static int fd_open(BlockDriverState *bs) -{ - return 0; -} - -#endif /* !linux && !FreeBSD */ static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors, @@ -2303,17 +2247,22 @@ static int hdev_create(const char *filename, QemuOpts *opts, int64_t total_size = 0; bool has_prefix; - /* This function is used by all three protocol block drivers and therefore - * any of these three prefixes may be given. + /* This function is used by both protocol block drivers and therefore either + * of these prefixes may be given. * The return value has to be stored somewhere, otherwise this is an error * due to -Werror=unused-value. */ has_prefix = strstart(filename, "host_device:", &filename) || - strstart(filename, "host_cdrom:" , &filename) || - strstart(filename, "host_floppy:", &filename); + strstart(filename, "host_cdrom:" , &filename); (void)has_prefix; + ret = raw_normalize_devicepath(&filename); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not normalize device path"); + return ret; + } + /* Read out options */ total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), BDRV_SECTOR_SIZE); @@ -2379,160 +2328,10 @@ static BlockDriver bdrv_host_device = { /* generic scsi device */ #ifdef __linux__ - .bdrv_ioctl = hdev_ioctl, .bdrv_aio_ioctl = hdev_aio_ioctl, #endif }; -#ifdef __linux__ -static void floppy_parse_filename(const char *filename, QDict *options, - Error **errp) -{ - /* The prefix is optional, just as for "file". */ - strstart(filename, "host_floppy:", &filename); - - qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename))); -} - -static int floppy_open(BlockDriverState *bs, QDict *options, int flags, - Error **errp) -{ - BDRVRawState *s = bs->opaque; - Error *local_err = NULL; - int ret; - - s->type = FTYPE_FD; - - /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */ - ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err); - if (ret) { - if (local_err) { - error_propagate(errp, local_err); - } - return ret; - } - - /* close fd so that we can reopen it as needed */ - qemu_close(s->fd); - s->fd = -1; - s->fd_media_changed = 1; - - error_report("Host floppy pass-through is deprecated"); - error_printf("Support for it will be removed in a future release.\n"); - return 0; -} - -static int floppy_probe_device(const char *filename) -{ - int fd, ret; - int prio = 0; - struct floppy_struct fdparam; - struct stat st; - - if (strstart(filename, "/dev/fd", NULL) && - !strstart(filename, "/dev/fdset/", NULL) && - !strstart(filename, "/dev/fd/", NULL)) { - prio = 50; - } - - fd = qemu_open(filename, O_RDONLY | O_NONBLOCK); - if (fd < 0) { - goto out; - } - ret = fstat(fd, &st); - if (ret == -1 || !S_ISBLK(st.st_mode)) { - goto outc; - } - - /* Attempt to detect via a floppy specific ioctl */ - ret = ioctl(fd, FDGETPRM, &fdparam); - if (ret >= 0) - prio = 100; - -outc: - qemu_close(fd); -out: - return prio; -} - - -static int floppy_is_inserted(BlockDriverState *bs) -{ - return fd_open(bs) >= 0; -} - -static int floppy_media_changed(BlockDriverState *bs) -{ - BDRVRawState *s = bs->opaque; - int ret; - - /* - * XXX: we do not have a true media changed indication. - * It does not work if the floppy is changed without trying to read it. - */ - fd_open(bs); - ret = s->fd_media_changed; - s->fd_media_changed = 0; - DPRINTF("Floppy changed=%d\n", ret); - return ret; -} - -static void floppy_eject(BlockDriverState *bs, bool eject_flag) -{ - BDRVRawState *s = bs->opaque; - int fd; - - if (s->fd >= 0) { - qemu_close(s->fd); - s->fd = -1; - } - fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK); - if (fd >= 0) { - if (ioctl(fd, FDEJECT, 0) < 0) - perror("FDEJECT"); - qemu_close(fd); - } -} - -static BlockDriver bdrv_host_floppy = { - .format_name = "host_floppy", - .protocol_name = "host_floppy", - .instance_size = sizeof(BDRVRawState), - .bdrv_needs_filename = true, - .bdrv_probe_device = floppy_probe_device, - .bdrv_parse_filename = floppy_parse_filename, - .bdrv_file_open = floppy_open, - .bdrv_close = raw_close, - .bdrv_reopen_prepare = raw_reopen_prepare, - .bdrv_reopen_commit = raw_reopen_commit, - .bdrv_reopen_abort = raw_reopen_abort, - .bdrv_create = hdev_create, - .create_opts = &raw_create_opts, - - .bdrv_aio_readv = raw_aio_readv, - .bdrv_aio_writev = raw_aio_writev, - .bdrv_aio_flush = raw_aio_flush, - .bdrv_refresh_limits = raw_refresh_limits, - .bdrv_io_plug = raw_aio_plug, - .bdrv_io_unplug = raw_aio_unplug, - .bdrv_flush_io_queue = raw_aio_flush_io_queue, - - .bdrv_truncate = raw_truncate, - .bdrv_getlength = raw_getlength, - .has_variable_length = true, - .bdrv_get_allocated_file_size - = raw_get_allocated_file_size, - - .bdrv_detach_aio_context = raw_detach_aio_context, - .bdrv_attach_aio_context = raw_attach_aio_context, - - /* removable device support */ - .bdrv_is_inserted = floppy_is_inserted, - .bdrv_media_changed = floppy_media_changed, - .bdrv_eject = floppy_eject, -}; -#endif - #if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) static void cdrom_parse_filename(const char *filename, QDict *options, Error **errp) @@ -2588,15 +2387,13 @@ out: return prio; } -static int cdrom_is_inserted(BlockDriverState *bs) +static bool cdrom_is_inserted(BlockDriverState *bs) { BDRVRawState *s = bs->opaque; int ret; ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT); - if (ret == CDS_DISC_OK) - return 1; - return 0; + return ret == CDS_DISC_OK; } static void cdrom_eject(BlockDriverState *bs, bool eject_flag) @@ -2663,7 +2460,6 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_lock_medium = cdrom_lock_medium, /* generic scsi device */ - .bdrv_ioctl = hdev_ioctl, .bdrv_aio_ioctl = hdev_aio_ioctl, }; #endif /* __linux__ */ @@ -2722,7 +2518,7 @@ static int cdrom_reopen(BlockDriverState *bs) return 0; } -static int cdrom_is_inserted(BlockDriverState *bs) +static bool cdrom_is_inserted(BlockDriverState *bs) { return raw_getlength(bs) > 0; } @@ -2810,7 +2606,6 @@ static void bdrv_file_init(void) bdrv_register(&bdrv_file); bdrv_register(&bdrv_host_device); #ifdef __linux__ - bdrv_register(&bdrv_host_floppy); bdrv_register(&bdrv_host_cdrom); #endif #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) diff --git a/block/raw-win32.c b/block/raw-win32.c index 68f2338ac..2d0907a82 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -119,9 +119,9 @@ static int aio_worker(void *arg) case QEMU_AIO_WRITE: count = handle_aiocb_rw(aiocb); if (count == aiocb->aio_nbytes) { - count = 0; + ret = 0; } else { - count = -EINVAL; + ret = -EINVAL; } break; case QEMU_AIO_FLUSH: @@ -135,7 +135,7 @@ static int aio_worker(void *arg) break; } - g_slice_free(RawWin32AIOData, aiocb); + g_free(aiocb); return ret; } @@ -143,7 +143,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile, int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, BlockCompletionFunc *cb, void *opaque, int type) { - RawWin32AIOData *acb = g_slice_new(RawWin32AIOData); + RawWin32AIOData *acb = g_new(RawWin32AIOData, 1); ThreadPool *pool; acb->bs = bs; diff --git a/block/raw_bsd.c b/block/raw_bsd.c index e3d2d0468..915d6fd0e 100644 --- a/block/raw_bsd.c +++ b/block/raw_bsd.c @@ -52,7 +52,7 @@ static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); - return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov); + return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov); } static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num, @@ -75,7 +75,7 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num, return 0; } - buf = qemu_try_blockalign(bs->file, 512); + buf = qemu_try_blockalign(bs->file->bs, 512); if (!buf) { ret = -ENOMEM; goto fail; @@ -102,7 +102,7 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num, } BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); - ret = bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov); + ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov); fail: if (qiov == &local_qiov) { @@ -125,58 +125,48 @@ static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { - return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors, flags); + return bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags); } static int coroutine_fn raw_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { - return bdrv_co_discard(bs->file, sector_num, nb_sectors); + return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors); } static int64_t raw_getlength(BlockDriverState *bs) { - return bdrv_getlength(bs->file); + return bdrv_getlength(bs->file->bs); } static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) { - return bdrv_get_info(bs->file, bdi); + return bdrv_get_info(bs->file->bs, bdi); } static void raw_refresh_limits(BlockDriverState *bs, Error **errp) { - bs->bl = bs->file->bl; + bs->bl = bs->file->bs->bl; } static int raw_truncate(BlockDriverState *bs, int64_t offset) { - return bdrv_truncate(bs->file, offset); -} - -static int raw_is_inserted(BlockDriverState *bs) -{ - return bdrv_is_inserted(bs->file); + return bdrv_truncate(bs->file->bs, offset); } static int raw_media_changed(BlockDriverState *bs) { - return bdrv_media_changed(bs->file); + return bdrv_media_changed(bs->file->bs); } static void raw_eject(BlockDriverState *bs, bool eject_flag) { - bdrv_eject(bs->file, eject_flag); + bdrv_eject(bs->file->bs, eject_flag); } static void raw_lock_medium(BlockDriverState *bs, bool locked) { - bdrv_lock_medium(bs->file, locked); -} - -static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) -{ - return bdrv_ioctl(bs->file, req, buf); + bdrv_lock_medium(bs->file->bs, locked); } static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs, @@ -184,12 +174,12 @@ static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs, BlockCompletionFunc *cb, void *opaque) { - return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque); + return bdrv_aio_ioctl(bs->file->bs, req, buf, cb, opaque); } static int raw_has_zero_init(BlockDriverState *bs) { - return bdrv_has_zero_init(bs->file); + return bdrv_has_zero_init(bs->file->bs); } static int raw_create(const char *filename, QemuOpts *opts, Error **errp) @@ -207,7 +197,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp) static int raw_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { - bs->sg = bs->file->sg; + bs->sg = bs->file->bs->sg; if (bs->probed && !bdrv_is_read_only(bs)) { fprintf(stderr, @@ -217,7 +207,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, "raw images, write operations on block 0 will be restricted.\n" " Specify the 'raw' format explicitly to remove the " "restrictions.\n", - bs->file->filename); + bs->file->bs->filename); } return 0; @@ -237,12 +227,12 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename) static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) { - return bdrv_probe_blocksizes(bs->file, bsz); + return bdrv_probe_blocksizes(bs->file->bs, bsz); } static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo) { - return bdrv_probe_geometry(bs->file, geo); + return bdrv_probe_geometry(bs->file->bs, geo); } BlockDriver bdrv_raw = { @@ -264,11 +254,9 @@ BlockDriver bdrv_raw = { .bdrv_refresh_limits = &raw_refresh_limits, .bdrv_probe_blocksizes = &raw_probe_blocksizes, .bdrv_probe_geometry = &raw_probe_geometry, - .bdrv_is_inserted = &raw_is_inserted, .bdrv_media_changed = &raw_media_changed, .bdrv_eject = &raw_eject, .bdrv_lock_medium = &raw_lock_medium, - .bdrv_ioctl = &raw_ioctl, .bdrv_aio_ioctl = &raw_aio_ioctl, .create_opts = &raw_create_opts, .bdrv_has_zero_init = &raw_has_zero_init diff --git a/block/sheepdog.c b/block/sheepdog.c index 9585beb73..d80e4ed18 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -28,7 +28,6 @@ #define SD_OP_READ_OBJ 0x02 #define SD_OP_WRITE_OBJ 0x03 /* 0x04 is used internally by Sheepdog */ -#define SD_OP_DISCARD_OBJ 0x05 #define SD_OP_NEW_VDI 0x11 #define SD_OP_LOCK_VDI 0x12 @@ -318,7 +317,7 @@ enum AIOCBState { AIOCB_DISCARD_OBJ, }; -#define AIOCBOverwrapping(x, y) \ +#define AIOCBOverlapping(x, y) \ (!(x->max_affect_data_idx < y->min_affect_data_idx \ || y->max_affect_data_idx < x->min_affect_data_idx)) @@ -342,6 +341,15 @@ struct SheepdogAIOCB { uint32_t min_affect_data_idx; uint32_t max_affect_data_idx; + /* + * The difference between affect_data_idx and dirty_data_idx: + * affect_data_idx represents range of index of all request types. + * dirty_data_idx represents range of index updated by COW requests. + * dirty_data_idx is used for updating an inode object. + */ + uint32_t min_dirty_data_idx; + uint32_t max_dirty_data_idx; + QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings; }; @@ -351,9 +359,6 @@ typedef struct BDRVSheepdogState { SheepdogInode inode; - uint32_t min_dirty_data_idx; - uint32_t max_dirty_data_idx; - char name[SD_MAX_VDI_LEN]; bool is_snapshot; uint32_t cache_flags; @@ -373,10 +378,15 @@ typedef struct BDRVSheepdogState { QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head; QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head; - CoQueue overwrapping_queue; + CoQueue overlapping_queue; QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head; } BDRVSheepdogState; +typedef struct BDRVSheepdogReopenState { + int fd; + int cache_flags; +} BDRVSheepdogReopenState; + static const char * sd_strerror(int err) { int i; @@ -556,6 +566,9 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov, acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE + acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size; + acb->min_dirty_data_idx = UINT32_MAX; + acb->max_dirty_data_idx = 0; + return acb; } @@ -638,14 +651,16 @@ static coroutine_fn void do_co_req(void *opaque) unsigned int *rlen = srco->rlen; co = qemu_coroutine_self(); - aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co); + aio_set_fd_handler(srco->aio_context, sockfd, false, + NULL, restart_co_req, co); ret = send_co_req(sockfd, hdr, data, wlen); if (ret < 0) { goto out; } - aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co); + aio_set_fd_handler(srco->aio_context, sockfd, false, + restart_co_req, NULL, co); ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr)); if (ret != sizeof(*hdr)) { @@ -670,7 +685,8 @@ static coroutine_fn void do_co_req(void *opaque) out: /* there is at most one request for this sockfd, so it is safe to * set each handler to NULL. */ - aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL); + aio_set_fd_handler(srco->aio_context, sockfd, false, + NULL, NULL, NULL); srco->ret = ret; srco->finished = true; @@ -722,7 +738,8 @@ static coroutine_fn void reconnect_to_sdog(void *opaque) BDRVSheepdogState *s = opaque; AIOReq *aio_req, *next; - aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL); + aio_set_fd_handler(s->aio_context, s->fd, false, NULL, + NULL, NULL); close(s->fd); s->fd = -1; @@ -819,8 +836,8 @@ static void coroutine_fn aio_read_response(void *opaque) */ if (rsp.result == SD_RES_SUCCESS) { s->inode.data_vdi_id[idx] = s->inode.vdi_id; - s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx); - s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx); + acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx); + acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx); } } break; @@ -847,10 +864,6 @@ static void coroutine_fn aio_read_response(void *opaque) rsp.result = SD_RES_SUCCESS; s->discard_supported = false; break; - case SD_RES_SUCCESS: - idx = data_oid_to_idx(aio_req->oid); - s->inode.data_vdi_id[idx] = 0; - break; default: break; } @@ -929,7 +942,8 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp) return fd; } - aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s); + aio_set_fd_handler(s->aio_context, fd, false, + co_read_response, NULL, s); return fd; } @@ -1165,7 +1179,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, hdr.flags = SD_FLAG_CMD_WRITE | flags; break; case AIOCB_DISCARD_OBJ: - hdr.opcode = SD_OP_DISCARD_OBJ; + hdr.opcode = SD_OP_WRITE_OBJ; + hdr.flags = SD_FLAG_CMD_WRITE | flags; + s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0; + offset = offsetof(SheepdogInode, + data_vdi_id[data_oid_to_idx(oid)]); + oid = vid_to_vdi_oid(s->inode.vdi_id); + wlen = datalen = sizeof(uint32_t); break; } @@ -1184,7 +1204,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, qemu_co_mutex_lock(&s->lock); s->co_send = qemu_coroutine_self(); - aio_set_fd_handler(s->aio_context, s->fd, + aio_set_fd_handler(s->aio_context, s->fd, false, co_read_response, co_write_request, s); socket_set_cork(s->fd, 1); @@ -1203,7 +1223,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req, } out: socket_set_cork(s->fd, 0); - aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s); + aio_set_fd_handler(s->aio_context, s->fd, false, + co_read_response, NULL, s); s->co_send = NULL; qemu_co_mutex_unlock(&s->lock); } @@ -1353,7 +1374,8 @@ static void sd_detach_aio_context(BlockDriverState *bs) { BDRVSheepdogState *s = bs->opaque; - aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL); + aio_set_fd_handler(s->aio_context, s->fd, false, NULL, + NULL, NULL); } static void sd_attach_aio_context(BlockDriverState *bs, @@ -1362,7 +1384,8 @@ static void sd_attach_aio_context(BlockDriverState *bs, BDRVSheepdogState *s = bs->opaque; s->aio_context = new_context; - aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s); + aio_set_fd_handler(new_context, s->fd, false, + co_read_response, NULL, s); } /* TODO Convert to fine grained options */ @@ -1466,18 +1489,17 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags, } memcpy(&s->inode, buf, sizeof(s->inode)); - s->min_dirty_data_idx = UINT32_MAX; - s->max_dirty_data_idx = 0; bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE; pstrcpy(s->name, sizeof(s->name), vdi); qemu_co_mutex_init(&s->lock); - qemu_co_queue_init(&s->overwrapping_queue); + qemu_co_queue_init(&s->overlapping_queue); qemu_opts_del(opts); g_free(buf); return 0; out: - aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL); + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, + false, NULL, NULL, NULL); if (s->fd >= 0) { closesocket(s->fd); } @@ -1486,6 +1508,70 @@ out: return ret; } +static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue, + Error **errp) +{ + BDRVSheepdogState *s = state->bs->opaque; + BDRVSheepdogReopenState *re_s; + int ret = 0; + + re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1); + + re_s->cache_flags = SD_FLAG_CMD_CACHE; + if (state->flags & BDRV_O_NOCACHE) { + re_s->cache_flags = SD_FLAG_CMD_DIRECT; + } + + re_s->fd = get_sheep_fd(s, errp); + if (re_s->fd < 0) { + ret = re_s->fd; + return ret; + } + + return ret; +} + +static void sd_reopen_commit(BDRVReopenState *state) +{ + BDRVSheepdogReopenState *re_s = state->opaque; + BDRVSheepdogState *s = state->bs->opaque; + + if (s->fd) { + aio_set_fd_handler(s->aio_context, s->fd, false, + NULL, NULL, NULL); + closesocket(s->fd); + } + + s->fd = re_s->fd; + s->cache_flags = re_s->cache_flags; + + g_free(state->opaque); + state->opaque = NULL; + + return; +} + +static void sd_reopen_abort(BDRVReopenState *state) +{ + BDRVSheepdogReopenState *re_s = state->opaque; + BDRVSheepdogState *s = state->bs->opaque; + + if (re_s == NULL) { + return; + } + + if (re_s->fd) { + aio_set_fd_handler(s->aio_context, re_s->fd, false, + NULL, NULL, NULL); + closesocket(re_s->fd); + } + + g_free(state->opaque); + state->opaque = NULL; + + return; +} + static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot, Error **errp) { @@ -1554,7 +1640,7 @@ static int sd_prealloc(const char *filename, Error **errp) int ret; ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - NULL, errp); + errp); if (ret < 0) { goto out_with_err_set; } @@ -1746,8 +1832,7 @@ static int sd_create(const char *filename, QemuOpts *opts, } bs = NULL; - ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, NULL, - errp); + ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, errp); if (ret < 0) { goto out; } @@ -1861,7 +1946,8 @@ static void sd_close(BlockDriverState *bs) error_report("%s, %s", sd_strerror(rsp->result), s->name); } - aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL); + aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, + false, NULL, NULL, NULL); closesocket(s->fd); g_free(s->host_spec); } @@ -1923,16 +2009,16 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb) AIOReq *aio_req; uint32_t offset, data_len, mn, mx; - mn = s->min_dirty_data_idx; - mx = s->max_dirty_data_idx; + mn = acb->min_dirty_data_idx; + mx = acb->max_dirty_data_idx; if (mn <= mx) { /* we need to update the vdi object. */ offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) + mn * sizeof(s->inode.data_vdi_id[0]); data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]); - s->min_dirty_data_idx = UINT32_MAX; - s->max_dirty_data_idx = 0; + acb->min_dirty_data_idx = UINT32_MAX; + acb->max_dirty_data_idx = 0; iov.iov_base = &s->inode; iov.iov_len = sizeof(s->inode); @@ -2141,7 +2227,9 @@ static int coroutine_fn sd_co_rw_vector(void *p) } aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create, - old_oid, done); + old_oid, + acb->aiocb_type == AIOCB_DISCARD_OBJ ? + 0 : done); QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings); add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, @@ -2158,12 +2246,12 @@ out: return 1; } -static bool check_overwrapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb) +static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb) { SheepdogAIOCB *cb; QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) { - if (AIOCBOverwrapping(aiocb, cb)) { + if (AIOCBOverlapping(aiocb, cb)) { return true; } } @@ -2192,15 +2280,15 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num, acb->aiocb_type = AIOCB_WRITE_UDATA; retry: - if (check_overwrapping_aiocb(s, acb)) { - qemu_co_queue_wait(&s->overwrapping_queue); + if (check_overlapping_aiocb(s, acb)) { + qemu_co_queue_wait(&s->overlapping_queue); goto retry; } ret = sd_co_rw_vector(acb); if (ret <= 0) { QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); qemu_aio_unref(acb); return ret; } @@ -2208,7 +2296,7 @@ retry: qemu_coroutine_yield(); QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); return acb->ret; } @@ -2225,15 +2313,15 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num, acb->aio_done_func = sd_finish_aiocb; retry: - if (check_overwrapping_aiocb(s, acb)) { - qemu_co_queue_wait(&s->overwrapping_queue); + if (check_overlapping_aiocb(s, acb)) { + qemu_co_queue_wait(&s->overlapping_queue); goto retry; } ret = sd_co_rw_vector(acb); if (ret <= 0) { QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); qemu_aio_unref(acb); return ret; } @@ -2241,7 +2329,7 @@ retry: qemu_coroutine_yield(); QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); return acb->ret; } @@ -2577,28 +2665,36 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { SheepdogAIOCB *acb; - QEMUIOVector dummy; BDRVSheepdogState *s = bs->opaque; int ret; + QEMUIOVector discard_iov; + struct iovec iov; + uint32_t zero = 0; if (!s->discard_supported) { return 0; } - acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors); + memset(&discard_iov, 0, sizeof(discard_iov)); + memset(&iov, 0, sizeof(iov)); + iov.iov_base = &zero; + iov.iov_len = sizeof(zero); + discard_iov.iov = &iov; + discard_iov.niov = 1; + acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors); acb->aiocb_type = AIOCB_DISCARD_OBJ; acb->aio_done_func = sd_finish_aiocb; retry: - if (check_overwrapping_aiocb(s, acb)) { - qemu_co_queue_wait(&s->overwrapping_queue); + if (check_overlapping_aiocb(s, acb)) { + qemu_co_queue_wait(&s->overlapping_queue); goto retry; } ret = sd_co_rw_vector(acb); if (ret <= 0) { QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); qemu_aio_unref(acb); return ret; } @@ -2606,7 +2702,7 @@ retry: qemu_coroutine_yield(); QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); return acb->ret; } @@ -2703,6 +2799,9 @@ static BlockDriver bdrv_sheepdog = { .instance_size = sizeof(BDRVSheepdogState), .bdrv_needs_filename = true, .bdrv_file_open = sd_open, + .bdrv_reopen_prepare = sd_reopen_prepare, + .bdrv_reopen_commit = sd_reopen_commit, + .bdrv_reopen_abort = sd_reopen_abort, .bdrv_close = sd_close, .bdrv_create = sd_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, @@ -2736,6 +2835,9 @@ static BlockDriver bdrv_sheepdog_tcp = { .instance_size = sizeof(BDRVSheepdogState), .bdrv_needs_filename = true, .bdrv_file_open = sd_open, + .bdrv_reopen_prepare = sd_reopen_prepare, + .bdrv_reopen_commit = sd_reopen_commit, + .bdrv_reopen_abort = sd_reopen_abort, .bdrv_close = sd_close, .bdrv_create = sd_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, @@ -2769,6 +2871,9 @@ static BlockDriver bdrv_sheepdog_unix = { .instance_size = sizeof(BDRVSheepdogState), .bdrv_needs_filename = true, .bdrv_file_open = sd_open, + .bdrv_reopen_prepare = sd_reopen_prepare, + .bdrv_reopen_commit = sd_reopen_commit, + .bdrv_reopen_abort = sd_reopen_abort, .bdrv_close = sd_close, .bdrv_create = sd_create, .bdrv_has_zero_init = bdrv_has_zero_init_1, diff --git a/block/snapshot.c b/block/snapshot.c index 49e143e99..6e9fa8da9 100644 --- a/block/snapshot.c +++ b/block/snapshot.c @@ -149,7 +149,7 @@ int bdrv_can_snapshot(BlockDriverState *bs) if (!drv->bdrv_snapshot_create) { if (bs->file != NULL) { - return bdrv_can_snapshot(bs->file); + return bdrv_can_snapshot(bs->file->bs); } return 0; } @@ -168,7 +168,7 @@ int bdrv_snapshot_create(BlockDriverState *bs, return drv->bdrv_snapshot_create(bs, sn_info); } if (bs->file) { - return bdrv_snapshot_create(bs->file, sn_info); + return bdrv_snapshot_create(bs->file->bs, sn_info); } return -ENOTSUP; } @@ -188,10 +188,10 @@ int bdrv_snapshot_goto(BlockDriverState *bs, if (bs->file) { drv->bdrv_close(bs); - ret = bdrv_snapshot_goto(bs->file, snapshot_id); + ret = bdrv_snapshot_goto(bs->file->bs, snapshot_id); open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL); if (open_ret < 0) { - bdrv_unref(bs->file); + bdrv_unref(bs->file->bs); bs->drv = NULL; return open_ret; } @@ -245,7 +245,7 @@ int bdrv_snapshot_delete(BlockDriverState *bs, return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); } if (bs->file) { - return bdrv_snapshot_delete(bs->file, snapshot_id, name, errp); + return bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp); } error_setg(errp, "Block format '%s' used by device '%s' " "does not support internal snapshot deletion", @@ -253,9 +253,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs, return -ENOTSUP; } -void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, - const char *id_or_name, - Error **errp) +int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, + const char *id_or_name, + Error **errp) { int ret; Error *local_err = NULL; @@ -270,6 +270,7 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, if (ret < 0) { error_propagate(errp, local_err); } + return ret; } int bdrv_snapshot_list(BlockDriverState *bs, @@ -283,7 +284,7 @@ int bdrv_snapshot_list(BlockDriverState *bs, return drv->bdrv_snapshot_list(bs, psn_info); } if (bs->file) { - return bdrv_snapshot_list(bs->file, psn_info); + return bdrv_snapshot_list(bs->file->bs, psn_info); } return -ENOTSUP; } @@ -356,3 +357,130 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, return ret; } + + +/* Group operations. All block drivers are involved. + * These functions will properly handle dataplane (take aio_context_acquire + * when appropriate for appropriate block drivers) */ + +bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs) +{ + bool ok = true; + BlockDriverState *bs = NULL; + + while (ok && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) { + ok = bdrv_can_snapshot(bs); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return ok; +} + +int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs, + Error **err) +{ + int ret = 0; + BlockDriverState *bs = NULL; + QEMUSnapshotInfo sn1, *snapshot = &sn1; + + while (ret == 0 && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs) && + bdrv_snapshot_find(bs, snapshot, name) >= 0) { + ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return ret; +} + + +int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs) +{ + int err = 0; + BlockDriverState *bs = NULL; + + while (err == 0 && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs)) { + err = bdrv_snapshot_goto(bs, name); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return err; +} + +int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs) +{ + QEMUSnapshotInfo sn; + int err = 0; + BlockDriverState *bs = NULL; + + while (err == 0 && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bdrv_can_snapshot(bs)) { + err = bdrv_snapshot_find(bs, &sn, name); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return err; +} + +int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + BlockDriverState *vm_state_bs, + uint64_t vm_state_size, + BlockDriverState **first_bad_bs) +{ + int err = 0; + BlockDriverState *bs = NULL; + + while (err == 0 && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + if (bs == vm_state_bs) { + sn->vm_state_size = vm_state_size; + err = bdrv_snapshot_create(bs, sn); + } else if (bdrv_can_snapshot(bs)) { + sn->vm_state_size = 0; + err = bdrv_snapshot_create(bs, sn); + } + aio_context_release(ctx); + } + + *first_bad_bs = bs; + return err; +} + +BlockDriverState *bdrv_all_find_vmstate_bs(void) +{ + bool not_found = true; + BlockDriverState *bs = NULL; + + while (not_found && (bs = bdrv_next(bs))) { + AioContext *ctx = bdrv_get_aio_context(bs); + + aio_context_acquire(ctx); + not_found = !bdrv_can_snapshot(bs); + aio_context_release(ctx); + } + return bs; +} diff --git a/block/ssh.c b/block/ssh.c index 8d0673903..af025c08a 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -193,7 +193,7 @@ sftp_error_report(BDRVSSHState *s, const char *fs, ...) static int parse_uri(const char *filename, QDict *options, Error **errp) { URI *uri = NULL; - QueryParams *qp = NULL; + QueryParams *qp; int i; uri = uri_parse(filename); @@ -249,9 +249,6 @@ static int parse_uri(const char *filename, QDict *options, Error **errp) return 0; err: - if (qp) { - query_params_free(qp); - } if (uri) { uri_free(uri); } @@ -803,14 +800,15 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs) rd_handler, wr_handler); aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, - rd_handler, wr_handler, co); + false, rd_handler, wr_handler, co); } static coroutine_fn void clear_fd_handler(BDRVSSHState *s, BlockDriverState *bs) { DPRINTF("s->sock=%d", s->sock); - aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL); + aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, + false, NULL, NULL, NULL); } /* A non-blocking call returned EAGAIN, so yield, ensuring the diff --git a/block/stream.c b/block/stream.c index ab0bd057f..25af7eff6 100644 --- a/block/stream.c +++ b/block/stream.c @@ -16,6 +16,7 @@ #include "block/blockjob.h" #include "qapi/qmp/qerror.h" #include "qemu/ratelimit.h" +#include "sysemu/block-backend.h" enum { /* @@ -52,34 +53,6 @@ static int coroutine_fn stream_populate(BlockDriverState *bs, return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov); } -static void close_unused_images(BlockDriverState *top, BlockDriverState *base, - const char *base_id) -{ - BlockDriverState *intermediate; - intermediate = top->backing_hd; - - /* Must assign before bdrv_delete() to prevent traversing dangling pointer - * while we delete backing image instances. - */ - bdrv_set_backing_hd(top, base); - - while (intermediate) { - BlockDriverState *unused; - - /* reached base */ - if (intermediate == base) { - break; - } - - unused = intermediate; - intermediate = intermediate->backing_hd; - bdrv_set_backing_hd(unused, NULL); - bdrv_unref(unused); - } - - bdrv_refresh_limits(top, NULL); -} - typedef struct { int ret; bool reached_end; @@ -101,7 +74,7 @@ static void stream_complete(BlockJob *job, void *opaque) } } data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt); - close_unused_images(job->bs, base, base_id); + bdrv_set_backing_hd(job->bs, base); } g_free(s->backing_file_str); @@ -121,7 +94,7 @@ static void coroutine_fn stream_run(void *opaque) int n = 0; void *buf; - if (!bs->backing_hd) { + if (!bs->backing) { block_job_completed(&s->common, 0); return; } @@ -166,7 +139,7 @@ wait: } else if (ret >= 0) { /* Copy if allocated in the intermediate images. Limit to the * known-unallocated area [sector_num, sector_num+n). */ - ret = bdrv_is_allocated_above(bs->backing_hd, base, + ret = bdrv_is_allocated_above(backing_bs(bs), base, sector_num, n, &n); /* Finish early if end of backing file has been reached */ @@ -250,7 +223,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base, if ((on_error == BLOCKDEV_ON_ERROR_STOP || on_error == BLOCKDEV_ON_ERROR_ENOSPC) && - !bdrv_iostatus_is_enabled(bs)) { + (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) { error_setg(errp, QERR_INVALID_PARAMETER, "on-error"); return; } diff --git a/block/throttle-groups.c b/block/throttle-groups.c index 1abc6fcae..13b5baa5d 100644 --- a/block/throttle-groups.c +++ b/block/throttle-groups.c @@ -33,8 +33,7 @@ * its own locking. * * This locking is however handled internally in this file, so it's - * mostly transparent to outside users (but see the documentation in - * throttle_groups_lock()). + * transparent to outside users. * * The whole ThrottleGroup structure is private and invisible to * outside users, that only use it through its ThrottleState. @@ -76,9 +75,9 @@ static QTAILQ_HEAD(, ThrottleGroup) throttle_groups = * created. * * @name: the name of the ThrottleGroup - * @ret: the ThrottleGroup + * @ret: the ThrottleState member of the ThrottleGroup */ -static ThrottleGroup *throttle_group_incref(const char *name) +ThrottleState *throttle_group_incref(const char *name) { ThrottleGroup *tg = NULL; ThrottleGroup *iter; @@ -108,7 +107,7 @@ static ThrottleGroup *throttle_group_incref(const char *name) qemu_mutex_unlock(&throttle_groups_lock); - return tg; + return &tg->ts; } /* Decrease the reference count of a ThrottleGroup. @@ -116,10 +115,12 @@ static ThrottleGroup *throttle_group_incref(const char *name) * When the reference count reaches zero the ThrottleGroup is * destroyed. * - * @tg: The ThrottleGroup to unref + * @ts: The ThrottleGroup to unref, given by its ThrottleState member */ -static void throttle_group_unref(ThrottleGroup *tg) +void throttle_group_unref(ThrottleState *ts) { + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); + qemu_mutex_lock(&throttle_groups_lock); if (--tg->refcount == 0) { QTAILQ_REMOVE(&throttle_groups, tg, list); @@ -401,7 +402,8 @@ static void write_timer_cb(void *opaque) void throttle_group_register_bs(BlockDriverState *bs, const char *groupname) { int i; - ThrottleGroup *tg = throttle_group_incref(groupname); + ThrottleState *ts = throttle_group_incref(groupname); + ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); int clock_type = QEMU_CLOCK_REALTIME; if (qtest_enabled()) { @@ -409,7 +411,7 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname) clock_type = QEMU_CLOCK_VIRTUAL; } - bs->throttle_state = &tg->ts; + bs->throttle_state = ts; qemu_mutex_lock(&tg->lock); /* If the ThrottleGroup is new set this BlockDriverState as the token */ @@ -435,6 +437,9 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname) * list, destroying the timers and setting the throttle_state pointer * to NULL. * + * The BlockDriverState must not have pending throttled requests, so + * the caller has to drain them first. + * * The group will be destroyed if it's empty after this operation. * * @bs: the BlockDriverState to remove @@ -444,6 +449,10 @@ void throttle_group_unregister_bs(BlockDriverState *bs) ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); int i; + assert(bs->pending_reqs[0] == 0 && bs->pending_reqs[1] == 0); + assert(qemu_co_queue_empty(&bs->throttled_reqs[0])); + assert(qemu_co_queue_empty(&bs->throttled_reqs[1])); + qemu_mutex_lock(&tg->lock); for (i = 0; i < 2; i++) { if (tg->tokens[i] == bs) { @@ -461,38 +470,10 @@ void throttle_group_unregister_bs(BlockDriverState *bs) throttle_timers_destroy(&bs->throttle_timers); qemu_mutex_unlock(&tg->lock); - throttle_group_unref(tg); + throttle_group_unref(&tg->ts); bs->throttle_state = NULL; } -/* Acquire the lock of this throttling group. - * - * You won't normally need to use this. None of the functions from the - * ThrottleGroup API require you to acquire the lock since all of them - * deal with it internally. - * - * This should only be used in exceptional cases when you want to - * access the protected fields of a BlockDriverState directly - * (e.g. bdrv_swap()). - * - * @bs: a BlockDriverState that is member of the group - */ -void throttle_group_lock(BlockDriverState *bs) -{ - ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); - qemu_mutex_lock(&tg->lock); -} - -/* Release the lock of this throttling group. - * - * See the comments in throttle_group_lock(). - */ -void throttle_group_unlock(BlockDriverState *bs) -{ - ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts); - qemu_mutex_unlock(&tg->lock); -} - static void throttle_groups_init(void) { qemu_mutex_init(&throttle_groups_lock); diff --git a/block/vdi.c b/block/vdi.c index 7642ef359..17f435fad 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -53,7 +53,7 @@ #include "block/block_int.h" #include "qemu/module.h" #include "migration/migration.h" -#include "block/coroutine.h" +#include "qemu/coroutine.h" #if defined(CONFIG_UUID) #include <uuid/uuid.h> @@ -399,7 +399,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, logout("\n"); - ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1); + ret = bdrv_read(bs->file->bs, 0, (uint8_t *)&header, 1); if (ret < 0) { goto fail; } @@ -490,13 +490,14 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags, bmap_size = header.blocks_in_image * sizeof(uint32_t); bmap_size = DIV_ROUND_UP(bmap_size, SECTOR_SIZE); - s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE); + s->bmap = qemu_try_blockalign(bs->file->bs, bmap_size * SECTOR_SIZE); if (s->bmap == NULL) { ret = -ENOMEM; goto fail; } - ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size); + ret = bdrv_read(bs->file->bs, s->bmap_sector, (uint8_t *)s->bmap, + bmap_size); if (ret < 0) { goto fail_free_bmap; } @@ -585,7 +586,7 @@ static int vdi_co_read(BlockDriverState *bs, uint64_t offset = s->header.offset_data / SECTOR_SIZE + (uint64_t)bmap_entry * s->block_sectors + sector_in_block; - ret = bdrv_read(bs->file, offset, buf, n_sectors); + ret = bdrv_read(bs->file->bs, offset, buf, n_sectors); } logout("%u sectors read\n", n_sectors); @@ -653,7 +654,7 @@ static int vdi_co_write(BlockDriverState *bs, * acquire the lock and thus the padded cluster is written before * the other coroutines can write to the affected area. */ qemu_co_mutex_lock(&s->write_lock); - ret = bdrv_write(bs->file, offset, block, s->block_sectors); + ret = bdrv_write(bs->file->bs, offset, block, s->block_sectors); qemu_co_mutex_unlock(&s->write_lock); } else { uint64_t offset = s->header.offset_data / SECTOR_SIZE + @@ -669,7 +670,7 @@ static int vdi_co_write(BlockDriverState *bs, * that that write operation has returned (there may be other writes * in flight, but they do not concern this very operation). */ qemu_co_mutex_unlock(&s->write_lock); - ret = bdrv_write(bs->file, offset, buf, n_sectors); + ret = bdrv_write(bs->file->bs, offset, buf, n_sectors); } nb_sectors -= n_sectors; @@ -694,7 +695,7 @@ static int vdi_co_write(BlockDriverState *bs, assert(VDI_IS_ALLOCATED(bmap_first)); *header = s->header; vdi_header_to_le(header); - ret = bdrv_write(bs->file, 0, block, 1); + ret = bdrv_write(bs->file->bs, 0, block, 1); g_free(block); block = NULL; @@ -712,7 +713,7 @@ static int vdi_co_write(BlockDriverState *bs, base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE; logout("will write %u block map sectors starting from entry %u\n", n_sectors, bmap_first); - ret = bdrv_write(bs->file, offset, base, n_sectors); + ret = bdrv_write(bs->file->bs, offset, base, n_sectors); } return ret; @@ -764,7 +765,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - NULL, &local_err); + &local_err); if (ret < 0) { error_propagate(errp, local_err); goto exit; diff --git a/block/vhdx-log.c b/block/vhdx-log.c index 47fec63c6..47ae4b135 100644 --- a/block/vhdx-log.c +++ b/block/vhdx-log.c @@ -81,7 +81,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log, offset = log->offset + read; - ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader)); + ret = bdrv_pread(bs->file->bs, offset, hdr, sizeof(VHDXLogEntryHeader)); if (ret < 0) { goto exit; } @@ -141,7 +141,7 @@ static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log, } offset = log->offset + read; - ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE); + ret = bdrv_pread(bs->file->bs, offset, buffer, VHDX_LOG_SECTOR_SIZE); if (ret < 0) { goto exit; } @@ -191,7 +191,8 @@ static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log, /* full */ break; } - ret = bdrv_pwrite(bs->file, offset, buffer_tmp, VHDX_LOG_SECTOR_SIZE); + ret = bdrv_pwrite(bs->file->bs, offset, buffer_tmp, + VHDX_LOG_SECTOR_SIZE); if (ret < 0) { goto exit; } @@ -353,7 +354,7 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s, } desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count); - desc_entries = qemu_try_blockalign(bs->file, + desc_entries = qemu_try_blockalign(bs->file->bs, desc_sectors * VHDX_LOG_SECTOR_SIZE); if (desc_entries == NULL) { ret = -ENOMEM; @@ -462,7 +463,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc, /* count is only > 1 if we are writing zeroes */ for (i = 0; i < count; i++) { - ret = bdrv_pwrite_sync(bs->file, file_offset, buffer, + ret = bdrv_pwrite_sync(bs->file->bs, file_offset, buffer, VHDX_LOG_SECTOR_SIZE); if (ret < 0) { goto exit; @@ -509,7 +510,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, /* if the log shows a FlushedFileOffset larger than our current file * size, then that means the file has been truncated / corrupted, and * we must refused to open it / use it */ - if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) { + if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file->bs)) { ret = -EINVAL; goto exit; } @@ -539,12 +540,12 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s, goto exit; } } - if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) { + if (bdrv_getlength(bs->file->bs) < desc_entries->hdr.last_file_offset) { new_file_size = desc_entries->hdr.last_file_offset; if (new_file_size % (1024*1024)) { /* round up to nearest 1MB boundary */ new_file_size = ((new_file_size >> 20) + 1) << 20; - bdrv_truncate(bs->file, new_file_size); + bdrv_truncate(bs->file->bs, new_file_size); } } qemu_vfree(desc_entries); @@ -908,8 +909,8 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, .sequence_number = s->log.sequence, .descriptor_count = sectors, .reserved = 0, - .flushed_file_offset = bdrv_getlength(bs->file), - .last_file_offset = bdrv_getlength(bs->file), + .flushed_file_offset = bdrv_getlength(bs->file->bs), + .last_file_offset = bdrv_getlength(bs->file->bs), }; new_hdr.log_guid = header->log_guid; @@ -940,7 +941,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, if (i == 0 && leading_length) { /* partial sector at the front of the buffer */ - ret = bdrv_pread(bs->file, file_offset, merged_sector, + ret = bdrv_pread(bs->file->bs, file_offset, merged_sector, VHDX_LOG_SECTOR_SIZE); if (ret < 0) { goto exit; @@ -950,7 +951,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s, sector_write = merged_sector; } else if (i == sectors - 1 && trailing_length) { /* partial sector at the end of the buffer */ - ret = bdrv_pread(bs->file, + ret = bdrv_pread(bs->file->bs, file_offset, merged_sector + trailing_length, VHDX_LOG_SECTOR_SIZE - trailing_length); diff --git a/block/vhdx.c b/block/vhdx.c index 0776de717..2fe9a5e0c 100644 --- a/block/vhdx.c +++ b/block/vhdx.c @@ -375,7 +375,7 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s, inactive_header->log_guid = *log_guid; } - ret = vhdx_write_header(bs->file, inactive_header, header_offset, true); + ret = vhdx_write_header(bs->file->bs, inactive_header, header_offset, true); if (ret < 0) { goto exit; } @@ -427,7 +427,8 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s, /* We have to read the whole VHDX_HEADER_SIZE instead of * sizeof(VHDXHeader), because the checksum is over the whole * region */ - ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, VHDX_HEADER_SIZE); + ret = bdrv_pread(bs->file->bs, VHDX_HEADER1_OFFSET, buffer, + VHDX_HEADER_SIZE); if (ret < 0) { goto fail; } @@ -443,7 +444,8 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s, } } - ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE); + ret = bdrv_pread(bs->file->bs, VHDX_HEADER2_OFFSET, buffer, + VHDX_HEADER_SIZE); if (ret < 0) { goto fail; } @@ -516,7 +518,7 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s) * whole block */ buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE); - ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer, + ret = bdrv_pread(bs->file->bs, VHDX_REGION_TABLE_OFFSET, buffer, VHDX_HEADER_BLOCK_SIZE); if (ret < 0) { goto fail; @@ -629,7 +631,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE); - ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer, + ret = bdrv_pread(bs->file->bs, s->metadata_rt.file_offset, buffer, VHDX_METADATA_TABLE_MAX_SIZE); if (ret < 0) { goto exit; @@ -732,7 +734,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) goto exit; } - ret = bdrv_pread(bs->file, + ret = bdrv_pread(bs->file->bs, s->metadata_entries.file_parameters_entry.offset + s->metadata_rt.file_offset, &s->params, @@ -767,7 +769,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) /* determine virtual disk size, logical sector size, * and phys sector size */ - ret = bdrv_pread(bs->file, + ret = bdrv_pread(bs->file->bs, s->metadata_entries.virtual_disk_size_entry.offset + s->metadata_rt.file_offset, &s->virtual_disk_size, @@ -775,7 +777,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) if (ret < 0) { goto exit; } - ret = bdrv_pread(bs->file, + ret = bdrv_pread(bs->file->bs, s->metadata_entries.logical_sector_size_entry.offset + s->metadata_rt.file_offset, &s->logical_sector_size, @@ -783,7 +785,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s) if (ret < 0) { goto exit; } - ret = bdrv_pread(bs->file, + ret = bdrv_pread(bs->file->bs, s->metadata_entries.phys_sector_size_entry.offset + s->metadata_rt.file_offset, &s->physical_sector_size, @@ -906,7 +908,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, QLIST_INIT(&s->regions); /* validate the file signature */ - ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t)); + ret = bdrv_pread(bs->file->bs, 0, &signature, sizeof(uint64_t)); if (ret < 0) { goto fail; } @@ -959,13 +961,13 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags, } /* s->bat is freed in vhdx_close() */ - s->bat = qemu_try_blockalign(bs->file, s->bat_rt.length); + s->bat = qemu_try_blockalign(bs->file->bs, s->bat_rt.length); if (s->bat == NULL) { ret = -ENOMEM; goto fail; } - ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length); + ret = bdrv_pread(bs->file->bs, s->bat_offset, s->bat, s->bat_rt.length); if (ret < 0) { goto fail; } @@ -1118,7 +1120,7 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num, break; case PAYLOAD_BLOCK_FULLY_PRESENT: qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_readv(bs->file, + ret = bdrv_co_readv(bs->file->bs, sinfo.file_offset >> BDRV_SECTOR_BITS, sinfo.sectors_avail, &hd_qiov); qemu_co_mutex_lock(&s->lock); @@ -1156,12 +1158,12 @@ exit: static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s, uint64_t *new_offset) { - *new_offset = bdrv_getlength(bs->file); + *new_offset = bdrv_getlength(bs->file->bs); /* per the spec, the address for a block is in units of 1MB */ *new_offset = ROUND_UP(*new_offset, 1024 * 1024); - return bdrv_truncate(bs->file, *new_offset + s->block_size); + return bdrv_truncate(bs->file->bs, *new_offset + s->block_size); } /* @@ -1260,7 +1262,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, /* Queue another write of zero buffers if the underlying file * does not zero-fill on file extension */ - if (bdrv_has_zero_init(bs->file) == 0) { + if (bdrv_has_zero_init(bs->file->bs) == 0) { use_zero_buffers = true; /* zero fill the front, if any */ @@ -1327,7 +1329,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num, } /* block exists, so we can just overwrite it */ qemu_co_mutex_unlock(&s->lock); - ret = bdrv_co_writev(bs->file, + ret = bdrv_co_writev(bs->file->bs, sinfo.file_offset >> BDRV_SECTOR_BITS, sectors_to_write, &hd_qiov); qemu_co_mutex_lock(&s->lock); @@ -1454,7 +1456,7 @@ static int vhdx_create_new_metadata(BlockDriverState *bs, uint32_t offset = 0; void *buffer = NULL; void *entry_buffer; - VHDXMetadataTableHeader *md_table;; + VHDXMetadataTableHeader *md_table; VHDXMetadataTableEntry *md_table_entry; /* Metadata entries */ @@ -1842,7 +1844,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp) bs = NULL; ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - NULL, &local_err); + &local_err); if (ret < 0) { error_propagate(errp, local_err); goto exit; diff --git a/block/vmdk.c b/block/vmdk.c index fbaab67c8..e46271a80 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -87,7 +87,7 @@ typedef struct { #define L2_CACHE_SIZE 16 typedef struct VmdkExtent { - BlockDriverState *file; + BdrvChild *file; bool flat; bool compressed; bool has_marker; @@ -222,7 +222,7 @@ static void vmdk_free_extents(BlockDriverState *bs) g_free(e->l1_backup_table); g_free(e->type); if (e->file != bs->file) { - bdrv_unref(e->file); + bdrv_unref_child(bs, e->file); } } g_free(s->extents); @@ -248,7 +248,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent) BDRVVmdkState *s = bs->opaque; int ret; - ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); + ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { return 0; } @@ -278,7 +278,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) BDRVVmdkState *s = bs->opaque; int ret; - ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); + ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { return ret; } @@ -297,7 +297,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) pstrcat(desc, sizeof(desc), tmp_desc); } - ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE); + ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { return ret; } @@ -308,10 +308,11 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid) static int vmdk_is_cid_valid(BlockDriverState *bs) { BDRVVmdkState *s = bs->opaque; - BlockDriverState *p_bs = bs->backing_hd; uint32_t cur_pcid; - if (!s->cid_checked && p_bs) { + if (!s->cid_checked && bs->backing) { + BlockDriverState *p_bs = bs->backing->bs; + cur_pcid = vmdk_read_cid(p_bs, 0); if (s->parent_cid != cur_pcid) { /* CID not valid */ @@ -340,7 +341,7 @@ static int vmdk_parent_open(BlockDriverState *bs) int ret; desc[DESC_SIZE] = '\0'; - ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE); + ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE); if (ret < 0) { return ret; } @@ -367,7 +368,7 @@ static int vmdk_parent_open(BlockDriverState *bs) /* Create and append extent to the extent array. Return the added VmdkExtent * address. return NULL if allocation failed. */ static int vmdk_add_extent(BlockDriverState *bs, - BlockDriverState *file, bool flat, int64_t sectors, + BdrvChild *file, bool flat, int64_t sectors, int64_t l1_offset, int64_t l1_backup_offset, uint32_t l1_size, int l2_size, uint64_t cluster_sectors, @@ -392,7 +393,7 @@ static int vmdk_add_extent(BlockDriverState *bs, return -EFBIG; } - nb_sectors = bdrv_nb_sectors(file); + nb_sectors = bdrv_nb_sectors(file->bs); if (nb_sectors < 0) { return nb_sectors; } @@ -439,14 +440,14 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, return -ENOMEM; } - ret = bdrv_pread(extent->file, + ret = bdrv_pread(extent->file->bs, extent->l1_table_offset, extent->l1_table, l1_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read l1 table from extent '%s'", - extent->file->filename); + extent->file->bs->filename); goto fail_l1; } for (i = 0; i < extent->l1_size; i++) { @@ -459,14 +460,14 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, ret = -ENOMEM; goto fail_l1; } - ret = bdrv_pread(extent->file, + ret = bdrv_pread(extent->file->bs, extent->l1_backup_table_offset, extent->l1_backup_table, l1_size); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read l1 backup table from extent '%s'", - extent->file->filename); + extent->file->bs->filename); goto fail_l1b; } for (i = 0; i < extent->l1_size; i++) { @@ -485,7 +486,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent, } static int vmdk_open_vmfs_sparse(BlockDriverState *bs, - BlockDriverState *file, + BdrvChild *file, int flags, Error **errp) { int ret; @@ -493,11 +494,11 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs, VMDK3Header header; VmdkExtent *extent; - ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); + ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header)); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read header from file '%s'", - file->filename); + file->bs->filename); return ret; } ret = vmdk_add_extent(bs, file, false, @@ -559,7 +560,7 @@ static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset, } static int vmdk_open_vmdk4(BlockDriverState *bs, - BlockDriverState *file, + BdrvChild *file, int flags, QDict *options, Error **errp) { int ret; @@ -569,18 +570,19 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, VmdkExtent *extent; BDRVVmdkState *s = bs->opaque; int64_t l1_backup_offset = 0; + bool compressed; - ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header)); + ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header)); if (ret < 0) { error_setg_errno(errp, -ret, "Could not read header from file '%s'", - file->filename); + file->bs->filename); return -EINVAL; } if (header.capacity == 0) { uint64_t desc_offset = le64_to_cpu(header.desc_offset); if (desc_offset) { - char *buf = vmdk_read_desc(file, desc_offset << 9, errp); + char *buf = vmdk_read_desc(file->bs, desc_offset << 9, errp); if (!buf) { return -EINVAL; } @@ -620,8 +622,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, } QEMU_PACKED eos_marker; } QEMU_PACKED footer; - ret = bdrv_pread(file, - bs->file->total_sectors * 512 - 1536, + ret = bdrv_pread(file->bs, + bs->file->bs->total_sectors * 512 - 1536, &footer, sizeof(footer)); if (ret < 0) { error_setg_errno(errp, -ret, "Failed to read footer"); @@ -643,6 +645,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, header = footer.header; } + compressed = + le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE; if (le32_to_cpu(header.version) > 3) { char buf[64]; snprintf(buf, sizeof(buf), "VMDK version %" PRId32, @@ -650,7 +654,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, bdrv_get_device_or_node_name(bs), "vmdk", buf); return -ENOTSUP; - } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) { + } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) && + !compressed) { /* VMware KB 2064959 explains that version 3 added support for * persistent changed block tracking (CBT), and backup software can * read it as version=1 if it doesn't care about the changed area @@ -675,7 +680,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs, if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) { l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9; } - if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) { + if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) { error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes", (int64_t)(le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE)); @@ -739,8 +744,7 @@ static int vmdk_parse_description(const char *desc, const char *opt_name, } /* Open an extent file and append to bs array */ -static int vmdk_open_sparse(BlockDriverState *bs, - BlockDriverState *file, int flags, +static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags, char *buf, QDict *options, Error **errp) { uint32_t magic; @@ -773,10 +777,11 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, int64_t sectors = 0; int64_t flat_offset; char *extent_path; - BlockDriverState *extent_file; + BdrvChild *extent_file; BDRVVmdkState *s = bs->opaque; VmdkExtent *extent; char extent_opt_prefix[32]; + Error *local_err = NULL; while (*p) { /* parse extent line in one of below formats: @@ -819,22 +824,22 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, !desc_file_path[0]) { error_setg(errp, "Cannot use relative extent paths with VMDK " - "descriptor file '%s'", bs->file->filename); + "descriptor file '%s'", bs->file->bs->filename); return -EINVAL; } extent_path = g_malloc0(PATH_MAX); path_combine(extent_path, PATH_MAX, desc_file_path, fname); - extent_file = NULL; ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents); assert(ret < 32); - ret = bdrv_open_image(&extent_file, extent_path, options, - extent_opt_prefix, bs, &child_file, false, errp); + extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix, + bs, &child_file, false, &local_err); g_free(extent_path); - if (ret) { - return ret; + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; } /* save to extents array */ @@ -844,13 +849,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, ret = vmdk_add_extent(bs, extent_file, true, sectors, 0, 0, 0, 0, 0, &extent, errp); if (ret < 0) { - bdrv_unref(extent_file); + bdrv_unref_child(bs, extent_file); return ret; } extent->flat_start_offset = flat_offset << 9; } else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) { /* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/ - char *buf = vmdk_read_desc(extent_file, 0, errp); + char *buf = vmdk_read_desc(extent_file->bs, 0, errp); if (!buf) { ret = -EINVAL; } else { @@ -859,13 +864,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs, } g_free(buf); if (ret) { - bdrv_unref(extent_file); + bdrv_unref_child(bs, extent_file); return ret; } extent = &s->extents[s->num_extents - 1]; } else { error_setg(errp, "Unsupported extent type '%s'", type); - bdrv_unref(extent_file); + bdrv_unref_child(bs, extent_file); return -ENOTSUP; } extent->type = g_strdup(type); @@ -905,7 +910,8 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf, } s->create_type = g_strdup(ct); s->desc_offset = 0; - ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, options, errp); + ret = vmdk_parse_extents(buf, bs, bs->file->bs->exact_filename, options, + errp); exit: return ret; } @@ -918,7 +924,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, BDRVVmdkState *s = bs->opaque; uint32_t magic; - buf = vmdk_read_desc(bs->file, 0, errp); + buf = vmdk_read_desc(bs->file->bs, 0, errp); if (!buf) { return -EINVAL; } @@ -927,7 +933,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags, switch (magic) { case VMDK3_MAGIC: case VMDK4_MAGIC: - ret = vmdk_open_sparse(bs, bs->file, flags, buf, options, errp); + ret = vmdk_open_sparse(bs, bs->file, flags, buf, options, + errp); s->desc_offset = 0x200; break; default: @@ -1004,7 +1011,7 @@ static int get_whole_cluster(BlockDriverState *bs, cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS; whole_grain = qemu_blockalign(bs, cluster_bytes); - if (!bs->backing_hd) { + if (!bs->backing) { memset(whole_grain, 0, skip_start_sector << BDRV_SECTOR_BITS); memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0, cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS)); @@ -1013,22 +1020,22 @@ static int get_whole_cluster(BlockDriverState *bs, assert(skip_end_sector <= extent->cluster_sectors); /* we will be here if it's first write on non-exist grain(cluster). * try to read from parent image, if exist */ - if (bs->backing_hd && !vmdk_is_cid_valid(bs)) { + if (bs->backing && !vmdk_is_cid_valid(bs)) { ret = VMDK_ERROR; goto exit; } /* Read backing data before skip range */ if (skip_start_sector > 0) { - if (bs->backing_hd) { - ret = bdrv_read(bs->backing_hd, sector_num, + if (bs->backing) { + ret = bdrv_read(bs->backing->bs, sector_num, whole_grain, skip_start_sector); if (ret < 0) { ret = VMDK_ERROR; goto exit; } } - ret = bdrv_write(extent->file, cluster_sector_num, whole_grain, + ret = bdrv_write(extent->file->bs, cluster_sector_num, whole_grain, skip_start_sector); if (ret < 0) { ret = VMDK_ERROR; @@ -1037,8 +1044,8 @@ static int get_whole_cluster(BlockDriverState *bs, } /* Read backing data after skip range */ if (skip_end_sector < extent->cluster_sectors) { - if (bs->backing_hd) { - ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector, + if (bs->backing) { + ret = bdrv_read(bs->backing->bs, sector_num + skip_end_sector, whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), extent->cluster_sectors - skip_end_sector); if (ret < 0) { @@ -1046,7 +1053,7 @@ static int get_whole_cluster(BlockDriverState *bs, goto exit; } } - ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector, + ret = bdrv_write(extent->file->bs, cluster_sector_num + skip_end_sector, whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), extent->cluster_sectors - skip_end_sector); if (ret < 0) { @@ -1066,7 +1073,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, offset = cpu_to_le32(offset); /* update L2 table */ if (bdrv_pwrite_sync( - extent->file, + extent->file->bs, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(offset)), &offset, sizeof(offset)) < 0) { @@ -1076,7 +1083,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, if (extent->l1_backup_table_offset != 0) { m_data->l2_offset = extent->l1_backup_table[m_data->l1_index]; if (bdrv_pwrite_sync( - extent->file, + extent->file->bs, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(offset)), &offset, sizeof(offset)) < 0) { @@ -1166,7 +1173,7 @@ static int get_cluster_offset(BlockDriverState *bs, } l2_table = extent->l2_cache + (min_index * extent->l2_size); if (bdrv_pread( - extent->file, + extent->file->bs, (int64_t)l2_offset * 512, l2_table, extent->l2_size * sizeof(uint32_t) @@ -1320,12 +1327,16 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset, write_len = buf_len + sizeof(VmdkGrainMarker); } write_offset = cluster_offset + offset_in_cluster, - ret = bdrv_pwrite(extent->file, write_offset, write_buf, write_len); + ret = bdrv_pwrite(extent->file->bs, write_offset, write_buf, write_len); write_end_sector = DIV_ROUND_UP(write_offset + write_len, BDRV_SECTOR_SIZE); - extent->next_cluster_sector = MAX(extent->next_cluster_sector, - write_end_sector); + if (extent->compressed) { + extent->next_cluster_sector = write_end_sector; + } else { + extent->next_cluster_sector = MAX(extent->next_cluster_sector, + write_end_sector); + } if (ret != write_len) { ret = ret < 0 ? ret : -EIO; @@ -1351,7 +1362,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, if (!extent->compressed) { - ret = bdrv_pread(extent->file, + ret = bdrv_pread(extent->file->bs, cluster_offset + offset_in_cluster, buf, nb_sectors * 512); if (ret == nb_sectors * 512) { @@ -1365,7 +1376,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset, buf_bytes = cluster_bytes * 2; cluster_buf = g_malloc(buf_bytes); uncomp_buf = g_malloc(cluster_bytes); - ret = bdrv_pread(extent->file, + ret = bdrv_pread(extent->file->bs, cluster_offset, cluster_buf, buf_bytes); if (ret < 0) { @@ -1427,11 +1438,11 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num, } if (ret != VMDK_OK) { /* if not allocated, try to read from parent image, if exist */ - if (bs->backing_hd && ret != VMDK_ZEROED) { + if (bs->backing && ret != VMDK_ZEROED) { if (!vmdk_is_cid_valid(bs)) { return -EINVAL; } - ret = bdrv_read(bs->backing_hd, sector_num, buf, n); + ret = bdrv_read(bs->backing->bs, sector_num, buf, n); if (ret < 0) { return ret; } @@ -1632,7 +1643,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, assert(bs == NULL); ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - NULL, &local_err); + &local_err); if (ret < 0) { error_propagate(errp, local_err); goto exit; @@ -1647,7 +1658,13 @@ static int vmdk_create_extent(const char *filename, int64_t filesize, } magic = cpu_to_be32(VMDK4_MAGIC); memset(&header, 0, sizeof(header)); - header.version = zeroed_grain ? 2 : 1; + if (compress) { + header.version = 3; + } else if (zeroed_grain) { + header.version = 2; + } else { + header.version = 1; + } header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0) | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0); @@ -1905,8 +1922,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) ret = -ENOENT; goto exit; } - ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, NULL, - errp); + ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, errp); g_free(full_backing); if (ret != 0) { goto exit; @@ -1977,7 +1993,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp) } assert(new_bs == NULL); ret = bdrv_open(&new_bs, filename, NULL, NULL, - BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err); + BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err); if (ret < 0) { error_propagate(errp, local_err); goto exit; @@ -2032,7 +2048,7 @@ static coroutine_fn int vmdk_co_flush(BlockDriverState *bs) int ret = 0; for (i = 0; i < s->num_extents; i++) { - err = bdrv_co_flush(s->extents[i].file); + err = bdrv_co_flush(s->extents[i].file->bs); if (err < 0) { ret = err; } @@ -2047,7 +2063,7 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs) int64_t r; BDRVVmdkState *s = bs->opaque; - ret = bdrv_get_allocated_file_size(bs->file); + ret = bdrv_get_allocated_file_size(bs->file->bs); if (ret < 0) { return ret; } @@ -2055,7 +2071,7 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs) if (s->extents[i].file == bs->file) { continue; } - r = bdrv_get_allocated_file_size(s->extents[i].file); + r = bdrv_get_allocated_file_size(s->extents[i].file->bs); if (r < 0) { return r; } @@ -2073,7 +2089,7 @@ static int vmdk_has_zero_init(BlockDriverState *bs) * return 0. */ for (i = 0; i < s->num_extents; i++) { if (s->extents[i].flat) { - if (!bdrv_has_zero_init(s->extents[i].file)) { + if (!bdrv_has_zero_init(s->extents[i].file->bs)) { return 0; } } @@ -2086,7 +2102,7 @@ static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent) ImageInfo *info = g_new0(ImageInfo, 1); *info = (ImageInfo){ - .filename = g_strdup(extent->file->filename), + .filename = g_strdup(extent->file->bs->filename), .format = g_strdup(extent->type), .virtual_size = extent->sectors * BDRV_SECTOR_SIZE, .compressed = extent->compressed, @@ -2132,7 +2148,9 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result, PRId64 "\n", sector_num); break; } - if (ret == VMDK_OK && cluster_offset >= bdrv_getlength(extent->file)) { + if (ret == VMDK_OK && + cluster_offset >= bdrv_getlength(extent->file->bs)) + { fprintf(stderr, "ERROR: cluster offset for sector %" PRId64 " points after EOF\n", sector_num); @@ -2153,19 +2171,19 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs) ImageInfoList **next; *spec_info = (ImageInfoSpecific){ - .kind = IMAGE_INFO_SPECIFIC_KIND_VMDK, + .type = IMAGE_INFO_SPECIFIC_KIND_VMDK, { .vmdk = g_new0(ImageInfoSpecificVmdk, 1), }, }; - *spec_info->vmdk = (ImageInfoSpecificVmdk) { + *spec_info->u.vmdk = (ImageInfoSpecificVmdk) { .create_type = g_strdup(s->create_type), .cid = s->cid, .parent_cid = s->parent_cid, }; - next = &spec_info->vmdk->extents; + next = &spec_info->u.vmdk->extents; for (i = 0; i < s->num_extents; i++) { *next = g_new0(ImageInfoList, 1); (*next)->value = vmdk_get_extent_info(&s->extents[i]); @@ -2208,7 +2226,7 @@ static void vmdk_detach_aio_context(BlockDriverState *bs) int i; for (i = 0; i < s->num_extents; i++) { - bdrv_detach_aio_context(s->extents[i].file); + bdrv_detach_aio_context(s->extents[i].file->bs); } } @@ -2219,7 +2237,7 @@ static void vmdk_attach_aio_context(BlockDriverState *bs, int i; for (i = 0; i < s->num_extents; i++) { - bdrv_attach_aio_context(s->extents[i].file, new_context); + bdrv_attach_aio_context(s->extents[i].file->bs, new_context); } } diff --git a/block/vpc.c b/block/vpc.c index 3e385d9fb..299d37309 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -172,14 +172,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, int disk_type = VHD_DYNAMIC; int ret; - ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE); + ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE); if (ret < 0) { goto fail; } footer = (VHDFooter *) s->footer_buf; if (strncmp(footer->creator, "conectix", 8)) { - int64_t offset = bdrv_getlength(bs->file); + int64_t offset = bdrv_getlength(bs->file->bs); if (offset < 0) { ret = offset; goto fail; @@ -189,7 +189,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, } /* If a fixed disk, the footer is found only at the end of the file */ - ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf, + ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf, HEADER_SIZE); if (ret < 0) { goto fail; @@ -232,7 +232,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, } if (disk_type == VHD_DYNAMIC) { - ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf, + ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE); if (ret < 0) { goto fail; @@ -280,7 +280,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, pagetable_size = (uint64_t) s->max_table_entries * 4; - s->pagetable = qemu_try_blockalign(bs->file, pagetable_size); + s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size); if (s->pagetable == NULL) { ret = -ENOMEM; goto fail; @@ -288,7 +288,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, s->bat_offset = be64_to_cpu(dyndisk_header->table_offset); - ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable, pagetable_size); + ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable, + pagetable_size); if (ret < 0) { goto fail; } @@ -308,7 +309,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags, } } - if (s->free_data_block_offset > bdrv_getlength(bs->file)) { + if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) { error_setg(errp, "block-vpc: free_data_block_offset points after " "the end of file. The image has been truncated."); ret = -EINVAL; @@ -383,7 +384,7 @@ static inline int64_t get_sector_offset(BlockDriverState *bs, s->last_bitmap_offset = bitmap_offset; memset(bitmap, 0xff, s->bitmap_size); - bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size); + bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size); } return block_offset; @@ -401,7 +402,7 @@ static int rewrite_footer(BlockDriverState* bs) BDRVVPCState *s = bs->opaque; int64_t offset = s->free_data_block_offset; - ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE); + ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE); if (ret < 0) return ret; @@ -436,7 +437,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num) // Initialize the block's bitmap memset(bitmap, 0xff, s->bitmap_size); - ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap, + ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap, s->bitmap_size); if (ret < 0) { return ret; @@ -451,7 +452,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num) // Write BAT entry to disk bat_offset = s->bat_offset + (4 * index); bat_value = cpu_to_be32(s->pagetable[index]); - ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4); + ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4); if (ret < 0) goto fail; @@ -485,7 +486,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num, VHDFooter *footer = (VHDFooter *) s->footer_buf; if (be32_to_cpu(footer->type) == VHD_FIXED) { - return bdrv_read(bs->file, sector_num, buf, nb_sectors); + return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors); } while (nb_sectors > 0) { offset = get_sector_offset(bs, sector_num, 0); @@ -499,7 +500,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num, if (offset == -1) { memset(buf, 0, sectors * BDRV_SECTOR_SIZE); } else { - ret = bdrv_pread(bs->file, offset, buf, + ret = bdrv_pread(bs->file->bs, offset, buf, sectors * BDRV_SECTOR_SIZE); if (ret != sectors * BDRV_SECTOR_SIZE) { return -1; @@ -534,7 +535,7 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num, VHDFooter *footer = (VHDFooter *) s->footer_buf; if (be32_to_cpu(footer->type) == VHD_FIXED) { - return bdrv_write(bs->file, sector_num, buf, nb_sectors); + return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors); } while (nb_sectors > 0) { offset = get_sector_offset(bs, sector_num, 1); @@ -551,7 +552,8 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num, return -1; } - ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE); + ret = bdrv_pwrite(bs->file->bs, offset, buf, + sectors * BDRV_SECTOR_SIZE); if (ret != sectors * BDRV_SECTOR_SIZE) { return -1; } @@ -794,7 +796,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp) goto out; } ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL, - NULL, &local_err); + &local_err); if (ret < 0) { error_propagate(errp, local_err); goto out; @@ -878,7 +880,7 @@ static int vpc_has_zero_init(BlockDriverState *bs) VHDFooter *footer = (VHDFooter *) s->footer_buf; if (be32_to_cpu(footer->type) == VHD_FIXED) { - return bdrv_has_zero_init(bs->file); + return bdrv_has_zero_init(bs->file->bs); } else { return 1; } diff --git a/block/vvfat.c b/block/vvfat.c index 206869712..b184eca6f 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -985,12 +985,6 @@ static BDRVVVFATState *vvv = NULL; static int enable_write_target(BDRVVVFATState *s, Error **errp); static int is_consistent(BDRVVVFATState *s); -static void vvfat_rebind(BlockDriverState *bs) -{ - BDRVVVFATState *s = bs->opaque; - s->bs = bs; -} - static QemuOptsList runtime_opts = { .name = "vvfat", .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), @@ -2923,9 +2917,12 @@ static BlockDriver vvfat_write_target = { static int enable_write_target(BDRVVVFATState *s, Error **errp) { BlockDriver *bdrv_qcow = NULL; + BlockDriverState *backing; QemuOpts *opts = NULL; int ret; int size = sector2cluster(s, s->sector_count); + QDict *options; + s->used_clusters = calloc(size, 1); array_init(&(s->commits), sizeof(commit_t)); @@ -2956,9 +2953,11 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp) } s->qcow = NULL; - ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, NULL, + options = qdict_new(); + qdict_put(options, "driver", qstring_from_str("qcow")); + ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, options, BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, - bdrv_qcow, errp); + errp); if (ret < 0) { goto err; } @@ -2967,10 +2966,13 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp) unlink(s->qcow_filename); #endif - bdrv_set_backing_hd(s->bs, bdrv_new()); - s->bs->backing_hd->drv = &vvfat_write_target; - s->bs->backing_hd->opaque = g_new(void *, 1); - *(void**)s->bs->backing_hd->opaque = s; + backing = bdrv_new(); + bdrv_set_backing_hd(s->bs, backing); + bdrv_unref(backing); + + s->bs->backing->bs->drv = &vvfat_write_target; + s->bs->backing->bs->opaque = g_new(void *, 1); + *(void**)s->bs->backing->bs->opaque = s; return 0; @@ -3004,7 +3006,6 @@ static BlockDriver bdrv_vvfat = { .bdrv_parse_filename = vvfat_parse_filename, .bdrv_file_open = vvfat_open, .bdrv_close = vvfat_close, - .bdrv_rebind = vvfat_rebind, .bdrv_read = vvfat_co_read, .bdrv_write = vvfat_co_write, diff --git a/block/win32-aio.c b/block/win32-aio.c index 64e86827b..bbf2f01c1 100644 --- a/block/win32-aio.c +++ b/block/win32-aio.c @@ -174,7 +174,7 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile) void win32_aio_detach_aio_context(QEMUWin32AIOState *aio, AioContext *old_context) { - aio_set_event_notifier(old_context, &aio->e, NULL); + aio_set_event_notifier(old_context, &aio->e, false, NULL); aio->is_aio_context_attached = false; } @@ -182,7 +182,8 @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio, AioContext *new_context) { aio->is_aio_context_attached = true; - aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb); + aio_set_event_notifier(new_context, &aio->e, false, + win32_aio_completion_cb); } QEMUWin32AIOState *win32_aio_init(void) diff --git a/block/write-threshold.c b/block/write-threshold.c index a53c1f5e6..0fe38917c 100644 --- a/block/write-threshold.c +++ b/block/write-threshold.c @@ -11,7 +11,7 @@ */ #include "block/block_int.h" -#include "block/coroutine.h" +#include "qemu/coroutine.h" #include "block/write-threshold.h" #include "qemu/notify.h" #include "qapi-event.h" |