summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorYonghee Han <onstudy@samsung.com>2016-07-27 16:42:54 +0900
committerYonghee Han <onstudy@samsung.com>2016-07-27 00:56:08 -0700
commita03c4728275d119af5f66c4a69e8d9d5a1730031 (patch)
tree2b4ed9542884bf8b947076c55c4ef1814217cb69 /block
parent3158f4a51894e46ecb593bffbfd12824e1d6534a (diff)
downloadqemu-a03c4728275d119af5f66c4a69e8d9d5a1730031.tar.gz
qemu-a03c4728275d119af5f66c4a69e8d9d5a1730031.tar.bz2
qemu-a03c4728275d119af5f66c4a69e8d9d5a1730031.zip
Imported Upstream version 2.5.1.1upstream/2.5.1.1
Change-Id: Ie290b0e68882590d8a64fab165a943940b7c98ed
Diffstat (limited to 'block')
-rw-r--r--block/accounting.c123
-rw-r--r--block/backup.c87
-rw-r--r--block/blkdebug.c43
-rw-r--r--block/blkverify.c71
-rw-r--r--block/block-backend.c464
-rw-r--r--block/bochs.c8
-rw-r--r--block/cloop.c10
-rw-r--r--block/commit.c13
-rw-r--r--block/curl.c14
-rw-r--r--block/dmg.c20
-rw-r--r--block/gluster.c86
-rw-r--r--block/io.c280
-rw-r--r--block/iscsi.c175
-rw-r--r--block/linux-aio.c5
-rw-r--r--block/mirror.c65
-rw-r--r--block/nbd-client.c10
-rw-r--r--block/nbd.c71
-rw-r--r--block/nfs.c53
-rw-r--r--block/parallels.c42
-rw-r--r--block/qapi.c107
-rw-r--r--block/qcow.c49
-rw-r--r--block/qcow2-cache.c86
-rw-r--r--block/qcow2-cluster.c108
-rw-r--r--block/qcow2-refcount.c120
-rw-r--r--block/qcow2-snapshot.c50
-rw-r--r--block/qcow2.c600
-rw-r--r--block/qcow2.h30
-rw-r--r--block/qed-table.c4
-rw-r--r--block/qed.c66
-rw-r--r--block/quorum.c85
-rw-r--r--block/raw-posix.c303
-rw-r--r--block/raw-win32.c8
-rw-r--r--block/raw_bsd.c48
-rw-r--r--block/sheepdog.c207
-rw-r--r--block/snapshot.c146
-rw-r--r--block/ssh.c10
-rw-r--r--block/stream.c37
-rw-r--r--block/throttle-groups.c57
-rw-r--r--block/vdi.c21
-rw-r--r--block/vhdx-log.c25
-rw-r--r--block/vhdx.c40
-rw-r--r--block/vmdk.c168
-rw-r--r--block/vpc.c36
-rw-r--r--block/vvfat.c27
-rw-r--r--block/win32-aio.c5
-rw-r--r--block/write-threshold.c2
46 files changed, 2605 insertions, 1480 deletions
diff --git a/block/accounting.c b/block/accounting.c
index 01d594ffd..185025ec1 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -2,6 +2,7 @@
* QEMU System Emulator block accounting
*
* Copyright (c) 2011 Christoph Hellwig
+ * Copyright (c) 2015 Igalia, S.L.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -25,6 +26,54 @@
#include "block/accounting.h"
#include "block/block_int.h"
#include "qemu/timer.h"
+#include "sysemu/qtest.h"
+
+static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
+static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;
+
+void block_acct_init(BlockAcctStats *stats, bool account_invalid,
+ bool account_failed)
+{
+ stats->account_invalid = account_invalid;
+ stats->account_failed = account_failed;
+
+ if (qtest_enabled()) {
+ clock_type = QEMU_CLOCK_VIRTUAL;
+ }
+}
+
+void block_acct_cleanup(BlockAcctStats *stats)
+{
+ BlockAcctTimedStats *s, *next;
+ QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
+ g_free(s);
+ }
+}
+
+void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
+{
+ BlockAcctTimedStats *s;
+ unsigned i;
+
+ s = g_new0(BlockAcctTimedStats, 1);
+ s->interval_length = interval_length;
+ QSLIST_INSERT_HEAD(&stats->intervals, s, entries);
+
+ for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
+ timed_average_init(&s->latency[i], clock_type,
+ (uint64_t) interval_length * NANOSECONDS_PER_SECOND);
+ }
+}
+
+BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
+ BlockAcctTimedStats *s)
+{
+ if (s == NULL) {
+ return QSLIST_FIRST(&stats->intervals);
+ } else {
+ return QSLIST_NEXT(s, entries);
+ }
+}
void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
int64_t bytes, enum BlockAcctType type)
@@ -32,26 +81,69 @@ void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
assert(type < BLOCK_MAX_IOTYPE);
cookie->bytes = bytes;
- cookie->start_time_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ cookie->start_time_ns = qemu_clock_get_ns(clock_type);
cookie->type = type;
}
void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
{
+ BlockAcctTimedStats *s;
+ int64_t time_ns = qemu_clock_get_ns(clock_type);
+ int64_t latency_ns = time_ns - cookie->start_time_ns;
+
+ if (qtest_enabled()) {
+ latency_ns = qtest_latency_ns;
+ }
+
assert(cookie->type < BLOCK_MAX_IOTYPE);
stats->nr_bytes[cookie->type] += cookie->bytes;
stats->nr_ops[cookie->type]++;
- stats->total_time_ns[cookie->type] +=
- qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - cookie->start_time_ns;
+ stats->total_time_ns[cookie->type] += latency_ns;
+ stats->last_access_time_ns = time_ns;
+
+ QSLIST_FOREACH(s, &stats->intervals, entries) {
+ timed_average_account(&s->latency[cookie->type], latency_ns);
+ }
}
+void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
+{
+ assert(cookie->type < BLOCK_MAX_IOTYPE);
+
+ stats->failed_ops[cookie->type]++;
+
+ if (stats->account_failed) {
+ BlockAcctTimedStats *s;
+ int64_t time_ns = qemu_clock_get_ns(clock_type);
+ int64_t latency_ns = time_ns - cookie->start_time_ns;
+
+ if (qtest_enabled()) {
+ latency_ns = qtest_latency_ns;
+ }
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
- unsigned int nb_sectors)
+ stats->total_time_ns[cookie->type] += latency_ns;
+ stats->last_access_time_ns = time_ns;
+
+ QSLIST_FOREACH(s, &stats->intervals, entries) {
+ timed_average_account(&s->latency[cookie->type], latency_ns);
+ }
+ }
+}
+
+void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
{
- if (stats->wr_highest_sector < sector_num + nb_sectors - 1) {
- stats->wr_highest_sector = sector_num + nb_sectors - 1;
+ assert(type < BLOCK_MAX_IOTYPE);
+
+ /* block_acct_done() and block_acct_failed() update
+ * total_time_ns[], but this one does not. The reason is that
+ * invalid requests are accounted during their submission,
+ * therefore there's no actual I/O involved. */
+
+ stats->invalid_ops[type]++;
+
+ if (stats->account_invalid) {
+ stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
}
}
@@ -61,3 +153,20 @@ void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
assert(type < BLOCK_MAX_IOTYPE);
stats->merged[type] += num_requests;
}
+
+int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
+{
+ return qemu_clock_get_ns(clock_type) - stats->last_access_time_ns;
+}
+
+double block_acct_queue_depth(BlockAcctTimedStats *stats,
+ enum BlockAcctType type)
+{
+ uint64_t sum, elapsed;
+
+ assert(type < BLOCK_MAX_IOTYPE);
+
+ sum = timed_average_sum(&stats->latency[type], &elapsed);
+
+ return (double) sum / elapsed;
+}
diff --git a/block/backup.c b/block/backup.c
index 965654d52..705bb7766 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -21,6 +21,7 @@
#include "block/blockjob.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
#define BACKUP_CLUSTER_BITS 16
#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
@@ -89,7 +90,8 @@ static void cow_request_end(CowRequest *req)
static int coroutine_fn backup_do_cow(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
- bool *error_is_read)
+ bool *error_is_read,
+ bool is_write_notifier)
{
BackupBlockJob *job = (BackupBlockJob *)bs->job;
CowRequest cow_request;
@@ -129,8 +131,14 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
iov.iov_len = n * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
- ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
- &bounce_qiov);
+ if (is_write_notifier) {
+ ret = bdrv_co_readv_no_serialising(bs,
+ start * BACKUP_SECTORS_PER_CLUSTER,
+ n, &bounce_qiov);
+ } else {
+ ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
+ &bounce_qiov);
+ }
if (ret < 0) {
trace_backup_do_cow_read_fail(job, start, ret);
if (error_is_read) {
@@ -190,7 +198,7 @@ static int coroutine_fn backup_before_write_notify(
assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
- return backup_do_cow(req->bs, sector_num, nb_sectors, NULL);
+ return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true);
}
static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -208,7 +216,41 @@ static void backup_iostatus_reset(BlockJob *job)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
- bdrv_iostatus_reset(s->target);
+ if (s->target->blk) {
+ blk_iostatus_reset(s->target->blk);
+ }
+}
+
+static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
+{
+ BdrvDirtyBitmap *bm;
+ BlockDriverState *bs = job->common.bs;
+
+ if (ret < 0 || block_job_is_cancelled(&job->common)) {
+ /* Merge the successor back into the parent, delete nothing. */
+ bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
+ assert(bm);
+ } else {
+ /* Everything is fine, delete this bitmap and install the backup. */
+ bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
+ assert(bm);
+ }
+}
+
+static void backup_commit(BlockJob *job)
+{
+ BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+ if (s->sync_bitmap) {
+ backup_cleanup_sync_bitmap(s, 0);
+ }
+}
+
+static void backup_abort(BlockJob *job)
+{
+ BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+ if (s->sync_bitmap) {
+ backup_cleanup_sync_bitmap(s, -1);
+ }
}
static const BlockJobDriver backup_job_driver = {
@@ -216,6 +258,8 @@ static const BlockJobDriver backup_job_driver = {
.job_type = BLOCK_JOB_TYPE_BACKUP,
.set_speed = backup_set_speed,
.iostatus_reset = backup_iostatus_reset,
+ .commit = backup_commit,
+ .abort = backup_abort,
};
static BlockErrorAction backup_error_action(BackupBlockJob *job,
@@ -303,7 +347,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
return ret;
}
ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
- BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
+ BACKUP_SECTORS_PER_CLUSTER, &error_is_read,
+ false);
if ((ret < 0) &&
backup_error_action(job, error_is_read, -ret) ==
BLOCK_ERROR_ACTION_REPORT) {
@@ -352,8 +397,10 @@ static void coroutine_fn backup_run(void *opaque)
job->bitmap = hbitmap_alloc(end, 0);
bdrv_set_enable_write_cache(target, true);
- bdrv_set_on_error(target, on_target_error, on_target_error);
- bdrv_iostatus_enable(target);
+ if (target->blk) {
+ blk_set_on_error(target->blk, on_target_error, on_target_error);
+ blk_iostatus_enable(target->blk);
+ }
bdrv_add_before_write_notifier(bs, &before_write);
@@ -408,7 +455,7 @@ static void coroutine_fn backup_run(void *opaque)
}
/* FULL sync mode we copy the whole drive. */
ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
- BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
+ BACKUP_SECTORS_PER_CLUSTER, &error_is_read, false);
if (ret < 0) {
/* Depending on error action, fail now or retry cluster */
BlockErrorAction action =
@@ -428,22 +475,11 @@ static void coroutine_fn backup_run(void *opaque)
/* wait until pending backup_do_cow() calls have completed */
qemu_co_rwlock_wrlock(&job->flush_rwlock);
qemu_co_rwlock_unlock(&job->flush_rwlock);
-
- if (job->sync_bitmap) {
- BdrvDirtyBitmap *bm;
- if (ret < 0 || block_job_is_cancelled(&job->common)) {
- /* Merge the successor back into the parent, delete nothing. */
- bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
- assert(bm);
- } else {
- /* Everything is fine, delete this bitmap and install the backup. */
- bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
- assert(bm);
- }
- }
hbitmap_free(job->bitmap);
- bdrv_iostatus_disable(target);
+ if (target->blk) {
+ blk_iostatus_disable(target->blk);
+ }
bdrv_op_unblock_all(target, job->common.blocker);
data = g_malloc(sizeof(*data));
@@ -457,7 +493,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockCompletionFunc *cb, void *opaque,
- Error **errp)
+ BlockJobTxn *txn, Error **errp)
{
int64_t len;
@@ -472,7 +508,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- !bdrv_iostatus_is_enabled(bs)) {
+ (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
return;
}
@@ -539,6 +575,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
sync_bitmap : NULL;
job->common.len = len;
job->common.co = qemu_coroutine_create(backup_run);
+ block_job_txn_add_job(txn, &job->common);
qemu_coroutine_enter(job->common.co, job);
return;
diff --git a/block/blkdebug.c b/block/blkdebug.c
index bc247f46f..dee3a0edf 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -30,6 +30,7 @@
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qstring.h"
+#include "sysemu/qtest.h"
typedef struct BDRVBlkdebugState {
int state;
@@ -426,11 +427,11 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
/* Set initial state */
s->state = 1;
- /* Open the backing file */
- assert(bs->file == NULL);
- ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image",
- bs, &child_file, false, &local_err);
- if (ret < 0) {
+ /* Open the image file */
+ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
+ bs, &child_file, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
error_propagate(errp, local_err);
goto out;
}
@@ -449,7 +450,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
fail_unref:
- bdrv_unref(bs->file);
+ bdrv_unref_child(bs, bs->file);
out:
qemu_opts_del(opts);
return ret;
@@ -510,7 +511,8 @@ static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
return inject_error(bs, cb, opaque, rule);
}
- return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+ return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors,
+ cb, opaque);
}
static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
@@ -532,7 +534,8 @@ static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
return inject_error(bs, cb, opaque, rule);
}
- return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+ return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
+ cb, opaque);
}
static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
@@ -551,7 +554,7 @@ static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
return inject_error(bs, cb, opaque, rule);
}
- return bdrv_aio_flush(bs->file, cb, opaque);
+ return bdrv_aio_flush(bs->file->bs, cb, opaque);
}
@@ -581,9 +584,13 @@ static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
remove_rule(rule);
QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next);
- printf("blkdebug: Suspended request '%s'\n", r.tag);
+ if (!qtest_enabled()) {
+ printf("blkdebug: Suspended request '%s'\n", r.tag);
+ }
qemu_coroutine_yield();
- printf("blkdebug: Resuming request '%s'\n", r.tag);
+ if (!qtest_enabled()) {
+ printf("blkdebug: Resuming request '%s'\n", r.tag);
+ }
QLIST_REMOVE(&r, next);
g_free(r.tag);
@@ -716,12 +723,12 @@ static bool blkdebug_debug_is_suspended(BlockDriverState *bs, const char *tag)
static int64_t blkdebug_getlength(BlockDriverState *bs)
{
- return bdrv_getlength(bs->file);
+ return bdrv_getlength(bs->file->bs);
}
static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
{
- return bdrv_truncate(bs->file, offset);
+ return bdrv_truncate(bs->file->bs, offset);
}
static void blkdebug_refresh_filename(BlockDriverState *bs)
@@ -741,24 +748,24 @@ static void blkdebug_refresh_filename(BlockDriverState *bs)
}
}
- if (force_json && !bs->file->full_open_options) {
+ if (force_json && !bs->file->bs->full_open_options) {
/* The config file cannot be recreated, so creating a plain filename
* is impossible */
return;
}
- if (!force_json && bs->file->exact_filename[0]) {
+ if (!force_json && bs->file->bs->exact_filename[0]) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"blkdebug:%s:%s",
qdict_get_try_str(bs->options, "config") ?: "",
- bs->file->exact_filename);
+ bs->file->bs->exact_filename);
}
opts = qdict_new();
qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));
- QINCREF(bs->file->full_open_options);
- qdict_put_obj(opts, "image", QOBJECT(bs->file->full_open_options));
+ QINCREF(bs->file->bs->full_open_options);
+ qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));
for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
if (strcmp(qdict_entry_key(e), "x-image") &&
diff --git a/block/blkverify.c b/block/blkverify.c
index d277e6322..c5f8e8dcb 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -14,7 +14,7 @@
#include "qapi/qmp/qstring.h"
typedef struct {
- BlockDriverState *test_file;
+ BdrvChild *test_file;
} BDRVBlkverifyState;
typedef struct BlkverifyAIOCB BlkverifyAIOCB;
@@ -123,26 +123,29 @@ static int blkverify_open(BlockDriverState *bs, QDict *options, int flags,
}
/* Open the raw file */
- assert(bs->file == NULL);
- ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options,
- "raw", bs, &child_file, false, &local_err);
- if (ret < 0) {
+ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw",
+ bs, &child_file, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
error_propagate(errp, local_err);
goto fail;
}
/* Open the test file */
- assert(s->test_file == NULL);
- ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options,
- "test", bs, &child_format, false, &local_err);
- if (ret < 0) {
+ s->test_file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options,
+ "test", bs, &child_format, false,
+ &local_err);
+ if (local_err) {
+ ret = -EINVAL;
error_propagate(errp, local_err);
- s->test_file = NULL;
goto fail;
}
ret = 0;
fail:
+ if (ret < 0) {
+ bdrv_unref_child(bs, bs->file);
+ }
qemu_opts_del(opts);
return ret;
}
@@ -151,7 +154,7 @@ static void blkverify_close(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
- bdrv_unref(s->test_file);
+ bdrv_unref_child(bs, s->test_file);
s->test_file = NULL;
}
@@ -159,7 +162,7 @@ static int64_t blkverify_getlength(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
- return bdrv_getlength(s->test_file);
+ return bdrv_getlength(s->test_file->bs);
}
static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
@@ -238,13 +241,13 @@ static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
nb_sectors, cb, opaque);
acb->verify = blkverify_verify_readv;
- acb->buf = qemu_blockalign(bs->file, qiov->size);
+ acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
- bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
+ bdrv_aio_readv(s->test_file->bs, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
- bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
+ bdrv_aio_readv(bs->file->bs, sector_num, &acb->raw_qiov, nb_sectors,
blkverify_aio_cb, acb);
return &acb->common;
}
@@ -257,9 +260,9 @@ static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
nb_sectors, cb, opaque);
- bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
+ bdrv_aio_writev(s->test_file->bs, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
- bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+ bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
return &acb->common;
}
@@ -271,7 +274,7 @@ static BlockAIOCB *blkverify_aio_flush(BlockDriverState *bs,
BDRVBlkverifyState *s = bs->opaque;
/* Only flush test file, the raw file is not important */
- return bdrv_aio_flush(s->test_file, cb, opaque);
+ return bdrv_aio_flush(s->test_file->bs, cb, opaque);
}
static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -279,13 +282,13 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
{
BDRVBlkverifyState *s = bs->opaque;
- bool perm = bdrv_recurse_is_first_non_filter(bs->file, candidate);
+ bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
if (perm) {
return true;
}
- return bdrv_recurse_is_first_non_filter(s->test_file, candidate);
+ return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
}
/* Propagate AioContext changes to ->test_file */
@@ -293,7 +296,7 @@ static void blkverify_detach_aio_context(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
- bdrv_detach_aio_context(s->test_file);
+ bdrv_detach_aio_context(s->test_file->bs);
}
static void blkverify_attach_aio_context(BlockDriverState *bs,
@@ -301,32 +304,38 @@ static void blkverify_attach_aio_context(BlockDriverState *bs,
{
BDRVBlkverifyState *s = bs->opaque;
- bdrv_attach_aio_context(s->test_file, new_context);
+ bdrv_attach_aio_context(s->test_file->bs, new_context);
}
static void blkverify_refresh_filename(BlockDriverState *bs)
{
BDRVBlkverifyState *s = bs->opaque;
- /* bs->file has already been refreshed */
- bdrv_refresh_filename(s->test_file);
+ /* bs->file->bs has already been refreshed */
+ bdrv_refresh_filename(s->test_file->bs);
- if (bs->file->full_open_options && s->test_file->full_open_options) {
+ if (bs->file->bs->full_open_options
+ && s->test_file->bs->full_open_options)
+ {
QDict *opts = qdict_new();
qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify")));
- QINCREF(bs->file->full_open_options);
- qdict_put_obj(opts, "raw", QOBJECT(bs->file->full_open_options));
- QINCREF(s->test_file->full_open_options);
- qdict_put_obj(opts, "test", QOBJECT(s->test_file->full_open_options));
+ QINCREF(bs->file->bs->full_open_options);
+ qdict_put_obj(opts, "raw", QOBJECT(bs->file->bs->full_open_options));
+ QINCREF(s->test_file->bs->full_open_options);
+ qdict_put_obj(opts, "test",
+ QOBJECT(s->test_file->bs->full_open_options));
bs->full_open_options = opts;
}
- if (bs->file->exact_filename[0] && s->test_file->exact_filename[0]) {
+ if (bs->file->bs->exact_filename[0]
+ && s->test_file->bs->exact_filename[0])
+ {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"blkverify:%s:%s",
- bs->file->exact_filename, s->test_file->exact_filename);
+ bs->file->bs->exact_filename,
+ s->test_file->bs->exact_filename);
}
}
diff --git a/block/block-backend.c b/block/block-backend.c
index aee8a1202..419591f26 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -12,12 +12,17 @@
#include "sysemu/block-backend.h"
#include "block/block_int.h"
+#include "block/blockjob.h"
+#include "block/throttle-groups.h"
#include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
#include "qapi-event.h"
/* Number of coroutines to reserve per attached device model */
#define COROUTINE_POOL_RESERVATION 64
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
+
struct BlockBackend {
char *name;
int refcnt;
@@ -29,15 +34,31 @@ struct BlockBackend {
/* TODO change to DeviceState when all users are qdevified */
const BlockDevOps *dev_ops;
void *dev_opaque;
+
+ /* the block size for which the guest device expects atomicity */
+ int guest_block_size;
+
+ /* If the BDS tree is removed, some of its options are stored here (which
+ * can be used to restore those options in the new BDS on insert) */
+ BlockBackendRootState root_state;
+
+ /* I/O stats (display with "info blockstats"). */
+ BlockAcctStats stats;
+
+ BlockdevOnError on_read_error, on_write_error;
+ bool iostatus_enabled;
+ BlockDeviceIoStatus iostatus;
};
typedef struct BlockBackendAIOCB {
BlockAIOCB common;
QEMUBH *bh;
+ BlockBackend *blk;
int ret;
} BlockBackendAIOCB;
static const AIOCBInfo block_backend_aiocb_info = {
+ .get_aio_context = blk_aiocb_get_aio_context,
.aiocb_size = sizeof(BlockBackendAIOCB),
};
@@ -126,7 +147,7 @@ BlockBackend *blk_new_open(const char *name, const char *filename,
return NULL;
}
- ret = bdrv_open(&blk->bs, filename, reference, options, flags, NULL, errp);
+ ret = bdrv_open(&blk->bs, filename, reference, options, flags, errp);
if (ret < 0) {
blk_unref(blk);
return NULL;
@@ -145,12 +166,17 @@ static void blk_delete(BlockBackend *blk)
bdrv_unref(blk->bs);
blk->bs = NULL;
}
+ if (blk->root_state.throttle_state) {
+ g_free(blk->root_state.throttle_group);
+ throttle_group_unref(blk->root_state.throttle_state);
+ }
/* Avoid double-remove after blk_hide_on_behalf_of_hmp_drive_del() */
if (blk->name[0]) {
QTAILQ_REMOVE(&blk_backends, blk, link);
}
g_free(blk->name);
drive_info_del(blk->legacy_dinfo);
+ block_acct_cleanup(&blk->stats);
g_free(blk);
}
@@ -164,6 +190,11 @@ static void drive_info_del(DriveInfo *dinfo)
g_free(dinfo);
}
+int blk_get_refcnt(BlockBackend *blk)
+{
+ return blk ? blk->refcnt : 0;
+}
+
/*
* Increment @blk's reference count.
* @blk must not be null.
@@ -239,6 +270,23 @@ BlockDriverState *blk_bs(BlockBackend *blk)
}
/*
+ * Changes the BlockDriverState attached to @blk
+ */
+void blk_set_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+ bdrv_ref(bs);
+
+ if (blk->bs) {
+ blk->bs->blk = NULL;
+ bdrv_unref(blk->bs);
+ }
+ assert(bs->blk == NULL);
+
+ blk->bs = bs;
+ bs->blk = blk;
+}
+
+/*
* Return @blk's DriveInfo if any, else null.
*/
DriveInfo *blk_legacy_dinfo(BlockBackend *blk)
@@ -292,6 +340,29 @@ void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk)
}
/*
+ * Disassociates the currently associated BlockDriverState from @blk.
+ */
+void blk_remove_bs(BlockBackend *blk)
+{
+ blk_update_root_state(blk);
+
+ blk->bs->blk = NULL;
+ bdrv_unref(blk->bs);
+ blk->bs = NULL;
+}
+
+/*
+ * Associates a new BlockDriverState with @blk.
+ */
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+ assert(!blk->bs && !bs->blk);
+ bdrv_ref(bs);
+ blk->bs = bs;
+ bs->blk = blk;
+}
+
+/*
* Attach device model @dev to @blk.
* Return 0 on success, -EBUSY when a device model is attached already.
*/
@@ -303,7 +374,7 @@ int blk_attach_dev(BlockBackend *blk, void *dev)
}
blk_ref(blk);
blk->dev = dev;
- bdrv_iostatus_reset(blk->bs);
+ blk_iostatus_reset(blk);
return 0;
}
@@ -330,7 +401,7 @@ void blk_detach_dev(BlockBackend *blk, void *dev)
blk->dev = NULL;
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
- bdrv_set_guest_block_size(blk->bs, 512);
+ blk->guest_block_size = 512;
blk_unref(blk);
}
@@ -364,18 +435,15 @@ void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops,
void blk_dev_change_media_cb(BlockBackend *blk, bool load)
{
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
- bool tray_was_closed = !blk_dev_is_tray_open(blk);
+ bool tray_was_open, tray_is_open;
+ tray_was_open = blk_dev_is_tray_open(blk);
blk->dev_ops->change_media_cb(blk->dev_opaque, load);
- if (tray_was_closed) {
- /* tray open */
- qapi_event_send_device_tray_moved(blk_name(blk),
- true, &error_abort);
- }
- if (load) {
- /* tray close */
- qapi_event_send_device_tray_moved(blk_name(blk),
- false, &error_abort);
+ tray_is_open = blk_dev_is_tray_open(blk);
+
+ if (tray_was_open != tray_is_open) {
+ qapi_event_send_device_tray_moved(blk_name(blk), tray_is_open,
+ &error_abort);
}
}
}
@@ -390,6 +458,14 @@ bool blk_dev_has_removable_media(BlockBackend *blk)
}
/*
+ * Does @blk's attached device model have a tray?
+ */
+bool blk_dev_has_tray(BlockBackend *blk)
+{
+ return blk->dev_ops && blk->dev_ops->is_tray_open;
+}
+
+/*
* Notify @blk's attached device model of a media eject request.
* If @force is true, the medium is about to be yanked out forcefully.
*/
@@ -405,7 +481,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force)
*/
bool blk_dev_is_tray_open(BlockBackend *blk)
{
- if (blk->dev_ops && blk->dev_ops->is_tray_open) {
+ if (blk_dev_has_tray(blk)) {
return blk->dev_ops->is_tray_open(blk->dev_opaque);
}
return false;
@@ -435,7 +511,47 @@ void blk_dev_resize_cb(BlockBackend *blk)
void blk_iostatus_enable(BlockBackend *blk)
{
- bdrv_iostatus_enable(blk->bs);
+ blk->iostatus_enabled = true;
+ blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+}
+
+/* The I/O status is only enabled if the drive explicitly
+ * enables it _and_ the VM is configured to stop on errors */
+bool blk_iostatus_is_enabled(const BlockBackend *blk)
+{
+ return (blk->iostatus_enabled &&
+ (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+ blk->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
+ blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
+}
+
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
+{
+ return blk->iostatus;
+}
+
+void blk_iostatus_disable(BlockBackend *blk)
+{
+ blk->iostatus_enabled = false;
+}
+
+void blk_iostatus_reset(BlockBackend *blk)
+{
+ if (blk_iostatus_is_enabled(blk)) {
+ blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+ if (blk->bs && blk->bs->job) {
+ block_job_iostatus_reset(blk->bs->job);
+ }
+ }
+}
+
+void blk_iostatus_set_err(BlockBackend *blk, int error)
+{
+ assert(blk_iostatus_is_enabled(blk));
+ if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+ blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+ BLOCK_DEVICE_IO_STATUS_FAILED;
+ }
}
static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
@@ -447,7 +563,7 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
return -EIO;
}
- if (!blk_is_inserted(blk)) {
+ if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
@@ -534,13 +650,15 @@ static void error_callback_bh(void *opaque)
qemu_aio_unref(acb);
}
-static BlockAIOCB *abort_aio_request(BlockBackend *blk, BlockCompletionFunc *cb,
- void *opaque, int ret)
+BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
+ BlockCompletionFunc *cb,
+ void *opaque, int ret)
{
struct BlockBackendAIOCB *acb;
QEMUBH *bh;
acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
+ acb->blk = blk;
acb->ret = ret;
bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
@@ -556,7 +674,7 @@ BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
if (ret < 0) {
- return abort_aio_request(blk, cb, opaque, ret);
+ return blk_abort_aio_request(blk, cb, opaque, ret);
}
return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags,
@@ -585,16 +703,28 @@ int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
int64_t blk_getlength(BlockBackend *blk)
{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
return bdrv_getlength(blk->bs);
}
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
{
- bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+ if (!blk->bs) {
+ *nb_sectors_ptr = 0;
+ } else {
+ bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+ }
}
int64_t blk_nb_sectors(BlockBackend *blk)
{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
return bdrv_nb_sectors(blk->bs);
}
@@ -604,7 +734,7 @@ BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
if (ret < 0) {
- return abort_aio_request(blk, cb, opaque, ret);
+ return blk_abort_aio_request(blk, cb, opaque, ret);
}
return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
@@ -616,7 +746,7 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
if (ret < 0) {
- return abort_aio_request(blk, cb, opaque, ret);
+ return blk_abort_aio_request(blk, cb, opaque, ret);
}
return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
@@ -625,6 +755,10 @@ BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
+ if (!blk_is_available(blk)) {
+ return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+ }
+
return bdrv_aio_flush(blk->bs, cb, opaque);
}
@@ -634,7 +768,7 @@ BlockAIOCB *blk_aio_discard(BlockBackend *blk,
{
int ret = blk_check_request(blk, sector_num, nb_sectors);
if (ret < 0) {
- return abort_aio_request(blk, cb, opaque, ret);
+ return blk_abort_aio_request(blk, cb, opaque, ret);
}
return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque);
@@ -666,12 +800,20 @@ int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
return bdrv_ioctl(blk->bs, req, buf);
}
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
{
+ if (!blk_is_available(blk)) {
+ return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+ }
+
return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque);
}
@@ -687,11 +829,19 @@ int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
int blk_co_flush(BlockBackend *blk)
{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
return bdrv_co_flush(blk->bs);
}
int blk_flush(BlockBackend *blk)
{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
return bdrv_flush(blk->bs);
}
@@ -702,7 +852,9 @@ int blk_flush_all(void)
void blk_drain(BlockBackend *blk)
{
- bdrv_drain(blk->bs);
+ if (blk->bs) {
+ bdrv_drain(blk->bs);
+ }
}
void blk_drain_all(void)
@@ -710,76 +862,178 @@ void blk_drain_all(void)
bdrv_drain_all();
}
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error)
+{
+ blk->on_read_error = on_read_error;
+ blk->on_write_error = on_write_error;
+}
+
BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
{
- return bdrv_get_on_error(blk->bs, is_read);
+ return is_read ? blk->on_read_error : blk->on_write_error;
}
BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
int error)
{
- return bdrv_get_error_action(blk->bs, is_read, error);
+ BlockdevOnError on_err = blk_get_on_error(blk, is_read);
+
+ switch (on_err) {
+ case BLOCKDEV_ON_ERROR_ENOSPC:
+ return (error == ENOSPC) ?
+ BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_STOP:
+ return BLOCK_ERROR_ACTION_STOP;
+ case BLOCKDEV_ON_ERROR_REPORT:
+ return BLOCK_ERROR_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_IGNORE:
+ return BLOCK_ERROR_ACTION_IGNORE;
+ default:
+ abort();
+ }
+}
+
+static void send_qmp_error_event(BlockBackend *blk,
+ BlockErrorAction action,
+ bool is_read, int error)
+{
+ IoOperationType optype;
+
+ optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
+ qapi_event_send_block_io_error(blk_name(blk), optype, action,
+ blk_iostatus_is_enabled(blk),
+ error == ENOSPC, strerror(error),
+ &error_abort);
}
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
void blk_error_action(BlockBackend *blk, BlockErrorAction action,
bool is_read, int error)
{
- bdrv_error_action(blk->bs, action, is_read, error);
+ assert(error >= 0);
+
+ if (action == BLOCK_ERROR_ACTION_STOP) {
+ /* First set the iostatus, so that "info block" returns an iostatus
+ * that matches the events raised so far (an additional error iostatus
+ * is fine, but not a lost one).
+ */
+ blk_iostatus_set_err(blk, error);
+
+ /* Then raise the request to stop the VM and the event.
+ * qemu_system_vmstop_request_prepare has two effects. First,
+ * it ensures that the STOP event always comes after the
+ * BLOCK_IO_ERROR event. Second, it ensures that even if management
+ * can observe the STOP event and do a "cont" before the STOP
+ * event is issued, the VM will not stop. In this case, vm_start()
+ * also ensures that the STOP/RESUME pair of events is emitted.
+ */
+ qemu_system_vmstop_request_prepare();
+ send_qmp_error_event(blk, action, is_read, error);
+ qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
+ } else {
+ send_qmp_error_event(blk, action, is_read, error);
+ }
}
int blk_is_read_only(BlockBackend *blk)
{
- return bdrv_is_read_only(blk->bs);
+ if (blk->bs) {
+ return bdrv_is_read_only(blk->bs);
+ } else {
+ return blk->root_state.read_only;
+ }
}
int blk_is_sg(BlockBackend *blk)
{
+ if (!blk->bs) {
+ return 0;
+ }
+
return bdrv_is_sg(blk->bs);
}
int blk_enable_write_cache(BlockBackend *blk)
{
- return bdrv_enable_write_cache(blk->bs);
+ if (blk->bs) {
+ return bdrv_enable_write_cache(blk->bs);
+ } else {
+ return !!(blk->root_state.open_flags & BDRV_O_CACHE_WB);
+ }
}
void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
{
- bdrv_set_enable_write_cache(blk->bs, wce);
+ if (blk->bs) {
+ bdrv_set_enable_write_cache(blk->bs, wce);
+ } else {
+ if (wce) {
+ blk->root_state.open_flags |= BDRV_O_CACHE_WB;
+ } else {
+ blk->root_state.open_flags &= ~BDRV_O_CACHE_WB;
+ }
+ }
}
void blk_invalidate_cache(BlockBackend *blk, Error **errp)
{
+ if (!blk->bs) {
+ error_setg(errp, "Device '%s' has no medium", blk->name);
+ return;
+ }
+
bdrv_invalidate_cache(blk->bs, errp);
}
-int blk_is_inserted(BlockBackend *blk)
+bool blk_is_inserted(BlockBackend *blk)
+{
+ return blk->bs && bdrv_is_inserted(blk->bs);
+}
+
+bool blk_is_available(BlockBackend *blk)
{
- return bdrv_is_inserted(blk->bs);
+ return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
}
void blk_lock_medium(BlockBackend *blk, bool locked)
{
- bdrv_lock_medium(blk->bs, locked);
+ if (blk->bs) {
+ bdrv_lock_medium(blk->bs, locked);
+ }
}
void blk_eject(BlockBackend *blk, bool eject_flag)
{
- bdrv_eject(blk->bs, eject_flag);
+ if (blk->bs) {
+ bdrv_eject(blk->bs, eject_flag);
+ }
}
int blk_get_flags(BlockBackend *blk)
{
- return bdrv_get_flags(blk->bs);
+ if (blk->bs) {
+ return bdrv_get_flags(blk->bs);
+ } else {
+ return blk->root_state.open_flags;
+ }
}
int blk_get_max_transfer_length(BlockBackend *blk)
{
- return blk->bs->bl.max_transfer_length;
+ if (blk->bs) {
+ return blk->bs->bl.max_transfer_length;
+ } else {
+ return 0;
+ }
}
void blk_set_guest_block_size(BlockBackend *blk, int align)
{
- bdrv_set_guest_block_size(blk->bs, align);
+ blk->guest_block_size = align;
}
void *blk_blockalign(BlockBackend *blk, size_t size)
@@ -789,40 +1043,64 @@ void *blk_blockalign(BlockBackend *blk, size_t size)
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
{
+ if (!blk->bs) {
+ return false;
+ }
+
return bdrv_op_is_blocked(blk->bs, op, errp);
}
void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
{
- bdrv_op_unblock(blk->bs, op, reason);
+ if (blk->bs) {
+ bdrv_op_unblock(blk->bs, op, reason);
+ }
}
void blk_op_block_all(BlockBackend *blk, Error *reason)
{
- bdrv_op_block_all(blk->bs, reason);
+ if (blk->bs) {
+ bdrv_op_block_all(blk->bs, reason);
+ }
}
void blk_op_unblock_all(BlockBackend *blk, Error *reason)
{
- bdrv_op_unblock_all(blk->bs, reason);
+ if (blk->bs) {
+ bdrv_op_unblock_all(blk->bs, reason);
+ }
}
AioContext *blk_get_aio_context(BlockBackend *blk)
{
- return bdrv_get_aio_context(blk->bs);
+ if (blk->bs) {
+ return bdrv_get_aio_context(blk->bs);
+ } else {
+ return qemu_get_aio_context();
+ }
+}
+
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
+{
+ BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
+ return blk_get_aio_context(blk_acb->blk);
}
void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
{
- bdrv_set_aio_context(blk->bs, new_context);
+ if (blk->bs) {
+ bdrv_set_aio_context(blk->bs, new_context);
+ }
}
void blk_add_aio_context_notifier(BlockBackend *blk,
void (*attached_aio_context)(AioContext *new_context, void *opaque),
void (*detach_aio_context)(void *opaque), void *opaque)
{
- bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
- detach_aio_context, opaque);
+ if (blk->bs) {
+ bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
+ detach_aio_context, opaque);
+ }
}
void blk_remove_aio_context_notifier(BlockBackend *blk,
@@ -831,28 +1109,36 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
void (*detach_aio_context)(void *),
void *opaque)
{
- bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
- detach_aio_context, opaque);
+ if (blk->bs) {
+ bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
+ detach_aio_context, opaque);
+ }
}
void blk_add_close_notifier(BlockBackend *blk, Notifier *notify)
{
- bdrv_add_close_notifier(blk->bs, notify);
+ if (blk->bs) {
+ bdrv_add_close_notifier(blk->bs, notify);
+ }
}
void blk_io_plug(BlockBackend *blk)
{
- bdrv_io_plug(blk->bs);
+ if (blk->bs) {
+ bdrv_io_plug(blk->bs);
+ }
}
void blk_io_unplug(BlockBackend *blk)
{
- bdrv_io_unplug(blk->bs);
+ if (blk->bs) {
+ bdrv_io_unplug(blk->bs);
+ }
}
BlockAcctStats *blk_get_stats(BlockBackend *blk)
{
- return bdrv_get_stats(blk->bs);
+ return &blk->stats;
}
void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
@@ -885,6 +1171,10 @@ int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
int blk_truncate(BlockBackend *blk, int64_t offset)
{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
return bdrv_truncate(blk->bs, offset);
}
@@ -901,20 +1191,94 @@ int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size)
{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
return bdrv_save_vmstate(blk->bs, buf, pos, size);
}
int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
return bdrv_load_vmstate(blk->bs, buf, pos, size);
}
int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
return bdrv_probe_blocksizes(blk->bs, bsz);
}
int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
return bdrv_probe_geometry(blk->bs, geo);
}
+
+/*
+ * Updates the BlockBackendRootState object with data from the currently
+ * attached BlockDriverState.
+ */
+void blk_update_root_state(BlockBackend *blk)
+{
+ assert(blk->bs);
+
+ blk->root_state.open_flags = blk->bs->open_flags;
+ blk->root_state.read_only = blk->bs->read_only;
+ blk->root_state.detect_zeroes = blk->bs->detect_zeroes;
+
+ if (blk->root_state.throttle_group) {
+ g_free(blk->root_state.throttle_group);
+ throttle_group_unref(blk->root_state.throttle_state);
+ }
+ if (blk->bs->throttle_state) {
+ const char *name = throttle_group_get_name(blk->bs);
+ blk->root_state.throttle_group = g_strdup(name);
+ blk->root_state.throttle_state = throttle_group_incref(name);
+ } else {
+ blk->root_state.throttle_group = NULL;
+ blk->root_state.throttle_state = NULL;
+ }
+}
+
+/*
+ * Applies the information in the root state to the given BlockDriverState. This
+ * does not include the flags which have to be specified for bdrv_open(), use
+ * blk_get_open_flags_from_root_state() to inquire them.
+ */
+void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
+{
+ bs->detect_zeroes = blk->root_state.detect_zeroes;
+ if (blk->root_state.throttle_group) {
+ bdrv_io_limits_enable(bs, blk->root_state.throttle_group);
+ }
+}
+
+/*
+ * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
+ * supposed to inherit the root state.
+ */
+int blk_get_open_flags_from_root_state(BlockBackend *blk)
+{
+ int bs_flags;
+
+ bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
+ bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
+
+ return bs_flags;
+}
+
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
+{
+ return &blk->root_state;
+}
diff --git a/block/bochs.c b/block/bochs.c
index 199ac2b9a..18949b9d4 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -103,7 +103,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
bs->read_only = 1; // no write support yet
- ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
+ ret = bdrv_pread(bs->file->bs, 0, &bochs, sizeof(bochs));
if (ret < 0) {
return ret;
}
@@ -137,7 +137,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
return -ENOMEM;
}
- ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
+ ret = bdrv_pread(bs->file->bs, le32_to_cpu(bochs.header), s->catalog_bitmap,
s->catalog_size * 4);
if (ret < 0) {
goto fail;
@@ -206,7 +206,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
(s->extent_blocks + s->bitmap_blocks));
/* read in bitmap for current extent */
- ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
+ ret = bdrv_pread(bs->file->bs, bitmap_offset + (extent_offset / 8),
&bitmap_entry, 1);
if (ret < 0) {
return ret;
@@ -229,7 +229,7 @@ static int bochs_read(BlockDriverState *bs, int64_t sector_num,
if (block_offset < 0) {
return block_offset;
} else if (block_offset > 0) {
- ret = bdrv_pread(bs->file, block_offset, buf, 512);
+ ret = bdrv_pread(bs->file->bs, block_offset, buf, 512);
if (ret < 0) {
return ret;
}
diff --git a/block/cloop.c b/block/cloop.c
index f328be06f..4190ae06d 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -66,7 +66,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
bs->read_only = 1;
/* read header */
- ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
+ ret = bdrv_pread(bs->file->bs, 128, &s->block_size, 4);
if (ret < 0) {
return ret;
}
@@ -92,7 +92,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
- ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
+ ret = bdrv_pread(bs->file->bs, 128 + 4, &s->n_blocks, 4);
if (ret < 0) {
return ret;
}
@@ -123,7 +123,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
return -ENOMEM;
}
- ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
+ ret = bdrv_pread(bs->file->bs, 128 + 4 + 4, s->offsets, offsets_size);
if (ret < 0) {
goto fail;
}
@@ -203,8 +203,8 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
int ret;
uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];
- ret = bdrv_pread(bs->file, s->offsets[block_num], s->compressed_block,
- bytes);
+ ret = bdrv_pread(bs->file->bs, s->offsets[block_num],
+ s->compressed_block, bytes);
if (ret != bytes) {
return -1;
}
diff --git a/block/commit.c b/block/commit.c
index 7312a5bdc..a5d02aa56 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -17,6 +17,7 @@
#include "block/blockjob.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
enum {
/*
@@ -213,7 +214,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- !bdrv_iostatus_is_enabled(bs)) {
+ (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
error_setg(errp, "Invalid parameter combination");
return;
}
@@ -235,14 +236,14 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
orig_overlay_flags = bdrv_get_flags(overlay_bs);
/* convert base & overlay_bs to r/w, if necessary */
- if (!(orig_base_flags & BDRV_O_RDWR)) {
- reopen_queue = bdrv_reopen_queue(reopen_queue, base,
- orig_base_flags | BDRV_O_RDWR);
- }
if (!(orig_overlay_flags & BDRV_O_RDWR)) {
- reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs,
+ reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL,
orig_overlay_flags | BDRV_O_RDWR);
}
+ if (!(orig_base_flags & BDRV_O_RDWR)) {
+ reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
+ orig_base_flags | BDRV_O_RDWR);
+ }
if (reopen_queue) {
bdrv_reopen_multiple(reopen_queue, &local_err);
if (local_err != NULL) {
diff --git a/block/curl.c b/block/curl.c
index 032cc8ae2..89941826e 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -154,18 +154,20 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
switch (action) {
case CURL_POLL_IN:
- aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
- NULL, state);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ curl_multi_read, NULL, state);
break;
case CURL_POLL_OUT:
- aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ NULL, curl_multi_do, state);
break;
case CURL_POLL_INOUT:
- aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
- curl_multi_do, state);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ curl_multi_read, curl_multi_do, state);
break;
case CURL_POLL_REMOVE:
- aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ NULL, NULL, NULL);
break;
}
diff --git a/block/dmg.c b/block/dmg.c
index 9f2528169..546a6f533 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -85,7 +85,7 @@ static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result)
uint64_t buffer;
int ret;
- ret = bdrv_pread(bs->file, offset, &buffer, 8);
+ ret = bdrv_pread(bs->file->bs, offset, &buffer, 8);
if (ret < 0) {
return ret;
}
@@ -99,7 +99,7 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
uint32_t buffer;
int ret;
- ret = bdrv_pread(bs->file, offset, &buffer, 4);
+ ret = bdrv_pread(bs->file->bs, offset, &buffer, 4);
if (ret < 0) {
return ret;
}
@@ -354,7 +354,7 @@ static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
offset += 4;
buffer = g_realloc(buffer, count);
- ret = bdrv_pread(bs->file, offset, buffer, count);
+ ret = bdrv_pread(bs->file->bs, offset, buffer, count);
if (ret < 0) {
goto fail;
}
@@ -391,7 +391,7 @@ static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
buffer = g_malloc(info_length + 1);
buffer[info_length] = '\0';
- ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
+ ret = bdrv_pread(bs->file->bs, info_begin, buffer, info_length);
if (ret != info_length) {
ret = -EINVAL;
goto fail;
@@ -446,7 +446,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
ds.max_sectors_per_chunk = 1;
/* locate the UDIF trailer */
- offset = dmg_find_koly_offset(bs->file, errp);
+ offset = dmg_find_koly_offset(bs->file->bs, errp);
if (offset < 0) {
ret = offset;
goto fail;
@@ -514,9 +514,9 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
}
/* initialize zlib engine */
- s->compressed_chunk = qemu_try_blockalign(bs->file,
+ s->compressed_chunk = qemu_try_blockalign(bs->file->bs,
ds.max_compressed_size + 1);
- s->uncompressed_chunk = qemu_try_blockalign(bs->file,
+ s->uncompressed_chunk = qemu_try_blockalign(bs->file->bs,
512 * ds.max_sectors_per_chunk);
if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
ret = -ENOMEM;
@@ -592,7 +592,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
case 0x80000005: { /* zlib compressed */
/* we need to buffer, because only the chunk as whole can be
* inflated. */
- ret = bdrv_pread(bs->file, s->offsets[chunk],
+ ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
s->compressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
@@ -616,7 +616,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
case 0x80000006: /* bzip2 compressed */
/* we need to buffer, because only the chunk as whole can be
* inflated. */
- ret = bdrv_pread(bs->file, s->offsets[chunk],
+ ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
s->compressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
@@ -641,7 +641,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
break;
#endif /* CONFIG_BZIP2 */
case 1: /* copy */
- ret = bdrv_pread(bs->file, s->offsets[chunk],
+ ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
s->uncompressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
diff --git a/block/gluster.c b/block/gluster.c
index 1eb3a8c39..0857c1464 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -429,28 +429,23 @@ static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
{
int ret;
- GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+ GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
off_t size = nb_sectors * BDRV_SECTOR_SIZE;
off_t offset = sector_num * BDRV_SECTOR_SIZE;
- acb->size = size;
- acb->ret = 0;
- acb->coroutine = qemu_coroutine_self();
- acb->aio_context = bdrv_get_aio_context(bs);
+ acb.size = size;
+ acb.ret = 0;
+ acb.coroutine = qemu_coroutine_self();
+ acb.aio_context = bdrv_get_aio_context(bs);
- ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
+ ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {
- ret = -errno;
- goto out;
+ return -errno;
}
qemu_coroutine_yield();
- ret = acb->ret;
-
-out:
- g_slice_free(GlusterAIOCB, acb);
- return ret;
+ return acb.ret;
}
static inline bool gluster_supports_zerofill(void)
@@ -541,35 +536,30 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
{
int ret;
- GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+ GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
size_t size = nb_sectors * BDRV_SECTOR_SIZE;
off_t offset = sector_num * BDRV_SECTOR_SIZE;
- acb->size = size;
- acb->ret = 0;
- acb->coroutine = qemu_coroutine_self();
- acb->aio_context = bdrv_get_aio_context(bs);
+ acb.size = size;
+ acb.ret = 0;
+ acb.coroutine = qemu_coroutine_self();
+ acb.aio_context = bdrv_get_aio_context(bs);
if (write) {
ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
- &gluster_finish_aiocb, acb);
+ gluster_finish_aiocb, &acb);
} else {
ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
- &gluster_finish_aiocb, acb);
+ gluster_finish_aiocb, &acb);
}
if (ret < 0) {
- ret = -errno;
- goto out;
+ return -errno;
}
qemu_coroutine_yield();
- ret = acb->ret;
-
-out:
- g_slice_free(GlusterAIOCB, acb);
- return ret;
+ return acb.ret;
}
static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
@@ -600,26 +590,21 @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
{
int ret;
- GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+ GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
- acb->size = 0;
- acb->ret = 0;
- acb->coroutine = qemu_coroutine_self();
- acb->aio_context = bdrv_get_aio_context(bs);
+ acb.size = 0;
+ acb.ret = 0;
+ acb.coroutine = qemu_coroutine_self();
+ acb.aio_context = bdrv_get_aio_context(bs);
- ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
+ ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
if (ret < 0) {
- ret = -errno;
- goto out;
+ return -errno;
}
qemu_coroutine_yield();
- ret = acb->ret;
-
-out:
- g_slice_free(GlusterAIOCB, acb);
- return ret;
+ return acb.ret;
}
#ifdef CONFIG_GLUSTERFS_DISCARD
@@ -627,28 +612,23 @@ static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
int ret;
- GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+ GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
size_t size = nb_sectors * BDRV_SECTOR_SIZE;
off_t offset = sector_num * BDRV_SECTOR_SIZE;
- acb->size = 0;
- acb->ret = 0;
- acb->coroutine = qemu_coroutine_self();
- acb->aio_context = bdrv_get_aio_context(bs);
+ acb.size = 0;
+ acb.ret = 0;
+ acb.coroutine = qemu_coroutine_self();
+ acb.aio_context = bdrv_get_aio_context(bs);
- ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
+ ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {
- ret = -errno;
- goto out;
+ return -errno;
}
qemu_coroutine_yield();
- ret = acb->ret;
-
-out:
- g_slice_free(GlusterAIOCB, acb);
- return ret;
+ return acb.ret;
}
#endif
diff --git a/block/io.c b/block/io.c
index d4bc83b33..e00fb5d69 100644
--- a/block/io.c
+++ b/block/io.c
@@ -23,6 +23,7 @@
*/
#include "trace.h"
+#include "sysemu/block-backend.h"
#include "block/blockjob.h"
#include "block/block_int.h"
#include "block/throttle-groups.h"
@@ -156,38 +157,38 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
/* Take some limits from the children as a default */
if (bs->file) {
- bdrv_refresh_limits(bs->file, &local_err);
+ bdrv_refresh_limits(bs->file->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
- bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
- bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
- bs->bl.min_mem_alignment = bs->file->bl.min_mem_alignment;
- bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
+ bs->bl.opt_transfer_length = bs->file->bs->bl.opt_transfer_length;
+ bs->bl.max_transfer_length = bs->file->bs->bl.max_transfer_length;
+ bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment;
+ bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment;
} else {
bs->bl.min_mem_alignment = 512;
bs->bl.opt_mem_alignment = getpagesize();
}
- if (bs->backing_hd) {
- bdrv_refresh_limits(bs->backing_hd, &local_err);
+ if (bs->backing) {
+ bdrv_refresh_limits(bs->backing->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
bs->bl.opt_transfer_length =
MAX(bs->bl.opt_transfer_length,
- bs->backing_hd->bl.opt_transfer_length);
+ bs->backing->bs->bl.opt_transfer_length);
bs->bl.max_transfer_length =
MIN_NON_ZERO(bs->bl.max_transfer_length,
- bs->backing_hd->bl.max_transfer_length);
+ bs->backing->bs->bl.max_transfer_length);
bs->bl.opt_mem_alignment =
MAX(bs->bl.opt_mem_alignment,
- bs->backing_hd->bl.opt_mem_alignment);
+ bs->backing->bs->bl.opt_mem_alignment);
bs->bl.min_mem_alignment =
MAX(bs->bl.min_mem_alignment,
- bs->backing_hd->bl.min_mem_alignment);
+ bs->backing->bs->bl.min_mem_alignment);
}
/* Then let the driver override it */
@@ -213,8 +214,10 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs)
}
/* Check if any requests are in-flight (including throttled requests) */
-static bool bdrv_requests_pending(BlockDriverState *bs)
+bool bdrv_requests_pending(BlockDriverState *bs)
{
+ BdrvChild *child;
+
if (!QLIST_EMPTY(&bs->tracked_requests)) {
return true;
}
@@ -224,17 +227,31 @@ static bool bdrv_requests_pending(BlockDriverState *bs)
if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
return true;
}
- if (bs->file && bdrv_requests_pending(bs->file)) {
- return true;
- }
- if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
- return true;
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ if (bdrv_requests_pending(child->bs)) {
+ return true;
+ }
}
+
return false;
}
+static void bdrv_drain_recurse(BlockDriverState *bs)
+{
+ BdrvChild *child;
+
+ if (bs->drv && bs->drv->bdrv_drain) {
+ bs->drv->bdrv_drain(bs);
+ }
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_drain_recurse(child->bs);
+ }
+}
+
/*
- * Wait for pending requests to complete on a single BlockDriverState subtree
+ * Wait for pending requests to complete on a single BlockDriverState subtree,
+ * and suspend block driver's internal I/O until next request arrives.
*
* Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
* AioContext.
@@ -247,6 +264,7 @@ void bdrv_drain(BlockDriverState *bs)
{
bool busy = true;
+ bdrv_drain_recurse(bs);
while (busy) {
/* Keep iterating */
bdrv_flush_io_queue(bs);
@@ -344,13 +362,14 @@ static void tracked_request_end(BdrvTrackedRequest *req)
static void tracked_request_begin(BdrvTrackedRequest *req,
BlockDriverState *bs,
int64_t offset,
- unsigned int bytes, bool is_write)
+ unsigned int bytes,
+ enum BdrvTrackedRequestType type)
{
*req = (BdrvTrackedRequest){
.bs = bs,
.offset = offset,
.bytes = bytes,
- .is_write = is_write,
+ .type = type,
.co = qemu_coroutine_self(),
.serialising = false,
.overlap_offset = offset,
@@ -844,7 +863,9 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
mark_request_serialising(req, bdrv_get_cluster_size(bs));
}
- wait_serialising_requests(req);
+ if (!(flags & BDRV_REQ_NO_SERIALISING)) {
+ wait_serialising_requests(req);
+ }
if (flags & BDRV_REQ_COPY_ON_READ) {
int pnum;
@@ -932,7 +953,8 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
return ret;
}
- if (bs->copy_on_read) {
+ /* Don't do copy-on-read if we read data before write operation */
+ if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
flags |= BDRV_REQ_COPY_ON_READ;
}
@@ -966,7 +988,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
bytes = ROUND_UP(bytes, align);
}
- tracked_request_begin(&req, bs, offset, bytes, false);
+ tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
@@ -1001,6 +1023,15 @@ int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
}
+int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_readv_no_serialising(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
+ BDRV_REQ_NO_SERIALISING);
+}
+
int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
{
@@ -1127,13 +1158,13 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
if (ret < 0) {
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
} else {
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV);
ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
}
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
if (ret == 0 && !bs->enable_write_cache) {
ret = bdrv_co_flush(bs);
@@ -1141,7 +1172,9 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
bdrv_set_dirty(bs, sector_num, nb_sectors);
- block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
+ if (bs->wr_highest_offset < offset + bytes) {
+ bs->wr_highest_offset = offset + bytes;
+ }
if (ret >= 0) {
bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
@@ -1182,13 +1215,13 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
/* RMW the unaligned part before head. */
mark_request_serialising(req, align);
wait_serialising_requests(req);
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
}
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
memset(buf + head_padding_bytes, 0, zero_bytes);
ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
@@ -1220,13 +1253,13 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
/* RMW the unaligned part after tail. */
mark_request_serialising(req, align);
wait_serialising_requests(req);
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(bs, req, offset, align,
align, &local_qiov, 0);
if (ret < 0) {
goto fail;
}
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
memset(buf, 0, bytes);
ret = bdrv_aligned_pwritev(bs, req, offset, align,
@@ -1276,7 +1309,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
* Pad qiov with the read parts and be sure to have a tracked request not
* only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
*/
- tracked_request_begin(&req, bs, offset, bytes, true);
+ tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
if (!qiov) {
ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
@@ -1297,13 +1330,13 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
};
qemu_iovec_init_external(&head_qiov, &head_iov, 1);
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
align, &head_qiov, 0);
if (ret < 0) {
goto fail;
}
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
qemu_iovec_init(&local_qiov, qiov->niov + 2);
qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
@@ -1331,13 +1364,13 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
};
qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
align, &tail_qiov, 0);
if (ret < 0) {
goto fail;
}
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
if (!use_local_qiov) {
qemu_iovec_init(&local_qiov, qiov->niov + 1);
@@ -1486,7 +1519,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
if (ret & BDRV_BLOCK_RAW) {
assert(ret & BDRV_BLOCK_OFFSET_VALID);
- return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
+ return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
*pnum, pnum);
}
@@ -1495,8 +1528,8 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
} else {
if (bdrv_unallocated_blocks_are_zero(bs)) {
ret |= BDRV_BLOCK_ZERO;
- } else if (bs->backing_hd) {
- BlockDriverState *bs2 = bs->backing_hd;
+ } else if (bs->backing) {
+ BlockDriverState *bs2 = bs->backing->bs;
int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
ret |= BDRV_BLOCK_ZERO;
@@ -1509,7 +1542,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
(ret & BDRV_BLOCK_OFFSET_VALID)) {
int file_pnum;
- ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
+ ret2 = bdrv_co_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
*pnum, &file_pnum);
if (ret2 >= 0) {
/* Ignore errors. This is just providing extra information, it
@@ -1541,7 +1574,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
int64_t ret = 0;
assert(bs != base);
- for (p = bs; p != base; p = p->backing_hd) {
+ for (p = bs; p != base; p = backing_bs(p)) {
ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum);
if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) {
break;
@@ -1604,7 +1637,7 @@ int64_t bdrv_get_block_status(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum)
{
- return bdrv_get_block_status_above(bs, bs->backing_hd,
+ return bdrv_get_block_status_above(bs, backing_bs(bs),
sector_num, nb_sectors, pnum);
}
@@ -1662,7 +1695,7 @@ int bdrv_is_allocated_above(BlockDriverState *top,
n = pnum_inter;
}
- intermediate = intermediate->backing_hd;
+ intermediate = backing_bs(intermediate);
}
*pnum = n;
@@ -1713,7 +1746,7 @@ int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
} else if (drv->bdrv_save_vmstate) {
return drv->bdrv_save_vmstate(bs, qiov, pos);
} else if (bs->file) {
- return bdrv_writev_vmstate(bs->file, qiov, pos);
+ return bdrv_writev_vmstate(bs->file->bs, qiov, pos);
}
return -ENOTSUP;
@@ -1728,7 +1761,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
if (drv->bdrv_load_vmstate)
return drv->bdrv_load_vmstate(bs, buf, pos, size);
if (bs->file)
- return bdrv_load_vmstate(bs->file, buf, pos, size);
+ return bdrv_load_vmstate(bs->file->bs, buf, pos, size);
return -ENOTSUP;
}
@@ -1893,7 +1926,10 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
}
}
- block_acct_merge_done(&bs->stats, BLOCK_ACCT_WRITE, num_reqs - outidx - 1);
+ if (bs->blk) {
+ block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
+ num_reqs - outidx - 1);
+ }
return outidx + 1;
}
@@ -2208,7 +2244,7 @@ void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
{
BlockAIOCB *acb;
- acb = g_slice_alloc(aiocb_info->aiocb_size);
+ acb = g_malloc(aiocb_info->aiocb_size);
acb->aiocb_info = aiocb_info;
acb->bs = bs;
acb->cb = cb;
@@ -2228,7 +2264,7 @@ void qemu_aio_unref(void *p)
BlockAIOCB *acb = p;
assert(acb->refcnt > 0);
if (--acb->refcnt == 0) {
- g_slice_free1(acb->aiocb_info->aiocb_size, acb);
+ g_free(acb);
}
}
@@ -2298,18 +2334,20 @@ static void coroutine_fn bdrv_flush_co_entry(void *opaque)
int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
{
int ret;
+ BdrvTrackedRequest req;
if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
bdrv_is_sg(bs)) {
return 0;
}
+ tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
/* Write back cached data to the OS even with cache=unsafe */
BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
if (bs->drv->bdrv_co_flush_to_os) {
ret = bs->drv->bdrv_co_flush_to_os(bs);
if (ret < 0) {
- return ret;
+ goto out;
}
}
@@ -2349,14 +2387,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
ret = 0;
}
if (ret < 0) {
- return ret;
+ goto out;
}
/* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
* in the case of cache=unsafe, so there are no useless flushes.
*/
flush_parent:
- return bdrv_co_flush(bs->file);
+ ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
+out:
+ tracked_request_end(&req);
+ return ret;
}
int bdrv_flush(BlockDriverState *bs)
@@ -2399,6 +2440,7 @@ static void coroutine_fn bdrv_discard_co_entry(void *opaque)
int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
+ BdrvTrackedRequest req;
int max_discard, ret;
if (!bs->drv) {
@@ -2421,6 +2463,8 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
return 0;
}
+ tracked_request_begin(&req, bs, sector_num, nb_sectors,
+ BDRV_TRACKED_DISCARD);
bdrv_set_dirty(bs, sector_num, nb_sectors);
max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
@@ -2454,20 +2498,24 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
bdrv_co_io_em_complete, &co);
if (acb == NULL) {
- return -EIO;
+ ret = -EIO;
+ goto out;
} else {
qemu_coroutine_yield();
ret = co.ret;
}
}
if (ret && ret != -ENOTSUP) {
- return ret;
+ goto out;
}
sector_num += num;
nb_sectors -= num;
}
- return 0;
+ ret = 0;
+out:
+ tracked_request_end(&req);
+ return ret;
}
int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
@@ -2496,26 +2544,109 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
return rwco.ret;
}
-/* needed for generic scsi interface */
+typedef struct {
+ CoroutineIOCompletion *co;
+ QEMUBH *bh;
+} BdrvIoctlCompletionData;
-int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+static void bdrv_ioctl_bh_cb(void *opaque)
+{
+ BdrvIoctlCompletionData *data = opaque;
+
+ bdrv_co_io_em_complete(data->co, -ENOTSUP);
+ qemu_bh_delete(data->bh);
+}
+
+static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
{
BlockDriver *drv = bs->drv;
+ BdrvTrackedRequest tracked_req;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ BlockAIOCB *acb;
- if (drv && drv->bdrv_ioctl)
- return drv->bdrv_ioctl(bs, req, buf);
- return -ENOTSUP;
+ tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL);
+ if (!drv || !drv->bdrv_aio_ioctl) {
+ co.ret = -ENOTSUP;
+ goto out;
+ }
+
+ acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
+ if (!acb) {
+ BdrvIoctlCompletionData *data = g_new(BdrvIoctlCompletionData, 1);
+ data->bh = aio_bh_new(bdrv_get_aio_context(bs),
+ bdrv_ioctl_bh_cb, data);
+ data->co = &co;
+ qemu_bh_schedule(data->bh);
+ }
+ qemu_coroutine_yield();
+out:
+ tracked_request_end(&tracked_req);
+ return co.ret;
+}
+
+typedef struct {
+ BlockDriverState *bs;
+ int req;
+ void *buf;
+ int ret;
+} BdrvIoctlCoData;
+
+static void coroutine_fn bdrv_co_ioctl_entry(void *opaque)
+{
+ BdrvIoctlCoData *data = opaque;
+ data->ret = bdrv_co_do_ioctl(data->bs, data->req, data->buf);
+}
+
+/* needed for generic scsi interface */
+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+ BdrvIoctlCoData data = {
+ .bs = bs,
+ .req = req,
+ .buf = buf,
+ .ret = -EINPROGRESS,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_co_ioctl_entry(&data);
+ } else {
+ Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry);
+ qemu_coroutine_enter(co, &data);
+ }
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(bdrv_get_aio_context(bs), true);
+ }
+ return data.ret;
+}
+
+static void coroutine_fn bdrv_co_aio_ioctl_entry(void *opaque)
+{
+ BlockAIOCBCoroutine *acb = opaque;
+ acb->req.error = bdrv_co_do_ioctl(acb->common.bs,
+ acb->req.req, acb->req.buf);
+ bdrv_co_complete(acb);
}
BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
{
- BlockDriver *drv = bs->drv;
+ BlockAIOCBCoroutine *acb = qemu_aio_get(&bdrv_em_co_aiocb_info,
+ bs, cb, opaque);
+ Coroutine *co;
- if (drv && drv->bdrv_aio_ioctl)
- return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
- return NULL;
+ acb->need_bh = true;
+ acb->req.error = -EINPROGRESS;
+ acb->req.req = req;
+ acb->req.buf = buf;
+ co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry);
+ qemu_coroutine_enter(co, acb);
+
+ bdrv_co_maybe_schedule_bh(acb);
+ return &acb->common;
}
void *qemu_blockalign(BlockDriverState *bs, size_t size)
@@ -2584,7 +2715,7 @@ void bdrv_io_plug(BlockDriverState *bs)
if (drv && drv->bdrv_io_plug) {
drv->bdrv_io_plug(bs);
} else if (bs->file) {
- bdrv_io_plug(bs->file);
+ bdrv_io_plug(bs->file->bs);
}
}
@@ -2594,7 +2725,7 @@ void bdrv_io_unplug(BlockDriverState *bs)
if (drv && drv->bdrv_io_unplug) {
drv->bdrv_io_unplug(bs);
} else if (bs->file) {
- bdrv_io_unplug(bs->file);
+ bdrv_io_unplug(bs->file->bs);
}
}
@@ -2604,7 +2735,24 @@ void bdrv_flush_io_queue(BlockDriverState *bs)
if (drv && drv->bdrv_flush_io_queue) {
drv->bdrv_flush_io_queue(bs);
} else if (bs->file) {
- bdrv_flush_io_queue(bs->file);
+ bdrv_flush_io_queue(bs->file->bs);
}
bdrv_start_throttled_reqs(bs);
}
+
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+ if (!bs->quiesce_counter++) {
+ aio_disable_external(bdrv_get_aio_context(bs));
+ }
+ bdrv_drain(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+ assert(bs->quiesce_counter > 0);
+ if (--bs->quiesce_counter > 0) {
+ return;
+ }
+ aio_enable_external(bdrv_get_aio_context(bs));
+}
diff --git a/block/iscsi.c b/block/iscsi.c
index 93f1ee4c6..bd1f1bfcd 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -84,6 +84,7 @@ typedef struct IscsiTask {
IscsiLun *iscsilun;
QEMUTimer retry_timer;
bool force_next_flush;
+ int err_code;
} IscsiTask;
typedef struct IscsiAIOCB {
@@ -96,6 +97,7 @@ typedef struct IscsiAIOCB {
int status;
int64_t sector_num;
int nb_sectors;
+ int ret;
#ifdef __linux__
sg_io_hdr_t *ioh;
#endif
@@ -169,19 +171,70 @@ static inline unsigned exp_random(double mean)
return -mean * log((double)rand() / RAND_MAX);
}
-/* SCSI_STATUS_TASK_SET_FULL and SCSI_STATUS_TIMEOUT were introduced
- * in libiscsi 1.10.0 as part of an enum. The LIBISCSI_API_VERSION
- * macro was introduced in 1.11.0. So use the API_VERSION macro as
- * a hint that the macros are defined and define them ourselves
- * otherwise to keep the required libiscsi version at 1.9.0 */
-#if !defined(LIBISCSI_API_VERSION)
-#define QEMU_SCSI_STATUS_TASK_SET_FULL 0x28
-#define QEMU_SCSI_STATUS_TIMEOUT 0x0f000002
-#else
-#define QEMU_SCSI_STATUS_TASK_SET_FULL SCSI_STATUS_TASK_SET_FULL
-#define QEMU_SCSI_STATUS_TIMEOUT SCSI_STATUS_TIMEOUT
+/* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
+ * libiscsi 1.10.0, together with other constants we need. Use it as
+ * a hint that we have to define them ourselves if needed, to keep the
+ * minimum required libiscsi version at 1.9.0. We use an ASCQ macro for
+ * the test because SCSI_STATUS_* is an enum.
+ *
+ * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
+ * an enum, check against the LIBISCSI_API_VERSION macro, which was
+ * introduced in 1.11.0. If it is present, there is no need to define
+ * anything.
+ */
+#if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
+ !defined(LIBISCSI_API_VERSION)
+#define SCSI_STATUS_TASK_SET_FULL 0x28
+#define SCSI_STATUS_TIMEOUT 0x0f000002
+#define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST 0x2600
+#define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR 0x1a00
#endif
+static int iscsi_translate_sense(struct scsi_sense *sense)
+{
+ int ret;
+
+ switch (sense->key) {
+ case SCSI_SENSE_NOT_READY:
+ return -EBUSY;
+ case SCSI_SENSE_DATA_PROTECTION:
+ return -EACCES;
+ case SCSI_SENSE_COMMAND_ABORTED:
+ return -ECANCELED;
+ case SCSI_SENSE_ILLEGAL_REQUEST:
+ /* Parse ASCQ */
+ break;
+ default:
+ return -EIO;
+ }
+ switch (sense->ascq) {
+ case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
+ case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
+ case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
+ case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
+ ret = -EINVAL;
+ break;
+ case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
+ ret = -ENOSPC;
+ break;
+ case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
+ ret = -ENOTSUP;
+ break;
+ case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
+ case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
+ case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
+ ret = -ENOMEDIUM;
+ break;
+ case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
+ ret = -EACCES;
+ break;
+ default:
+ ret = -EIO;
+ break;
+ }
+ return ret;
+}
+
static void
iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
@@ -203,11 +256,11 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
goto out;
}
if (status == SCSI_STATUS_BUSY ||
- status == QEMU_SCSI_STATUS_TIMEOUT ||
- status == QEMU_SCSI_STATUS_TASK_SET_FULL) {
+ status == SCSI_STATUS_TIMEOUT ||
+ status == SCSI_STATUS_TASK_SET_FULL) {
unsigned retry_time =
exp_random(iscsi_retry_times[iTask->retries - 1]);
- if (status == QEMU_SCSI_STATUS_TIMEOUT) {
+ if (status == SCSI_STATUS_TIMEOUT) {
/* make sure the request is rescheduled AFTER the
* reconnect is initiated */
retry_time = EVENT_INTERVAL * 2;
@@ -226,6 +279,7 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
return;
}
}
+ iTask->err_code = iscsi_translate_sense(&task->sense);
error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
} else {
iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
@@ -291,8 +345,8 @@ iscsi_set_events(IscsiLun *iscsilun)
int ev = iscsi_which_events(iscsi);
if (ev != iscsilun->events) {
- aio_set_fd_handler(iscsilun->aio_context,
- iscsi_get_fd(iscsi),
+ aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
+ false,
(ev & POLLIN) ? iscsi_process_read : NULL,
(ev & POLLOUT) ? iscsi_process_write : NULL,
iscsilun);
@@ -455,7 +509,7 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
@@ -644,7 +698,7 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
return 0;
@@ -683,7 +737,7 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
return 0;
@@ -703,7 +757,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
if (status < 0) {
error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
iscsi_get_error(iscsi));
- acb->status = -EIO;
+ acb->status = iscsi_translate_sense(&acb->task->sense);
}
acb->ioh->driver_status = 0;
@@ -726,6 +780,38 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
iscsi_schedule_bh(acb);
}
+static void iscsi_ioctl_bh_completion(void *opaque)
+{
+ IscsiAIOCB *acb = opaque;
+
+ qemu_bh_delete(acb->bh);
+ acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_aio_unref(acb);
+}
+
+static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
+{
+ BlockDriverState *bs = acb->common.bs;
+ IscsiLun *iscsilun = bs->opaque;
+ int ret = 0;
+
+ switch (req) {
+ case SG_GET_VERSION_NUM:
+ *(int *)buf = 30000;
+ break;
+ case SG_GET_SCSI_ID:
+ ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ assert(!acb->bh);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
+ iscsi_ioctl_bh_completion, acb);
+ acb->ret = ret;
+ qemu_bh_schedule(acb->bh);
+}
+
static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
@@ -735,8 +821,6 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
struct iscsi_data data;
IscsiAIOCB *acb;
- assert(req == SG_IO);
-
acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
acb->iscsilun = iscsilun;
@@ -745,6 +829,11 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
acb->buf = NULL;
acb->ioh = buf;
+ if (req != SG_IO) {
+ iscsi_ioctl_handle_emulated(acb, req, buf);
+ return &acb->common;
+ }
+
acb->task = malloc(sizeof(struct scsi_task));
if (acb->task == NULL) {
error_report("iSCSI: Failed to allocate task for scsi command. %s",
@@ -809,38 +898,6 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
return &acb->common;
}
-static void ioctl_cb(void *opaque, int status)
-{
- int *p_status = opaque;
- *p_status = status;
-}
-
-static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
- IscsiLun *iscsilun = bs->opaque;
- int status;
-
- switch (req) {
- case SG_GET_VERSION_NUM:
- *(int *)buf = 30000;
- break;
- case SG_GET_SCSI_ID:
- ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
- break;
- case SG_IO:
- status = -EINPROGRESS;
- iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
-
- while (status == -EINPROGRESS) {
- aio_poll(iscsilun->aio_context, true);
- }
-
- return 0;
- default:
- return -1;
- }
- return 0;
-}
#endif
static int64_t
@@ -905,7 +962,7 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
@@ -999,7 +1056,7 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
if (flags & BDRV_REQ_MAY_UNMAP) {
@@ -1280,9 +1337,8 @@ static void iscsi_detach_aio_context(BlockDriverState *bs)
{
IscsiLun *iscsilun = bs->opaque;
- aio_set_fd_handler(iscsilun->aio_context,
- iscsi_get_fd(iscsilun->iscsi),
- NULL, NULL, NULL);
+ aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
+ false, NULL, NULL, NULL);
iscsilun->events = 0;
if (iscsilun->nop_timer) {
@@ -1772,7 +1828,6 @@ static BlockDriver bdrv_iscsi = {
.bdrv_co_flush_to_disk = iscsi_co_flush,
#ifdef __linux__
- .bdrv_ioctl = iscsi_ioctl,
.bdrv_aio_ioctl = iscsi_aio_ioctl,
#endif
diff --git a/block/linux-aio.c b/block/linux-aio.c
index c991443c5..88b0520a8 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -287,7 +287,7 @@ void laio_detach_aio_context(void *s_, AioContext *old_context)
{
struct qemu_laio_state *s = s_;
- aio_set_event_notifier(old_context, &s->e, NULL);
+ aio_set_event_notifier(old_context, &s->e, false, NULL);
qemu_bh_delete(s->completion_bh);
}
@@ -296,7 +296,8 @@ void laio_attach_aio_context(void *s_, AioContext *new_context)
struct qemu_laio_state *s = s_;
s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
- aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
+ aio_set_event_notifier(new_context, &s->e, false,
+ qemu_laio_completion_cb);
}
void *laio_init(void)
diff --git a/block/mirror.c b/block/mirror.c
index b2fb4b9b1..0e8f5565a 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -14,6 +14,7 @@
#include "trace.h"
#include "block/blockjob.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
#include "qemu/bitmap.h"
@@ -113,7 +114,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
}
qemu_iovec_destroy(&op->qiov);
- g_slice_free(MirrorOp, op);
+ g_free(op);
if (s->waiting_for_io) {
qemu_coroutine_enter(s->common.co, NULL);
@@ -246,6 +247,10 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
break;
}
+ if (IOV_MAX < nb_chunks + added_chunks) {
+ trace_mirror_break_iov_max(s, nb_chunks, added_chunks);
+ break;
+ }
/* We have enough free space to copy these sectors. */
bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
@@ -260,7 +265,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
} while (delay_ns == 0 && next_sector < end);
/* Allocate a MirrorOp that is used as an AIO callback. */
- op = g_slice_new(MirrorOp);
+ op = g_new(MirrorOp, 1);
op->s = s;
op->sector_num = sector_num;
op->nb_sectors = nb_sectors;
@@ -349,6 +354,11 @@ static void mirror_exit(BlockJob *job, void *opaque)
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
MirrorExitData *data = opaque;
AioContext *replace_aio_context = NULL;
+ BlockDriverState *src = s->common.bs;
+
+ /* Make sure that the source BDS doesn't go away before we called
+ * block_job_completed(). */
+ bdrv_ref(src);
if (s->to_replace) {
replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -363,14 +373,7 @@ static void mirror_exit(BlockJob *job, void *opaque)
if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
}
- bdrv_swap(s->target, to_replace);
- if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
- /* drop the bs loop chain formed by the swap: break the loop then
- * trigger the unref from the top one */
- BlockDriverState *p = s->base->backing_hd;
- bdrv_set_backing_hd(s->base, NULL);
- bdrv_unref(p);
- }
+ bdrv_replace_in_backing_chain(to_replace, s->target);
}
if (s->to_replace) {
bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -381,9 +384,12 @@ static void mirror_exit(BlockJob *job, void *opaque)
aio_context_release(replace_aio_context);
}
g_free(s->replaces);
+ bdrv_op_unblock_all(s->target, s->common.blocker);
bdrv_unref(s->target);
block_job_completed(&s->common, data->ret);
g_free(data);
+ bdrv_drained_end(src);
+ bdrv_unref(src);
}
static void coroutine_fn mirror_run(void *opaque)
@@ -427,7 +433,7 @@ static void coroutine_fn mirror_run(void *opaque)
*/
bdrv_get_backing_filename(s->target, backing_filename,
sizeof(backing_filename));
- if (backing_filename[0] && !s->target->backing_hd) {
+ if (backing_filename[0] && !s->target->backing) {
ret = bdrv_get_info(s->target, &bdi);
if (ret < 0) {
goto immediate_exit;
@@ -451,6 +457,8 @@ static void coroutine_fn mirror_run(void *opaque)
if (!s->is_none_mode) {
/* First part, loop on the sectors and initialize the dirty bitmap. */
BlockDriverState *base = s->base;
+ bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(s->target);
+
for (sector_num = 0; sector_num < end; ) {
/* Just to make sure we are not exceeding int limit. */
int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
@@ -473,7 +481,7 @@ static void coroutine_fn mirror_run(void *opaque)
}
assert(n > 0);
- if (ret == 1) {
+ if (ret == 1 || mark_all_dirty) {
bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
}
sector_num += n;
@@ -594,10 +602,15 @@ immediate_exit:
g_free(s->cow_bitmap);
g_free(s->in_flight_bitmap);
bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
- bdrv_iostatus_disable(s->target);
+ if (s->target->blk) {
+ blk_iostatus_disable(s->target->blk);
+ }
data = g_malloc(sizeof(*data));
data->ret = ret;
+ /* Before we switch to target in mirror_exit, make sure data doesn't
+ * change. */
+ bdrv_drained_begin(s->common.bs);
block_job_defer_to_main_loop(&s->common, mirror_exit, data);
}
@@ -616,7 +629,9 @@ static void mirror_iostatus_reset(BlockJob *job)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
- bdrv_iostatus_reset(s->target);
+ if (s->target->blk) {
+ blk_iostatus_reset(s->target->blk);
+ }
}
static void mirror_complete(BlockJob *job, Error **errp)
@@ -631,8 +646,7 @@ static void mirror_complete(BlockJob *job, Error **errp)
return;
}
if (!s->synced) {
- error_setg(errp, QERR_BLOCK_JOB_NOT_READY,
- bdrv_get_device_name(job->bs));
+ error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
return;
}
@@ -640,9 +654,9 @@ static void mirror_complete(BlockJob *job, Error **errp)
if (s->replaces) {
AioContext *replace_aio_context;
- s->to_replace = check_to_replace_node(s->replaces, &local_err);
+ s->to_replace = bdrv_find_node(s->replaces);
if (!s->to_replace) {
- error_propagate(errp, local_err);
+ error_setg(errp, "Node name '%s' not found", s->replaces);
return;
}
@@ -700,7 +714,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- !bdrv_iostatus_is_enabled(bs)) {
+ (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
return;
}
@@ -732,12 +746,17 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
if (!s->dirty_bitmap) {
g_free(s->replaces);
- block_job_release(bs);
+ block_job_unref(&s->common);
return;
}
+
+ bdrv_op_block_all(s->target, s->common.blocker);
+
bdrv_set_enable_write_cache(s->target, true);
- bdrv_set_on_error(s->target, on_target_error, on_target_error);
- bdrv_iostatus_enable(s->target);
+ if (s->target->blk) {
+ blk_set_on_error(s->target->blk, on_target_error, on_target_error);
+ blk_iostatus_enable(s->target->blk);
+ }
s->common.co = qemu_coroutine_create(mirror_run);
trace_mirror_start(bs, s, s->common.co, opaque);
qemu_coroutine_enter(s->common.co, s);
@@ -760,7 +779,7 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
return;
}
is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
- base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
+ base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
mirror_start_job(bs, target, replaces,
speed, granularity, buf_size,
on_source_error, on_target_error, unmap, cb, opaque, errp,
diff --git a/block/nbd-client.c b/block/nbd-client.c
index e1bb9198c..b7fd17a11 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -124,7 +124,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
s->send_coroutine = qemu_coroutine_self();
aio_context = bdrv_get_aio_context(bs);
- aio_set_fd_handler(aio_context, s->sock,
+ aio_set_fd_handler(aio_context, s->sock, false,
nbd_reply_ready, nbd_restart_write, bs);
if (qiov) {
if (!s->is_unix) {
@@ -144,7 +144,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
} else {
rc = nbd_send_request(s->sock, request);
}
- aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, bs);
+ aio_set_fd_handler(aio_context, s->sock, false,
+ nbd_reply_ready, NULL, bs);
s->send_coroutine = NULL;
qemu_co_mutex_unlock(&s->send_mutex);
return rc;
@@ -348,14 +349,15 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
void nbd_client_detach_aio_context(BlockDriverState *bs)
{
aio_set_fd_handler(bdrv_get_aio_context(bs),
- nbd_get_client_session(bs)->sock, NULL, NULL, NULL);
+ nbd_get_client_session(bs)->sock,
+ false, NULL, NULL, NULL);
}
void nbd_client_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sock,
- nbd_reply_ready, NULL, bs);
+ false, nbd_reply_ready, NULL, bs);
}
void nbd_client_close(BlockDriverState *bs)
diff --git a/block/nbd.c b/block/nbd.c
index 217618612..cd6a58777 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -43,7 +43,6 @@
typedef struct BDRVNBDState {
NbdClientSession client;
- QemuOpts *socket_opts;
} BDRVNBDState;
static int nbd_parse_uri(const char *filename, QDict *options)
@@ -190,10 +189,10 @@ out:
g_free(file);
}
-static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
- Error **errp)
+static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
+ Error **errp)
{
- Error *local_err = NULL;
+ SocketAddress *saddr;
if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
if (qdict_haskey(options, "path")) {
@@ -201,28 +200,37 @@ static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
} else {
error_setg(errp, "one of path and host must be specified.");
}
- return;
+ return NULL;
}
- s->client.is_unix = qdict_haskey(options, "path");
- s->socket_opts = qemu_opts_create(&socket_optslist, NULL, 0,
- &error_abort);
+ saddr = g_new0(SocketAddress, 1);
- qemu_opts_absorb_qdict(s->socket_opts, options, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
+ if (qdict_haskey(options, "path")) {
+ saddr->type = SOCKET_ADDRESS_KIND_UNIX;
+ saddr->u.q_unix = g_new0(UnixSocketAddress, 1);
+ saddr->u.q_unix->path = g_strdup(qdict_get_str(options, "path"));
+ qdict_del(options, "path");
+ } else {
+ saddr->type = SOCKET_ADDRESS_KIND_INET;
+ saddr->u.inet = g_new0(InetSocketAddress, 1);
+ saddr->u.inet->host = g_strdup(qdict_get_str(options, "host"));
+ if (!qdict_get_try_str(options, "port")) {
+ saddr->u.inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
+ } else {
+ saddr->u.inet->port = g_strdup(qdict_get_str(options, "port"));
+ }
+ qdict_del(options, "host");
+ qdict_del(options, "port");
}
- if (!qemu_opt_get(s->socket_opts, "port")) {
- qemu_opt_set_number(s->socket_opts, "port", NBD_DEFAULT_PORT,
- &error_abort);
- }
+ s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
*export = g_strdup(qdict_get_try_str(options, "export"));
if (*export) {
qdict_del(options, "export");
}
+
+ return saddr;
}
NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
@@ -231,26 +239,24 @@ NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
return &s->client;
}
-static int nbd_establish_connection(BlockDriverState *bs, Error **errp)
+static int nbd_establish_connection(BlockDriverState *bs,
+ SocketAddress *saddr,
+ Error **errp)
{
BDRVNBDState *s = bs->opaque;
int sock;
- if (s->client.is_unix) {
- sock = unix_connect_opts(s->socket_opts, errp, NULL, NULL);
- } else {
- sock = inet_connect_opts(s->socket_opts, errp, NULL, NULL);
- if (sock >= 0) {
- socket_set_nodelay(sock);
- }
- }
+ sock = socket_connect(saddr, errp, NULL, NULL);
- /* Failed to establish connection */
if (sock < 0) {
logout("Failed to establish connection to NBD server\n");
return -EIO;
}
+ if (!s->client.is_unix) {
+ socket_set_nodelay(sock);
+ }
+
return sock;
}
@@ -260,19 +266,19 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
BDRVNBDState *s = bs->opaque;
char *export = NULL;
int result, sock;
- Error *local_err = NULL;
+ SocketAddress *saddr;
/* Pop the config into our state object. Exit if invalid. */
- nbd_config(s, options, &export, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ saddr = nbd_config(s, options, &export, errp);
+ if (!saddr) {
return -EINVAL;
}
/* establish TCP connection, return error if it fails
* TODO: Configurable retry-until-timeout behaviour.
*/
- sock = nbd_establish_connection(bs, errp);
+ sock = nbd_establish_connection(bs, saddr, errp);
+ qapi_free_SocketAddress(saddr);
if (sock < 0) {
g_free(export);
return sock;
@@ -315,9 +321,6 @@ static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
static void nbd_close(BlockDriverState *bs)
{
- BDRVNBDState *s = bs->opaque;
-
- qemu_opts_del(s->socket_opts);
nbd_client_close(bs);
}
diff --git a/block/nfs.c b/block/nfs.c
index 02eb4e464..fd79f8994 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -43,6 +43,7 @@ typedef struct NFSClient {
int events;
bool has_zero_init;
AioContext *aio_context;
+ blkcnt_t st_blocks;
} NFSClient;
typedef struct NFSRPC {
@@ -62,11 +63,10 @@ static void nfs_set_events(NFSClient *client)
{
int ev = nfs_which_events(client->context);
if (ev != client->events) {
- aio_set_fd_handler(client->aio_context,
- nfs_get_fd(client->context),
+ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+ false,
(ev & POLLIN) ? nfs_process_read : NULL,
- (ev & POLLOUT) ? nfs_process_write : NULL,
- client);
+ (ev & POLLOUT) ? nfs_process_write : NULL, client);
}
client->events = ev;
@@ -241,9 +241,8 @@ static void nfs_detach_aio_context(BlockDriverState *bs)
{
NFSClient *client = bs->opaque;
- aio_set_fd_handler(client->aio_context,
- nfs_get_fd(client->context),
- NULL, NULL, NULL);
+ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+ false, NULL, NULL, NULL);
client->events = 0;
}
@@ -262,9 +261,8 @@ static void nfs_client_close(NFSClient *client)
if (client->fh) {
nfs_close(client->context, client->fh);
}
- aio_set_fd_handler(client->aio_context,
- nfs_get_fd(client->context),
- NULL, NULL, NULL);
+ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+ false, NULL, NULL, NULL);
nfs_destroy_context(client->context);
}
memset(client, 0, sizeof(NFSClient));
@@ -374,6 +372,7 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
}
ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
+ client->st_blocks = st.st_blocks;
client->has_zero_init = S_ISREG(st.st_mode);
goto out;
fail:
@@ -464,6 +463,11 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
NFSRPC task = {0};
struct stat st;
+ if (bdrv_is_read_only(bs) &&
+ !(bs->open_flags & BDRV_O_NOCACHE)) {
+ return client->st_blocks * 512;
+ }
+
task.st = &st;
if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
&task) != 0) {
@@ -484,6 +488,34 @@ static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
return nfs_ftruncate(client->context, client->fh, offset);
}
+/* Note that this will not re-establish a connection with the NFS server
+ * - it is effectively a NOP. */
+static int nfs_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ NFSClient *client = state->bs->opaque;
+ struct stat st;
+ int ret = 0;
+
+ if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) {
+ error_setg(errp, "Cannot open a read-only mount as read-write");
+ return -EACCES;
+ }
+
+ /* Update cache for read-only reopens */
+ if (!(state->flags & BDRV_O_RDWR)) {
+ ret = nfs_fstat(client->context, client->fh, &st);
+ if (ret < 0) {
+ error_setg(errp, "Failed to fstat file: %s",
+ nfs_get_error(client->context));
+ return ret;
+ }
+ client->st_blocks = st.st_blocks;
+ }
+
+ return 0;
+}
+
static BlockDriver bdrv_nfs = {
.format_name = "nfs",
.protocol_name = "nfs",
@@ -499,6 +531,7 @@ static BlockDriver bdrv_nfs = {
.bdrv_file_open = nfs_file_open,
.bdrv_close = nfs_file_close,
.bdrv_create = nfs_file_create,
+ .bdrv_reopen_prepare = nfs_reopen_prepare,
.bdrv_co_readv = nfs_co_readv,
.bdrv_co_writev = nfs_co_writev,
diff --git a/block/parallels.c b/block/parallels.c
index 046b56844..f689fdeaf 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -202,13 +202,13 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx;
space = to_allocate * s->tracks;
- if (s->data_end + space > bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS) {
+ if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
int ret;
space += s->prealloc_size;
if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
- ret = bdrv_write_zeroes(bs->file, s->data_end, space, 0);
+ ret = bdrv_write_zeroes(bs->file->bs, s->data_end, space, 0);
} else {
- ret = bdrv_truncate(bs->file,
+ ret = bdrv_truncate(bs->file->bs,
(s->data_end + space) << BDRV_SECTOR_BITS);
}
if (ret < 0) {
@@ -220,7 +220,7 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
s->data_end += s->tracks;
bitmap_set(s->bat_dirty_bmap,
- bat_entry_off(idx) / s->bat_dirty_block, 1);
+ bat_entry_off(idx + i) / s->bat_dirty_block, 1);
}
return bat2sect(s, idx) + sector_num % s->tracks;
@@ -244,7 +244,8 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
if (off + to_write > s->header_size) {
to_write = s->header_size - off;
}
- ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off, to_write);
+ ret = bdrv_pwrite(bs->file->bs, off, (uint8_t *)s->header + off,
+ to_write);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
return ret;
@@ -303,7 +304,7 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
- ret = bdrv_co_writev(bs->file, position, n, &hd_qiov);
+ ret = bdrv_co_writev(bs->file->bs, position, n, &hd_qiov);
if (ret < 0) {
break;
}
@@ -343,7 +344,7 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
- ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
+ ret = bdrv_co_readv(bs->file->bs, position, n, &hd_qiov);
if (ret < 0) {
break;
}
@@ -369,7 +370,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
bool flush_bat = false;
int cluster_size = s->tracks << BDRV_SECTOR_BITS;
- size = bdrv_getlength(bs->file);
+ size = bdrv_getlength(bs->file->bs);
if (size < 0) {
res->check_errors++;
return size;
@@ -424,7 +425,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
}
if (flush_bat) {
- ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
+ ret = bdrv_pwrite_sync(bs->file->bs, 0, s->header, s->header_size);
if (ret < 0) {
res->check_errors++;
return ret;
@@ -440,7 +441,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
size - res->image_end_offset);
res->leaks += count;
if (fix & BDRV_FIX_LEAKS) {
- ret = bdrv_truncate(bs->file, res->image_end_offset);
+ ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
if (ret < 0) {
res->check_errors++;
return ret;
@@ -476,7 +477,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
file = NULL;
ret = bdrv_open(&file, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
return ret;
@@ -546,12 +547,13 @@ static int parallels_probe(const uint8_t *buf, int buf_size,
static int parallels_update_header(BlockDriverState *bs)
{
BDRVParallelsState *s = bs->opaque;
- unsigned size = MAX(bdrv_opt_mem_align(bs->file), sizeof(ParallelsHeader));
+ unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs),
+ sizeof(ParallelsHeader));
if (size > s->header_size) {
size = s->header_size;
}
- return bdrv_pwrite_sync(bs->file, 0, s->header, size);
+ return bdrv_pwrite_sync(bs->file->bs, 0, s->header, size);
}
static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
@@ -564,7 +566,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
Error *local_err = NULL;
char *buf;
- ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
+ ret = bdrv_pread(bs->file->bs, 0, &ph, sizeof(ph));
if (ret < 0) {
goto fail;
}
@@ -603,8 +605,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
}
size = bat_entry_off(s->bat_size);
- s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file));
- s->header = qemu_try_blockalign(bs->file, s->header_size);
+ s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
+ s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
if (s->header == NULL) {
ret = -ENOMEM;
goto fail;
@@ -619,7 +621,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
s->header_size = size;
}
- ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
+ ret = bdrv_pread(bs->file->bs, 0, s->header, s->header_size);
if (ret < 0) {
goto fail;
}
@@ -663,8 +665,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
if (local_err != NULL) {
goto fail_options;
}
- if (!bdrv_has_zero_init(bs->file) ||
- bdrv_truncate(bs->file, bdrv_getlength(bs->file)) != 0) {
+ if (!bdrv_has_zero_init(bs->file->bs) ||
+ bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
}
@@ -707,7 +709,7 @@ static void parallels_close(BlockDriverState *bs)
}
if (bs->open_flags & BDRV_O_RDWR) {
- bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS);
+ bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
}
g_free(s->bat_dirty_bmap);
diff --git a/block/qapi.c b/block/qapi.c
index 2ce509711..267f147fe 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -64,7 +64,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
info->backing_file_depth = bdrv_get_backing_file_depth(bs);
info->detect_zeroes = bs->detect_zeroes;
- if (bs->io_limits_enabled) {
+ if (bs->throttle_state) {
ThrottleConfig cfg;
throttle_group_get_config(bs, &cfg);
@@ -110,8 +110,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
qapi_free_BlockDeviceInfo(info);
return NULL;
}
- if (bs0->drv && bs0->backing_hd) {
- bs0 = bs0->backing_hd;
+ if (bs0->drv && bs0->backing) {
+ bs0 = bs0->backing->bs;
(*p_image_info)->has_backing_image = true;
p_image_info = &((*p_image_info)->backing_image);
} else {
@@ -301,17 +301,17 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
info->tray_open = blk_dev_is_tray_open(blk);
}
- if (bdrv_iostatus_is_enabled(bs)) {
+ if (blk_iostatus_is_enabled(blk)) {
info->has_io_status = true;
- info->io_status = bs->iostatus;
+ info->io_status = blk_iostatus(blk);
}
- if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
+ if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) {
info->has_dirty_bitmaps = true;
info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
}
- if (bs->drv) {
+ if (bs && bs->drv) {
info->has_inserted = true;
info->inserted = bdrv_block_device_info(bs, errp);
if (info->inserted == NULL) {
@@ -344,27 +344,82 @@ static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
}
s->stats = g_malloc0(sizeof(*s->stats));
- s->stats->rd_bytes = bs->stats.nr_bytes[BLOCK_ACCT_READ];
- s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE];
- s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ];
- s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE];
- s->stats->rd_merged = bs->stats.merged[BLOCK_ACCT_READ];
- s->stats->wr_merged = bs->stats.merged[BLOCK_ACCT_WRITE];
- s->stats->wr_highest_offset =
- bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE;
- s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH];
- s->stats->wr_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_WRITE];
- s->stats->rd_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_READ];
- s->stats->flush_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_FLUSH];
+ if (bs->blk) {
+ BlockAcctStats *stats = blk_get_stats(bs->blk);
+ BlockAcctTimedStats *ts = NULL;
+
+ s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
+ s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+ s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
+ s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
+
+ s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
+ s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
+ s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
+
+ s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
+ s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
+ s->stats->invalid_flush_operations =
+ stats->invalid_ops[BLOCK_ACCT_FLUSH];
+
+ s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
+ s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+ s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
+ s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+ s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
+ s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+
+ s->stats->has_idle_time_ns = stats->last_access_time_ns > 0;
+ if (s->stats->has_idle_time_ns) {
+ s->stats->idle_time_ns = block_acct_idle_time_ns(stats);
+ }
+
+ s->stats->account_invalid = stats->account_invalid;
+ s->stats->account_failed = stats->account_failed;
+
+ while ((ts = block_acct_interval_next(stats, ts))) {
+ BlockDeviceTimedStatsList *timed_stats =
+ g_malloc0(sizeof(*timed_stats));
+ BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
+ timed_stats->next = s->stats->timed_stats;
+ timed_stats->value = dev_stats;
+ s->stats->timed_stats = timed_stats;
+
+ TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
+ TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
+ TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
+
+ dev_stats->interval_length = ts->interval_length;
+
+ dev_stats->min_rd_latency_ns = timed_average_min(rd);
+ dev_stats->max_rd_latency_ns = timed_average_max(rd);
+ dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
+
+ dev_stats->min_wr_latency_ns = timed_average_min(wr);
+ dev_stats->max_wr_latency_ns = timed_average_max(wr);
+ dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
+
+ dev_stats->min_flush_latency_ns = timed_average_min(fl);
+ dev_stats->max_flush_latency_ns = timed_average_max(fl);
+ dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
+
+ dev_stats->avg_rd_queue_depth =
+ block_acct_queue_depth(ts, BLOCK_ACCT_READ);
+ dev_stats->avg_wr_queue_depth =
+ block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
+ }
+ }
+
+ s->stats->wr_highest_offset = bs->wr_highest_offset;
if (bs->file) {
s->has_parent = true;
- s->parent = bdrv_query_stats(bs->file, query_backing);
+ s->parent = bdrv_query_stats(bs->file->bs, query_backing);
}
- if (query_backing && bs->backing_hd) {
+ if (query_backing && bs->backing) {
s->has_backing = true;
- s->backing = bdrv_query_stats(bs->backing_hd, query_backing);
+ s->backing = bdrv_query_stats(bs->backing->bs, query_backing);
}
return s;
@@ -381,7 +436,9 @@ BlockInfoList *qmp_query_block(Error **errp)
bdrv_query_info(blk, &info->value, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- goto err;
+ g_free(info);
+ qapi_free_BlockInfoList(head);
+ return NULL;
}
*p_next = info;
@@ -389,10 +446,6 @@ BlockInfoList *qmp_query_block(Error **errp)
}
return head;
-
- err:
- qapi_free_BlockInfoList(head);
- return NULL;
}
BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
diff --git a/block/qcow.c b/block/qcow.c
index 01fba54ce..635085e27 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -100,7 +100,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
int ret;
QCowHeader header;
- ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+ ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
if (ret < 0) {
goto fail;
}
@@ -193,7 +193,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+ ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
goto fail;
@@ -205,7 +205,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
/* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */
s->l2_cache =
- qemu_try_blockalign(bs->file,
+ qemu_try_blockalign(bs->file->bs,
s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
if (s->l2_cache == NULL) {
error_setg(errp, "Could not allocate L2 table cache");
@@ -224,7 +224,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EINVAL;
goto fail;
}
- ret = bdrv_pread(bs->file, header.backing_file_offset,
+ ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
bs->backing_file, len);
if (ret < 0) {
goto fail;
@@ -369,13 +369,13 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
if (!allocate)
return 0;
/* allocate a new l2 entry */
- l2_offset = bdrv_getlength(bs->file);
+ l2_offset = bdrv_getlength(bs->file->bs);
/* round to cluster size */
l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
/* update the L1 entry */
s->l1_table[l1_index] = l2_offset;
tmp = cpu_to_be64(l2_offset);
- if (bdrv_pwrite_sync(bs->file,
+ if (bdrv_pwrite_sync(bs->file->bs,
s->l1_table_offset + l1_index * sizeof(tmp),
&tmp, sizeof(tmp)) < 0)
return 0;
@@ -405,11 +405,12 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
l2_table = s->l2_cache + (min_index << s->l2_bits);
if (new_l2_table) {
memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
- if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
+ if (bdrv_pwrite_sync(bs->file->bs, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t)) < 0)
return 0;
} else {
- if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
+ if (bdrv_pread(bs->file->bs, l2_offset, l2_table,
+ s->l2_size * sizeof(uint64_t)) !=
s->l2_size * sizeof(uint64_t))
return 0;
}
@@ -430,20 +431,21 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
overwritten */
if (decompress_cluster(bs, cluster_offset) < 0)
return 0;
- cluster_offset = bdrv_getlength(bs->file);
+ cluster_offset = bdrv_getlength(bs->file->bs);
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
/* write the cluster content */
- if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
+ if (bdrv_pwrite(bs->file->bs, cluster_offset, s->cluster_cache,
+ s->cluster_size) !=
s->cluster_size)
return -1;
} else {
- cluster_offset = bdrv_getlength(bs->file);
+ cluster_offset = bdrv_getlength(bs->file->bs);
if (allocate == 1) {
/* round to cluster size */
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
- bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
+ bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
/* if encrypted, we must initialize the cluster
content which won't be written */
if (bs->encrypted &&
@@ -463,7 +465,8 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
errno = EIO;
return -1;
}
- if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
+ if (bdrv_pwrite(bs->file->bs,
+ cluster_offset + i * 512,
s->cluster_data, 512) != 512)
return -1;
}
@@ -477,7 +480,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
/* update L2 table */
tmp = cpu_to_be64(cluster_offset);
l2_table[l2_index] = tmp;
- if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
+ if (bdrv_pwrite_sync(bs->file->bs, l2_offset + l2_index * sizeof(tmp),
&tmp, sizeof(tmp)) < 0)
return 0;
}
@@ -546,7 +549,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
if (s->cluster_cache_offset != coffset) {
csize = cluster_offset >> (63 - s->cluster_bits);
csize &= (s->cluster_size - 1);
- ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
+ ret = bdrv_pread(bs->file->bs, coffset, s->cluster_data, csize);
if (ret != csize)
return -1;
if (decompress_buffer(s->cluster_cache, s->cluster_size,
@@ -594,13 +597,13 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
}
if (!cluster_offset) {
- if (bs->backing_hd) {
+ if (bs->backing) {
/* read from the base image */
hd_iov.iov_base = (void *)buf;
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ ret = bdrv_co_readv(bs->backing->bs, sector_num,
n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
@@ -625,7 +628,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
+ ret = bdrv_co_readv(bs->file->bs,
(cluster_offset >> 9) + index_in_cluster,
n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -727,7 +730,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_writev(bs->file,
+ ret = bdrv_co_writev(bs->file->bs,
(cluster_offset >> 9) + index_in_cluster,
n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -793,7 +796,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
qcow_bs = NULL;
ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto cleanup;
@@ -879,10 +882,10 @@ static int qcow_make_empty(BlockDriverState *bs)
int ret;
memset(s->l1_table, 0, l1_length);
- if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
+ if (bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, s->l1_table,
l1_length) < 0)
return -1;
- ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+ ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
if (ret < 0)
return ret;
@@ -962,7 +965,7 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
}
cluster_offset &= s->cluster_offset_mask;
- ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+ ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
if (ret < 0) {
goto fail;
}
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 53b8afc3d..86dd7f2bd 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -22,16 +22,24 @@
* THE SOFTWARE.
*/
+/* Needed for CONFIG_MADVISE */
+#include "config-host.h"
+
+#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
+#include <sys/mman.h>
+#endif
+
#include "block/block_int.h"
#include "qemu-common.h"
+#include "qemu/osdep.h"
#include "qcow2.h"
#include "trace.h"
typedef struct Qcow2CachedTable {
int64_t offset;
- bool dirty;
uint64_t lru_counter;
int ref;
+ bool dirty;
} Qcow2CachedTable;
struct Qcow2Cache {
@@ -41,34 +49,85 @@ struct Qcow2Cache {
bool depends_on_flush;
void *table_array;
uint64_t lru_counter;
+ uint64_t cache_clean_lru_counter;
};
static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
Qcow2Cache *c, int table)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
}
static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
Qcow2Cache *c, void *table)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
int idx = table_offset / s->cluster_size;
assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
return idx;
}
+static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
+ int i, int num_tables)
+{
+#if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
+ BDRVQcow2State *s = bs->opaque;
+ void *t = qcow2_cache_get_table_addr(bs, c, i);
+ int align = getpagesize();
+ size_t mem_size = (size_t) s->cluster_size * num_tables;
+ size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
+ size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
+ if (length > 0) {
+ qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
+ }
+#endif
+}
+
+static inline bool can_clean_entry(Qcow2Cache *c, int i)
+{
+ Qcow2CachedTable *t = &c->entries[i];
+ return t->ref == 0 && !t->dirty && t->offset != 0 &&
+ t->lru_counter <= c->cache_clean_lru_counter;
+}
+
+void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
+{
+ int i = 0;
+ while (i < c->size) {
+ int to_clean = 0;
+
+ /* Skip the entries that we don't need to clean */
+ while (i < c->size && !can_clean_entry(c, i)) {
+ i++;
+ }
+
+ /* And count how many we can clean in a row */
+ while (i < c->size && can_clean_entry(c, i)) {
+ c->entries[i].offset = 0;
+ c->entries[i].lru_counter = 0;
+ i++;
+ to_clean++;
+ }
+
+ if (to_clean > 0) {
+ qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
+ }
+ }
+
+ c->cache_clean_lru_counter = c->lru_counter;
+}
+
Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
Qcow2Cache *c;
c = g_new0(Qcow2Cache, 1);
c->size = num_tables;
c->entries = g_try_new0(Qcow2CachedTable, num_tables);
- c->table_array = qemu_try_blockalign(bs->file,
+ c->table_array = qemu_try_blockalign(bs->file->bs,
(size_t) num_tables * s->cluster_size);
if (!c->entries || !c->table_array) {
@@ -113,7 +172,7 @@ static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret = 0;
if (!c->entries[i].dirty || !c->entries[i].offset) {
@@ -126,7 +185,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
if (c->depends) {
ret = qcow2_cache_flush_dependency(bs, c);
} else if (c->depends_on_flush) {
- ret = bdrv_flush(bs->file);
+ ret = bdrv_flush(bs->file->bs);
if (ret >= 0) {
c->depends_on_flush = false;
}
@@ -157,7 +216,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
}
- ret = bdrv_pwrite(bs->file, c->entries[i].offset,
+ ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset,
qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
if (ret < 0) {
return ret;
@@ -170,7 +229,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int result = 0;
int ret;
int i;
@@ -185,7 +244,7 @@ int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
}
if (result == 0) {
- ret = bdrv_flush(bs->file);
+ ret = bdrv_flush(bs->file->bs);
if (ret < 0) {
result = ret;
}
@@ -237,6 +296,8 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
c->entries[i].lru_counter = 0;
}
+ qcow2_cache_table_release(bs, c, 0, c->size);
+
c->lru_counter = 0;
return 0;
@@ -245,7 +306,7 @@ int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
uint64_t offset, void **table, bool read_from_disk)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int i;
int ret;
int lookup_index;
@@ -295,7 +356,8 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
}
- ret = bdrv_pread(bs->file, offset, qcow2_cache_get_table_addr(bs, c, i),
+ ret = bdrv_pread(bs->file->bs, offset,
+ qcow2_cache_get_table_addr(bs, c, i),
s->cluster_size);
if (ret < 0) {
return ret;
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 7e94fe70e..24a60e223 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -32,7 +32,7 @@
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int new_l1_size2, ret, i;
uint64_t *new_l1_table;
int64_t old_l1_table_offset, old_l1_size;
@@ -72,7 +72,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
#endif
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
- new_l1_table = qemu_try_blockalign(bs->file,
+ new_l1_table = qemu_try_blockalign(bs->file->bs,
align_offset(new_l1_size2, 512));
if (new_l1_table == NULL) {
return -ENOMEM;
@@ -105,7 +105,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
- ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
+ ret = bdrv_pwrite_sync(bs->file->bs, new_l1_table_offset,
+ new_l1_table, new_l1_size2);
if (ret < 0)
goto fail;
for(i = 0; i < s->l1_size; i++)
@@ -115,7 +116,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
cpu_to_be32w((uint32_t*)data, new_l1_size);
stq_be_p(data + 4, new_l1_table_offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
+ ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_size),
+ data, sizeof(data));
if (ret < 0) {
goto fail;
}
@@ -148,7 +150,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
uint64_t **l2_table)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
@@ -163,7 +165,7 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
#define L1_ENTRIES_PER_SECTOR (512 / 8)
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t buf[L1_ENTRIES_PER_SECTOR] = { 0 };
int l1_start_index;
int i, ret;
@@ -182,8 +184,9 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
}
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
- ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
- buf, sizeof(buf));
+ ret = bdrv_pwrite_sync(bs->file->bs,
+ s->l1_table_offset + 8 * l1_start_index,
+ buf, sizeof(buf));
if (ret < 0) {
return ret;
}
@@ -203,7 +206,7 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t old_l2_offset;
uint64_t *l2_table = NULL;
int64_t l2_offset;
@@ -309,7 +312,7 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
if (!offset)
return 0;
- assert(qcow2_get_cluster_type(first_entry) != QCOW2_CLUSTER_COMPRESSED);
+ assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL);
for (i = 0; i < nb_clusters; i++) {
uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
@@ -321,14 +324,16 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
return i;
}
-static int count_contiguous_free_clusters(int nb_clusters, uint64_t *l2_table)
+static int count_contiguous_clusters_by_type(int nb_clusters,
+ uint64_t *l2_table,
+ int wanted_type)
{
int i;
for (i = 0; i < nb_clusters; i++) {
int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
- if (type != QCOW2_CLUSTER_UNALLOCATED) {
+ if (type != wanted_type) {
break;
}
}
@@ -339,7 +344,7 @@ static int count_contiguous_free_clusters(int nb_clusters, uint64_t *l2_table)
/* The crypt function is compatible with the linux cryptoloop
algorithm for < 4 GB images. NOTE: out_buf == in_buf is
supported */
-int qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
uint8_t *out_buf, const uint8_t *in_buf,
int nb_sectors, bool enc,
Error **errp)
@@ -387,7 +392,7 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
uint64_t cluster_offset,
int n_start, int n_end)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QEMUIOVector qiov;
struct iovec iov;
int n, ret;
@@ -440,7 +445,8 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
}
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
- ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
+ ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n,
+ &qiov);
if (ret < 0) {
goto out;
}
@@ -469,7 +475,7 @@ out:
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
int *num, uint64_t *cluster_offset)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int l2_index;
uint64_t l1_index, l2_offset, *l2_table;
int l1_bits, c;
@@ -499,7 +505,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
*cluster_offset = 0;
- /* seek the the l2 offset in the l1 table */
+ /* seek to the l2 offset in the l1 table */
l1_index = offset >> l1_bits;
if (l1_index >= s->l1_size) {
@@ -550,13 +556,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
ret = -EIO;
goto fail;
}
- c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], QCOW_OFLAG_ZERO);
+ c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
+ QCOW2_CLUSTER_ZERO);
*cluster_offset = 0;
break;
case QCOW2_CLUSTER_UNALLOCATED:
/* how many empty clusters ? */
- c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
+ c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
+ QCOW2_CLUSTER_UNALLOCATED);
*cluster_offset = 0;
break;
case QCOW2_CLUSTER_NORMAL:
@@ -609,13 +616,13 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
uint64_t **new_l2_table,
int *new_l2_index)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int l2_index;
uint64_t l1_index, l2_offset;
uint64_t *l2_table = NULL;
int ret;
- /* seek the the l2 offset in the l1 table */
+ /* seek to the l2 offset in the l1 table */
l1_index = offset >> (s->l2_bits + s->cluster_bits);
if (l1_index >= s->l1_size) {
@@ -683,7 +690,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
uint64_t offset,
int compressed_size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int l2_index, ret;
uint64_t *l2_table;
int64_t cluster_offset;
@@ -728,7 +735,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
if (r->nb_sectors == 0) {
@@ -757,7 +764,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int i, j = 0, l2_index, ret;
uint64_t *old_cluster, *l2_table;
uint64_t cluster_offset = m->alloc_offset;
@@ -817,7 +824,6 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
/*
* If this was a COW, we need to decrease the refcount of the old cluster.
- * Also flush bs->file to get the right order for L2 and refcount update.
*
* Don't discard clusters that reach a refcount of 0 (e.g. compressed
* clusters), the next write will reuse them anyway.
@@ -840,7 +846,7 @@ err:
* write, but require COW to be performed (this includes yet unallocated space,
* which must copy from the backing file)
*/
-static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
+static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
uint64_t *l2_table, int l2_index)
{
int i;
@@ -886,7 +892,7 @@ out:
static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *cur_bytes, QCowL2Meta **m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowL2Meta *old_alloc;
uint64_t bytes = *cur_bytes;
@@ -959,7 +965,7 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int l2_index;
uint64_t cluster_offset;
uint64_t *l2_table;
@@ -1067,7 +1073,7 @@ out:
static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *host_offset, uint64_t *nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
*host_offset, *nb_clusters);
@@ -1115,7 +1121,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int l2_index;
uint64_t *l2_table;
uint64_t entry;
@@ -1268,7 +1274,7 @@ fail:
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
int *num, uint64_t *host_offset, QCowL2Meta **m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t start, remaining;
uint64_t cluster_offset;
uint64_t cur_bytes;
@@ -1402,7 +1408,7 @@ static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret, csize, nb_csectors, sector_offset;
uint64_t coffset;
@@ -1412,7 +1418,8 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
sector_offset = coffset & 511;
csize = nb_csectors * 512 - sector_offset;
BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
- ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
+ ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data,
+ nb_csectors);
if (ret < 0) {
return ret;
}
@@ -1434,7 +1441,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
uint64_t nb_clusters, enum qcow2_discard_type type,
bool full_discard)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table;
int l2_index;
int ret;
@@ -1469,7 +1476,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
*/
switch (qcow2_get_cluster_type(old_l2_entry)) {
case QCOW2_CLUSTER_UNALLOCATED:
- if (full_discard || !bs->backing_hd) {
+ if (full_discard || !bs->backing) {
continue;
}
break;
@@ -1508,7 +1515,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
int nb_sectors, enum qcow2_discard_type type, bool full_discard)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t end_offset;
uint64_t nb_clusters;
int ret;
@@ -1554,7 +1561,7 @@ fail:
static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
uint64_t nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table;
int l2_index;
int ret;
@@ -1591,7 +1598,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t nb_clusters;
int ret;
@@ -1636,7 +1643,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
int64_t l1_entries,
BlockDriverAmendStatusCB *status_cb)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
bool is_active_l1 = (l1_table == s->l1_table);
uint64_t *l2_table = NULL;
int ret;
@@ -1645,7 +1652,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
if (!is_active_l1) {
/* inactive L2 tables require a buffer to be stored in when loading
* them from disk */
- l2_table = qemu_try_blockalign(bs->file, s->cluster_size);
+ l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size);
if (l2_table == NULL) {
return -ENOMEM;
}
@@ -1679,8 +1686,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
(void **)&l2_table);
} else {
/* load inactive L2 tables from disk */
- ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
- (void *)l2_table, s->cluster_sectors);
+ ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+ (void *)l2_table, s->cluster_sectors);
}
if (ret < 0) {
goto fail;
@@ -1703,7 +1710,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
}
if (!preallocated) {
- if (!bs->backing_hd) {
+ if (!bs->backing) {
/* not backed; therefore we can simply deallocate the
* cluster */
l2_table[j] = 0;
@@ -1754,7 +1761,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
goto fail;
}
- ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
+ ret = bdrv_write_zeroes(bs->file->bs, offset / BDRV_SECTOR_SIZE,
s->cluster_sectors, 0);
if (ret < 0) {
if (!preallocated) {
@@ -1787,8 +1794,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
goto fail;
}
- ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
- (void *)l2_table, s->cluster_sectors);
+ ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+ (void *)l2_table, s->cluster_sectors);
if (ret < 0) {
goto fail;
}
@@ -1823,7 +1830,7 @@ fail:
int qcow2_expand_zero_clusters(BlockDriverState *bs,
BlockDriverAmendStatusCB *status_cb)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l1_table = NULL;
int64_t l1_entries = 0, visited_l1_entries = 0;
int ret;
@@ -1861,8 +1868,9 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
- ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset /
- BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors);
+ ret = bdrv_read(bs->file->bs,
+ s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
+ (void *)l1_table, l1_sectors);
if (ret < 0) {
goto fail;
}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 0b6c302ee..820f412ab 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -82,7 +82,7 @@ static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
int qcow2_refcount_init(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int refcount_table_size2, i;
int ret;
@@ -101,7 +101,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
goto fail;
}
BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
- ret = bdrv_pread(bs->file, s->refcount_table_offset,
+ ret = bdrv_pread(bs->file->bs, s->refcount_table_offset,
s->refcount_table, refcount_table_size2);
if (ret < 0) {
goto fail;
@@ -116,7 +116,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
void qcow2_refcount_close(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
g_free(s->refcount_table);
}
@@ -214,7 +214,7 @@ static int load_refcount_block(BlockDriverState *bs,
int64_t refcount_block_offset,
void **refcount_block)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
@@ -231,7 +231,7 @@ static int load_refcount_block(BlockDriverState *bs,
int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
uint64_t *refcount)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t refcount_table_index, block_index;
int64_t refcount_block_offset;
int ret;
@@ -274,7 +274,7 @@ int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
* Rounds the refcount table size up to avoid growing the table for each single
* refcount block that is allocated.
*/
-static unsigned int next_refcount_table_size(BDRVQcowState *s,
+static unsigned int next_refcount_table_size(BDRVQcow2State *s,
unsigned int min_size)
{
unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
@@ -290,7 +290,7 @@ static unsigned int next_refcount_table_size(BDRVQcowState *s,
/* Checks if two offsets are described by the same refcount block */
-static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
+static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
uint64_t offset_b)
{
uint64_t block_a = offset_a >> (s->cluster_bits + s->refcount_block_bits);
@@ -308,7 +308,7 @@ static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
static int alloc_refcount_block(BlockDriverState *bs,
int64_t cluster_index, void **refcount_block)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int refcount_table_index;
int ret;
@@ -431,7 +431,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
if (refcount_table_index < s->refcount_table_size) {
uint64_t data64 = cpu_to_be64(new_block);
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
- ret = bdrv_pwrite_sync(bs->file,
+ ret = bdrv_pwrite_sync(bs->file->bs,
s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
&data64, sizeof(data64));
if (ret < 0) {
@@ -535,7 +535,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
/* Write refcount blocks to disk */
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
- ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
+ ret = bdrv_pwrite_sync(bs->file->bs, meta_offset, new_blocks,
blocks_clusters * s->cluster_size);
g_free(new_blocks);
new_blocks = NULL;
@@ -549,7 +549,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
}
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
- ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
+ ret = bdrv_pwrite_sync(bs->file->bs, table_offset, new_table,
table_size * sizeof(uint64_t));
if (ret < 0) {
goto fail_table;
@@ -560,12 +560,16 @@ static int alloc_refcount_block(BlockDriverState *bs,
}
/* Hook up the new refcount table in the qcow2 header */
- uint8_t data[12];
- cpu_to_be64w((uint64_t*)data, table_offset);
- cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
+ struct QEMU_PACKED {
+ uint64_t d64;
+ uint32_t d32;
+ } data;
+ cpu_to_be64w(&data.d64, table_offset);
+ cpu_to_be32w(&data.d32, table_clusters);
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
- data, sizeof(data));
+ ret = bdrv_pwrite_sync(bs->file->bs,
+ offsetof(QCowHeader, refcount_table_offset),
+ &data, sizeof(data));
if (ret < 0) {
goto fail_table;
}
@@ -605,7 +609,7 @@ fail_block:
void qcow2_process_discards(BlockDriverState *bs, int ret)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
Qcow2DiscardRegion *d, *next;
QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
@@ -613,7 +617,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)
/* Discard is optional, ignore the return value */
if (ret >= 0) {
- bdrv_discard(bs->file,
+ bdrv_discard(bs->file->bs,
d->offset >> BDRV_SECTOR_BITS,
d->bytes >> BDRV_SECTOR_BITS);
}
@@ -625,7 +629,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)
static void update_refcount_discard(BlockDriverState *bs,
uint64_t offset, uint64_t length)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
Qcow2DiscardRegion *d, *p, *next;
QTAILQ_FOREACH(d, &s->discards, next) {
@@ -682,7 +686,7 @@ static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
bool decrease,
enum qcow2_discard_type type)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t start, last, cluster_offset;
void *refcount_block = NULL;
int64_t old_table_index = -1;
@@ -793,7 +797,7 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs,
uint64_t addend, bool decrease,
enum qcow2_discard_type type)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
@@ -815,7 +819,7 @@ int qcow2_update_cluster_refcount(BlockDriverState *bs,
/* return < 0 if error */
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t i, nb_clusters, refcount;
int ret;
@@ -878,7 +882,7 @@ int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size)
int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
int64_t nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t cluster_index, refcount;
uint64_t i;
int ret;
@@ -916,7 +920,7 @@ int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
contiguous sectors. size must be <= cluster_size */
int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t offset;
size_t free_in_cluster;
int ret;
@@ -949,11 +953,17 @@ int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) {
offset = new_cluster;
+ free_in_cluster = s->cluster_size;
+ } else {
+ free_in_cluster += s->cluster_size;
}
}
assert(offset);
ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ offset = 0;
+ }
} while (ret == -EAGAIN);
if (ret < 0) {
return ret;
@@ -992,7 +1002,7 @@ void qcow2_free_clusters(BlockDriverState *bs,
void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
int nb_clusters, enum qcow2_discard_type type)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
switch (qcow2_get_cluster_type(l2_entry)) {
case QCOW2_CLUSTER_COMPRESSED:
@@ -1036,7 +1046,7 @@ void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size, int addend)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount;
bool l1_allocated = false;
int64_t old_offset, old_l2_offset;
@@ -1062,7 +1072,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
}
l1_allocated = true;
- ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+ ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
if (ret < 0) {
goto fail;
}
@@ -1215,7 +1225,8 @@ fail:
cpu_to_be64s(&l1_table[i]);
}
- ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
+ ret = bdrv_pwrite_sync(bs->file->bs, l1_table_offset,
+ l1_table, l1_size2);
for (i = 0; i < l1_size; i++) {
be64_to_cpus(&l1_table[i]);
@@ -1233,7 +1244,7 @@ fail:
/* refcount checking functions */
-static size_t refcount_array_byte_size(BDRVQcowState *s, uint64_t entries)
+static uint64_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries)
{
/* This assertion holds because there is no way we can address more than
* 2^(64 - 9) clusters at once (with cluster size 512 = 2^9, and because
@@ -1256,7 +1267,7 @@ static size_t refcount_array_byte_size(BDRVQcowState *s, uint64_t entries)
* refcount array buffer will be aligned to a cluster boundary, and the newly
* allocated area will be zeroed.
*/
-static int realloc_refcount_array(BDRVQcowState *s, void **array,
+static int realloc_refcount_array(BDRVQcow2State *s, void **array,
int64_t *size, int64_t new_size)
{
int64_t old_byte_size, new_byte_size;
@@ -1298,7 +1309,7 @@ static int realloc_refcount_array(BDRVQcowState *s, void **array,
/*
* Increases the refcount for a range of clusters in a given refcount table.
* This is used to construct a temporary refcount table out of L1 and L2 tables
- * which can be compared the the refcount table saved in the image.
+ * which can be compared to the refcount table saved in the image.
*
* Modifies the number of errors in res.
*/
@@ -1308,7 +1319,7 @@ static int inc_refcounts(BlockDriverState *bs,
int64_t *refcount_table_size,
int64_t offset, int64_t size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t start, last, cluster_offset, k, refcount;
int ret;
@@ -1361,7 +1372,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
int64_t *refcount_table_size, int64_t l2_offset,
int flags)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table, l2_entry;
uint64_t next_contiguous_offset = 0;
int i, l2_size, nb_csectors, ret;
@@ -1370,7 +1381,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
l2_size = s->l2_size * sizeof(uint64_t);
l2_table = g_malloc(l2_size);
- ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size);
+ ret = bdrv_pread(bs->file->bs, l2_offset, l2_table, l2_size);
if (ret < 0) {
fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
res->check_errors++;
@@ -1481,7 +1492,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size,
int flags)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l1_table = NULL, l2_offset, l1_size2;
int i, ret;
@@ -1502,7 +1513,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
res->check_errors++;
goto fail;
}
- ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+ ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
if (ret < 0) {
fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
res->check_errors++;
@@ -1558,7 +1569,7 @@ fail:
static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
int ret;
uint64_t refcount;
@@ -1600,7 +1611,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
}
}
- ret = bdrv_pread(bs->file, l2_offset, l2_table,
+ ret = bdrv_pread(bs->file->bs, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t));
if (ret < 0) {
fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
@@ -1652,7 +1663,8 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
goto fail;
}
- ret = bdrv_pwrite(bs->file, l2_offset, l2_table, s->cluster_size);
+ ret = bdrv_pwrite(bs->file->bs, l2_offset, l2_table,
+ s->cluster_size);
if (ret < 0) {
fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
strerror(-ret));
@@ -1677,7 +1689,7 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix, bool *rebuild,
void **refcount_table, int64_t *nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t i, size;
int ret;
@@ -1707,11 +1719,11 @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
goto resize_fail;
}
- ret = bdrv_truncate(bs->file, offset + s->cluster_size);
+ ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
if (ret < 0) {
goto resize_fail;
}
- size = bdrv_getlength(bs->file);
+ size = bdrv_getlength(bs->file->bs);
if (size < 0) {
ret = size;
goto resize_fail;
@@ -1780,7 +1792,7 @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix, bool *rebuild,
void **refcount_table, int64_t *nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t i;
QCowSnapshot *sn;
int ret;
@@ -1844,7 +1856,7 @@ static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
int64_t *highest_cluster,
void *refcount_table, int64_t nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t i;
uint64_t refcount1, refcount2;
int ret;
@@ -1921,7 +1933,7 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs,
int64_t *imrt_nb_clusters,
int64_t *first_free_cluster)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t cluster = *first_free_cluster, i;
bool first_gap = true;
int contiguous_free_clusters;
@@ -1991,7 +2003,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
void **refcount_table,
int64_t *nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0;
int64_t refblock_offset, refblock_start, refblock_index;
uint32_t reftable_size = 0;
@@ -2085,7 +2097,7 @@ write_refblocks:
on_disk_refblock = (void *)((char *) *refcount_table +
refblock_index * s->cluster_size);
- ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE,
+ ret = bdrv_write(bs->file->bs, refblock_offset / BDRV_SECTOR_SIZE,
on_disk_refblock, s->cluster_sectors);
if (ret < 0) {
fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
@@ -2134,7 +2146,7 @@ write_refblocks:
}
assert(reftable_size < INT_MAX / sizeof(uint64_t));
- ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
+ ret = bdrv_pwrite(bs->file->bs, reftable_offset, on_disk_reftable,
reftable_size * sizeof(uint64_t));
if (ret < 0) {
fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
@@ -2146,8 +2158,8 @@ write_refblocks:
reftable_offset);
cpu_to_be32w(&reftable_offset_and_clusters.reftable_clusters,
size_to_clusters(s, reftable_size * sizeof(uint64_t)));
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader,
- refcount_table_offset),
+ ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader,
+ refcount_table_offset),
&reftable_offset_and_clusters,
sizeof(reftable_offset_and_clusters));
if (ret < 0) {
@@ -2178,14 +2190,14 @@ fail:
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
BdrvCheckResult pre_compare_res;
int64_t size, highest_cluster, nb_clusters;
void *refcount_table = NULL;
bool rebuild = false;
int ret;
- size = bdrv_getlength(bs->file);
+ size = bdrv_getlength(bs->file->bs);
if (size < 0) {
res->check_errors++;
return size;
@@ -2315,7 +2327,7 @@ fail:
int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
int64_t size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int chk = s->overlap_check & ~ign;
int i, j;
@@ -2394,7 +2406,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
return -ENOMEM;
}
- ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
+ ret = bdrv_pread(bs->file->bs, l1_ofs, l1, l1_sz2);
if (ret < 0) {
g_free(l1);
return ret;
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index b6f58c13e..def720164 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -29,7 +29,7 @@
void qcow2_free_snapshots(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int i;
for(i = 0; i < s->nb_snapshots; i++) {
@@ -43,7 +43,7 @@ void qcow2_free_snapshots(BlockDriverState *bs)
int qcow2_read_snapshots(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshotHeader h;
QCowSnapshotExtraData extra;
QCowSnapshot *sn;
@@ -64,7 +64,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
for(i = 0; i < s->nb_snapshots; i++) {
/* Read statically sized part of the snapshot header */
offset = align_offset(offset, 8);
- ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
+ ret = bdrv_pread(bs->file->bs, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
}
@@ -83,7 +83,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
name_size = be16_to_cpu(h.name_size);
/* Read extra data */
- ret = bdrv_pread(bs->file, offset, &extra,
+ ret = bdrv_pread(bs->file->bs, offset, &extra,
MIN(sizeof(extra), extra_data_size));
if (ret < 0) {
goto fail;
@@ -102,7 +102,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
/* Read snapshot ID */
sn->id_str = g_malloc(id_str_size + 1);
- ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
+ ret = bdrv_pread(bs->file->bs, offset, sn->id_str, id_str_size);
if (ret < 0) {
goto fail;
}
@@ -111,7 +111,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
/* Read snapshot name */
sn->name = g_malloc(name_size + 1);
- ret = bdrv_pread(bs->file, offset, sn->name, name_size);
+ ret = bdrv_pread(bs->file->bs, offset, sn->name, name_size);
if (ret < 0) {
goto fail;
}
@@ -136,7 +136,7 @@ fail:
/* add at the end of the file a new list of snapshots */
static int qcow2_write_snapshots(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *sn;
QCowSnapshotHeader h;
QCowSnapshotExtraData extra;
@@ -214,25 +214,25 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
h.name_size = cpu_to_be16(name_size);
offset = align_offset(offset, 8);
- ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
+ ret = bdrv_pwrite(bs->file->bs, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
}
offset += sizeof(h);
- ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
+ ret = bdrv_pwrite(bs->file->bs, offset, &extra, sizeof(extra));
if (ret < 0) {
goto fail;
}
offset += sizeof(extra);
- ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
+ ret = bdrv_pwrite(bs->file->bs, offset, sn->id_str, id_str_size);
if (ret < 0) {
goto fail;
}
offset += id_str_size;
- ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
+ ret = bdrv_pwrite(bs->file->bs, offset, sn->name, name_size);
if (ret < 0) {
goto fail;
}
@@ -254,7 +254,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
+ ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, nb_snapshots),
&header_data, sizeof(header_data));
if (ret < 0) {
goto fail;
@@ -278,7 +278,7 @@ fail:
static void find_new_snapshot_id(BlockDriverState *bs,
char *id_str, int id_str_size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *sn;
int i;
unsigned long id, id_max = 0;
@@ -296,7 +296,7 @@ static int find_snapshot_by_id_and_name(BlockDriverState *bs,
const char *id,
const char *name)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int i;
if (id && name) {
@@ -338,7 +338,7 @@ static int find_snapshot_by_id_or_name(BlockDriverState *bs,
/* if no id is provided, a new one is constructed */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *new_snapshot_list = NULL;
QCowSnapshot *old_snapshot_list = NULL;
QCowSnapshot sn1, *sn = &sn1;
@@ -396,7 +396,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
goto fail;
}
- ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
+ ret = bdrv_pwrite(bs->file->bs, sn->l1_table_offset, l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
goto fail;
@@ -461,7 +461,7 @@ fail:
/* copy the snapshot 'snapshot_name' into the current disk image */
int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *sn;
int i, snapshot_index;
int cur_l1_bytes, sn_l1_bytes;
@@ -509,7 +509,8 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
}
- ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
+ ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
+ sn_l1_table, sn_l1_bytes);
if (ret < 0) {
goto fail;
}
@@ -526,7 +527,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
}
- ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
+ ret = bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, sn_l1_table,
cur_l1_bytes);
if (ret < 0) {
goto fail;
@@ -587,7 +588,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
const char *name,
Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot sn;
int snapshot_index, ret;
@@ -650,7 +651,7 @@ int qcow2_snapshot_delete(BlockDriverState *bs,
int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QEMUSnapshotInfo *sn_tab, *sn_info;
QCowSnapshot *sn;
int i;
@@ -683,7 +684,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
Error **errp)
{
int i, snapshot_index;
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *sn;
uint64_t *new_l1_table;
int new_l1_bytes;
@@ -706,13 +707,14 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
return -EFBIG;
}
new_l1_bytes = sn->l1_size * sizeof(uint64_t);
- new_l1_table = qemu_try_blockalign(bs->file,
+ new_l1_table = qemu_try_blockalign(bs->file->bs,
align_offset(new_l1_bytes, 512));
if (new_l1_table == NULL) {
return -ENOMEM;
}
- ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
+ ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
+ new_l1_table, new_l1_bytes);
if (ret < 0) {
error_setg(errp, "Failed to read l1 table for snapshot");
qemu_vfree(new_l1_table);
diff --git a/block/qcow2.c b/block/qcow2.c
index 76c331b38..88f56c886 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -85,7 +85,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
uint64_t end_offset, void **p_feature_table,
Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowExtension ext;
uint64_t offset;
int ret;
@@ -104,7 +104,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
printf("attempting to read extended header in offset %lu\n", offset);
#endif
- ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
+ ret = bdrv_pread(bs->file->bs, offset, &ext, sizeof(ext));
if (ret < 0) {
error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
"pread fail from offset %" PRIu64, offset);
@@ -132,7 +132,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
sizeof(bs->backing_format));
return 2;
}
- ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
+ ret = bdrv_pread(bs->file->bs, offset, bs->backing_format, ext.len);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
"Could not read format name");
@@ -148,7 +148,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
case QCOW2_EXT_MAGIC_FEATURE_TABLE:
if (p_feature_table != NULL) {
void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
- ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
+ ret = bdrv_pread(bs->file->bs, offset , feature_table, ext.len);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
"Could not read table");
@@ -169,7 +169,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
uext->len = ext.len;
QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
- ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
+ ret = bdrv_pread(bs->file->bs, offset , uext->data, uext->len);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: unknown extension: "
"Could not read data");
@@ -187,7 +187,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
static void cleanup_unknown_header_ext(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
Qcow2UnknownHeaderExtension *uext, *next;
QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
@@ -249,7 +249,7 @@ static void report_unsupported_feature(BlockDriverState *bs,
*/
int qcow2_mark_dirty(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t val;
int ret;
@@ -260,12 +260,12 @@ int qcow2_mark_dirty(BlockDriverState *bs)
}
val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
- ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
+ ret = bdrv_pwrite(bs->file->bs, offsetof(QCowHeader, incompatible_features),
&val, sizeof(val));
if (ret < 0) {
return ret;
}
- ret = bdrv_flush(bs->file);
+ ret = bdrv_flush(bs->file->bs);
if (ret < 0) {
return ret;
}
@@ -282,7 +282,7 @@ int qcow2_mark_dirty(BlockDriverState *bs)
*/
static int qcow2_mark_clean(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
int ret;
@@ -304,7 +304,7 @@ static int qcow2_mark_clean(BlockDriverState *bs)
*/
int qcow2_mark_corrupt(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
return qcow2_update_header(bs);
@@ -316,7 +316,7 @@ int qcow2_mark_corrupt(BlockDriverState *bs)
*/
int qcow2_mark_consistent(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
int ret = bdrv_flush(bs);
@@ -351,7 +351,7 @@ static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
static int validate_table_offset(BlockDriverState *bs, uint64_t offset,
uint64_t entries, size_t entry_len)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t size;
/* Use signed INT64_MAX as the maximum even for uint64_t header fields,
@@ -467,6 +467,11 @@ static QemuOptsList qcow2_runtime_opts = {
.type = QEMU_OPT_SIZE,
.help = "Maximum refcount block cache size",
},
+ {
+ .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Clean unused cache entries after this time (in seconds)",
+ },
{ /* end of list */ }
},
};
@@ -482,11 +487,54 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
[QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2,
};
+static void cache_clean_timer_cb(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ BDRVQcow2State *s = bs->opaque;
+ qcow2_cache_clean_unused(bs, s->l2_table_cache);
+ qcow2_cache_clean_unused(bs, s->refcount_block_cache);
+ timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+ (int64_t) s->cache_clean_interval * 1000);
+}
+
+static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
+{
+ BDRVQcow2State *s = bs->opaque;
+ if (s->cache_clean_interval > 0) {
+ s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
+ SCALE_MS, cache_clean_timer_cb,
+ bs);
+ timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+ (int64_t) s->cache_clean_interval * 1000);
+ }
+}
+
+static void cache_clean_timer_del(BlockDriverState *bs)
+{
+ BDRVQcow2State *s = bs->opaque;
+ if (s->cache_clean_timer) {
+ timer_del(s->cache_clean_timer);
+ timer_free(s->cache_clean_timer);
+ s->cache_clean_timer = NULL;
+ }
+}
+
+static void qcow2_detach_aio_context(BlockDriverState *bs)
+{
+ cache_clean_timer_del(bs);
+}
+
+static void qcow2_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ cache_clean_timer_init(bs, new_context);
+}
+
static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
uint64_t *l2_cache_size,
uint64_t *refcount_cache_size, Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t combined_cache_size;
bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
@@ -541,22 +589,246 @@ static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
}
}
+typedef struct Qcow2ReopenState {
+ Qcow2Cache *l2_table_cache;
+ Qcow2Cache *refcount_block_cache;
+ bool use_lazy_refcounts;
+ int overlap_check;
+ bool discard_passthrough[QCOW2_DISCARD_MAX];
+ uint64_t cache_clean_interval;
+} Qcow2ReopenState;
+
+static int qcow2_update_options_prepare(BlockDriverState *bs,
+ Qcow2ReopenState *r,
+ QDict *options, int flags,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ QemuOpts *opts = NULL;
+ const char *opt_overlap_check, *opt_overlap_check_template;
+ int overlap_check_template = 0;
+ uint64_t l2_cache_size, refcount_cache_size;
+ int i;
+ Error *local_err = NULL;
+ int ret;
+
+ opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* get L2 table/refcount block cache size from command line options */
+ read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ l2_cache_size /= s->cluster_size;
+ if (l2_cache_size < MIN_L2_CACHE_SIZE) {
+ l2_cache_size = MIN_L2_CACHE_SIZE;
+ }
+ if (l2_cache_size > INT_MAX) {
+ error_setg(errp, "L2 cache size too big");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ refcount_cache_size /= s->cluster_size;
+ if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
+ refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
+ }
+ if (refcount_cache_size > INT_MAX) {
+ error_setg(errp, "Refcount cache size too big");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* alloc new L2 table/refcount block cache, flush old one */
+ if (s->l2_table_cache) {
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
+ goto fail;
+ }
+ }
+
+ if (s->refcount_block_cache) {
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "Failed to flush the refcount block cache");
+ goto fail;
+ }
+ }
+
+ r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
+ r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
+ if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
+ error_setg(errp, "Could not allocate metadata caches");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ /* New interval for cache cleanup timer */
+ r->cache_clean_interval =
+ qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
+ s->cache_clean_interval);
+ if (r->cache_clean_interval > UINT_MAX) {
+ error_setg(errp, "Cache clean interval too big");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* lazy-refcounts; flush if going from enabled to disabled */
+ r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
+ (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
+ if (r->use_lazy_refcounts && s->qcow_version < 3) {
+ error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
+ "qemu 1.1 compatibility level");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
+ ret = qcow2_mark_clean(bs);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
+ goto fail;
+ }
+ }
+
+ /* Overlap check options */
+ opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
+ opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
+ if (opt_overlap_check_template && opt_overlap_check &&
+ strcmp(opt_overlap_check_template, opt_overlap_check))
+ {
+ error_setg(errp, "Conflicting values for qcow2 options '"
+ QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
+ "' ('%s')", opt_overlap_check, opt_overlap_check_template);
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (!opt_overlap_check) {
+ opt_overlap_check = opt_overlap_check_template ?: "cached";
+ }
+
+ if (!strcmp(opt_overlap_check, "none")) {
+ overlap_check_template = 0;
+ } else if (!strcmp(opt_overlap_check, "constant")) {
+ overlap_check_template = QCOW2_OL_CONSTANT;
+ } else if (!strcmp(opt_overlap_check, "cached")) {
+ overlap_check_template = QCOW2_OL_CACHED;
+ } else if (!strcmp(opt_overlap_check, "all")) {
+ overlap_check_template = QCOW2_OL_ALL;
+ } else {
+ error_setg(errp, "Unsupported value '%s' for qcow2 option "
+ "'overlap-check'. Allowed are any of the following: "
+ "none, constant, cached, all", opt_overlap_check);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ r->overlap_check = 0;
+ for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
+ /* overlap-check defines a template bitmask, but every flag may be
+ * overwritten through the associated boolean option */
+ r->overlap_check |=
+ qemu_opt_get_bool(opts, overlap_bool_option_names[i],
+ overlap_check_template & (1 << i)) << i;
+ }
+
+ r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
+ r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
+ r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
+ flags & BDRV_O_UNMAP);
+ r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
+ r->discard_passthrough[QCOW2_DISCARD_OTHER] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
+
+ ret = 0;
+fail:
+ qemu_opts_del(opts);
+ opts = NULL;
+ return ret;
+}
+
+static void qcow2_update_options_commit(BlockDriverState *bs,
+ Qcow2ReopenState *r)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int i;
+
+ if (s->l2_table_cache) {
+ qcow2_cache_destroy(bs, s->l2_table_cache);
+ }
+ if (s->refcount_block_cache) {
+ qcow2_cache_destroy(bs, s->refcount_block_cache);
+ }
+ s->l2_table_cache = r->l2_table_cache;
+ s->refcount_block_cache = r->refcount_block_cache;
+
+ s->overlap_check = r->overlap_check;
+ s->use_lazy_refcounts = r->use_lazy_refcounts;
+
+ for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
+ s->discard_passthrough[i] = r->discard_passthrough[i];
+ }
+
+ if (s->cache_clean_interval != r->cache_clean_interval) {
+ cache_clean_timer_del(bs);
+ s->cache_clean_interval = r->cache_clean_interval;
+ cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
+ }
+}
+
+static void qcow2_update_options_abort(BlockDriverState *bs,
+ Qcow2ReopenState *r)
+{
+ if (r->l2_table_cache) {
+ qcow2_cache_destroy(bs, r->l2_table_cache);
+ }
+ if (r->refcount_block_cache) {
+ qcow2_cache_destroy(bs, r->refcount_block_cache);
+ }
+}
+
+static int qcow2_update_options(BlockDriverState *bs, QDict *options,
+ int flags, Error **errp)
+{
+ Qcow2ReopenState r = {};
+ int ret;
+
+ ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
+ if (ret >= 0) {
+ qcow2_update_options_commit(bs, &r);
+ } else {
+ qcow2_update_options_abort(bs, &r);
+ }
+
+ return ret;
+}
+
static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int len, i;
int ret = 0;
QCowHeader header;
- QemuOpts *opts = NULL;
Error *local_err = NULL;
uint64_t ext_end;
uint64_t l1_vm_state_index;
- const char *opt_overlap_check, *opt_overlap_check_template;
- int overlap_check_template = 0;
- uint64_t l2_cache_size, refcount_cache_size;
- ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
+ ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read qcow2 header");
goto fail;
@@ -631,7 +903,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
if (header.header_length > sizeof(header)) {
s->unknown_header_fields_size = header.header_length - sizeof(header);
s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
- ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
+ ret = bdrv_pread(bs->file->bs, sizeof(header), s->unknown_header_fields,
s->unknown_header_fields_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
@@ -784,14 +1056,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
if (s->l1_size > 0) {
- s->l1_table = qemu_try_blockalign(bs->file,
+ s->l1_table = qemu_try_blockalign(bs->file->bs,
align_offset(s->l1_size * sizeof(uint64_t), 512));
if (s->l1_table == NULL) {
error_setg(errp, "Could not allocate L1 table");
ret = -ENOMEM;
goto fail;
}
- ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
+ ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read L1 table");
@@ -802,55 +1074,15 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
}
}
- /* get L2 table/refcount block cache size from command line options */
- opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
- qemu_opts_absorb_qdict(opts, options, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- ret = -EINVAL;
- goto fail;
- }
-
- read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size,
- &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- ret = -EINVAL;
- goto fail;
- }
-
- l2_cache_size /= s->cluster_size;
- if (l2_cache_size < MIN_L2_CACHE_SIZE) {
- l2_cache_size = MIN_L2_CACHE_SIZE;
- }
- if (l2_cache_size > INT_MAX) {
- error_setg(errp, "L2 cache size too big");
- ret = -EINVAL;
- goto fail;
- }
-
- refcount_cache_size /= s->cluster_size;
- if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
- refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
- }
- if (refcount_cache_size > INT_MAX) {
- error_setg(errp, "Refcount cache size too big");
- ret = -EINVAL;
- goto fail;
- }
-
- /* alloc L2 table/refcount block cache */
- s->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
- s->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
- if (s->l2_table_cache == NULL || s->refcount_block_cache == NULL) {
- error_setg(errp, "Could not allocate metadata caches");
- ret = -ENOMEM;
+ /* Parse driver-specific options */
+ ret = qcow2_update_options(bs, options, flags, errp);
+ if (ret < 0) {
goto fail;
}
s->cluster_cache = g_malloc(s->cluster_size);
/* one more sector for decompressed data alignment */
- s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
+ s->cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS
* s->cluster_size + 512);
if (s->cluster_data == NULL) {
error_setg(errp, "Could not allocate temporary cluster buffer");
@@ -887,7 +1119,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EINVAL;
goto fail;
}
- ret = bdrv_pread(bs->file, header.backing_file_offset,
+ ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
bs->backing_file, len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read backing file name");
@@ -932,70 +1164,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
}
}
- /* Enable lazy_refcounts according to image and command line options */
- s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
- (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
-
- s->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
- s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
- s->discard_passthrough[QCOW2_DISCARD_REQUEST] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
- flags & BDRV_O_UNMAP);
- s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
- s->discard_passthrough[QCOW2_DISCARD_OTHER] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
-
- opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
- opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
- if (opt_overlap_check_template && opt_overlap_check &&
- strcmp(opt_overlap_check_template, opt_overlap_check))
- {
- error_setg(errp, "Conflicting values for qcow2 options '"
- QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
- "' ('%s')", opt_overlap_check, opt_overlap_check_template);
- ret = -EINVAL;
- goto fail;
- }
- if (!opt_overlap_check) {
- opt_overlap_check = opt_overlap_check_template ?: "cached";
- }
-
- if (!strcmp(opt_overlap_check, "none")) {
- overlap_check_template = 0;
- } else if (!strcmp(opt_overlap_check, "constant")) {
- overlap_check_template = QCOW2_OL_CONSTANT;
- } else if (!strcmp(opt_overlap_check, "cached")) {
- overlap_check_template = QCOW2_OL_CACHED;
- } else if (!strcmp(opt_overlap_check, "all")) {
- overlap_check_template = QCOW2_OL_ALL;
- } else {
- error_setg(errp, "Unsupported value '%s' for qcow2 option "
- "'overlap-check'. Allowed are either of the following: "
- "none, constant, cached, all", opt_overlap_check);
- ret = -EINVAL;
- goto fail;
- }
-
- s->overlap_check = 0;
- for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
- /* overlap-check defines a template bitmask, but every flag may be
- * overwritten through the associated boolean option */
- s->overlap_check |=
- qemu_opt_get_bool(opts, overlap_bool_option_names[i],
- overlap_check_template & (1 << i)) << i;
- }
-
- qemu_opts_del(opts);
- opts = NULL;
-
- if (s->use_lazy_refcounts && s->qcow_version < 3) {
- error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
- "qemu 1.1 compatibility level");
- ret = -EINVAL;
- goto fail;
- }
-
#ifdef DEBUG_ALLOC
{
BdrvCheckResult result = {0};
@@ -1005,7 +1173,6 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
return ret;
fail:
- qemu_opts_del(opts);
g_free(s->unknown_header_fields);
cleanup_unknown_header_ext(bs);
qcow2_free_snapshots(bs);
@@ -1013,6 +1180,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
+ cache_clean_timer_del(bs);
if (s->l2_table_cache) {
qcow2_cache_destroy(bs, s->l2_table_cache);
}
@@ -1026,14 +1194,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
bs->bl.write_zeroes_alignment = s->cluster_sectors;
}
static int qcow2_set_key(BlockDriverState *bs, const char *key)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint8_t keybuf[16];
int len, i;
Error *err = NULL;
@@ -1066,32 +1234,58 @@ static int qcow2_set_key(BlockDriverState *bs, const char *key)
return 0;
}
-/* We have no actual commit/abort logic for qcow2, but we need to write out any
- * unwritten data if we reopen read-only. */
static int qcow2_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
+ Qcow2ReopenState *r;
int ret;
+ r = g_new0(Qcow2ReopenState, 1);
+ state->opaque = r;
+
+ ret = qcow2_update_options_prepare(state->bs, r, state->options,
+ state->flags, errp);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* We need to write out any unwritten data if we reopen read-only. */
if ((state->flags & BDRV_O_RDWR) == 0) {
ret = bdrv_flush(state->bs);
if (ret < 0) {
- return ret;
+ goto fail;
}
ret = qcow2_mark_clean(state->bs);
if (ret < 0) {
- return ret;
+ goto fail;
}
}
return 0;
+
+fail:
+ qcow2_update_options_abort(state->bs, r);
+ g_free(r);
+ return ret;
+}
+
+static void qcow2_reopen_commit(BDRVReopenState *state)
+{
+ qcow2_update_options_commit(state->bs, state->opaque);
+ g_free(state->opaque);
+}
+
+static void qcow2_reopen_abort(BDRVReopenState *state)
+{
+ qcow2_update_options_abort(state->bs, state->opaque);
+ g_free(state->opaque);
}
static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t cluster_offset;
int index_in_cluster, ret;
int64_t status = 0;
@@ -1138,7 +1332,7 @@ int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
int remaining_sectors, QEMUIOVector *qiov)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int index_in_cluster, n1;
int ret;
int cur_nr_sectors; /* number of sectors in current iteration */
@@ -1175,9 +1369,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
switch (ret) {
case QCOW2_CLUSTER_UNALLOCATED:
- if (bs->backing_hd) {
+ if (bs->backing) {
/* read from the base image */
- n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
+ n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
sector_num, cur_nr_sectors);
if (n1 > 0) {
QEMUIOVector local_qiov;
@@ -1188,7 +1382,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, sector_num,
+ ret = bdrv_co_readv(bs->backing->bs, sector_num,
n1, &local_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1235,8 +1429,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
*/
if (!cluster_data) {
cluster_data =
- qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
- * s->cluster_size);
+ qemu_try_blockalign(bs->file->bs,
+ QCOW_MAX_CRYPT_CLUSTERS
+ * s->cluster_size);
if (cluster_data == NULL) {
ret = -ENOMEM;
goto fail;
@@ -1252,7 +1447,7 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
+ ret = bdrv_co_readv(bs->file->bs,
(cluster_offset >> 9) + index_in_cluster,
cur_nr_sectors, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1300,7 +1495,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
int remaining_sectors,
QEMUIOVector *qiov)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int index_in_cluster;
int ret;
int cur_nr_sectors; /* number of sectors in current iteration */
@@ -1349,7 +1544,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
Error *err = NULL;
assert(s->cipher);
if (!cluster_data) {
- cluster_data = qemu_try_blockalign(bs->file,
+ cluster_data = qemu_try_blockalign(bs->file->bs,
QCOW_MAX_CRYPT_CLUSTERS
* s->cluster_size);
if (cluster_data == NULL) {
@@ -1386,7 +1581,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
trace_qcow2_writev_data(qemu_coroutine_self(),
(cluster_offset >> 9) + index_in_cluster);
- ret = bdrv_co_writev(bs->file,
+ ret = bdrv_co_writev(bs->file->bs,
(cluster_offset >> 9) + index_in_cluster,
cur_nr_sectors, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1446,7 +1641,7 @@ fail:
static void qcow2_close(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
@@ -1471,6 +1666,7 @@ static void qcow2_close(BlockDriverState *bs)
}
}
+ cache_clean_timer_del(bs);
qcow2_cache_destroy(bs, s->l2_table_cache);
qcow2_cache_destroy(bs, s->refcount_block_cache);
@@ -1491,7 +1687,7 @@ static void qcow2_close(BlockDriverState *bs)
static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int flags = s->flags;
QCryptoCipher *cipher = NULL;
QDict *options;
@@ -1508,13 +1704,13 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
qcow2_close(bs);
- bdrv_invalidate_cache(bs->file, &local_err);
+ bdrv_invalidate_cache(bs->file->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
- memset(s, 0, sizeof(BDRVQcowState));
+ memset(s, 0, sizeof(BDRVQcow2State));
options = qdict_clone_shallow(bs->options);
ret = qcow2_open(bs, options, flags, &local_err);
@@ -1561,7 +1757,7 @@ static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
*/
int qcow2_update_header(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowHeader *header;
char *buf;
size_t buflen = s->cluster_size;
@@ -1716,7 +1912,7 @@ int qcow2_update_header(BlockDriverState *bs)
}
/* Write the new header */
- ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
+ ret = bdrv_pwrite(bs->file->bs, 0, header, s->cluster_size);
if (ret < 0) {
goto fail;
}
@@ -1730,7 +1926,7 @@ fail:
static int qcow2_change_backing_file(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
@@ -1796,7 +1992,8 @@ static int preallocate(BlockDriverState *bs)
if (host_offset != 0) {
uint8_t buf[BDRV_SECTOR_SIZE];
memset(buf, 0, BDRV_SECTOR_SIZE);
- ret = bdrv_write(bs->file, (host_offset >> BDRV_SECTOR_BITS) + num - 1,
+ ret = bdrv_write(bs->file->bs,
+ (host_offset >> BDRV_SECTOR_BITS) + num - 1,
buf, 1);
if (ret < 0) {
return ret;
@@ -1812,8 +2009,10 @@ static int qcow2_create2(const char *filename, int64_t total_size,
QemuOpts *opts, int version, int refcount_order,
Error **errp)
{
- /* Calculate cluster_bits */
int cluster_bits;
+ QDict *options;
+
+ /* Calculate cluster_bits */
cluster_bits = ctz32(cluster_size);
if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
(1 << cluster_bits) != cluster_size)
@@ -1912,7 +2111,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
bs = NULL;
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
+ &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
return ret;
@@ -1971,9 +2170,11 @@ static int qcow2_create2(const char *filename, int64_t total_size,
* refcount of the cluster that is occupied by the header and the refcount
* table)
*/
- ret = bdrv_open(&bs, filename, NULL, NULL,
+ options = qdict_new();
+ qdict_put(options, "driver", qstring_from_str("qcow2"));
+ ret = bdrv_open(&bs, filename, NULL, options,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
- &bdrv_qcow2, &local_err);
+ &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto out;
@@ -2009,7 +2210,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* And if we're supposed to preallocate metadata, do that now */
if (prealloc != PREALLOC_MODE_OFF) {
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
ret = preallocate(bs);
qemu_co_mutex_unlock(&s->lock);
@@ -2023,9 +2224,11 @@ static int qcow2_create2(const char *filename, int64_t total_size,
bs = NULL;
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
- ret = bdrv_open(&bs, filename, NULL, NULL,
+ options = qdict_new();
+ qdict_put(options, "driver", qstring_from_str("qcow2"));
+ ret = bdrv_open(&bs, filename, NULL, options,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
- &bdrv_qcow2, &local_err);
+ &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto out;
@@ -2142,7 +2345,7 @@ static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
{
int ret;
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
/* Emulate misaligned zero writes */
if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
@@ -2162,7 +2365,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
int ret;
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
@@ -2173,7 +2376,7 @@ static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t new_l1_size;
int ret;
@@ -2202,7 +2405,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
/* write updated header.size */
offset = cpu_to_be64(offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
+ ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, size),
&offset, sizeof(uint64_t));
if (ret < 0) {
return ret;
@@ -2217,7 +2420,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
z_stream strm;
int ret, out_len;
uint8_t *out_buf;
@@ -2226,8 +2429,8 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (nb_sectors == 0) {
/* align end of file to a sector boundary to ease reading with
sector based I/Os */
- cluster_offset = bdrv_getlength(bs->file);
- return bdrv_truncate(bs->file, cluster_offset);
+ cluster_offset = bdrv_getlength(bs->file->bs);
+ return bdrv_truncate(bs->file->bs, cluster_offset);
}
if (nb_sectors != s->cluster_sectors) {
@@ -2294,7 +2497,7 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
- ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
+ ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
if (ret < 0) {
goto fail;
}
@@ -2308,7 +2511,7 @@ fail:
static int make_completely_empty(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret, l1_clusters;
int64_t offset;
uint64_t *new_reftable = NULL;
@@ -2343,7 +2546,7 @@ static int make_completely_empty(BlockDriverState *bs)
/* After this call, neither the in-memory nor the on-disk refcount
* information accurately describe the actual references */
- ret = bdrv_write_zeroes(bs->file, s->l1_table_offset / BDRV_SECTOR_SIZE,
+ ret = bdrv_write_zeroes(bs->file->bs, s->l1_table_offset / BDRV_SECTOR_SIZE,
l1_clusters * s->cluster_sectors, 0);
if (ret < 0) {
goto fail_broken_refcounts;
@@ -2357,7 +2560,7 @@ static int make_completely_empty(BlockDriverState *bs)
* overwrite parts of the existing refcount and L1 table, which is not
* an issue because the dirty flag is set, complete data loss is in fact
* desired and partial data loss is consequently fine as well */
- ret = bdrv_write_zeroes(bs->file, s->cluster_size / BDRV_SECTOR_SIZE,
+ ret = bdrv_write_zeroes(bs->file->bs, s->cluster_size / BDRV_SECTOR_SIZE,
(2 + l1_clusters) * s->cluster_size /
BDRV_SECTOR_SIZE, 0);
/* This call (even if it failed overall) may have overwritten on-disk
@@ -2377,7 +2580,7 @@ static int make_completely_empty(BlockDriverState *bs)
cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
+ ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_table_offset),
&l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
if (ret < 0) {
goto fail_broken_refcounts;
@@ -2408,7 +2611,7 @@ static int make_completely_empty(BlockDriverState *bs)
/* Enter the first refblock into the reftable */
rt_entry = cpu_to_be64(2 * s->cluster_size);
- ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
+ ret = bdrv_pwrite_sync(bs->file->bs, s->cluster_size,
&rt_entry, sizeof(rt_entry));
if (ret < 0) {
goto fail_broken_refcounts;
@@ -2433,7 +2636,7 @@ static int make_completely_empty(BlockDriverState *bs)
goto fail;
}
- ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
+ ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size);
if (ret < 0) {
goto fail;
}
@@ -2456,7 +2659,7 @@ fail:
static int qcow2_make_empty(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t start_sector;
int sector_step = INT_MAX / BDRV_SECTOR_SIZE;
int l1_clusters, ret = 0;
@@ -2497,7 +2700,7 @@ static int qcow2_make_empty(BlockDriverState *bs)
static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
qemu_co_mutex_lock(&s->lock);
@@ -2521,7 +2724,7 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
bdi->unallocated_blocks_are_zero = true;
bdi->can_write_zeroes_with_unmap = (s->qcow_version >= 3);
bdi->cluster_size = s->cluster_size;
@@ -2531,22 +2734,20 @@ static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1);
*spec_info = (ImageInfoSpecific){
- .kind = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
- {
- .qcow2 = g_new(ImageInfoSpecificQCow2, 1),
- },
+ .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
+ .u.qcow2 = g_new(ImageInfoSpecificQCow2, 1),
};
if (s->qcow_version == 2) {
- *spec_info->qcow2 = (ImageInfoSpecificQCow2){
+ *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){
.compat = g_strdup("0.10"),
.refcount_bits = s->refcount_bits,
};
} else if (s->qcow_version == 3) {
- *spec_info->qcow2 = (ImageInfoSpecificQCow2){
+ *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){
.compat = g_strdup("1.1"),
.lazy_refcounts = s->compatible_features &
QCOW2_COMPAT_LAZY_REFCOUNTS,
@@ -2564,11 +2765,11 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
#if 0
static void dump_refcounts(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t nb_clusters, k, k1, size;
int refcount;
- size = bdrv_getlength(bs->file);
+ size = bdrv_getlength(bs->file->bs);
nb_clusters = size_to_clusters(s, size);
for(k = 0; k < nb_clusters;) {
k1 = k;
@@ -2585,7 +2786,7 @@ static void dump_refcounts(BlockDriverState *bs)
static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t pos)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t total_sectors = bs->total_sectors;
bool zero_beyond_eof = bs->zero_beyond_eof;
int ret;
@@ -2606,7 +2807,7 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
int64_t pos, int size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
bool zero_beyond_eof = bs->zero_beyond_eof;
int ret;
@@ -2625,7 +2826,7 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
static int qcow2_downgrade(BlockDriverState *bs, int target_version,
BlockDriverAmendStatusCB *status_cb)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int current_version = s->qcow_version;
int ret;
@@ -2689,7 +2890,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
BlockDriverAmendStatusCB *status_cb)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int old_version = s->qcow_version, new_version = old_version;
uint64_t new_size = 0;
const char *backing_file = NULL, *backing_format = NULL;
@@ -2836,7 +3037,7 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
int64_t size, const char *message_format, ...)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
const char *node_name;
char *message;
va_list ap;
@@ -2937,11 +3138,13 @@ static QemuOptsList qcow2_create_opts = {
BlockDriver bdrv_qcow2 = {
.format_name = "qcow2",
- .instance_size = sizeof(BDRVQcowState),
+ .instance_size = sizeof(BDRVQcow2State),
.bdrv_probe = qcow2_probe,
.bdrv_open = qcow2_open,
.bdrv_close = qcow2_close,
.bdrv_reopen_prepare = qcow2_reopen_prepare,
+ .bdrv_reopen_commit = qcow2_reopen_commit,
+ .bdrv_reopen_abort = qcow2_reopen_abort,
.bdrv_create = qcow2_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = qcow2_co_get_block_status,
@@ -2977,6 +3180,9 @@ BlockDriver bdrv_qcow2 = {
.create_opts = &qcow2_create_opts,
.bdrv_check = qcow2_check,
.bdrv_amend_options = qcow2_amend_options,
+
+ .bdrv_detach_aio_context = qcow2_detach_aio_context,
+ .bdrv_attach_aio_context = qcow2_attach_aio_context,
};
static void bdrv_qcow2_init(void)
diff --git a/block/qcow2.h b/block/qcow2.h
index 4b5a6afc8..b8c500b9d 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -26,7 +26,7 @@
#define BLOCK_QCOW2_H
#include "crypto/cipher.h"
-#include "block/coroutine.h"
+#include "qemu/coroutine.h"
//#define DEBUG_ALLOC
//#define DEBUG_ALLOC2
@@ -96,6 +96,7 @@
#define QCOW2_OPT_CACHE_SIZE "cache-size"
#define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
#define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"
+#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval"
typedef struct QCowHeader {
uint32_t magic;
@@ -221,7 +222,7 @@ typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
typedef void Qcow2SetRefcountFunc(void *refcount_array,
uint64_t index, uint64_t value);
-typedef struct BDRVQcowState {
+typedef struct BDRVQcow2State {
int cluster_bits;
int cluster_size;
int cluster_sectors;
@@ -239,6 +240,8 @@ typedef struct BDRVQcowState {
Qcow2Cache* l2_table_cache;
Qcow2Cache* refcount_block_cache;
+ QEMUTimer *cache_clean_timer;
+ unsigned cache_clean_interval;
uint8_t *cluster_cache;
uint8_t *cluster_data;
@@ -290,9 +293,7 @@ typedef struct BDRVQcowState {
* override) */
char *image_backing_file;
char *image_backing_format;
-} BDRVQcowState;
-
-struct QCowAIOCB;
+} BDRVQcow2State;
typedef struct Qcow2COWRegion {
/**
@@ -402,28 +403,28 @@ typedef enum QCow2MetadataOverlap {
#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
-static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
+static inline int64_t start_of_cluster(BDRVQcow2State *s, int64_t offset)
{
return offset & ~(s->cluster_size - 1);
}
-static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
+static inline int64_t offset_into_cluster(BDRVQcow2State *s, int64_t offset)
{
return offset & (s->cluster_size - 1);
}
-static inline uint64_t size_to_clusters(BDRVQcowState *s, uint64_t size)
+static inline uint64_t size_to_clusters(BDRVQcow2State *s, uint64_t size)
{
return (size + (s->cluster_size - 1)) >> s->cluster_bits;
}
-static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
+static inline int64_t size_to_l1(BDRVQcow2State *s, int64_t size)
{
int shift = s->cluster_bits + s->l2_bits;
return (size + (1ULL << shift) - 1) >> shift;
}
-static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
+static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset)
{
return (offset >> s->cluster_bits) & (s->l2_size - 1);
}
@@ -434,12 +435,12 @@ static inline int64_t align_offset(int64_t offset, int n)
return offset;
}
-static inline int64_t qcow2_vm_state_offset(BDRVQcowState *s)
+static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s)
{
return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
}
-static inline uint64_t qcow2_max_refcount_clusters(BDRVQcowState *s)
+static inline uint64_t qcow2_max_refcount_clusters(BDRVQcow2State *s)
{
return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
}
@@ -458,7 +459,7 @@ static inline int qcow2_get_cluster_type(uint64_t l2_entry)
}
/* Check whether refcounts are eager or lazy */
-static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
+static inline bool qcow2_need_accurate_refcounts(BDRVQcow2State *s)
{
return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
}
@@ -534,7 +535,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
void qcow2_l2_cache_reset(BlockDriverState *bs);
int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-int qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
uint8_t *out_buf, const uint8_t *in_buf,
int nb_sectors, bool enc, Error **errp);
@@ -581,6 +582,7 @@ int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
Qcow2Cache *dependency);
void qcow2_cache_depends_on_flush(Qcow2Cache *c);
+void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
diff --git a/block/qed-table.c b/block/qed-table.c
index 513aa872c..f4219b8ac 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -63,7 +63,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
- bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
+ bdrv_aio_readv(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, qiov,
qiov->size / BDRV_SECTOR_SIZE,
qed_read_table_cb, read_table_cb);
}
@@ -152,7 +152,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
/* Adjust for offset into table */
offset += start * sizeof(uint64_t);
- bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+ bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
&write_table_cb->qiov,
write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
qed_write_table_cb, write_table_cb);
diff --git a/block/qed.c b/block/qed.c
index 954ed007c..9b8889503 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -82,7 +82,7 @@ int qed_write_header_sync(BDRVQEDState *s)
int ret;
qed_header_cpu_to_le(&s->header, &le);
- ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
+ ret = bdrv_pwrite(s->bs->file->bs, 0, &le, sizeof(le));
if (ret != sizeof(le)) {
return ret;
}
@@ -119,7 +119,7 @@ static void qed_write_header_read_cb(void *opaque, int ret)
/* Update header */
qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
- bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
+ bdrv_aio_writev(s->bs->file->bs, 0, &write_header_cb->qiov,
write_header_cb->nsectors, qed_write_header_cb,
write_header_cb);
}
@@ -152,7 +152,7 @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
write_header_cb->iov.iov_len = len;
qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
- bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
+ bdrv_aio_readv(s->bs->file->bs, 0, &write_header_cb->qiov, nsectors,
qed_write_header_read_cb, write_header_cb);
}
@@ -354,12 +354,6 @@ static void qed_cancel_need_check_timer(BDRVQEDState *s)
timer_del(s->need_check_timer);
}
-static void bdrv_qed_rebind(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
- s->bs = bs;
-}
-
static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
@@ -381,6 +375,18 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
}
}
+static void bdrv_qed_drain(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ /* Cancel timer and start doing I/O that were meant to happen as if it
+ * fired, that way we get bdrv_drain() taking care of the ongoing requests
+ * correctly. */
+ qed_cancel_need_check_timer(s);
+ qed_plug_allocating_write_reqs(s);
+ bdrv_aio_flush(s->bs, qed_clear_need_check, s);
+}
+
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -392,7 +398,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
s->bs = bs;
QSIMPLEQ_INIT(&s->allocating_write_reqs);
- ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
+ ret = bdrv_pread(bs->file->bs, 0, &le_header, sizeof(le_header));
if (ret < 0) {
return ret;
}
@@ -416,7 +422,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
}
/* Round down file size to the last cluster */
- file_size = bdrv_getlength(bs->file);
+ file_size = bdrv_getlength(bs->file->bs);
if (file_size < 0) {
return file_size;
}
@@ -452,7 +458,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
- ret = qed_read_string(bs->file, s->header.backing_filename_offset,
+ ret = qed_read_string(bs->file->bs, s->header.backing_filename_offset,
s->header.backing_filename_size, bs->backing_file,
sizeof(bs->backing_file));
if (ret < 0) {
@@ -471,7 +477,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
* feature is no longer valid.
*/
if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
- !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
+ !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INCOMING)) {
s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
ret = qed_write_header_sync(s);
@@ -480,7 +486,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
}
/* From here on only known autoclear feature bits are valid */
- bdrv_flush(bs->file);
+ bdrv_flush(bs->file->bs);
}
s->l1_table = qed_alloc_table(s);
@@ -498,7 +504,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
* potentially inconsistent images to be opened read-only. This can
* aid data recovery from an otherwise inconsistent image.
*/
- if (!bdrv_is_read_only(bs->file) &&
+ if (!bdrv_is_read_only(bs->file->bs) &&
!(flags & BDRV_O_INCOMING)) {
BdrvCheckResult result = {0};
@@ -541,7 +547,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
bdrv_qed_detach_aio_context(bs);
/* Ensure writes reach stable storage */
- bdrv_flush(bs->file);
+ bdrv_flush(bs->file->bs);
/* Clean shutdown, no check required on next open */
if (s->header.features & QED_F_NEED_CHECK) {
@@ -583,7 +589,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
bs = NULL;
ret = bdrv_open(&bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, NULL,
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
&local_err);
if (ret < 0) {
error_propagate(errp, local_err);
@@ -772,8 +778,8 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
/* If there is a backing file, get its length. Treat the absence of a
* backing file like a zero length backing file.
*/
- if (s->bs->backing_hd) {
- int64_t l = bdrv_getlength(s->bs->backing_hd);
+ if (s->bs->backing) {
+ int64_t l = bdrv_getlength(s->bs->backing->bs);
if (l < 0) {
cb(opaque, l);
return;
@@ -802,7 +808,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
qemu_iovec_concat(*backing_qiov, qiov, 0, size);
BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
- bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
+ bdrv_aio_readv(s->bs->backing->bs, pos / BDRV_SECTOR_SIZE,
*backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
}
@@ -839,7 +845,7 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
}
BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
- bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
+ bdrv_aio_writev(s->bs->file->bs, copy_cb->offset / BDRV_SECTOR_SIZE,
&copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
qed_copy_from_backing_file_cb, copy_cb);
}
@@ -1055,7 +1061,7 @@ static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
- if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
+ if (!bdrv_aio_flush(s->bs->file->bs, qed_aio_write_l2_update_cb, opaque)) {
qed_aio_complete(acb, -EIO);
}
}
@@ -1081,7 +1087,7 @@ static void qed_aio_write_main(void *opaque, int ret)
if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
next_fn = qed_aio_next_io;
} else {
- if (s->bs->backing_hd) {
+ if (s->bs->backing) {
next_fn = qed_aio_write_flush_before_l2_update;
} else {
next_fn = qed_aio_write_l2_update_cb;
@@ -1089,7 +1095,7 @@ static void qed_aio_write_main(void *opaque, int ret)
}
BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
- bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
+ bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
next_fn, acb);
}
@@ -1139,7 +1145,7 @@ static void qed_aio_write_prefill(void *opaque, int ret)
static bool qed_should_set_need_check(BDRVQEDState *s)
{
/* The flush before L2 update path ensures consistency */
- if (s->bs->backing_hd) {
+ if (s->bs->backing) {
return false;
}
@@ -1321,7 +1327,7 @@ static void qed_aio_read_data(void *opaque, int ret,
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
+ bdrv_aio_readv(bs->file->bs, offset / BDRV_SECTOR_SIZE,
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
qed_aio_next_io, acb);
return;
@@ -1443,7 +1449,7 @@ static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
struct iovec iov;
/* Refuse if there are untouched backing file sectors */
- if (bs->backing_hd) {
+ if (bs->backing) {
if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
return -ENOTSUP;
}
@@ -1580,7 +1586,7 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
}
/* Write new header */
- ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
+ ret = bdrv_pwrite_sync(bs->file->bs, 0, buffer, buffer_len);
g_free(buffer);
if (ret == 0) {
memcpy(&s->header, &new_header, sizeof(new_header));
@@ -1596,7 +1602,7 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
bdrv_qed_close(bs);
- bdrv_invalidate_cache(bs->file, &local_err);
+ bdrv_invalidate_cache(bs->file->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -1664,7 +1670,6 @@ static BlockDriver bdrv_qed = {
.supports_backing = true,
.bdrv_probe = bdrv_qed_probe,
- .bdrv_rebind = bdrv_qed_rebind,
.bdrv_open = bdrv_qed_open,
.bdrv_close = bdrv_qed_close,
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
@@ -1683,6 +1688,7 @@ static BlockDriver bdrv_qed = {
.bdrv_check = bdrv_qed_check,
.bdrv_detach_aio_context = bdrv_qed_detach_aio_context,
.bdrv_attach_aio_context = bdrv_qed_attach_aio_context,
+ .bdrv_drain = bdrv_qed_drain,
};
static void bdrv_qed_init(void)
diff --git a/block/quorum.c b/block/quorum.c
index 2f6c45f76..e640688eb 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -64,7 +64,7 @@ typedef struct QuorumVotes {
/* the following structure holds the state of one quorum instance */
typedef struct BDRVQuorumState {
- BlockDriverState **bs; /* children BlockDriverStates */
+ BdrvChild **children; /* children BlockDriverStates */
int num_children; /* children count */
int threshold; /* if less than threshold children reads gave the
* same result a quorum error occurs.
@@ -336,7 +336,7 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- quorum_report_bad(acb, s->bs[item->index]->node_name, 0);
+ quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0);
}
}
}
@@ -369,8 +369,9 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
- acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+ bdrv_aio_writev(s->children[item->index]->bs, acb->sector_num,
+ acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
+ acb);
}
}
@@ -639,14 +640,15 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
int i;
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].buf = qemu_blockalign(s->bs[i], acb->qiov->size);
+ acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size);
qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf);
}
for (i = 0; i < s->num_children; i++) {
- bdrv_aio_readv(s->bs[i], acb->sector_num, &acb->qcrs[i].qiov,
- acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
+ acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num,
+ &acb->qcrs[i].qiov, acb->nb_sectors,
+ quorum_aio_cb, &acb->qcrs[i]);
}
return &acb->common;
@@ -656,14 +658,15 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
{
BDRVQuorumState *s = acb->common.bs->opaque;
- acb->qcrs[acb->child_iter].buf = qemu_blockalign(s->bs[acb->child_iter],
- acb->qiov->size);
+ acb->qcrs[acb->child_iter].buf =
+ qemu_blockalign(s->children[acb->child_iter]->bs, acb->qiov->size);
qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
acb->qcrs[acb->child_iter].buf);
- bdrv_aio_readv(s->bs[acb->child_iter], acb->sector_num,
- &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
- quorum_aio_cb, &acb->qcrs[acb->child_iter]);
+ acb->qcrs[acb->child_iter].aiocb =
+ bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
+ &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
+ quorum_aio_cb, &acb->qcrs[acb->child_iter]);
return &acb->common;
}
@@ -702,8 +705,8 @@ static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
int i;
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov,
- nb_sectors, &quorum_aio_cb,
+ acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i]->bs, sector_num,
+ qiov, nb_sectors, &quorum_aio_cb,
&acb->qcrs[i]);
}
@@ -717,12 +720,12 @@ static int64_t quorum_getlength(BlockDriverState *bs)
int i;
/* check that all file have the same length */
- result = bdrv_getlength(s->bs[0]);
+ result = bdrv_getlength(s->children[0]->bs);
if (result < 0) {
return result;
}
for (i = 1; i < s->num_children; i++) {
- int64_t value = bdrv_getlength(s->bs[i]);
+ int64_t value = bdrv_getlength(s->children[i]->bs);
if (value < 0) {
return value;
}
@@ -741,7 +744,7 @@ static void quorum_invalidate_cache(BlockDriverState *bs, Error **errp)
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_invalidate_cache(s->bs[i], &local_err);
+ bdrv_invalidate_cache(s->children[i]->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
@@ -762,7 +765,7 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
error_votes.compare = quorum_64bits_compare;
for (i = 0; i < s->num_children; i++) {
- result = bdrv_co_flush(s->bs[i]);
+ result = bdrv_co_flush(s->children[i]->bs);
result_value.l = result;
quorum_count_vote(&error_votes, &result_value, i);
}
@@ -782,7 +785,7 @@ static bool quorum_recurse_is_first_non_filter(BlockDriverState *bs,
int i;
for (i = 0; i < s->num_children; i++) {
- bool perm = bdrv_recurse_is_first_non_filter(s->bs[i],
+ bool perm = bdrv_recurse_is_first_non_filter(s->children[i]->bs,
candidate);
if (perm) {
return true;
@@ -889,6 +892,12 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
}
s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
+ /* and validate it against s->num_children */
+ ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
+ if (ret < 0) {
+ goto exit;
+ }
+
ret = parse_read_pattern(qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN));
if (ret < 0) {
error_setg(&local_err, "Please set read-pattern as fifo or quorum");
@@ -897,12 +906,6 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
s->read_pattern = ret;
if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
- /* and validate it against s->num_children */
- ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
- if (ret < 0) {
- goto exit;
- }
-
/* is the driver in blkverify mode */
if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
s->num_children == 2 && s->threshold == 2) {
@@ -922,8 +925,8 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
}
}
- /* allocate the children BlockDriverState array */
- s->bs = g_new0(BlockDriverState *, s->num_children);
+ /* allocate the children array */
+ s->children = g_new0(BdrvChild *, s->num_children);
opened = g_new0(bool, s->num_children);
for (i = 0; i < s->num_children; i++) {
@@ -931,9 +934,10 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
ret = snprintf(indexstr, 32, "children.%d", i);
assert(ret < 32);
- ret = bdrv_open_image(&s->bs[i], NULL, options, indexstr, bs,
- &child_format, false, &local_err);
- if (ret < 0) {
+ s->children[i] = bdrv_open_child(NULL, options, indexstr, bs,
+ &child_format, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
goto close_exit;
}
@@ -949,9 +953,9 @@ close_exit:
if (!opened[i]) {
continue;
}
- bdrv_unref(s->bs[i]);
+ bdrv_unref_child(bs, s->children[i]);
}
- g_free(s->bs);
+ g_free(s->children);
g_free(opened);
exit:
qemu_opts_del(opts);
@@ -968,10 +972,10 @@ static void quorum_close(BlockDriverState *bs)
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_unref(s->bs[i]);
+ bdrv_unref_child(bs, s->children[i]);
}
- g_free(s->bs);
+ g_free(s->children);
}
static void quorum_detach_aio_context(BlockDriverState *bs)
@@ -980,7 +984,7 @@ static void quorum_detach_aio_context(BlockDriverState *bs)
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_detach_aio_context(s->bs[i]);
+ bdrv_detach_aio_context(s->children[i]->bs);
}
}
@@ -991,7 +995,7 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_attach_aio_context(s->bs[i], new_context);
+ bdrv_attach_aio_context(s->children[i]->bs, new_context);
}
}
@@ -1003,16 +1007,17 @@ static void quorum_refresh_filename(BlockDriverState *bs)
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_refresh_filename(s->bs[i]);
- if (!s->bs[i]->full_open_options) {
+ bdrv_refresh_filename(s->children[i]->bs);
+ if (!s->children[i]->bs->full_open_options) {
return;
}
}
children = qlist_new();
for (i = 0; i < s->num_children; i++) {
- QINCREF(s->bs[i]->full_open_options);
- qlist_append_obj(children, QOBJECT(s->bs[i]->full_open_options));
+ QINCREF(s->children[i]->bs->full_open_options);
+ qlist_append_obj(children,
+ QOBJECT(s->children[i]->bs->full_open_options));
}
opts = qdict_new();
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 855febed5..2fff1843c 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,11 +127,6 @@ do { \
#define FTYPE_FILE 0
#define FTYPE_CD 1
-#define FTYPE_FD 2
-
-/* if the FD is not accessed during that time (in ns), we try to
- reopen it to see if the disk has been changed */
-#define FD_OPEN_TIMEOUT (1000000000)
#define MAX_BLOCKSIZE 4096
@@ -141,13 +136,6 @@ typedef struct BDRVRawState {
int open_flags;
size_t buf_align;
-#if defined(__linux__)
- /* linux floppy specific */
- int64_t fd_open_time;
- int64_t fd_error_time;
- int fd_got_error;
- int fd_media_changed;
-#endif
#ifdef CONFIG_LINUX_AIO
int use_aio;
void *aio_ctx;
@@ -519,7 +507,16 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
"future QEMU versions.\n",
bs->filename);
}
-#endif
+#else
+ if (bdrv_flags & BDRV_O_NATIVE_AIO) {
+ error_printf("WARNING: aio=native was specified for '%s', but "
+ "is not supported in this build. Falling back to "
+ "aio=threads.\n"
+ " This will become an error condition in "
+ "future QEMU versions.\n",
+ bs->filename);
+ }
+#endif /* !defined(CONFIG_LINUX_AIO) */
s->has_discard = true;
s->has_write_zeroes = true;
@@ -626,7 +623,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
}
#endif
- if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
+ if (s->type == FTYPE_CD) {
raw_s->open_flags |= O_NONBLOCK;
}
@@ -670,11 +667,17 @@ static int raw_reopen_prepare(BDRVReopenState *state,
/* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
if (raw_s->fd == -1) {
- assert(!(raw_s->open_flags & O_CREAT));
- raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
- if (raw_s->fd == -1) {
- error_setg_errno(errp, errno, "Could not reopen file");
- ret = -1;
+ const char *normalized_filename = state->bs->filename;
+ ret = raw_normalize_devicepath(&normalized_filename);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not normalize device path");
+ } else {
+ assert(!(raw_s->open_flags & O_CREAT));
+ raw_s->fd = qemu_open(normalized_filename, raw_s->open_flags);
+ if (raw_s->fd == -1) {
+ error_setg_errno(errp, errno, "Could not reopen file");
+ ret = -1;
+ }
}
}
@@ -780,7 +783,6 @@ static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
{
BDRVRawState *s = bs->opaque;
struct hd_geometry ioctl_geo = {0};
- uint32_t blksize;
/* If DASD, get its geometry */
if (check_for_dasd(s->fd) < 0) {
@@ -800,12 +802,6 @@ static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
}
geo->heads = ioctl_geo.heads;
geo->sectors = ioctl_geo.sectors;
- if (!probe_physical_blocksize(s->fd, &blksize)) {
- /* overwrite cyls: HDIO_GETGEO result is incorrect for big drives */
- geo->cylinders = bdrv_nb_sectors(bs) / (blksize / BDRV_SECTOR_SIZE)
- / (geo->heads * geo->sectors);
- return 0;
- }
geo->cylinders = ioctl_geo.cylinders;
return 0;
@@ -1253,7 +1249,7 @@ static int aio_worker(void *arg)
break;
}
- g_slice_free(RawPosixAIOData, aiocb);
+ g_free(aiocb);
return ret;
}
@@ -1261,7 +1257,7 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
int type)
{
- RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+ RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
ThreadPool *pool;
acb->bs = bs;
@@ -1286,7 +1282,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type)
{
- RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+ RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
ThreadPool *pool;
acb->bs = bs;
@@ -1642,7 +1638,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
goto out;
}
- fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
+ fd = qemu_open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY,
0644);
if (fd < 0) {
result = -errno;
@@ -1973,8 +1969,8 @@ BlockDriver bdrv_file = {
#if defined(__APPLE__) && defined(__MACH__)
static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
-static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
-
+static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
+ CFIndex maxPathSize, int flags);
kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
{
kern_return_t kernResult;
@@ -2001,7 +1997,8 @@ kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
return kernResult;
}
-kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
+kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
+ CFIndex maxPathSize, int flags)
{
io_object_t nextMedia;
kern_return_t kernResult = KERN_FAILURE;
@@ -2014,7 +2011,9 @@ kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex ma
if ( bsdPathAsCFString ) {
size_t devPathLength;
strcpy( bsdPath, _PATH_DEV );
- strcat( bsdPath, "r" );
+ if (flags & BDRV_O_NOCACHE) {
+ strcat(bsdPath, "r");
+ }
devPathLength = strlen( bsdPath );
if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
kernResult = KERN_SUCCESS;
@@ -2126,8 +2125,8 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
int fd;
kernResult = FindEjectableCDMedia( &mediaIterator );
- kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
-
+ kernResult = GetBSDPath(mediaIterator, bsdPath, sizeof(bsdPath),
+ flags);
if ( bsdPath[ 0 ] != '\0' ) {
strcat(bsdPath,"s0");
/* some CDs don't have a partition 0 */
@@ -2172,53 +2171,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
}
#if defined(__linux__)
-/* Note: we do not have a reliable method to detect if the floppy is
- present. The current method is to try to open the floppy at every
- I/O and to keep it opened during a few hundreds of ms. */
-static int fd_open(BlockDriverState *bs)
-{
- BDRVRawState *s = bs->opaque;
- int last_media_present;
-
- if (s->type != FTYPE_FD)
- return 0;
- last_media_present = (s->fd >= 0);
- if (s->fd >= 0 &&
- (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
- qemu_close(s->fd);
- s->fd = -1;
- DPRINTF("Floppy closed\n");
- }
- if (s->fd < 0) {
- if (s->fd_got_error &&
- (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_error_time) < FD_OPEN_TIMEOUT) {
- DPRINTF("No floppy (open delayed)\n");
- return -EIO;
- }
- s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
- if (s->fd < 0) {
- s->fd_error_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- s->fd_got_error = 1;
- if (last_media_present)
- s->fd_media_changed = 1;
- DPRINTF("No floppy\n");
- return -EIO;
- }
- DPRINTF("Floppy opened\n");
- }
- if (!last_media_present)
- s->fd_media_changed = 1;
- s->fd_open_time = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- s->fd_got_error = 0;
- return 0;
-}
-
-static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
- BDRVRawState *s = bs->opaque;
-
- return ioctl(s->fd, req, buf);
-}
static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
unsigned long int req, void *buf,
@@ -2231,7 +2183,7 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
if (fd_open(bs) < 0)
return NULL;
- acb = g_slice_new(RawPosixAIOData);
+ acb = g_new(RawPosixAIOData, 1);
acb->bs = bs;
acb->aio_type = QEMU_AIO_IOCTL;
acb->aio_fildes = s->fd;
@@ -2241,8 +2193,8 @@ static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
+#endif /* linux */
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
static int fd_open(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
@@ -2252,14 +2204,6 @@ static int fd_open(BlockDriverState *bs)
return 0;
return -EIO;
}
-#else /* !linux && !FreeBSD */
-
-static int fd_open(BlockDriverState *bs)
-{
- return 0;
-}
-
-#endif /* !linux && !FreeBSD */
static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
@@ -2303,17 +2247,22 @@ static int hdev_create(const char *filename, QemuOpts *opts,
int64_t total_size = 0;
bool has_prefix;
- /* This function is used by all three protocol block drivers and therefore
- * any of these three prefixes may be given.
+ /* This function is used by both protocol block drivers and therefore either
+ * of these prefixes may be given.
* The return value has to be stored somewhere, otherwise this is an error
* due to -Werror=unused-value. */
has_prefix =
strstart(filename, "host_device:", &filename) ||
- strstart(filename, "host_cdrom:" , &filename) ||
- strstart(filename, "host_floppy:", &filename);
+ strstart(filename, "host_cdrom:" , &filename);
(void)has_prefix;
+ ret = raw_normalize_devicepath(&filename);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not normalize device path");
+ return ret;
+ }
+
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
BDRV_SECTOR_SIZE);
@@ -2379,160 +2328,10 @@ static BlockDriver bdrv_host_device = {
/* generic scsi device */
#ifdef __linux__
- .bdrv_ioctl = hdev_ioctl,
.bdrv_aio_ioctl = hdev_aio_ioctl,
#endif
};
-#ifdef __linux__
-static void floppy_parse_filename(const char *filename, QDict *options,
- Error **errp)
-{
- /* The prefix is optional, just as for "file". */
- strstart(filename, "host_floppy:", &filename);
-
- qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
- Error **errp)
-{
- BDRVRawState *s = bs->opaque;
- Error *local_err = NULL;
- int ret;
-
- s->type = FTYPE_FD;
-
- /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
- ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
- if (ret) {
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
- }
-
- /* close fd so that we can reopen it as needed */
- qemu_close(s->fd);
- s->fd = -1;
- s->fd_media_changed = 1;
-
- error_report("Host floppy pass-through is deprecated");
- error_printf("Support for it will be removed in a future release.\n");
- return 0;
-}
-
-static int floppy_probe_device(const char *filename)
-{
- int fd, ret;
- int prio = 0;
- struct floppy_struct fdparam;
- struct stat st;
-
- if (strstart(filename, "/dev/fd", NULL) &&
- !strstart(filename, "/dev/fdset/", NULL) &&
- !strstart(filename, "/dev/fd/", NULL)) {
- prio = 50;
- }
-
- fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
- if (fd < 0) {
- goto out;
- }
- ret = fstat(fd, &st);
- if (ret == -1 || !S_ISBLK(st.st_mode)) {
- goto outc;
- }
-
- /* Attempt to detect via a floppy specific ioctl */
- ret = ioctl(fd, FDGETPRM, &fdparam);
- if (ret >= 0)
- prio = 100;
-
-outc:
- qemu_close(fd);
-out:
- return prio;
-}
-
-
-static int floppy_is_inserted(BlockDriverState *bs)
-{
- return fd_open(bs) >= 0;
-}
-
-static int floppy_media_changed(BlockDriverState *bs)
-{
- BDRVRawState *s = bs->opaque;
- int ret;
-
- /*
- * XXX: we do not have a true media changed indication.
- * It does not work if the floppy is changed without trying to read it.
- */
- fd_open(bs);
- ret = s->fd_media_changed;
- s->fd_media_changed = 0;
- DPRINTF("Floppy changed=%d\n", ret);
- return ret;
-}
-
-static void floppy_eject(BlockDriverState *bs, bool eject_flag)
-{
- BDRVRawState *s = bs->opaque;
- int fd;
-
- if (s->fd >= 0) {
- qemu_close(s->fd);
- s->fd = -1;
- }
- fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
- if (fd >= 0) {
- if (ioctl(fd, FDEJECT, 0) < 0)
- perror("FDEJECT");
- qemu_close(fd);
- }
-}
-
-static BlockDriver bdrv_host_floppy = {
- .format_name = "host_floppy",
- .protocol_name = "host_floppy",
- .instance_size = sizeof(BDRVRawState),
- .bdrv_needs_filename = true,
- .bdrv_probe_device = floppy_probe_device,
- .bdrv_parse_filename = floppy_parse_filename,
- .bdrv_file_open = floppy_open,
- .bdrv_close = raw_close,
- .bdrv_reopen_prepare = raw_reopen_prepare,
- .bdrv_reopen_commit = raw_reopen_commit,
- .bdrv_reopen_abort = raw_reopen_abort,
- .bdrv_create = hdev_create,
- .create_opts = &raw_create_opts,
-
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
- .bdrv_aio_flush = raw_aio_flush,
- .bdrv_refresh_limits = raw_refresh_limits,
- .bdrv_io_plug = raw_aio_plug,
- .bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
-
- .bdrv_truncate = raw_truncate,
- .bdrv_getlength = raw_getlength,
- .has_variable_length = true,
- .bdrv_get_allocated_file_size
- = raw_get_allocated_file_size,
-
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
- /* removable device support */
- .bdrv_is_inserted = floppy_is_inserted,
- .bdrv_media_changed = floppy_media_changed,
- .bdrv_eject = floppy_eject,
-};
-#endif
-
#if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
static void cdrom_parse_filename(const char *filename, QDict *options,
Error **errp)
@@ -2588,15 +2387,13 @@ out:
return prio;
}
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
int ret;
ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
- if (ret == CDS_DISC_OK)
- return 1;
- return 0;
+ return ret == CDS_DISC_OK;
}
static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
@@ -2663,7 +2460,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_lock_medium = cdrom_lock_medium,
/* generic scsi device */
- .bdrv_ioctl = hdev_ioctl,
.bdrv_aio_ioctl = hdev_aio_ioctl,
};
#endif /* __linux__ */
@@ -2722,7 +2518,7 @@ static int cdrom_reopen(BlockDriverState *bs)
return 0;
}
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
{
return raw_getlength(bs) > 0;
}
@@ -2810,7 +2606,6 @@ static void bdrv_file_init(void)
bdrv_register(&bdrv_file);
bdrv_register(&bdrv_host_device);
#ifdef __linux__
- bdrv_register(&bdrv_host_floppy);
bdrv_register(&bdrv_host_cdrom);
#endif
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 68f2338ac..2d0907a82 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -119,9 +119,9 @@ static int aio_worker(void *arg)
case QEMU_AIO_WRITE:
count = handle_aiocb_rw(aiocb);
if (count == aiocb->aio_nbytes) {
- count = 0;
+ ret = 0;
} else {
- count = -EINVAL;
+ ret = -EINVAL;
}
break;
case QEMU_AIO_FLUSH:
@@ -135,7 +135,7 @@ static int aio_worker(void *arg)
break;
}
- g_slice_free(RawWin32AIOData, aiocb);
+ g_free(aiocb);
return ret;
}
@@ -143,7 +143,7 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque, int type)
{
- RawWin32AIOData *acb = g_slice_new(RawWin32AIOData);
+ RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
ThreadPool *pool;
acb->bs = bs;
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index e3d2d0468..915d6fd0e 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -52,7 +52,7 @@ static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov);
+ return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
}
static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
@@ -75,7 +75,7 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
return 0;
}
- buf = qemu_try_blockalign(bs->file, 512);
+ buf = qemu_try_blockalign(bs->file->bs, 512);
if (!buf) {
ret = -ENOMEM;
goto fail;
@@ -102,7 +102,7 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- ret = bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
+ ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
fail:
if (qiov == &local_qiov) {
@@ -125,58 +125,48 @@ static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
BdrvRequestFlags flags)
{
- return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors, flags);
+ return bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
}
static int coroutine_fn raw_co_discard(BlockDriverState *bs,
int64_t sector_num, int nb_sectors)
{
- return bdrv_co_discard(bs->file, sector_num, nb_sectors);
+ return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
}
static int64_t raw_getlength(BlockDriverState *bs)
{
- return bdrv_getlength(bs->file);
+ return bdrv_getlength(bs->file->bs);
}
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
- return bdrv_get_info(bs->file, bdi);
+ return bdrv_get_info(bs->file->bs, bdi);
}
static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
- bs->bl = bs->file->bl;
+ bs->bl = bs->file->bs->bl;
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
{
- return bdrv_truncate(bs->file, offset);
-}
-
-static int raw_is_inserted(BlockDriverState *bs)
-{
- return bdrv_is_inserted(bs->file);
+ return bdrv_truncate(bs->file->bs, offset);
}
static int raw_media_changed(BlockDriverState *bs)
{
- return bdrv_media_changed(bs->file);
+ return bdrv_media_changed(bs->file->bs);
}
static void raw_eject(BlockDriverState *bs, bool eject_flag)
{
- bdrv_eject(bs->file, eject_flag);
+ bdrv_eject(bs->file->bs, eject_flag);
}
static void raw_lock_medium(BlockDriverState *bs, bool locked)
{
- bdrv_lock_medium(bs->file, locked);
-}
-
-static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
- return bdrv_ioctl(bs->file, req, buf);
+ bdrv_lock_medium(bs->file->bs, locked);
}
static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
@@ -184,12 +174,12 @@ static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
BlockCompletionFunc *cb,
void *opaque)
{
- return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
+ return bdrv_aio_ioctl(bs->file->bs, req, buf, cb, opaque);
}
static int raw_has_zero_init(BlockDriverState *bs)
{
- return bdrv_has_zero_init(bs->file);
+ return bdrv_has_zero_init(bs->file->bs);
}
static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
@@ -207,7 +197,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
- bs->sg = bs->file->sg;
+ bs->sg = bs->file->bs->sg;
if (bs->probed && !bdrv_is_read_only(bs)) {
fprintf(stderr,
@@ -217,7 +207,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
"raw images, write operations on block 0 will be restricted.\n"
" Specify the 'raw' format explicitly to remove the "
"restrictions.\n",
- bs->file->filename);
+ bs->file->bs->filename);
}
return 0;
@@ -237,12 +227,12 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename)
static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
{
- return bdrv_probe_blocksizes(bs->file, bsz);
+ return bdrv_probe_blocksizes(bs->file->bs, bsz);
}
static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
{
- return bdrv_probe_geometry(bs->file, geo);
+ return bdrv_probe_geometry(bs->file->bs, geo);
}
BlockDriver bdrv_raw = {
@@ -264,11 +254,9 @@ BlockDriver bdrv_raw = {
.bdrv_refresh_limits = &raw_refresh_limits,
.bdrv_probe_blocksizes = &raw_probe_blocksizes,
.bdrv_probe_geometry = &raw_probe_geometry,
- .bdrv_is_inserted = &raw_is_inserted,
.bdrv_media_changed = &raw_media_changed,
.bdrv_eject = &raw_eject,
.bdrv_lock_medium = &raw_lock_medium,
- .bdrv_ioctl = &raw_ioctl,
.bdrv_aio_ioctl = &raw_aio_ioctl,
.create_opts = &raw_create_opts,
.bdrv_has_zero_init = &raw_has_zero_init
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 9585beb73..d80e4ed18 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -28,7 +28,6 @@
#define SD_OP_READ_OBJ 0x02
#define SD_OP_WRITE_OBJ 0x03
/* 0x04 is used internally by Sheepdog */
-#define SD_OP_DISCARD_OBJ 0x05
#define SD_OP_NEW_VDI 0x11
#define SD_OP_LOCK_VDI 0x12
@@ -318,7 +317,7 @@ enum AIOCBState {
AIOCB_DISCARD_OBJ,
};
-#define AIOCBOverwrapping(x, y) \
+#define AIOCBOverlapping(x, y) \
(!(x->max_affect_data_idx < y->min_affect_data_idx \
|| y->max_affect_data_idx < x->min_affect_data_idx))
@@ -342,6 +341,15 @@ struct SheepdogAIOCB {
uint32_t min_affect_data_idx;
uint32_t max_affect_data_idx;
+ /*
+ * The difference between affect_data_idx and dirty_data_idx:
+ * affect_data_idx represents range of index of all request types.
+ * dirty_data_idx represents range of index updated by COW requests.
+ * dirty_data_idx is used for updating an inode object.
+ */
+ uint32_t min_dirty_data_idx;
+ uint32_t max_dirty_data_idx;
+
QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
};
@@ -351,9 +359,6 @@ typedef struct BDRVSheepdogState {
SheepdogInode inode;
- uint32_t min_dirty_data_idx;
- uint32_t max_dirty_data_idx;
-
char name[SD_MAX_VDI_LEN];
bool is_snapshot;
uint32_t cache_flags;
@@ -373,10 +378,15 @@ typedef struct BDRVSheepdogState {
QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
- CoQueue overwrapping_queue;
+ CoQueue overlapping_queue;
QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
} BDRVSheepdogState;
+typedef struct BDRVSheepdogReopenState {
+ int fd;
+ int cache_flags;
+} BDRVSheepdogReopenState;
+
static const char * sd_strerror(int err)
{
int i;
@@ -556,6 +566,9 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE +
acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size;
+ acb->min_dirty_data_idx = UINT32_MAX;
+ acb->max_dirty_data_idx = 0;
+
return acb;
}
@@ -638,14 +651,16 @@ static coroutine_fn void do_co_req(void *opaque)
unsigned int *rlen = srco->rlen;
co = qemu_coroutine_self();
- aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co);
+ aio_set_fd_handler(srco->aio_context, sockfd, false,
+ NULL, restart_co_req, co);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
goto out;
}
- aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co);
+ aio_set_fd_handler(srco->aio_context, sockfd, false,
+ restart_co_req, NULL, co);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
@@ -670,7 +685,8 @@ static coroutine_fn void do_co_req(void *opaque)
out:
/* there is at most one request for this sockfd, so it is safe to
* set each handler to NULL. */
- aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL);
+ aio_set_fd_handler(srco->aio_context, sockfd, false,
+ NULL, NULL, NULL);
srco->ret = ret;
srco->finished = true;
@@ -722,7 +738,8 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
BDRVSheepdogState *s = opaque;
AIOReq *aio_req, *next;
- aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+ NULL, NULL);
close(s->fd);
s->fd = -1;
@@ -819,8 +836,8 @@ static void coroutine_fn aio_read_response(void *opaque)
*/
if (rsp.result == SD_RES_SUCCESS) {
s->inode.data_vdi_id[idx] = s->inode.vdi_id;
- s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx);
- s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx);
+ acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx);
+ acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx);
}
}
break;
@@ -847,10 +864,6 @@ static void coroutine_fn aio_read_response(void *opaque)
rsp.result = SD_RES_SUCCESS;
s->discard_supported = false;
break;
- case SD_RES_SUCCESS:
- idx = data_oid_to_idx(aio_req->oid);
- s->inode.data_vdi_id[idx] = 0;
- break;
default:
break;
}
@@ -929,7 +942,8 @@ static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
return fd;
}
- aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ co_read_response, NULL, s);
return fd;
}
@@ -1165,7 +1179,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
hdr.flags = SD_FLAG_CMD_WRITE | flags;
break;
case AIOCB_DISCARD_OBJ:
- hdr.opcode = SD_OP_DISCARD_OBJ;
+ hdr.opcode = SD_OP_WRITE_OBJ;
+ hdr.flags = SD_FLAG_CMD_WRITE | flags;
+ s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0;
+ offset = offsetof(SheepdogInode,
+ data_vdi_id[data_oid_to_idx(oid)]);
+ oid = vid_to_vdi_oid(s->inode.vdi_id);
+ wlen = datalen = sizeof(uint32_t);
break;
}
@@ -1184,7 +1204,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
qemu_co_mutex_lock(&s->lock);
s->co_send = qemu_coroutine_self();
- aio_set_fd_handler(s->aio_context, s->fd,
+ aio_set_fd_handler(s->aio_context, s->fd, false,
co_read_response, co_write_request, s);
socket_set_cork(s->fd, 1);
@@ -1203,7 +1223,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
}
out:
socket_set_cork(s->fd, 0);
- aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s);
+ aio_set_fd_handler(s->aio_context, s->fd, false,
+ co_read_response, NULL, s);
s->co_send = NULL;
qemu_co_mutex_unlock(&s->lock);
}
@@ -1353,7 +1374,8 @@ static void sd_detach_aio_context(BlockDriverState *bs)
{
BDRVSheepdogState *s = bs->opaque;
- aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+ NULL, NULL);
}
static void sd_attach_aio_context(BlockDriverState *bs,
@@ -1362,7 +1384,8 @@ static void sd_attach_aio_context(BlockDriverState *bs,
BDRVSheepdogState *s = bs->opaque;
s->aio_context = new_context;
- aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s);
+ aio_set_fd_handler(new_context, s->fd, false,
+ co_read_response, NULL, s);
}
/* TODO Convert to fine grained options */
@@ -1466,18 +1489,17 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
}
memcpy(&s->inode, buf, sizeof(s->inode));
- s->min_dirty_data_idx = UINT32_MAX;
- s->max_dirty_data_idx = 0;
bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
pstrcpy(s->name, sizeof(s->name), vdi);
qemu_co_mutex_init(&s->lock);
- qemu_co_queue_init(&s->overwrapping_queue);
+ qemu_co_queue_init(&s->overlapping_queue);
qemu_opts_del(opts);
g_free(buf);
return 0;
out:
- aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+ false, NULL, NULL, NULL);
if (s->fd >= 0) {
closesocket(s->fd);
}
@@ -1486,6 +1508,70 @@ out:
return ret;
}
+static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue,
+ Error **errp)
+{
+ BDRVSheepdogState *s = state->bs->opaque;
+ BDRVSheepdogReopenState *re_s;
+ int ret = 0;
+
+ re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1);
+
+ re_s->cache_flags = SD_FLAG_CMD_CACHE;
+ if (state->flags & BDRV_O_NOCACHE) {
+ re_s->cache_flags = SD_FLAG_CMD_DIRECT;
+ }
+
+ re_s->fd = get_sheep_fd(s, errp);
+ if (re_s->fd < 0) {
+ ret = re_s->fd;
+ return ret;
+ }
+
+ return ret;
+}
+
+static void sd_reopen_commit(BDRVReopenState *state)
+{
+ BDRVSheepdogReopenState *re_s = state->opaque;
+ BDRVSheepdogState *s = state->bs->opaque;
+
+ if (s->fd) {
+ aio_set_fd_handler(s->aio_context, s->fd, false,
+ NULL, NULL, NULL);
+ closesocket(s->fd);
+ }
+
+ s->fd = re_s->fd;
+ s->cache_flags = re_s->cache_flags;
+
+ g_free(state->opaque);
+ state->opaque = NULL;
+
+ return;
+}
+
+static void sd_reopen_abort(BDRVReopenState *state)
+{
+ BDRVSheepdogReopenState *re_s = state->opaque;
+ BDRVSheepdogState *s = state->bs->opaque;
+
+ if (re_s == NULL) {
+ return;
+ }
+
+ if (re_s->fd) {
+ aio_set_fd_handler(s->aio_context, re_s->fd, false,
+ NULL, NULL, NULL);
+ closesocket(re_s->fd);
+ }
+
+ g_free(state->opaque);
+ state->opaque = NULL;
+
+ return;
+}
+
static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
Error **errp)
{
@@ -1554,7 +1640,7 @@ static int sd_prealloc(const char *filename, Error **errp)
int ret;
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, errp);
+ errp);
if (ret < 0) {
goto out_with_err_set;
}
@@ -1746,8 +1832,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
}
bs = NULL;
- ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, NULL,
- errp);
+ ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, errp);
if (ret < 0) {
goto out;
}
@@ -1861,7 +1946,8 @@ static void sd_close(BlockDriverState *bs)
error_report("%s, %s", sd_strerror(rsp->result), s->name);
}
- aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+ false, NULL, NULL, NULL);
closesocket(s->fd);
g_free(s->host_spec);
}
@@ -1923,16 +2009,16 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
AIOReq *aio_req;
uint32_t offset, data_len, mn, mx;
- mn = s->min_dirty_data_idx;
- mx = s->max_dirty_data_idx;
+ mn = acb->min_dirty_data_idx;
+ mx = acb->max_dirty_data_idx;
if (mn <= mx) {
/* we need to update the vdi object. */
offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
mn * sizeof(s->inode.data_vdi_id[0]);
data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
- s->min_dirty_data_idx = UINT32_MAX;
- s->max_dirty_data_idx = 0;
+ acb->min_dirty_data_idx = UINT32_MAX;
+ acb->max_dirty_data_idx = 0;
iov.iov_base = &s->inode;
iov.iov_len = sizeof(s->inode);
@@ -2141,7 +2227,9 @@ static int coroutine_fn sd_co_rw_vector(void *p)
}
aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
- old_oid, done);
+ old_oid,
+ acb->aiocb_type == AIOCB_DISCARD_OBJ ?
+ 0 : done);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
@@ -2158,12 +2246,12 @@ out:
return 1;
}
-static bool check_overwrapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
+static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
{
SheepdogAIOCB *cb;
QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
- if (AIOCBOverwrapping(aiocb, cb)) {
+ if (AIOCBOverlapping(aiocb, cb)) {
return true;
}
}
@@ -2192,15 +2280,15 @@ static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
acb->aiocb_type = AIOCB_WRITE_UDATA;
retry:
- if (check_overwrapping_aiocb(s, acb)) {
- qemu_co_queue_wait(&s->overwrapping_queue);
+ if (check_overlapping_aiocb(s, acb)) {
+ qemu_co_queue_wait(&s->overlapping_queue);
goto retry;
}
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
@@ -2208,7 +2296,7 @@ retry:
qemu_coroutine_yield();
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
}
@@ -2225,15 +2313,15 @@ static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
acb->aio_done_func = sd_finish_aiocb;
retry:
- if (check_overwrapping_aiocb(s, acb)) {
- qemu_co_queue_wait(&s->overwrapping_queue);
+ if (check_overlapping_aiocb(s, acb)) {
+ qemu_co_queue_wait(&s->overlapping_queue);
goto retry;
}
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
@@ -2241,7 +2329,7 @@ retry:
qemu_coroutine_yield();
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
}
@@ -2577,28 +2665,36 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
{
SheepdogAIOCB *acb;
- QEMUIOVector dummy;
BDRVSheepdogState *s = bs->opaque;
int ret;
+ QEMUIOVector discard_iov;
+ struct iovec iov;
+ uint32_t zero = 0;
if (!s->discard_supported) {
return 0;
}
- acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
+ memset(&discard_iov, 0, sizeof(discard_iov));
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = &zero;
+ iov.iov_len = sizeof(zero);
+ discard_iov.iov = &iov;
+ discard_iov.niov = 1;
+ acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors);
acb->aiocb_type = AIOCB_DISCARD_OBJ;
acb->aio_done_func = sd_finish_aiocb;
retry:
- if (check_overwrapping_aiocb(s, acb)) {
- qemu_co_queue_wait(&s->overwrapping_queue);
+ if (check_overlapping_aiocb(s, acb)) {
+ qemu_co_queue_wait(&s->overlapping_queue);
goto retry;
}
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
@@ -2606,7 +2702,7 @@ retry:
qemu_coroutine_yield();
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
}
@@ -2703,6 +2799,9 @@ static BlockDriver bdrv_sheepdog = {
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_needs_filename = true,
.bdrv_file_open = sd_open,
+ .bdrv_reopen_prepare = sd_reopen_prepare,
+ .bdrv_reopen_commit = sd_reopen_commit,
+ .bdrv_reopen_abort = sd_reopen_abort,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2736,6 +2835,9 @@ static BlockDriver bdrv_sheepdog_tcp = {
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_needs_filename = true,
.bdrv_file_open = sd_open,
+ .bdrv_reopen_prepare = sd_reopen_prepare,
+ .bdrv_reopen_commit = sd_reopen_commit,
+ .bdrv_reopen_abort = sd_reopen_abort,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2769,6 +2871,9 @@ static BlockDriver bdrv_sheepdog_unix = {
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_needs_filename = true,
.bdrv_file_open = sd_open,
+ .bdrv_reopen_prepare = sd_reopen_prepare,
+ .bdrv_reopen_commit = sd_reopen_commit,
+ .bdrv_reopen_abort = sd_reopen_abort,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
diff --git a/block/snapshot.c b/block/snapshot.c
index 49e143e99..6e9fa8da9 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -149,7 +149,7 @@ int bdrv_can_snapshot(BlockDriverState *bs)
if (!drv->bdrv_snapshot_create) {
if (bs->file != NULL) {
- return bdrv_can_snapshot(bs->file);
+ return bdrv_can_snapshot(bs->file->bs);
}
return 0;
}
@@ -168,7 +168,7 @@ int bdrv_snapshot_create(BlockDriverState *bs,
return drv->bdrv_snapshot_create(bs, sn_info);
}
if (bs->file) {
- return bdrv_snapshot_create(bs->file, sn_info);
+ return bdrv_snapshot_create(bs->file->bs, sn_info);
}
return -ENOTSUP;
}
@@ -188,10 +188,10 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
if (bs->file) {
drv->bdrv_close(bs);
- ret = bdrv_snapshot_goto(bs->file, snapshot_id);
+ ret = bdrv_snapshot_goto(bs->file->bs, snapshot_id);
open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL);
if (open_ret < 0) {
- bdrv_unref(bs->file);
+ bdrv_unref(bs->file->bs);
bs->drv = NULL;
return open_ret;
}
@@ -245,7 +245,7 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
}
if (bs->file) {
- return bdrv_snapshot_delete(bs->file, snapshot_id, name, errp);
+ return bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp);
}
error_setg(errp, "Block format '%s' used by device '%s' "
"does not support internal snapshot deletion",
@@ -253,9 +253,9 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
return -ENOTSUP;
}
-void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
- const char *id_or_name,
- Error **errp)
+int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
+ const char *id_or_name,
+ Error **errp)
{
int ret;
Error *local_err = NULL;
@@ -270,6 +270,7 @@ void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
if (ret < 0) {
error_propagate(errp, local_err);
}
+ return ret;
}
int bdrv_snapshot_list(BlockDriverState *bs,
@@ -283,7 +284,7 @@ int bdrv_snapshot_list(BlockDriverState *bs,
return drv->bdrv_snapshot_list(bs, psn_info);
}
if (bs->file) {
- return bdrv_snapshot_list(bs->file, psn_info);
+ return bdrv_snapshot_list(bs->file->bs, psn_info);
}
return -ENOTSUP;
}
@@ -356,3 +357,130 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
return ret;
}
+
+
+/* Group operations. All block drivers are involved.
+ * These functions will properly handle dataplane (take aio_context_acquire
+ * when appropriate for appropriate block drivers) */
+
+bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
+{
+ bool ok = true;
+ BlockDriverState *bs = NULL;
+
+ while (ok && (bs = bdrv_next(bs))) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) {
+ ok = bdrv_can_snapshot(bs);
+ }
+ aio_context_release(ctx);
+ }
+
+ *first_bad_bs = bs;
+ return ok;
+}
+
+int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
+ Error **err)
+{
+ int ret = 0;
+ BlockDriverState *bs = NULL;
+ QEMUSnapshotInfo sn1, *snapshot = &sn1;
+
+ while (ret == 0 && (bs = bdrv_next(bs))) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bdrv_can_snapshot(bs) &&
+ bdrv_snapshot_find(bs, snapshot, name) >= 0) {
+ ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err);
+ }
+ aio_context_release(ctx);
+ }
+
+ *first_bad_bs = bs;
+ return ret;
+}
+
+
+int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
+{
+ int err = 0;
+ BlockDriverState *bs = NULL;
+
+ while (err == 0 && (bs = bdrv_next(bs))) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bdrv_can_snapshot(bs)) {
+ err = bdrv_snapshot_goto(bs, name);
+ }
+ aio_context_release(ctx);
+ }
+
+ *first_bad_bs = bs;
+ return err;
+}
+
+int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
+{
+ QEMUSnapshotInfo sn;
+ int err = 0;
+ BlockDriverState *bs = NULL;
+
+ while (err == 0 && (bs = bdrv_next(bs))) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bdrv_can_snapshot(bs)) {
+ err = bdrv_snapshot_find(bs, &sn, name);
+ }
+ aio_context_release(ctx);
+ }
+
+ *first_bad_bs = bs;
+ return err;
+}
+
+int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
+ BlockDriverState *vm_state_bs,
+ uint64_t vm_state_size,
+ BlockDriverState **first_bad_bs)
+{
+ int err = 0;
+ BlockDriverState *bs = NULL;
+
+ while (err == 0 && (bs = bdrv_next(bs))) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bs == vm_state_bs) {
+ sn->vm_state_size = vm_state_size;
+ err = bdrv_snapshot_create(bs, sn);
+ } else if (bdrv_can_snapshot(bs)) {
+ sn->vm_state_size = 0;
+ err = bdrv_snapshot_create(bs, sn);
+ }
+ aio_context_release(ctx);
+ }
+
+ *first_bad_bs = bs;
+ return err;
+}
+
+BlockDriverState *bdrv_all_find_vmstate_bs(void)
+{
+ bool not_found = true;
+ BlockDriverState *bs = NULL;
+
+ while (not_found && (bs = bdrv_next(bs))) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ not_found = !bdrv_can_snapshot(bs);
+ aio_context_release(ctx);
+ }
+ return bs;
+}
diff --git a/block/ssh.c b/block/ssh.c
index 8d0673903..af025c08a 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -193,7 +193,7 @@ sftp_error_report(BDRVSSHState *s, const char *fs, ...)
static int parse_uri(const char *filename, QDict *options, Error **errp)
{
URI *uri = NULL;
- QueryParams *qp = NULL;
+ QueryParams *qp;
int i;
uri = uri_parse(filename);
@@ -249,9 +249,6 @@ static int parse_uri(const char *filename, QDict *options, Error **errp)
return 0;
err:
- if (qp) {
- query_params_free(qp);
- }
if (uri) {
uri_free(uri);
}
@@ -803,14 +800,15 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
rd_handler, wr_handler);
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
- rd_handler, wr_handler, co);
+ false, rd_handler, wr_handler, co);
}
static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
BlockDriverState *bs)
{
DPRINTF("s->sock=%d", s->sock);
- aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
+ false, NULL, NULL, NULL);
}
/* A non-blocking call returned EAGAIN, so yield, ensuring the
diff --git a/block/stream.c b/block/stream.c
index ab0bd057f..25af7eff6 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -16,6 +16,7 @@
#include "block/blockjob.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
enum {
/*
@@ -52,34 +53,6 @@ static int coroutine_fn stream_populate(BlockDriverState *bs,
return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
}
-static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
- const char *base_id)
-{
- BlockDriverState *intermediate;
- intermediate = top->backing_hd;
-
- /* Must assign before bdrv_delete() to prevent traversing dangling pointer
- * while we delete backing image instances.
- */
- bdrv_set_backing_hd(top, base);
-
- while (intermediate) {
- BlockDriverState *unused;
-
- /* reached base */
- if (intermediate == base) {
- break;
- }
-
- unused = intermediate;
- intermediate = intermediate->backing_hd;
- bdrv_set_backing_hd(unused, NULL);
- bdrv_unref(unused);
- }
-
- bdrv_refresh_limits(top, NULL);
-}
-
typedef struct {
int ret;
bool reached_end;
@@ -101,7 +74,7 @@ static void stream_complete(BlockJob *job, void *opaque)
}
}
data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
- close_unused_images(job->bs, base, base_id);
+ bdrv_set_backing_hd(job->bs, base);
}
g_free(s->backing_file_str);
@@ -121,7 +94,7 @@ static void coroutine_fn stream_run(void *opaque)
int n = 0;
void *buf;
- if (!bs->backing_hd) {
+ if (!bs->backing) {
block_job_completed(&s->common, 0);
return;
}
@@ -166,7 +139,7 @@ wait:
} else if (ret >= 0) {
/* Copy if allocated in the intermediate images. Limit to the
* known-unallocated area [sector_num, sector_num+n). */
- ret = bdrv_is_allocated_above(bs->backing_hd, base,
+ ret = bdrv_is_allocated_above(backing_bs(bs), base,
sector_num, n, &n);
/* Finish early if end of backing file has been reached */
@@ -250,7 +223,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- !bdrv_iostatus_is_enabled(bs)) {
+ (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
error_setg(errp, QERR_INVALID_PARAMETER, "on-error");
return;
}
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 1abc6fcae..13b5baa5d 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -33,8 +33,7 @@
* its own locking.
*
* This locking is however handled internally in this file, so it's
- * mostly transparent to outside users (but see the documentation in
- * throttle_groups_lock()).
+ * transparent to outside users.
*
* The whole ThrottleGroup structure is private and invisible to
* outside users, that only use it through its ThrottleState.
@@ -76,9 +75,9 @@ static QTAILQ_HEAD(, ThrottleGroup) throttle_groups =
* created.
*
* @name: the name of the ThrottleGroup
- * @ret: the ThrottleGroup
+ * @ret: the ThrottleState member of the ThrottleGroup
*/
-static ThrottleGroup *throttle_group_incref(const char *name)
+ThrottleState *throttle_group_incref(const char *name)
{
ThrottleGroup *tg = NULL;
ThrottleGroup *iter;
@@ -108,7 +107,7 @@ static ThrottleGroup *throttle_group_incref(const char *name)
qemu_mutex_unlock(&throttle_groups_lock);
- return tg;
+ return &tg->ts;
}
/* Decrease the reference count of a ThrottleGroup.
@@ -116,10 +115,12 @@ static ThrottleGroup *throttle_group_incref(const char *name)
* When the reference count reaches zero the ThrottleGroup is
* destroyed.
*
- * @tg: The ThrottleGroup to unref
+ * @ts: The ThrottleGroup to unref, given by its ThrottleState member
*/
-static void throttle_group_unref(ThrottleGroup *tg)
+void throttle_group_unref(ThrottleState *ts)
{
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+
qemu_mutex_lock(&throttle_groups_lock);
if (--tg->refcount == 0) {
QTAILQ_REMOVE(&throttle_groups, tg, list);
@@ -401,7 +402,8 @@ static void write_timer_cb(void *opaque)
void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
{
int i;
- ThrottleGroup *tg = throttle_group_incref(groupname);
+ ThrottleState *ts = throttle_group_incref(groupname);
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
int clock_type = QEMU_CLOCK_REALTIME;
if (qtest_enabled()) {
@@ -409,7 +411,7 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
clock_type = QEMU_CLOCK_VIRTUAL;
}
- bs->throttle_state = &tg->ts;
+ bs->throttle_state = ts;
qemu_mutex_lock(&tg->lock);
/* If the ThrottleGroup is new set this BlockDriverState as the token */
@@ -435,6 +437,9 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
* list, destroying the timers and setting the throttle_state pointer
* to NULL.
*
+ * The BlockDriverState must not have pending throttled requests, so
+ * the caller has to drain them first.
+ *
* The group will be destroyed if it's empty after this operation.
*
* @bs: the BlockDriverState to remove
@@ -444,6 +449,10 @@ void throttle_group_unregister_bs(BlockDriverState *bs)
ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
int i;
+ assert(bs->pending_reqs[0] == 0 && bs->pending_reqs[1] == 0);
+ assert(qemu_co_queue_empty(&bs->throttled_reqs[0]));
+ assert(qemu_co_queue_empty(&bs->throttled_reqs[1]));
+
qemu_mutex_lock(&tg->lock);
for (i = 0; i < 2; i++) {
if (tg->tokens[i] == bs) {
@@ -461,38 +470,10 @@ void throttle_group_unregister_bs(BlockDriverState *bs)
throttle_timers_destroy(&bs->throttle_timers);
qemu_mutex_unlock(&tg->lock);
- throttle_group_unref(tg);
+ throttle_group_unref(&tg->ts);
bs->throttle_state = NULL;
}
-/* Acquire the lock of this throttling group.
- *
- * You won't normally need to use this. None of the functions from the
- * ThrottleGroup API require you to acquire the lock since all of them
- * deal with it internally.
- *
- * This should only be used in exceptional cases when you want to
- * access the protected fields of a BlockDriverState directly
- * (e.g. bdrv_swap()).
- *
- * @bs: a BlockDriverState that is member of the group
- */
-void throttle_group_lock(BlockDriverState *bs)
-{
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
- qemu_mutex_lock(&tg->lock);
-}
-
-/* Release the lock of this throttling group.
- *
- * See the comments in throttle_group_lock().
- */
-void throttle_group_unlock(BlockDriverState *bs)
-{
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
- qemu_mutex_unlock(&tg->lock);
-}
-
static void throttle_groups_init(void)
{
qemu_mutex_init(&throttle_groups_lock);
diff --git a/block/vdi.c b/block/vdi.c
index 7642ef359..17f435fad 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -53,7 +53,7 @@
#include "block/block_int.h"
#include "qemu/module.h"
#include "migration/migration.h"
-#include "block/coroutine.h"
+#include "qemu/coroutine.h"
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
@@ -399,7 +399,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
logout("\n");
- ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
+ ret = bdrv_read(bs->file->bs, 0, (uint8_t *)&header, 1);
if (ret < 0) {
goto fail;
}
@@ -490,13 +490,14 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
bmap_size = header.blocks_in_image * sizeof(uint32_t);
bmap_size = DIV_ROUND_UP(bmap_size, SECTOR_SIZE);
- s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE);
+ s->bmap = qemu_try_blockalign(bs->file->bs, bmap_size * SECTOR_SIZE);
if (s->bmap == NULL) {
ret = -ENOMEM;
goto fail;
}
- ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
+ ret = bdrv_read(bs->file->bs, s->bmap_sector, (uint8_t *)s->bmap,
+ bmap_size);
if (ret < 0) {
goto fail_free_bmap;
}
@@ -585,7 +586,7 @@ static int vdi_co_read(BlockDriverState *bs,
uint64_t offset = s->header.offset_data / SECTOR_SIZE +
(uint64_t)bmap_entry * s->block_sectors +
sector_in_block;
- ret = bdrv_read(bs->file, offset, buf, n_sectors);
+ ret = bdrv_read(bs->file->bs, offset, buf, n_sectors);
}
logout("%u sectors read\n", n_sectors);
@@ -653,7 +654,7 @@ static int vdi_co_write(BlockDriverState *bs,
* acquire the lock and thus the padded cluster is written before
* the other coroutines can write to the affected area. */
qemu_co_mutex_lock(&s->write_lock);
- ret = bdrv_write(bs->file, offset, block, s->block_sectors);
+ ret = bdrv_write(bs->file->bs, offset, block, s->block_sectors);
qemu_co_mutex_unlock(&s->write_lock);
} else {
uint64_t offset = s->header.offset_data / SECTOR_SIZE +
@@ -669,7 +670,7 @@ static int vdi_co_write(BlockDriverState *bs,
* that that write operation has returned (there may be other writes
* in flight, but they do not concern this very operation). */
qemu_co_mutex_unlock(&s->write_lock);
- ret = bdrv_write(bs->file, offset, buf, n_sectors);
+ ret = bdrv_write(bs->file->bs, offset, buf, n_sectors);
}
nb_sectors -= n_sectors;
@@ -694,7 +695,7 @@ static int vdi_co_write(BlockDriverState *bs,
assert(VDI_IS_ALLOCATED(bmap_first));
*header = s->header;
vdi_header_to_le(header);
- ret = bdrv_write(bs->file, 0, block, 1);
+ ret = bdrv_write(bs->file->bs, 0, block, 1);
g_free(block);
block = NULL;
@@ -712,7 +713,7 @@ static int vdi_co_write(BlockDriverState *bs,
base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
logout("will write %u block map sectors starting from entry %u\n",
n_sectors, bmap_first);
- ret = bdrv_write(bs->file, offset, base, n_sectors);
+ ret = bdrv_write(bs->file->bs, offset, base, n_sectors);
}
return ret;
@@ -764,7 +765,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
+ &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto exit;
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index 47fec63c6..47ae4b135 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -81,7 +81,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
offset = log->offset + read;
- ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader));
+ ret = bdrv_pread(bs->file->bs, offset, hdr, sizeof(VHDXLogEntryHeader));
if (ret < 0) {
goto exit;
}
@@ -141,7 +141,7 @@ static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
}
offset = log->offset + read;
- ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE);
+ ret = bdrv_pread(bs->file->bs, offset, buffer, VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
}
@@ -191,7 +191,8 @@ static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
/* full */
break;
}
- ret = bdrv_pwrite(bs->file, offset, buffer_tmp, VHDX_LOG_SECTOR_SIZE);
+ ret = bdrv_pwrite(bs->file->bs, offset, buffer_tmp,
+ VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
}
@@ -353,7 +354,7 @@ static int vhdx_log_read_desc(BlockDriverState *bs, BDRVVHDXState *s,
}
desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
- desc_entries = qemu_try_blockalign(bs->file,
+ desc_entries = qemu_try_blockalign(bs->file->bs,
desc_sectors * VHDX_LOG_SECTOR_SIZE);
if (desc_entries == NULL) {
ret = -ENOMEM;
@@ -462,7 +463,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
/* count is only > 1 if we are writing zeroes */
for (i = 0; i < count; i++) {
- ret = bdrv_pwrite_sync(bs->file, file_offset, buffer,
+ ret = bdrv_pwrite_sync(bs->file->bs, file_offset, buffer,
VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
@@ -509,7 +510,7 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
/* if the log shows a FlushedFileOffset larger than our current file
* size, then that means the file has been truncated / corrupted, and
* we must refused to open it / use it */
- if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) {
+ if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file->bs)) {
ret = -EINVAL;
goto exit;
}
@@ -539,12 +540,12 @@ static int vhdx_log_flush(BlockDriverState *bs, BDRVVHDXState *s,
goto exit;
}
}
- if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) {
+ if (bdrv_getlength(bs->file->bs) < desc_entries->hdr.last_file_offset) {
new_file_size = desc_entries->hdr.last_file_offset;
if (new_file_size % (1024*1024)) {
/* round up to nearest 1MB boundary */
new_file_size = ((new_file_size >> 20) + 1) << 20;
- bdrv_truncate(bs->file, new_file_size);
+ bdrv_truncate(bs->file->bs, new_file_size);
}
}
qemu_vfree(desc_entries);
@@ -908,8 +909,8 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
.sequence_number = s->log.sequence,
.descriptor_count = sectors,
.reserved = 0,
- .flushed_file_offset = bdrv_getlength(bs->file),
- .last_file_offset = bdrv_getlength(bs->file),
+ .flushed_file_offset = bdrv_getlength(bs->file->bs),
+ .last_file_offset = bdrv_getlength(bs->file->bs),
};
new_hdr.log_guid = header->log_guid;
@@ -940,7 +941,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
if (i == 0 && leading_length) {
/* partial sector at the front of the buffer */
- ret = bdrv_pread(bs->file, file_offset, merged_sector,
+ ret = bdrv_pread(bs->file->bs, file_offset, merged_sector,
VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
@@ -950,7 +951,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
sector_write = merged_sector;
} else if (i == sectors - 1 && trailing_length) {
/* partial sector at the end of the buffer */
- ret = bdrv_pread(bs->file,
+ ret = bdrv_pread(bs->file->bs,
file_offset,
merged_sector + trailing_length,
VHDX_LOG_SECTOR_SIZE - trailing_length);
diff --git a/block/vhdx.c b/block/vhdx.c
index 0776de717..2fe9a5e0c 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -375,7 +375,7 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
inactive_header->log_guid = *log_guid;
}
- ret = vhdx_write_header(bs->file, inactive_header, header_offset, true);
+ ret = vhdx_write_header(bs->file->bs, inactive_header, header_offset, true);
if (ret < 0) {
goto exit;
}
@@ -427,7 +427,8 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
/* We have to read the whole VHDX_HEADER_SIZE instead of
* sizeof(VHDXHeader), because the checksum is over the whole
* region */
- ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, VHDX_HEADER_SIZE);
+ ret = bdrv_pread(bs->file->bs, VHDX_HEADER1_OFFSET, buffer,
+ VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
}
@@ -443,7 +444,8 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
}
}
- ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
+ ret = bdrv_pread(bs->file->bs, VHDX_HEADER2_OFFSET, buffer,
+ VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
}
@@ -516,7 +518,7 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
* whole block */
buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
- ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
+ ret = bdrv_pread(bs->file->bs, VHDX_REGION_TABLE_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE);
if (ret < 0) {
goto fail;
@@ -629,7 +631,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
- ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
+ ret = bdrv_pread(bs->file->bs, s->metadata_rt.file_offset, buffer,
VHDX_METADATA_TABLE_MAX_SIZE);
if (ret < 0) {
goto exit;
@@ -732,7 +734,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
goto exit;
}
- ret = bdrv_pread(bs->file,
+ ret = bdrv_pread(bs->file->bs,
s->metadata_entries.file_parameters_entry.offset
+ s->metadata_rt.file_offset,
&s->params,
@@ -767,7 +769,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
/* determine virtual disk size, logical sector size,
* and phys sector size */
- ret = bdrv_pread(bs->file,
+ ret = bdrv_pread(bs->file->bs,
s->metadata_entries.virtual_disk_size_entry.offset
+ s->metadata_rt.file_offset,
&s->virtual_disk_size,
@@ -775,7 +777,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
if (ret < 0) {
goto exit;
}
- ret = bdrv_pread(bs->file,
+ ret = bdrv_pread(bs->file->bs,
s->metadata_entries.logical_sector_size_entry.offset
+ s->metadata_rt.file_offset,
&s->logical_sector_size,
@@ -783,7 +785,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
if (ret < 0) {
goto exit;
}
- ret = bdrv_pread(bs->file,
+ ret = bdrv_pread(bs->file->bs,
s->metadata_entries.phys_sector_size_entry.offset
+ s->metadata_rt.file_offset,
&s->physical_sector_size,
@@ -906,7 +908,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
QLIST_INIT(&s->regions);
/* validate the file signature */
- ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
+ ret = bdrv_pread(bs->file->bs, 0, &signature, sizeof(uint64_t));
if (ret < 0) {
goto fail;
}
@@ -959,13 +961,13 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
}
/* s->bat is freed in vhdx_close() */
- s->bat = qemu_try_blockalign(bs->file, s->bat_rt.length);
+ s->bat = qemu_try_blockalign(bs->file->bs, s->bat_rt.length);
if (s->bat == NULL) {
ret = -ENOMEM;
goto fail;
}
- ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
+ ret = bdrv_pread(bs->file->bs, s->bat_offset, s->bat, s->bat_rt.length);
if (ret < 0) {
goto fail;
}
@@ -1118,7 +1120,7 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
break;
case PAYLOAD_BLOCK_FULLY_PRESENT:
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
+ ret = bdrv_co_readv(bs->file->bs,
sinfo.file_offset >> BDRV_SECTOR_BITS,
sinfo.sectors_avail, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1156,12 +1158,12 @@ exit:
static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
uint64_t *new_offset)
{
- *new_offset = bdrv_getlength(bs->file);
+ *new_offset = bdrv_getlength(bs->file->bs);
/* per the spec, the address for a block is in units of 1MB */
*new_offset = ROUND_UP(*new_offset, 1024 * 1024);
- return bdrv_truncate(bs->file, *new_offset + s->block_size);
+ return bdrv_truncate(bs->file->bs, *new_offset + s->block_size);
}
/*
@@ -1260,7 +1262,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
/* Queue another write of zero buffers if the underlying file
* does not zero-fill on file extension */
- if (bdrv_has_zero_init(bs->file) == 0) {
+ if (bdrv_has_zero_init(bs->file->bs) == 0) {
use_zero_buffers = true;
/* zero fill the front, if any */
@@ -1327,7 +1329,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
}
/* block exists, so we can just overwrite it */
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_writev(bs->file,
+ ret = bdrv_co_writev(bs->file->bs,
sinfo.file_offset >> BDRV_SECTOR_BITS,
sectors_to_write, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1454,7 +1456,7 @@ static int vhdx_create_new_metadata(BlockDriverState *bs,
uint32_t offset = 0;
void *buffer = NULL;
void *entry_buffer;
- VHDXMetadataTableHeader *md_table;;
+ VHDXMetadataTableHeader *md_table;
VHDXMetadataTableEntry *md_table_entry;
/* Metadata entries */
@@ -1842,7 +1844,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
bs = NULL;
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
+ &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto exit;
diff --git a/block/vmdk.c b/block/vmdk.c
index fbaab67c8..e46271a80 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -87,7 +87,7 @@ typedef struct {
#define L2_CACHE_SIZE 16
typedef struct VmdkExtent {
- BlockDriverState *file;
+ BdrvChild *file;
bool flat;
bool compressed;
bool has_marker;
@@ -222,7 +222,7 @@ static void vmdk_free_extents(BlockDriverState *bs)
g_free(e->l1_backup_table);
g_free(e->type);
if (e->file != bs->file) {
- bdrv_unref(e->file);
+ bdrv_unref_child(bs, e->file);
}
}
g_free(s->extents);
@@ -248,7 +248,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
BDRVVmdkState *s = bs->opaque;
int ret;
- ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+ ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
return 0;
}
@@ -278,7 +278,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
BDRVVmdkState *s = bs->opaque;
int ret;
- ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+ ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
return ret;
}
@@ -297,7 +297,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
pstrcat(desc, sizeof(desc), tmp_desc);
}
- ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
+ ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
return ret;
}
@@ -308,10 +308,11 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
static int vmdk_is_cid_valid(BlockDriverState *bs)
{
BDRVVmdkState *s = bs->opaque;
- BlockDriverState *p_bs = bs->backing_hd;
uint32_t cur_pcid;
- if (!s->cid_checked && p_bs) {
+ if (!s->cid_checked && bs->backing) {
+ BlockDriverState *p_bs = bs->backing->bs;
+
cur_pcid = vmdk_read_cid(p_bs, 0);
if (s->parent_cid != cur_pcid) {
/* CID not valid */
@@ -340,7 +341,7 @@ static int vmdk_parent_open(BlockDriverState *bs)
int ret;
desc[DESC_SIZE] = '\0';
- ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+ ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
return ret;
}
@@ -367,7 +368,7 @@ static int vmdk_parent_open(BlockDriverState *bs)
/* Create and append extent to the extent array. Return the added VmdkExtent
* address. return NULL if allocation failed. */
static int vmdk_add_extent(BlockDriverState *bs,
- BlockDriverState *file, bool flat, int64_t sectors,
+ BdrvChild *file, bool flat, int64_t sectors,
int64_t l1_offset, int64_t l1_backup_offset,
uint32_t l1_size,
int l2_size, uint64_t cluster_sectors,
@@ -392,7 +393,7 @@ static int vmdk_add_extent(BlockDriverState *bs,
return -EFBIG;
}
- nb_sectors = bdrv_nb_sectors(file);
+ nb_sectors = bdrv_nb_sectors(file->bs);
if (nb_sectors < 0) {
return nb_sectors;
}
@@ -439,14 +440,14 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
return -ENOMEM;
}
- ret = bdrv_pread(extent->file,
+ ret = bdrv_pread(extent->file->bs,
extent->l1_table_offset,
extent->l1_table,
l1_size);
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read l1 table from extent '%s'",
- extent->file->filename);
+ extent->file->bs->filename);
goto fail_l1;
}
for (i = 0; i < extent->l1_size; i++) {
@@ -459,14 +460,14 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
ret = -ENOMEM;
goto fail_l1;
}
- ret = bdrv_pread(extent->file,
+ ret = bdrv_pread(extent->file->bs,
extent->l1_backup_table_offset,
extent->l1_backup_table,
l1_size);
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read l1 backup table from extent '%s'",
- extent->file->filename);
+ extent->file->bs->filename);
goto fail_l1b;
}
for (i = 0; i < extent->l1_size; i++) {
@@ -485,7 +486,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
}
static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
- BlockDriverState *file,
+ BdrvChild *file,
int flags, Error **errp)
{
int ret;
@@ -493,11 +494,11 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
VMDK3Header header;
VmdkExtent *extent;
- ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
+ ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read header from file '%s'",
- file->filename);
+ file->bs->filename);
return ret;
}
ret = vmdk_add_extent(bs, file, false,
@@ -559,7 +560,7 @@ static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
}
static int vmdk_open_vmdk4(BlockDriverState *bs,
- BlockDriverState *file,
+ BdrvChild *file,
int flags, QDict *options, Error **errp)
{
int ret;
@@ -569,18 +570,19 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
int64_t l1_backup_offset = 0;
+ bool compressed;
- ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
+ ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read header from file '%s'",
- file->filename);
+ file->bs->filename);
return -EINVAL;
}
if (header.capacity == 0) {
uint64_t desc_offset = le64_to_cpu(header.desc_offset);
if (desc_offset) {
- char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
+ char *buf = vmdk_read_desc(file->bs, desc_offset << 9, errp);
if (!buf) {
return -EINVAL;
}
@@ -620,8 +622,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
} QEMU_PACKED eos_marker;
} QEMU_PACKED footer;
- ret = bdrv_pread(file,
- bs->file->total_sectors * 512 - 1536,
+ ret = bdrv_pread(file->bs,
+ bs->file->bs->total_sectors * 512 - 1536,
&footer, sizeof(footer));
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to read footer");
@@ -643,6 +645,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
header = footer.header;
}
+ compressed =
+ le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
if (le32_to_cpu(header.version) > 3) {
char buf[64];
snprintf(buf, sizeof(buf), "VMDK version %" PRId32,
@@ -650,7 +654,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
bdrv_get_device_or_node_name(bs), "vmdk", buf);
return -ENOTSUP;
- } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) {
+ } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) &&
+ !compressed) {
/* VMware KB 2064959 explains that version 3 added support for
* persistent changed block tracking (CBT), and backup software can
* read it as version=1 if it doesn't care about the changed area
@@ -675,7 +680,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
}
- if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) {
+ if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) {
error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
(int64_t)(le64_to_cpu(header.grain_offset)
* BDRV_SECTOR_SIZE));
@@ -739,8 +744,7 @@ static int vmdk_parse_description(const char *desc, const char *opt_name,
}
/* Open an extent file and append to bs array */
-static int vmdk_open_sparse(BlockDriverState *bs,
- BlockDriverState *file, int flags,
+static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
char *buf, QDict *options, Error **errp)
{
uint32_t magic;
@@ -773,10 +777,11 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
int64_t sectors = 0;
int64_t flat_offset;
char *extent_path;
- BlockDriverState *extent_file;
+ BdrvChild *extent_file;
BDRVVmdkState *s = bs->opaque;
VmdkExtent *extent;
char extent_opt_prefix[32];
+ Error *local_err = NULL;
while (*p) {
/* parse extent line in one of below formats:
@@ -819,22 +824,22 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
!desc_file_path[0])
{
error_setg(errp, "Cannot use relative extent paths with VMDK "
- "descriptor file '%s'", bs->file->filename);
+ "descriptor file '%s'", bs->file->bs->filename);
return -EINVAL;
}
extent_path = g_malloc0(PATH_MAX);
path_combine(extent_path, PATH_MAX, desc_file_path, fname);
- extent_file = NULL;
ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents);
assert(ret < 32);
- ret = bdrv_open_image(&extent_file, extent_path, options,
- extent_opt_prefix, bs, &child_file, false, errp);
+ extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix,
+ bs, &child_file, false, &local_err);
g_free(extent_path);
- if (ret) {
- return ret;
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return -EINVAL;
}
/* save to extents array */
@@ -844,13 +849,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
ret = vmdk_add_extent(bs, extent_file, true, sectors,
0, 0, 0, 0, 0, &extent, errp);
if (ret < 0) {
- bdrv_unref(extent_file);
+ bdrv_unref_child(bs, extent_file);
return ret;
}
extent->flat_start_offset = flat_offset << 9;
} else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
/* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
- char *buf = vmdk_read_desc(extent_file, 0, errp);
+ char *buf = vmdk_read_desc(extent_file->bs, 0, errp);
if (!buf) {
ret = -EINVAL;
} else {
@@ -859,13 +864,13 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
}
g_free(buf);
if (ret) {
- bdrv_unref(extent_file);
+ bdrv_unref_child(bs, extent_file);
return ret;
}
extent = &s->extents[s->num_extents - 1];
} else {
error_setg(errp, "Unsupported extent type '%s'", type);
- bdrv_unref(extent_file);
+ bdrv_unref_child(bs, extent_file);
return -ENOTSUP;
}
extent->type = g_strdup(type);
@@ -905,7 +910,8 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
}
s->create_type = g_strdup(ct);
s->desc_offset = 0;
- ret = vmdk_parse_extents(buf, bs, bs->file->exact_filename, options, errp);
+ ret = vmdk_parse_extents(buf, bs, bs->file->bs->exact_filename, options,
+ errp);
exit:
return ret;
}
@@ -918,7 +924,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
BDRVVmdkState *s = bs->opaque;
uint32_t magic;
- buf = vmdk_read_desc(bs->file, 0, errp);
+ buf = vmdk_read_desc(bs->file->bs, 0, errp);
if (!buf) {
return -EINVAL;
}
@@ -927,7 +933,8 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
switch (magic) {
case VMDK3_MAGIC:
case VMDK4_MAGIC:
- ret = vmdk_open_sparse(bs, bs->file, flags, buf, options, errp);
+ ret = vmdk_open_sparse(bs, bs->file, flags, buf, options,
+ errp);
s->desc_offset = 0x200;
break;
default:
@@ -1004,7 +1011,7 @@ static int get_whole_cluster(BlockDriverState *bs,
cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
whole_grain = qemu_blockalign(bs, cluster_bytes);
- if (!bs->backing_hd) {
+ if (!bs->backing) {
memset(whole_grain, 0, skip_start_sector << BDRV_SECTOR_BITS);
memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
@@ -1013,22 +1020,22 @@ static int get_whole_cluster(BlockDriverState *bs,
assert(skip_end_sector <= extent->cluster_sectors);
/* we will be here if it's first write on non-exist grain(cluster).
* try to read from parent image, if exist */
- if (bs->backing_hd && !vmdk_is_cid_valid(bs)) {
+ if (bs->backing && !vmdk_is_cid_valid(bs)) {
ret = VMDK_ERROR;
goto exit;
}
/* Read backing data before skip range */
if (skip_start_sector > 0) {
- if (bs->backing_hd) {
- ret = bdrv_read(bs->backing_hd, sector_num,
+ if (bs->backing) {
+ ret = bdrv_read(bs->backing->bs, sector_num,
whole_grain, skip_start_sector);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
- ret = bdrv_write(extent->file, cluster_sector_num, whole_grain,
+ ret = bdrv_write(extent->file->bs, cluster_sector_num, whole_grain,
skip_start_sector);
if (ret < 0) {
ret = VMDK_ERROR;
@@ -1037,8 +1044,8 @@ static int get_whole_cluster(BlockDriverState *bs,
}
/* Read backing data after skip range */
if (skip_end_sector < extent->cluster_sectors) {
- if (bs->backing_hd) {
- ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector,
+ if (bs->backing) {
+ ret = bdrv_read(bs->backing->bs, sector_num + skip_end_sector,
whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
extent->cluster_sectors - skip_end_sector);
if (ret < 0) {
@@ -1046,7 +1053,7 @@ static int get_whole_cluster(BlockDriverState *bs,
goto exit;
}
}
- ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector,
+ ret = bdrv_write(extent->file->bs, cluster_sector_num + skip_end_sector,
whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
extent->cluster_sectors - skip_end_sector);
if (ret < 0) {
@@ -1066,7 +1073,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
offset = cpu_to_le32(offset);
/* update L2 table */
if (bdrv_pwrite_sync(
- extent->file,
+ extent->file->bs,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
@@ -1076,7 +1083,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
if (extent->l1_backup_table_offset != 0) {
m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
if (bdrv_pwrite_sync(
- extent->file,
+ extent->file->bs,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
@@ -1166,7 +1173,7 @@ static int get_cluster_offset(BlockDriverState *bs,
}
l2_table = extent->l2_cache + (min_index * extent->l2_size);
if (bdrv_pread(
- extent->file,
+ extent->file->bs,
(int64_t)l2_offset * 512,
l2_table,
extent->l2_size * sizeof(uint32_t)
@@ -1320,12 +1327,16 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
write_len = buf_len + sizeof(VmdkGrainMarker);
}
write_offset = cluster_offset + offset_in_cluster,
- ret = bdrv_pwrite(extent->file, write_offset, write_buf, write_len);
+ ret = bdrv_pwrite(extent->file->bs, write_offset, write_buf, write_len);
write_end_sector = DIV_ROUND_UP(write_offset + write_len, BDRV_SECTOR_SIZE);
- extent->next_cluster_sector = MAX(extent->next_cluster_sector,
- write_end_sector);
+ if (extent->compressed) {
+ extent->next_cluster_sector = write_end_sector;
+ } else {
+ extent->next_cluster_sector = MAX(extent->next_cluster_sector,
+ write_end_sector);
+ }
if (ret != write_len) {
ret = ret < 0 ? ret : -EIO;
@@ -1351,7 +1362,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
if (!extent->compressed) {
- ret = bdrv_pread(extent->file,
+ ret = bdrv_pread(extent->file->bs,
cluster_offset + offset_in_cluster,
buf, nb_sectors * 512);
if (ret == nb_sectors * 512) {
@@ -1365,7 +1376,7 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
buf_bytes = cluster_bytes * 2;
cluster_buf = g_malloc(buf_bytes);
uncomp_buf = g_malloc(cluster_bytes);
- ret = bdrv_pread(extent->file,
+ ret = bdrv_pread(extent->file->bs,
cluster_offset,
cluster_buf, buf_bytes);
if (ret < 0) {
@@ -1427,11 +1438,11 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
}
if (ret != VMDK_OK) {
/* if not allocated, try to read from parent image, if exist */
- if (bs->backing_hd && ret != VMDK_ZEROED) {
+ if (bs->backing && ret != VMDK_ZEROED) {
if (!vmdk_is_cid_valid(bs)) {
return -EINVAL;
}
- ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+ ret = bdrv_read(bs->backing->bs, sector_num, buf, n);
if (ret < 0) {
return ret;
}
@@ -1632,7 +1643,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
assert(bs == NULL);
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
+ &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto exit;
@@ -1647,7 +1658,13 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
}
magic = cpu_to_be32(VMDK4_MAGIC);
memset(&header, 0, sizeof(header));
- header.version = zeroed_grain ? 2 : 1;
+ if (compress) {
+ header.version = 3;
+ } else if (zeroed_grain) {
+ header.version = 2;
+ } else {
+ header.version = 1;
+ }
header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
| (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
| (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
@@ -1905,8 +1922,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
ret = -ENOENT;
goto exit;
}
- ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, NULL,
- errp);
+ ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, errp);
g_free(full_backing);
if (ret != 0) {
goto exit;
@@ -1977,7 +1993,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
}
assert(new_bs == NULL);
ret = bdrv_open(&new_bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto exit;
@@ -2032,7 +2048,7 @@ static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
int ret = 0;
for (i = 0; i < s->num_extents; i++) {
- err = bdrv_co_flush(s->extents[i].file);
+ err = bdrv_co_flush(s->extents[i].file->bs);
if (err < 0) {
ret = err;
}
@@ -2047,7 +2063,7 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
int64_t r;
BDRVVmdkState *s = bs->opaque;
- ret = bdrv_get_allocated_file_size(bs->file);
+ ret = bdrv_get_allocated_file_size(bs->file->bs);
if (ret < 0) {
return ret;
}
@@ -2055,7 +2071,7 @@ static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
if (s->extents[i].file == bs->file) {
continue;
}
- r = bdrv_get_allocated_file_size(s->extents[i].file);
+ r = bdrv_get_allocated_file_size(s->extents[i].file->bs);
if (r < 0) {
return r;
}
@@ -2073,7 +2089,7 @@ static int vmdk_has_zero_init(BlockDriverState *bs)
* return 0. */
for (i = 0; i < s->num_extents; i++) {
if (s->extents[i].flat) {
- if (!bdrv_has_zero_init(s->extents[i].file)) {
+ if (!bdrv_has_zero_init(s->extents[i].file->bs)) {
return 0;
}
}
@@ -2086,7 +2102,7 @@ static ImageInfo *vmdk_get_extent_info(VmdkExtent *extent)
ImageInfo *info = g_new0(ImageInfo, 1);
*info = (ImageInfo){
- .filename = g_strdup(extent->file->filename),
+ .filename = g_strdup(extent->file->bs->filename),
.format = g_strdup(extent->type),
.virtual_size = extent->sectors * BDRV_SECTOR_SIZE,
.compressed = extent->compressed,
@@ -2132,7 +2148,9 @@ static int vmdk_check(BlockDriverState *bs, BdrvCheckResult *result,
PRId64 "\n", sector_num);
break;
}
- if (ret == VMDK_OK && cluster_offset >= bdrv_getlength(extent->file)) {
+ if (ret == VMDK_OK &&
+ cluster_offset >= bdrv_getlength(extent->file->bs))
+ {
fprintf(stderr,
"ERROR: cluster offset for sector %"
PRId64 " points after EOF\n", sector_num);
@@ -2153,19 +2171,19 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)
ImageInfoList **next;
*spec_info = (ImageInfoSpecific){
- .kind = IMAGE_INFO_SPECIFIC_KIND_VMDK,
+ .type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
{
.vmdk = g_new0(ImageInfoSpecificVmdk, 1),
},
};
- *spec_info->vmdk = (ImageInfoSpecificVmdk) {
+ *spec_info->u.vmdk = (ImageInfoSpecificVmdk) {
.create_type = g_strdup(s->create_type),
.cid = s->cid,
.parent_cid = s->parent_cid,
};
- next = &spec_info->vmdk->extents;
+ next = &spec_info->u.vmdk->extents;
for (i = 0; i < s->num_extents; i++) {
*next = g_new0(ImageInfoList, 1);
(*next)->value = vmdk_get_extent_info(&s->extents[i]);
@@ -2208,7 +2226,7 @@ static void vmdk_detach_aio_context(BlockDriverState *bs)
int i;
for (i = 0; i < s->num_extents; i++) {
- bdrv_detach_aio_context(s->extents[i].file);
+ bdrv_detach_aio_context(s->extents[i].file->bs);
}
}
@@ -2219,7 +2237,7 @@ static void vmdk_attach_aio_context(BlockDriverState *bs,
int i;
for (i = 0; i < s->num_extents; i++) {
- bdrv_attach_aio_context(s->extents[i].file, new_context);
+ bdrv_attach_aio_context(s->extents[i].file->bs, new_context);
}
}
diff --git a/block/vpc.c b/block/vpc.c
index 3e385d9fb..299d37309 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -172,14 +172,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int disk_type = VHD_DYNAMIC;
int ret;
- ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
+ ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
if (ret < 0) {
goto fail;
}
footer = (VHDFooter *) s->footer_buf;
if (strncmp(footer->creator, "conectix", 8)) {
- int64_t offset = bdrv_getlength(bs->file);
+ int64_t offset = bdrv_getlength(bs->file->bs);
if (offset < 0) {
ret = offset;
goto fail;
@@ -189,7 +189,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
}
/* If a fixed disk, the footer is found only at the end of the file */
- ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
+ ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
HEADER_SIZE);
if (ret < 0) {
goto fail;
@@ -232,7 +232,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
}
if (disk_type == VHD_DYNAMIC) {
- ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
+ ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
HEADER_SIZE);
if (ret < 0) {
goto fail;
@@ -280,7 +280,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
pagetable_size = (uint64_t) s->max_table_entries * 4;
- s->pagetable = qemu_try_blockalign(bs->file, pagetable_size);
+ s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
if (s->pagetable == NULL) {
ret = -ENOMEM;
goto fail;
@@ -288,7 +288,8 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
- ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable, pagetable_size);
+ ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
+ pagetable_size);
if (ret < 0) {
goto fail;
}
@@ -308,7 +309,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
}
}
- if (s->free_data_block_offset > bdrv_getlength(bs->file)) {
+ if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
error_setg(errp, "block-vpc: free_data_block_offset points after "
"the end of file. The image has been truncated.");
ret = -EINVAL;
@@ -383,7 +384,7 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
s->last_bitmap_offset = bitmap_offset;
memset(bitmap, 0xff, s->bitmap_size);
- bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
+ bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
}
return block_offset;
@@ -401,7 +402,7 @@ static int rewrite_footer(BlockDriverState* bs)
BDRVVPCState *s = bs->opaque;
int64_t offset = s->free_data_block_offset;
- ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
+ ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
if (ret < 0)
return ret;
@@ -436,7 +437,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
// Initialize the block's bitmap
memset(bitmap, 0xff, s->bitmap_size);
- ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
+ ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
s->bitmap_size);
if (ret < 0) {
return ret;
@@ -451,7 +452,7 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
// Write BAT entry to disk
bat_offset = s->bat_offset + (4 * index);
bat_value = cpu_to_be32(s->pagetable[index]);
- ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
+ ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
if (ret < 0)
goto fail;
@@ -485,7 +486,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
VHDFooter *footer = (VHDFooter *) s->footer_buf;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_read(bs->file, sector_num, buf, nb_sectors);
+ return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors);
}
while (nb_sectors > 0) {
offset = get_sector_offset(bs, sector_num, 0);
@@ -499,7 +500,7 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
if (offset == -1) {
memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
} else {
- ret = bdrv_pread(bs->file, offset, buf,
+ ret = bdrv_pread(bs->file->bs, offset, buf,
sectors * BDRV_SECTOR_SIZE);
if (ret != sectors * BDRV_SECTOR_SIZE) {
return -1;
@@ -534,7 +535,7 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
VHDFooter *footer = (VHDFooter *) s->footer_buf;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_write(bs->file, sector_num, buf, nb_sectors);
+ return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors);
}
while (nb_sectors > 0) {
offset = get_sector_offset(bs, sector_num, 1);
@@ -551,7 +552,8 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
return -1;
}
- ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
+ ret = bdrv_pwrite(bs->file->bs, offset, buf,
+ sectors * BDRV_SECTOR_SIZE);
if (ret != sectors * BDRV_SECTOR_SIZE) {
return -1;
}
@@ -794,7 +796,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
goto out;
}
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
+ &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto out;
@@ -878,7 +880,7 @@ static int vpc_has_zero_init(BlockDriverState *bs)
VHDFooter *footer = (VHDFooter *) s->footer_buf;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_has_zero_init(bs->file);
+ return bdrv_has_zero_init(bs->file->bs);
} else {
return 1;
}
diff --git a/block/vvfat.c b/block/vvfat.c
index 206869712..b184eca6f 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -985,12 +985,6 @@ static BDRVVVFATState *vvv = NULL;
static int enable_write_target(BDRVVVFATState *s, Error **errp);
static int is_consistent(BDRVVVFATState *s);
-static void vvfat_rebind(BlockDriverState *bs)
-{
- BDRVVVFATState *s = bs->opaque;
- s->bs = bs;
-}
-
static QemuOptsList runtime_opts = {
.name = "vvfat",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
@@ -2923,9 +2917,12 @@ static BlockDriver vvfat_write_target = {
static int enable_write_target(BDRVVVFATState *s, Error **errp)
{
BlockDriver *bdrv_qcow = NULL;
+ BlockDriverState *backing;
QemuOpts *opts = NULL;
int ret;
int size = sector2cluster(s, s->sector_count);
+ QDict *options;
+
s->used_clusters = calloc(size, 1);
array_init(&(s->commits), sizeof(commit_t));
@@ -2956,9 +2953,11 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
}
s->qcow = NULL;
- ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, NULL,
+ options = qdict_new();
+ qdict_put(options, "driver", qstring_from_str("qcow"));
+ ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, options,
BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
- bdrv_qcow, errp);
+ errp);
if (ret < 0) {
goto err;
}
@@ -2967,10 +2966,13 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
unlink(s->qcow_filename);
#endif
- bdrv_set_backing_hd(s->bs, bdrv_new());
- s->bs->backing_hd->drv = &vvfat_write_target;
- s->bs->backing_hd->opaque = g_new(void *, 1);
- *(void**)s->bs->backing_hd->opaque = s;
+ backing = bdrv_new();
+ bdrv_set_backing_hd(s->bs, backing);
+ bdrv_unref(backing);
+
+ s->bs->backing->bs->drv = &vvfat_write_target;
+ s->bs->backing->bs->opaque = g_new(void *, 1);
+ *(void**)s->bs->backing->bs->opaque = s;
return 0;
@@ -3004,7 +3006,6 @@ static BlockDriver bdrv_vvfat = {
.bdrv_parse_filename = vvfat_parse_filename,
.bdrv_file_open = vvfat_open,
.bdrv_close = vvfat_close,
- .bdrv_rebind = vvfat_rebind,
.bdrv_read = vvfat_co_read,
.bdrv_write = vvfat_co_write,
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 64e86827b..bbf2f01c1 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -174,7 +174,7 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
AioContext *old_context)
{
- aio_set_event_notifier(old_context, &aio->e, NULL);
+ aio_set_event_notifier(old_context, &aio->e, false, NULL);
aio->is_aio_context_attached = false;
}
@@ -182,7 +182,8 @@ void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
AioContext *new_context)
{
aio->is_aio_context_attached = true;
- aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb);
+ aio_set_event_notifier(new_context, &aio->e, false,
+ win32_aio_completion_cb);
}
QEMUWin32AIOState *win32_aio_init(void)
diff --git a/block/write-threshold.c b/block/write-threshold.c
index a53c1f5e6..0fe38917c 100644
--- a/block/write-threshold.c
+++ b/block/write-threshold.c
@@ -11,7 +11,7 @@
*/
#include "block/block_int.h"
-#include "block/coroutine.h"
+#include "qemu/coroutine.h"
#include "block/write-threshold.h"
#include "qemu/notify.h"
#include "qapi-event.h"