summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorhyokeun <hyokeun.jeon@samsung.com>2016-09-06 14:09:22 +0900
committerhyokeun <hyokeun.jeon@samsung.com>2016-09-06 14:09:22 +0900
commitbd54c25035217800f3b1d39f6472d599cd602d5a (patch)
tree299417fe96f546225439ff92b27ac3e55909a970 /block
parent186efde2677c31fb40d154a81a5f3731eab52414 (diff)
downloadqemu-bd54c25035217800f3b1d39f6472d599cd602d5a.tar.gz
qemu-bd54c25035217800f3b1d39f6472d599cd602d5a.tar.bz2
qemu-bd54c25035217800f3b1d39f6472d599cd602d5a.zip
Imported Upstream version 2.7.0upstream/2.7.0
Diffstat (limited to 'block')
-rw-r--r--block/Makefile.objs3
-rw-r--r--block/archipelago.c4
-rw-r--r--block/backup.c139
-rw-r--r--block/blkdebug.c44
-rwxr-xr-xblock/blkreplay.c34
-rw-r--r--block/blkverify.c27
-rw-r--r--block/block-backend.c461
-rw-r--r--block/bochs.c65
-rw-r--r--block/cloop.c54
-rw-r--r--block/commit.c218
-rw-r--r--block/crypto.c97
-rw-r--r--block/curl.c12
-rw-r--r--block/dirty-bitmap.c6
-rw-r--r--block/dmg.c69
-rw-r--r--block/gluster.c848
-rw-r--r--block/io.c1634
-rw-r--r--block/iscsi.c402
-rw-r--r--block/linux-aio.c179
-rw-r--r--block/mirror.c428
-rw-r--r--block/nbd-client.c124
-rw-r--r--block/nbd-client.h13
-rw-r--r--block/nbd.c222
-rw-r--r--block/nfs.c57
-rw-r--r--block/null.c20
-rw-r--r--block/parallels.c31
-rw-r--r--block/qapi.c15
-rw-r--r--block/qcow.c107
-rw-r--r--block/qcow2-cache.c20
-rw-r--r--block/qcow2-cluster.c188
-rw-r--r--block/qcow2-refcount.c65
-rw-r--r--block/qcow2-snapshot.c27
-rw-r--r--block/qcow2.c426
-rw-r--r--block/qcow2.h19
-rw-r--r--block/qed-check.c3
-rw-r--r--block/qed-table.c5
-rw-r--r--block/qed.c77
-rw-r--r--block/quorum.c100
-rw-r--r--block/raw-aio.h64
-rw-r--r--block/raw-posix.c328
-rw-r--r--block/raw-win32.c31
-rw-r--r--block/raw_bsd.c83
-rw-r--r--block/rbd.c71
-rw-r--r--block/sheepdog.c59
-rw-r--r--block/snapshot.c62
-rw-r--r--block/ssh.c81
-rw-r--r--block/stream.c53
-rw-r--r--block/throttle-groups.c252
-rw-r--r--block/trace-events116
-rw-r--r--block/vdi.c140
-rw-r--r--block/vhdx-endian.c1
-rw-r--r--block/vhdx-log.c13
-rw-r--r--block/vhdx.c92
-rw-r--r--block/vmdk.c458
-rw-r--r--block/vpc.c192
-rw-r--r--block/vvfat.c143
-rw-r--r--block/win32-aio.c2
56 files changed, 4940 insertions, 3544 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 44a541622..2593a2f8a 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -9,7 +9,7 @@ block-obj-y += block-backend.o snapshot.o qapi.o
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
-block-obj-y += null.o mirror.o io.o
+block-obj-y += null.o mirror.o commit.o io.o
block-obj-y += throttle-groups.o
block-obj-y += nbd.o nbd-client.o sheepdog.o
@@ -26,7 +26,6 @@ block-obj-y += write-threshold.o
block-obj-y += crypto.o
common-obj-y += stream.o
-common-obj-y += commit.o
common-obj-y += backup.o
iscsi.o-cflags := $(LIBISCSI_CFLAGS)
diff --git a/block/archipelago.c b/block/archipelago.c
index b9f5e69d4..37b8aca78 100644
--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -974,11 +974,9 @@ err_exit2:
static int64_t qemu_archipelago_getlength(BlockDriverState *bs)
{
- int64_t ret;
BDRVArchipelagoState *s = bs->opaque;
- ret = archipelago_volume_info(s);
- return ret;
+ return archipelago_volume_info(s);
}
static int qemu_archipelago_truncate(BlockDriverState *bs, int64_t offset)
diff --git a/block/backup.c b/block/backup.c
index 491fd1406..2c0532314 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -36,7 +36,7 @@ typedef struct CowRequest {
typedef struct BackupBlockJob {
BlockJob common;
- BlockDriverState *target;
+ BlockBackend *target;
/* bitmap for sync=incremental */
BdrvDirtyBitmap *sync_bitmap;
MirrorSyncMode sync_mode;
@@ -47,6 +47,7 @@ typedef struct BackupBlockJob {
uint64_t sectors_read;
unsigned long *done_bitmap;
int64_t cluster_size;
+ NotifierWithReturn before_write;
QLIST_HEAD(, CowRequest) inflight_reqs;
} BackupBlockJob;
@@ -93,12 +94,12 @@ static void cow_request_end(CowRequest *req)
qemu_co_queue_restart_all(&req->wait_queue);
}
-static int coroutine_fn backup_do_cow(BlockDriverState *bs,
+static int coroutine_fn backup_do_cow(BackupBlockJob *job,
int64_t sector_num, int nb_sectors,
bool *error_is_read,
bool is_write_notifier)
{
- BackupBlockJob *job = (BackupBlockJob *)bs->job;
+ BlockBackend *blk = job->common.blk;
CowRequest cow_request;
struct iovec iov;
QEMUIOVector bounce_qiov;
@@ -131,20 +132,15 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
start * sectors_per_cluster);
if (!bounce_buffer) {
- bounce_buffer = qemu_blockalign(bs, job->cluster_size);
+ bounce_buffer = blk_blockalign(blk, job->cluster_size);
}
iov.iov_base = bounce_buffer;
iov.iov_len = n * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
- if (is_write_notifier) {
- ret = bdrv_co_readv_no_serialising(bs,
- start * sectors_per_cluster,
- n, &bounce_qiov);
- } else {
- ret = bdrv_co_readv(bs, start * sectors_per_cluster, n,
- &bounce_qiov);
- }
+ ret = blk_co_preadv(blk, start * job->cluster_size,
+ bounce_qiov.size, &bounce_qiov,
+ is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
if (ret < 0) {
trace_backup_do_cow_read_fail(job, start, ret);
if (error_is_read) {
@@ -154,13 +150,11 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
}
if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
- ret = bdrv_co_write_zeroes(job->target,
- start * sectors_per_cluster,
- n, BDRV_REQ_MAY_UNMAP);
+ ret = blk_co_pwrite_zeroes(job->target, start * job->cluster_size,
+ bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
} else {
- ret = bdrv_co_writev(job->target,
- start * sectors_per_cluster, n,
- &bounce_qiov);
+ ret = blk_co_pwritev(job->target, start * job->cluster_size,
+ bounce_qiov.size, &bounce_qiov, 0);
}
if (ret < 0) {
trace_backup_do_cow_write_fail(job, start, ret);
@@ -197,14 +191,16 @@ static int coroutine_fn backup_before_write_notify(
NotifierWithReturn *notifier,
void *opaque)
{
+ BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
BdrvTrackedRequest *req = opaque;
int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
+ assert(req->bs == blk_bs(job->common.blk));
assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
- return backup_do_cow(req->bs, sector_num, nb_sectors, NULL, true);
+ return backup_do_cow(job, sector_num, nb_sectors, NULL, true);
}
static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -218,19 +214,10 @@ static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
}
-static void backup_iostatus_reset(BlockJob *job)
-{
- BackupBlockJob *s = container_of(job, BackupBlockJob, common);
-
- if (s->target->blk) {
- blk_iostatus_reset(s->target->blk);
- }
-}
-
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
{
BdrvDirtyBitmap *bm;
- BlockDriverState *bs = job->common.bs;
+ BlockDriverState *bs = blk_bs(job->common.blk);
if (ret < 0 || block_job_is_cancelled(&job->common)) {
/* Merge the successor back into the parent, delete nothing. */
@@ -259,24 +246,31 @@ static void backup_abort(BlockJob *job)
}
}
+static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context)
+{
+ BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+
+ blk_set_aio_context(s->target, aio_context);
+}
+
static const BlockJobDriver backup_job_driver = {
- .instance_size = sizeof(BackupBlockJob),
- .job_type = BLOCK_JOB_TYPE_BACKUP,
- .set_speed = backup_set_speed,
- .iostatus_reset = backup_iostatus_reset,
- .commit = backup_commit,
- .abort = backup_abort,
+ .instance_size = sizeof(BackupBlockJob),
+ .job_type = BLOCK_JOB_TYPE_BACKUP,
+ .set_speed = backup_set_speed,
+ .commit = backup_commit,
+ .abort = backup_abort,
+ .attached_aio_context = backup_attached_aio_context,
};
static BlockErrorAction backup_error_action(BackupBlockJob *job,
bool read, int error)
{
if (read) {
- return block_job_error_action(&job->common, job->common.bs,
- job->on_source_error, true, error);
+ return block_job_error_action(&job->common, job->on_source_error,
+ true, error);
} else {
- return block_job_error_action(&job->common, job->target,
- job->on_target_error, false, error);
+ return block_job_error_action(&job->common, job->on_target_error,
+ false, error);
}
}
@@ -289,7 +283,7 @@ static void backup_complete(BlockJob *job, void *opaque)
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
BackupCompleteData *data = opaque;
- bdrv_unref(s->target);
+ blk_unref(s->target);
block_job_completed(job, data->ret);
g_free(data);
@@ -331,7 +325,6 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
int64_t end;
int64_t last_cluster = -1;
int64_t sectors_per_cluster = cluster_size_sectors(job);
- BlockDriverState *bs = job->common.bs;
HBitmapIter hbi;
granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
@@ -353,7 +346,7 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
if (yield_and_check(job)) {
return ret;
}
- ret = backup_do_cow(bs, cluster * sectors_per_cluster,
+ ret = backup_do_cow(job, cluster * sectors_per_cluster,
sectors_per_cluster, &error_is_read,
false);
if ((ret < 0) &&
@@ -386,12 +379,8 @@ static void coroutine_fn backup_run(void *opaque)
{
BackupBlockJob *job = opaque;
BackupCompleteData *data;
- BlockDriverState *bs = job->common.bs;
- BlockDriverState *target = job->target;
- BlockdevOnError on_target_error = job->on_target_error;
- NotifierWithReturn before_write = {
- .notify = backup_before_write_notify,
- };
+ BlockDriverState *bs = blk_bs(job->common.blk);
+ BlockBackend *target = job->target;
int64_t start, end;
int64_t sectors_per_cluster = cluster_size_sectors(job);
int ret = 0;
@@ -404,20 +393,14 @@ static void coroutine_fn backup_run(void *opaque)
job->done_bitmap = bitmap_new(end);
- if (target->blk) {
- blk_set_on_error(target->blk, on_target_error, on_target_error);
- blk_iostatus_enable(target->blk);
- }
-
- bdrv_add_before_write_notifier(bs, &before_write);
+ job->before_write.notify = backup_before_write_notify;
+ bdrv_add_before_write_notifier(bs, &job->before_write);
if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
while (!block_job_is_cancelled(&job->common)) {
/* Yield until the job is cancelled. We just let our before_write
* notify callback service CoW requests. */
- job->common.busy = false;
- qemu_coroutine_yield();
- job->common.busy = true;
+ block_job_yield(&job->common);
}
} else if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
ret = backup_run_incremental(job);
@@ -461,7 +444,7 @@ static void coroutine_fn backup_run(void *opaque)
}
}
/* FULL sync mode we copy the whole drive. */
- ret = backup_do_cow(bs, start * sectors_per_cluster,
+ ret = backup_do_cow(job, start * sectors_per_cluster,
sectors_per_cluster, &error_is_read, false);
if (ret < 0) {
/* Depending on error action, fail now or retry cluster */
@@ -477,26 +460,23 @@ static void coroutine_fn backup_run(void *opaque)
}
}
- notifier_with_return_remove(&before_write);
+ notifier_with_return_remove(&job->before_write);
/* wait until pending backup_do_cow() calls have completed */
qemu_co_rwlock_wrlock(&job->flush_rwlock);
qemu_co_rwlock_unlock(&job->flush_rwlock);
g_free(job->done_bitmap);
- if (target->blk) {
- blk_iostatus_disable(target->blk);
- }
- bdrv_op_unblock_all(target, job->common.blocker);
+ bdrv_op_unblock_all(blk_bs(target), job->common.blocker);
data = g_malloc(sizeof(*data));
data->ret = ret;
block_job_defer_to_main_loop(&job->common, backup_complete, data);
}
-void backup_start(BlockDriverState *bs, BlockDriverState *target,
- int64_t speed, MirrorSyncMode sync_mode,
- BdrvDirtyBitmap *sync_bitmap,
+void backup_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, int64_t speed,
+ MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockCompletionFunc *cb, void *opaque,
@@ -504,24 +484,17 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
{
int64_t len;
BlockDriverInfo bdi;
+ BackupBlockJob *job = NULL;
int ret;
assert(bs);
assert(target);
- assert(cb);
if (bs == target) {
error_setg(errp, "Source and target cannot be the same");
return;
}
- if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
- on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
- error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
- return;
- }
-
if (!bdrv_is_inserted(bs)) {
error_setg(errp, "Device is not inserted: %s",
bdrv_get_device_name(bs));
@@ -568,15 +541,17 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
goto error;
}
- BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
- cb, opaque, errp);
+ job = block_job_create(job_id, &backup_job_driver, bs, speed,
+ cb, opaque, errp);
if (!job) {
goto error;
}
+ job->target = blk_new();
+ blk_insert_bs(job->target, target);
+
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
- job->target = target;
job->sync_mode = sync_mode;
job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
sync_bitmap : NULL;
@@ -584,7 +559,7 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
/* If there is no backing file on the target, we cannot rely on COW if our
* backup cluster size is smaller than the target cluster size. Even for
* targets with a backing file, try to avoid COW if possible. */
- ret = bdrv_get_info(job->target, &bdi);
+ ret = bdrv_get_info(target, &bdi);
if (ret < 0 && !target->backing) {
error_setg_errno(errp, -ret,
"Couldn't determine the cluster size of the target image, "
@@ -601,13 +576,17 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
bdrv_op_block_all(target, job->common.blocker);
job->common.len = len;
- job->common.co = qemu_coroutine_create(backup_run);
+ job->common.co = qemu_coroutine_create(backup_run, job);
block_job_txn_add_job(txn, &job->common);
- qemu_coroutine_enter(job->common.co, job);
+ qemu_coroutine_enter(job->common.co);
return;
error:
if (sync_bitmap) {
bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
}
+ if (job) {
+ blk_unref(job->target);
+ block_job_unref(&job->common);
+ }
}
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 20d25bda6..d5db16681 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -37,6 +37,10 @@
typedef struct BDRVBlkdebugState {
int state;
int new_state;
+ int align;
+
+ /* For blkdebug_refresh_filename() */
+ char *config_file;
QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
@@ -350,7 +354,6 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
BDRVBlkdebugState *s = bs->opaque;
QemuOpts *opts;
Error *local_err = NULL;
- const char *config;
uint64_t align;
int ret;
@@ -363,8 +366,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
}
/* Read rules from config file or command line options */
- config = qemu_opt_get(opts, "config");
- ret = read_config(s, config, options, errp);
+ s->config_file = g_strdup(qemu_opt_get(opts, "config"));
+ ret = read_config(s, s->config_file, options, errp);
if (ret) {
goto out;
}
@@ -382,10 +385,10 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
}
/* Set request alignment */
- align = qemu_opt_get_size(opts, "align", bs->request_alignment);
- if (align > 0 && align < INT_MAX && !(align & (align - 1))) {
- bs->request_alignment = align;
- } else {
+ align = qemu_opt_get_size(opts, "align", 0);
+ if (align < INT_MAX && is_power_of_2(align)) {
+ s->align = align;
+ } else if (align) {
error_setg(errp, "Invalid alignment");
ret = -EINVAL;
goto fail_unref;
@@ -397,6 +400,9 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
fail_unref:
bdrv_unref_child(bs, bs->file);
out:
+ if (ret < 0) {
+ g_free(s->config_file);
+ }
qemu_opts_del(opts);
return ret;
}
@@ -456,7 +462,7 @@ static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
return inject_error(bs, cb, opaque, rule);
}
- return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors,
+ return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
cb, opaque);
}
@@ -479,7 +485,7 @@ static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
return inject_error(bs, cb, opaque, rule);
}
- return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
+ return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
cb, opaque);
}
@@ -514,6 +520,8 @@ static void blkdebug_close(BlockDriverState *bs)
remove_rule(rule);
}
}
+
+ g_free(s->config_file);
}
static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
@@ -620,7 +628,7 @@ static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
if (!strcmp(r->tag, tag)) {
- qemu_coroutine_enter(r->co, NULL);
+ qemu_coroutine_enter(r->co);
return 0;
}
}
@@ -646,7 +654,7 @@ static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
}
QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
if (!strcmp(r->tag, tag)) {
- qemu_coroutine_enter(r->co, NULL);
+ qemu_coroutine_enter(r->co);
ret = 0;
}
}
@@ -678,6 +686,7 @@ static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
{
+ BDRVBlkdebugState *s = bs->opaque;
QDict *opts;
const QDictEntry *e;
bool force_json = false;
@@ -699,8 +708,7 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
if (!force_json && bs->file->bs->exact_filename[0]) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "blkdebug:%s:%s",
- qdict_get_try_str(options, "config") ?: "",
+ "blkdebug:%s:%s", s->config_file ?: "",
bs->file->bs->exact_filename);
}
@@ -720,6 +728,15 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
bs->full_open_options = opts;
}
+static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ BDRVBlkdebugState *s = bs->opaque;
+
+ if (s->align) {
+ bs->bl.request_alignment = s->align;
+ }
+}
+
static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue, Error **errp)
{
@@ -738,6 +755,7 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_getlength = blkdebug_getlength,
.bdrv_truncate = blkdebug_truncate,
.bdrv_refresh_filename = blkdebug_refresh_filename,
+ .bdrv_refresh_limits = blkdebug_refresh_limits,
.bdrv_aio_readv = blkdebug_aio_readv,
.bdrv_aio_writev = blkdebug_aio_writev,
diff --git a/block/blkreplay.c b/block/blkreplay.c
index 42f1813af..30f9d5ff6 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -65,7 +65,7 @@ static int64_t blkreplay_getlength(BlockDriverState *bs)
static void blkreplay_bh_cb(void *opaque)
{
Request *req = opaque;
- qemu_coroutine_enter(req->co, NULL);
+ qemu_coroutine_enter(req->co);
qemu_bh_delete(req->bh);
g_free(req);
}
@@ -81,44 +81,44 @@ static void block_request_create(uint64_t reqid, BlockDriverState *bs,
replay_block_event(req->bh, reqid);
}
-static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
uint64_t reqid = request_id++;
- int ret = bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
+ int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
return ret;
}
-static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
{
uint64_t reqid = request_id++;
- int ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
+ int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
return ret;
}
-static int coroutine_fn blkreplay_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
{
uint64_t reqid = request_id++;
- int ret = bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
+ int ret = bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
return ret;
}
-static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
{
uint64_t reqid = request_id++;
- int ret = bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+ int ret = bdrv_co_pdiscard(bs->file->bs, offset, count);
block_request_create(reqid, bs, qemu_coroutine_self());
qemu_coroutine_yield();
@@ -144,11 +144,11 @@ static BlockDriver bdrv_blkreplay = {
.bdrv_close = blkreplay_close,
.bdrv_getlength = blkreplay_getlength,
- .bdrv_co_readv = blkreplay_co_readv,
- .bdrv_co_writev = blkreplay_co_writev,
+ .bdrv_co_preadv = blkreplay_co_preadv,
+ .bdrv_co_pwritev = blkreplay_co_pwritev,
- .bdrv_co_write_zeroes = blkreplay_co_write_zeroes,
- .bdrv_co_discard = blkreplay_co_discard,
+ .bdrv_co_pwrite_zeroes = blkreplay_co_pwrite_zeroes,
+ .bdrv_co_pdiscard = blkreplay_co_pdiscard,
.bdrv_co_flush = blkreplay_co_flush,
};
diff --git a/block/blkverify.c b/block/blkverify.c
index 9414b7a84..da62d7596 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -247,9 +247,9 @@ static BlockAIOCB *blkverify_aio_readv(BlockDriverState *bs,
qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
- bdrv_aio_readv(s->test_file->bs, sector_num, qiov, nb_sectors,
+ bdrv_aio_readv(s->test_file, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
- bdrv_aio_readv(bs->file->bs, sector_num, &acb->raw_qiov, nb_sectors,
+ bdrv_aio_readv(bs->file, sector_num, &acb->raw_qiov, nb_sectors,
blkverify_aio_cb, acb);
return &acb->common;
}
@@ -262,9 +262,9 @@ static BlockAIOCB *blkverify_aio_writev(BlockDriverState *bs,
BlkverifyAIOCB *acb = blkverify_aio_get(bs, true, sector_num, qiov,
nb_sectors, cb, opaque);
- bdrv_aio_writev(s->test_file->bs, sector_num, qiov, nb_sectors,
+ bdrv_aio_writev(s->test_file, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
- bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
+ bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
blkverify_aio_cb, acb);
return &acb->common;
}
@@ -293,22 +293,6 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
}
-/* Propagate AioContext changes to ->test_file */
-static void blkverify_detach_aio_context(BlockDriverState *bs)
-{
- BDRVBlkverifyState *s = bs->opaque;
-
- bdrv_detach_aio_context(s->test_file->bs);
-}
-
-static void blkverify_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
-{
- BDRVBlkverifyState *s = bs->opaque;
-
- bdrv_attach_aio_context(s->test_file->bs, new_context);
-}
-
static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVBlkverifyState *s = bs->opaque;
@@ -356,9 +340,6 @@ static BlockDriver bdrv_blkverify = {
.bdrv_aio_writev = blkverify_aio_writev,
.bdrv_aio_flush = blkverify_aio_flush,
- .bdrv_attach_aio_context = blkverify_attach_aio_context,
- .bdrv_detach_aio_context = blkverify_detach_aio_context,
-
.is_filter = true,
.bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
};
diff --git a/block/block-backend.c b/block/block-backend.c
index 16c9d5e0f..effa03892 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1,7 +1,7 @@
/*
* QEMU Block backends
*
- * Copyright (C) 2014 Red Hat, Inc.
+ * Copyright (C) 2014-2016 Red Hat, Inc.
*
* Authors:
* Markus Armbruster <armbru@redhat.com>,
@@ -19,6 +19,7 @@
#include "sysemu/sysemu.h"
#include "qapi-event.h"
#include "qemu/id.h"
+#include "trace.h"
/* Number of coroutines to reserve per attached device model */
#define COROUTINE_POOL_RESERVATION 64
@@ -34,6 +35,7 @@ struct BlockBackend {
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
+ BlockBackendPublic public;
void *dev; /* attached device model, if any */
/* TODO change to DeviceState when all users are qdevified */
@@ -74,6 +76,7 @@ static const AIOCBInfo block_backend_aiocb_info = {
};
static void drive_info_del(DriveInfo *dinfo);
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
/* All BlockBackends */
static QTAILQ_HEAD(, BlockBackend) block_backends =
@@ -90,9 +93,26 @@ static void blk_root_inherit_options(int *child_flags, QDict *child_options,
/* We're not supposed to call this function for root nodes */
abort();
}
+static void blk_root_drained_begin(BdrvChild *child);
+static void blk_root_drained_end(BdrvChild *child);
+
+static void blk_root_change_media(BdrvChild *child, bool load);
+static void blk_root_resize(BdrvChild *child);
+
+static const char *blk_root_get_name(BdrvChild *child)
+{
+ return blk_name(child->opaque);
+}
static const BdrvChildRole child_root = {
- .inherit_options = blk_root_inherit_options,
+ .inherit_options = blk_root_inherit_options,
+
+ .change_media = blk_root_change_media,
+ .resize = blk_root_resize,
+ .get_name = blk_root_get_name,
+
+ .drained_begin = blk_root_drained_begin,
+ .drained_end = blk_root_drained_end,
};
/*
@@ -100,40 +120,26 @@ static const BdrvChildRole child_root = {
* Store an error through @errp on failure, unless it's null.
* Return the new BlockBackend on success, null on failure.
*/
-BlockBackend *blk_new(Error **errp)
+BlockBackend *blk_new(void)
{
BlockBackend *blk;
blk = g_new0(BlockBackend, 1);
blk->refcnt = 1;
- notifier_list_init(&blk->remove_bs_notifiers);
- notifier_list_init(&blk->insert_bs_notifiers);
- QTAILQ_INSERT_TAIL(&block_backends, blk, link);
- return blk;
-}
+ blk_set_enable_write_cache(blk, true);
-/*
- * Create a new BlockBackend with a new BlockDriverState attached.
- * Otherwise just like blk_new(), which see.
- */
-BlockBackend *blk_new_with_bs(Error **errp)
-{
- BlockBackend *blk;
- BlockDriverState *bs;
+ qemu_co_queue_init(&blk->public.throttled_reqs[0]);
+ qemu_co_queue_init(&blk->public.throttled_reqs[1]);
- blk = blk_new(errp);
- if (!blk) {
- return NULL;
- }
+ notifier_list_init(&blk->remove_bs_notifiers);
+ notifier_list_init(&blk->insert_bs_notifiers);
- bs = bdrv_new_root();
- blk->root = bdrv_root_attach_child(bs, "root", &child_root);
- bs->blk = blk;
+ QTAILQ_INSERT_TAIL(&block_backends, blk, link);
return blk;
}
/*
- * Calls blk_new_with_bs() and then calls bdrv_open() on the BlockDriverState.
+ * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
*
* Just as with bdrv_open(), after having called this function the reference to
* @options belongs to the block layer (even on failure).
@@ -148,21 +154,16 @@ BlockBackend *blk_new_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp)
{
BlockBackend *blk;
- int ret;
-
- blk = blk_new_with_bs(errp);
- if (!blk) {
- QDECREF(options);
- return NULL;
- }
+ BlockDriverState *bs;
- ret = bdrv_open(&blk->root->bs, filename, reference, options, flags, errp);
- if (ret < 0) {
+ blk = blk_new();
+ bs = bdrv_open(filename, reference, options, flags, errp);
+ if (!bs) {
blk_unref(blk);
return NULL;
}
- blk_set_enable_write_cache(blk, true);
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
return blk;
}
@@ -177,10 +178,6 @@ static void blk_delete(BlockBackend *blk)
}
assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
- if (blk->root_state.throttle_state) {
- g_free(blk->root_state.throttle_group);
- throttle_group_unref(blk->root_state.throttle_state);
- }
QTAILQ_REMOVE(&block_backends, blk, link);
drive_info_del(blk->legacy_dinfo);
block_acct_cleanup(&blk->stats);
@@ -267,28 +264,45 @@ BlockBackend *blk_next(BlockBackend *blk)
: QTAILQ_FIRST(&monitor_block_backends);
}
-/*
- * Iterates over all BlockDriverStates which are attached to a BlockBackend.
- * This function is for use by bdrv_next().
- *
- * @bs must be NULL or a BDS that is attached to a BB.
- */
-BlockDriverState *blk_next_root_bs(BlockDriverState *bs)
+/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
+ * the monitor or attached to a BlockBackend */
+BlockDriverState *bdrv_next(BdrvNextIterator *it)
{
- BlockBackend *blk;
+ BlockDriverState *bs;
- if (bs) {
- assert(bs->blk);
- blk = bs->blk;
- } else {
- blk = NULL;
+ /* First, return all root nodes of BlockBackends. In order to avoid
+ * returning a BDS twice when multiple BBs refer to it, we only return it
+ * if the BB is the first one in the parent list of the BDS. */
+ if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
+ do {
+ it->blk = blk_all_next(it->blk);
+ bs = it->blk ? blk_bs(it->blk) : NULL;
+ } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
+
+ if (bs) {
+ return bs;
+ }
+ it->phase = BDRV_NEXT_MONITOR_OWNED;
}
+ /* Then return the monitor-owned BDSes without a BB attached. Ignore all
+ * BDSes that are attached to a BlockBackend here; they have been handled
+ * by the above block already */
do {
- blk = blk_all_next(blk);
- } while (blk && !blk->root);
+ it->bs = bdrv_next_monitor_owned(it->bs);
+ bs = it->bs;
+ } while (bs && bdrv_has_blk(bs));
+
+ return bs;
+}
+
+BlockDriverState *bdrv_first(BdrvNextIterator *it)
+{
+ *it = (BdrvNextIterator) {
+ .phase = BDRV_NEXT_BACKEND_ROOTS,
+ };
- return blk ? blk->root->bs : NULL;
+ return bdrv_next(it);
}
/*
@@ -375,6 +389,26 @@ BlockDriverState *blk_bs(BlockBackend *blk)
return blk->root ? blk->root->bs : NULL;
}
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
+{
+ BdrvChild *child;
+ QLIST_FOREACH(child, &bs->parents, next_parent) {
+ if (child->role == &child_root) {
+ return child->opaque;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Returns true if @bs has an associated BlockBackend.
+ */
+bool bdrv_has_blk(BlockDriverState *bs)
+{
+ return bdrv_first_blk(bs) != NULL;
+}
+
/*
* Return @blk's DriveInfo if any, else null.
*/
@@ -411,17 +445,33 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
}
/*
+ * Returns a pointer to the publicly accessible fields of @blk.
+ */
+BlockBackendPublic *blk_get_public(BlockBackend *blk)
+{
+ return &blk->public;
+}
+
+/*
+ * Returns a BlockBackend given the associated @public fields.
+ */
+BlockBackend *blk_by_public(BlockBackendPublic *public)
+{
+ return container_of(public, BlockBackend, public);
+}
+
+/*
* Disassociates the currently associated BlockDriverState from @blk.
*/
void blk_remove_bs(BlockBackend *blk)
{
- assert(blk->root->bs->blk == blk);
-
notifier_list_notify(&blk->remove_bs_notifiers, blk);
+ if (blk->public.throttle_state) {
+ throttle_timers_detach_aio_context(&blk->public.throttle_timers);
+ }
blk_update_root_state(blk);
- blk->root->bs->blk = NULL;
bdrv_root_unref_child(blk->root);
blk->root = NULL;
}
@@ -431,12 +481,14 @@ void blk_remove_bs(BlockBackend *blk)
*/
void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
{
- assert(!blk->root && !bs->blk);
bdrv_ref(bs);
- blk->root = bdrv_root_attach_child(bs, "root", &child_root);
- bs->blk = blk;
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
notifier_list_notify(&blk->insert_bs_notifiers, blk);
+ if (blk->public.throttle_state) {
+ throttle_timers_attach_aio_context(
+ &blk->public.throttle_timers, bdrv_get_aio_context(bs));
+ }
}
/*
@@ -525,6 +577,11 @@ void blk_dev_change_media_cb(BlockBackend *blk, bool load)
}
}
+static void blk_root_change_media(BdrvChild *child, bool load)
+{
+ blk_dev_change_media_cb(child->opaque, load);
+}
+
/*
* Does @blk's attached device model have removable media?
* %true if no device model is attached.
@@ -579,8 +636,10 @@ bool blk_dev_is_medium_locked(BlockBackend *blk)
/*
* Notify @blk's attached device model of a backend size change.
*/
-void blk_dev_resize_cb(BlockBackend *blk)
+static void blk_root_resize(BdrvChild *child)
{
+ BlockBackend *blk = child->opaque;
+
if (blk->dev_ops && blk->dev_ops->resize_cb) {
blk->dev_ops->resize_cb(blk->dev_opaque);
}
@@ -683,34 +742,50 @@ static int blk_check_request(BlockBackend *blk, int64_t sector_num,
nb_sectors * BDRV_SECTOR_SIZE);
}
-static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
- unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
- int ret = blk_check_byte_request(blk, offset, bytes);
+ int ret;
+
+ trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags);
+
+ ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
- return bdrv_co_do_preadv(blk_bs(blk), offset, bytes, qiov, flags);
+ /* throttling disk I/O */
+ if (blk->public.throttle_state) {
+ throttle_group_co_io_limits_intercept(blk, bytes, false);
+ }
+
+ return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
}
-static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
- unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
int ret;
+ trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags);
+
ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
+ /* throttling disk I/O */
+ if (blk->public.throttle_state) {
+ throttle_group_co_io_limits_intercept(blk, bytes, true);
+ }
+
if (!blk->enable_write_cache) {
flags |= BDRV_REQ_FUA;
}
- return bdrv_co_do_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
+ return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
}
typedef struct BlkRwCo {
@@ -761,8 +836,8 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
.ret = NOT_DONE,
};
- co = qemu_coroutine_create(co_entry);
- qemu_coroutine_enter(co, &rwco);
+ co = qemu_coroutine_create(co_entry, &rwco);
+ qemu_coroutine_enter(co);
aio_context = blk_get_aio_context(blk);
while (rwco.ret == NOT_DONE) {
@@ -772,55 +847,32 @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
return rwco.ret;
}
-static int blk_rw(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors, CoroutineEntry co_entry,
- BdrvRequestFlags flags)
+int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
+ int count)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return -EINVAL;
- }
-
- return blk_prw(blk, sector_num << BDRV_SECTOR_BITS, buf,
- nb_sectors << BDRV_SECTOR_BITS, co_entry, flags);
-}
-
-int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
-{
- return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0);
-}
-
-int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
-{
- BlockDriverState *bs = blk_bs(blk);
- bool enabled;
int ret;
- ret = blk_check_request(blk, sector_num, nb_sectors);
+ ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return ret;
}
- enabled = bs->io_limits_enabled;
- bs->io_limits_enabled = false;
- ret = blk_read(blk, sector_num, buf, nb_sectors);
- bs->io_limits_enabled = enabled;
+ blk_root_drained_begin(blk->root);
+ ret = blk_pread(blk, offset, buf, count);
+ blk_root_drained_end(blk->root);
return ret;
}
-int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
- int nb_sectors)
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
- return blk_rw(blk, sector_num, (uint8_t*) buf, nb_sectors,
- blk_write_entry, 0);
+ return blk_prw(blk, offset, NULL, count, blk_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE);
}
-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
{
- return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry,
- flags | BDRV_REQ_ZERO_WRITE);
+ return bdrv_make_zero(blk->root, flags);
}
static void error_callback_bh(void *opaque)
@@ -898,8 +950,8 @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
acb->bh = NULL;
acb->has_returned = false;
- co = qemu_coroutine_create(co_entry);
- qemu_coroutine_enter(co, acb);
+ co = qemu_coroutine_create(co_entry, acb);
+ qemu_coroutine_enter(co);
acb->has_returned = true;
if (acb->rwco.ret != NOT_DONE) {
@@ -932,18 +984,12 @@ static void blk_aio_write_entry(void *opaque)
blk_aio_complete(acb);
}
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
- }
-
- return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, NULL,
- blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE,
- cb, opaque);
+ return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
}
int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
@@ -955,9 +1001,11 @@ int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
return count;
}
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
+ BdrvRequestFlags flags)
{
- int ret = blk_prw(blk, offset, (void*) buf, count, blk_write_entry, 0);
+ int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+ flags);
if (ret < 0) {
return ret;
}
@@ -991,30 +1039,20 @@ int64_t blk_nb_sectors(BlockBackend *blk)
return bdrv_nb_sectors(blk_bs(blk));
}
-BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
- }
-
- assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
- return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
- blk_aio_read_entry, 0, cb, opaque);
+ return blk_aio_prwv(blk, offset, qiov->size, qiov,
+ blk_aio_read_entry, flags, cb, opaque);
}
-BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
- }
-
- assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
- return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
- blk_aio_write_entry, 0, cb, opaque);
+ return blk_aio_prwv(blk, offset, qiov->size, qiov,
+ blk_aio_write_entry, flags, cb, opaque);
}
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
@@ -1027,16 +1065,16 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk,
return bdrv_aio_flush(blk_bs(blk), cb, opaque);
}
-BlockAIOCB *blk_aio_discard(BlockBackend *blk,
- int64_t sector_num, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
+ int64_t offset, int count,
+ BlockCompletionFunc *cb, void *opaque)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
+ int ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return blk_abort_aio_request(blk, cb, opaque, ret);
}
- return bdrv_aio_discard(blk_bs(blk), sector_num, nb_sectors, cb, opaque);
+ return bdrv_aio_pdiscard(blk_bs(blk), offset, count, cb, opaque);
}
void blk_aio_cancel(BlockAIOCB *acb)
@@ -1049,20 +1087,6 @@ void blk_aio_cancel_async(BlockAIOCB *acb)
bdrv_aio_cancel_async(acb);
}
-int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
-{
- int i, ret;
-
- for (i = 0; i < num_reqs; i++) {
- ret = blk_check_request(blk, reqs[i].sector, reqs[i].nb_sectors);
- if (ret < 0) {
- return ret;
- }
- }
-
- return bdrv_aio_multiwrite(blk_bs(blk), reqs, num_reqs);
-}
-
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
if (!blk_is_available(blk)) {
@@ -1082,14 +1106,14 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
}
-int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
+int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
+ int ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return ret;
}
- return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors);
+ return bdrv_co_pdiscard(blk_bs(blk), offset, count);
}
int blk_co_flush(BlockBackend *blk)
@@ -1149,6 +1173,7 @@ BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
return BLOCK_ERROR_ACTION_REPORT;
case BLOCKDEV_ON_ERROR_IGNORE:
return BLOCK_ERROR_ACTION_IGNORE;
+ case BLOCKDEV_ON_ERROR_AUTO:
default:
abort();
}
@@ -1284,15 +1309,16 @@ int blk_get_flags(BlockBackend *blk)
}
}
-int blk_get_max_transfer_length(BlockBackend *blk)
+/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
+uint32_t blk_get_max_transfer(BlockBackend *blk)
{
BlockDriverState *bs = blk_bs(blk);
+ uint32_t max = 0;
if (bs) {
- return bs->bl.max_transfer_length;
- } else {
- return 0;
+ max = bs->bl.max_transfer;
}
+ return MIN_NON_ZERO(max, INT_MAX);
}
int blk_get_max_iov(BlockBackend *blk)
@@ -1375,7 +1401,14 @@ void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
BlockDriverState *bs = blk_bs(blk);
if (bs) {
+ if (blk->public.throttle_state) {
+ throttle_timers_detach_aio_context(&blk->public.throttle_timers);
+ }
bdrv_set_aio_context(bs, new_context);
+ if (blk->public.throttle_state) {
+ throttle_timers_attach_aio_context(&blk->public.throttle_timers,
+ new_context);
+ }
}
}
@@ -1444,15 +1477,10 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
}
-int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
- if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
- return -EINVAL;
- }
-
- return blk_co_pwritev(blk, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, NULL,
+ return blk_co_pwritev(blk, offset, count, NULL,
flags | BDRV_REQ_ZERO_WRITE);
}
@@ -1476,14 +1504,14 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
return bdrv_truncate(blk_bs(blk), offset);
}
-int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
+ int ret = blk_check_byte_request(blk, offset, count);
if (ret < 0) {
return ret;
}
- return bdrv_discard(blk_bs(blk), sector_num, nb_sectors);
+ return bdrv_pdiscard(blk_bs(blk), offset, count);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
@@ -1545,19 +1573,6 @@ void blk_update_root_state(BlockBackend *blk)
blk->root_state.open_flags = blk->root->bs->open_flags;
blk->root_state.read_only = blk->root->bs->read_only;
blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
-
- if (blk->root_state.throttle_group) {
- g_free(blk->root_state.throttle_group);
- throttle_group_unref(blk->root_state.throttle_state);
- }
- if (blk->root->bs->throttle_state) {
- const char *name = throttle_group_get_name(blk->root->bs);
- blk->root_state.throttle_group = g_strdup(name);
- blk->root_state.throttle_state = throttle_group_incref(name);
- } else {
- blk->root_state.throttle_group = NULL;
- blk->root_state.throttle_state = NULL;
- }
}
/*
@@ -1568,9 +1583,6 @@ void blk_update_root_state(BlockBackend *blk)
void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
{
bs->detect_zeroes = blk->root_state.detect_zeroes;
- if (blk->root_state.throttle_group) {
- bdrv_io_limits_enable(bs, blk->root_state.throttle_group);
- }
}
/*
@@ -1633,3 +1645,62 @@ int blk_flush_all(void)
return result;
}
+
+
+/* throttling disk I/O limits */
+void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
+{
+ throttle_group_config(blk, cfg);
+}
+
+void blk_io_limits_disable(BlockBackend *blk)
+{
+ assert(blk->public.throttle_state);
+ bdrv_drained_begin(blk_bs(blk));
+ throttle_group_unregister_blk(blk);
+ bdrv_drained_end(blk_bs(blk));
+}
+
+/* should be called before blk_set_io_limits if a limit is set */
+void blk_io_limits_enable(BlockBackend *blk, const char *group)
+{
+ assert(!blk->public.throttle_state);
+ throttle_group_register_blk(blk, group);
+}
+
+void blk_io_limits_update_group(BlockBackend *blk, const char *group)
+{
+ /* this BB is not part of any group */
+ if (!blk->public.throttle_state) {
+ return;
+ }
+
+ /* this BB is a part of the same group than the one we want */
+ if (!g_strcmp0(throttle_group_get_name(blk), group)) {
+ return;
+ }
+
+ /* need to change the group this bs belong to */
+ blk_io_limits_disable(blk);
+ blk_io_limits_enable(blk, group);
+}
+
+static void blk_root_drained_begin(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+
+ /* Note that blk->root may not be accessible here yet if we are just
+ * attaching to a BlockDriverState that is drained. Use child instead. */
+
+ if (blk->public.io_limits_disabled++ == 0) {
+ throttle_group_restart_blk(blk);
+ }
+}
+
+static void blk_root_drained_end(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+
+ assert(blk->public.io_limits_disabled);
+ --blk->public.io_limits_disabled;
+}
diff --git a/block/bochs.c b/block/bochs.c
index af8b7abdf..8c9652ebe 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -27,6 +27,7 @@
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
/**************************************************************/
@@ -103,9 +104,9 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
struct bochs_header bochs;
int ret;
- bs->read_only = 1; // no write support yet
+ bs->read_only = true; /* no write support yet */
- ret = bdrv_pread(bs->file->bs, 0, &bochs, sizeof(bochs));
+ ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
if (ret < 0) {
return ret;
}
@@ -139,7 +140,7 @@ static int bochs_open(BlockDriverState *bs, QDict *options, int flags,
return -ENOMEM;
}
- ret = bdrv_pread(bs->file->bs, le32_to_cpu(bochs.header), s->catalog_bitmap,
+ ret = bdrv_pread(bs->file, le32_to_cpu(bochs.header), s->catalog_bitmap,
s->catalog_size * 4);
if (ret < 0) {
goto fail;
@@ -187,6 +188,11 @@ fail:
return ret;
}
+static void bochs_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+}
+
static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
{
BDRVBochsState *s = bs->opaque;
@@ -208,7 +214,7 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
(s->extent_blocks + s->bitmap_blocks));
/* read in bitmap for current extent */
- ret = bdrv_pread(bs->file->bs, bitmap_offset + (extent_offset / 8),
+ ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
&bitmap_entry, 1);
if (ret < 0) {
return ret;
@@ -221,38 +227,52 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
}
-static int bochs_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+bochs_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
+ BDRVBochsState *s = bs->opaque;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ uint64_t bytes_done = 0;
+ QEMUIOVector local_qiov;
int ret;
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_co_mutex_lock(&s->lock);
+
while (nb_sectors > 0) {
int64_t block_offset = seek_to_sector(bs, sector_num);
if (block_offset < 0) {
- return block_offset;
- } else if (block_offset > 0) {
- ret = bdrv_pread(bs->file->bs, block_offset, buf, 512);
+ ret = block_offset;
+ goto fail;
+ }
+
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, 512);
+
+ if (block_offset > 0) {
+ ret = bdrv_co_preadv(bs->file, block_offset, 512,
+ &local_qiov, 0);
if (ret < 0) {
- return ret;
+ goto fail;
}
} else {
- memset(buf, 0, 512);
+ qemu_iovec_memset(&local_qiov, 0, 0, 512);
}
nb_sectors--;
sector_num++;
- buf += 512;
+ bytes_done += 512;
}
- return 0;
-}
-static coroutine_fn int bochs_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVBochsState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = bochs_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
+ qemu_iovec_destroy(&local_qiov);
+
return ret;
}
@@ -267,7 +287,8 @@ static BlockDriver bdrv_bochs = {
.instance_size = sizeof(BDRVBochsState),
.bdrv_probe = bochs_probe,
.bdrv_open = bochs_open,
- .bdrv_read = bochs_co_read,
+ .bdrv_refresh_limits = bochs_refresh_limits,
+ .bdrv_co_preadv = bochs_co_preadv,
.bdrv_close = bochs_close,
};
diff --git a/block/cloop.c b/block/cloop.c
index a84f14019..7b75f7ef7 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -26,6 +26,7 @@
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include <zlib.h>
/* Maximum compressed block size */
@@ -65,10 +66,10 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
uint32_t offsets_size, max_compressed_block_size = 1, i;
int ret;
- bs->read_only = 1;
+ bs->read_only = true;
/* read header */
- ret = bdrv_pread(bs->file->bs, 128, &s->block_size, 4);
+ ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
if (ret < 0) {
return ret;
}
@@ -94,7 +95,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
- ret = bdrv_pread(bs->file->bs, 128 + 4, &s->n_blocks, 4);
+ ret = bdrv_pread(bs->file, 128 + 4, &s->n_blocks, 4);
if (ret < 0) {
return ret;
}
@@ -125,7 +126,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
return -ENOMEM;
}
- ret = bdrv_pread(bs->file->bs, 128 + 4 + 4, s->offsets, offsets_size);
+ ret = bdrv_pread(bs->file, 128 + 4 + 4, s->offsets, offsets_size);
if (ret < 0) {
goto fail;
}
@@ -197,6 +198,11 @@ fail:
return ret;
}
+static void cloop_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+}
+
static inline int cloop_read_block(BlockDriverState *bs, int block_num)
{
BDRVCloopState *s = bs->opaque;
@@ -205,7 +211,7 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
int ret;
uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];
- ret = bdrv_pread(bs->file->bs, s->offsets[block_num],
+ ret = bdrv_pread(bs->file, s->offsets[block_num],
s->compressed_block, bytes);
if (ret != bytes) {
return -1;
@@ -229,33 +235,38 @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
return 0;
}
-static int cloop_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+cloop_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVCloopState *s = bs->opaque;
- int i;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int ret, i;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ qemu_co_mutex_lock(&s->lock);
for (i = 0; i < nb_sectors; i++) {
+ void *data;
uint32_t sector_offset_in_block =
((sector_num + i) % s->sectors_per_block),
block_num = (sector_num + i) / s->sectors_per_block;
if (cloop_read_block(bs, block_num) != 0) {
- return -1;
+ ret = -EIO;
+ goto fail;
}
- memcpy(buf + i * 512,
- s->uncompressed_block + sector_offset_in_block * 512, 512);
+
+ data = s->uncompressed_block + sector_offset_in_block * 512;
+ qemu_iovec_from_buf(qiov, i * 512, data, 512);
}
- return 0;
-}
-static coroutine_fn int cloop_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVCloopState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = cloop_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
+
return ret;
}
@@ -273,7 +284,8 @@ static BlockDriver bdrv_cloop = {
.instance_size = sizeof(BDRVCloopState),
.bdrv_probe = cloop_probe,
.bdrv_open = cloop_open,
- .bdrv_read = cloop_co_read,
+ .bdrv_refresh_limits = cloop_refresh_limits,
+ .bdrv_co_preadv = cloop_co_preadv,
.bdrv_close = cloop_close,
};
diff --git a/block/commit.c b/block/commit.c
index cba0e8c1e..553e18da5 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -36,28 +36,36 @@ typedef struct CommitBlockJob {
BlockJob common;
RateLimit limit;
BlockDriverState *active;
- BlockDriverState *top;
- BlockDriverState *base;
+ BlockBackend *top;
+ BlockBackend *base;
BlockdevOnError on_error;
int base_flags;
int orig_overlay_flags;
char *backing_file_str;
} CommitBlockJob;
-static int coroutine_fn commit_populate(BlockDriverState *bs,
- BlockDriverState *base,
+static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
int64_t sector_num, int nb_sectors,
void *buf)
{
int ret = 0;
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+ };
- ret = bdrv_read(bs, sector_num, buf, nb_sectors);
- if (ret) {
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE,
+ qiov.size, &qiov, 0);
+ if (ret < 0) {
return ret;
}
- ret = bdrv_write(base, sector_num, buf, nb_sectors);
- if (ret) {
+ ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE,
+ qiov.size, &qiov, 0);
+ if (ret < 0) {
return ret;
}
@@ -73,8 +81,8 @@ static void commit_complete(BlockJob *job, void *opaque)
CommitBlockJob *s = container_of(job, CommitBlockJob, common);
CommitCompleteData *data = opaque;
BlockDriverState *active = s->active;
- BlockDriverState *top = s->top;
- BlockDriverState *base = s->base;
+ BlockDriverState *top = blk_bs(s->top);
+ BlockDriverState *base = blk_bs(s->base);
BlockDriverState *overlay_bs;
int ret = data->ret;
@@ -94,6 +102,8 @@ static void commit_complete(BlockJob *job, void *opaque)
bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
}
g_free(s->backing_file_str);
+ blk_unref(s->top);
+ blk_unref(s->base);
block_job_completed(&s->common, ret);
g_free(data);
}
@@ -102,42 +112,39 @@ static void coroutine_fn commit_run(void *opaque)
{
CommitBlockJob *s = opaque;
CommitCompleteData *data;
- BlockDriverState *top = s->top;
- BlockDriverState *base = s->base;
int64_t sector_num, end;
+ uint64_t delay_ns = 0;
int ret = 0;
int n = 0;
void *buf = NULL;
int bytes_written = 0;
int64_t base_len;
- ret = s->common.len = bdrv_getlength(top);
+ ret = s->common.len = blk_getlength(s->top);
if (s->common.len < 0) {
goto out;
}
- ret = base_len = bdrv_getlength(base);
+ ret = base_len = blk_getlength(s->base);
if (base_len < 0) {
goto out;
}
if (base_len < s->common.len) {
- ret = bdrv_truncate(base, s->common.len);
+ ret = blk_truncate(s->base, s->common.len);
if (ret) {
goto out;
}
}
end = s->common.len >> BDRV_SECTOR_BITS;
- buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
+ buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
for (sector_num = 0; sector_num < end; sector_num += n) {
- uint64_t delay_ns = 0;
bool copy;
-wait:
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
@@ -146,25 +153,20 @@ wait:
break;
}
/* Copy if allocated above the base */
- ret = bdrv_is_allocated_above(top, base, sector_num,
+ ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
+ sector_num,
COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
&n);
copy = (ret == 1);
trace_commit_one_iteration(s, sector_num, n, ret);
if (copy) {
- if (s->common.speed) {
- delay_ns = ratelimit_calculate_delay(&s->limit, n);
- if (delay_ns > 0) {
- goto wait;
- }
- }
- ret = commit_populate(top, base, sector_num, n, buf);
+ ret = commit_populate(s->top, s->base, sector_num, n, buf);
bytes_written += n * BDRV_SECTOR_SIZE;
}
if (ret < 0) {
- if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
- s->on_error == BLOCKDEV_ON_ERROR_REPORT||
- (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
+ BlockErrorAction action =
+ block_job_error_action(&s->common, false, s->on_error, -ret);
+ if (action == BLOCK_ERROR_ACTION_REPORT) {
goto out;
} else {
n = 0;
@@ -173,6 +175,10 @@ wait:
}
/* Publish progress */
s->common.offset += n * BDRV_SECTOR_SIZE;
+
+ if (copy && s->common.speed) {
+ delay_ns = ratelimit_calculate_delay(&s->limit, n);
+ }
}
ret = 0;
@@ -202,8 +208,8 @@ static const BlockJobDriver commit_job_driver = {
.set_speed = commit_set_speed,
};
-void commit_start(BlockDriverState *bs, BlockDriverState *base,
- BlockDriverState *top, int64_t speed,
+void commit_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, BlockDriverState *top, int64_t speed,
BlockdevOnError on_error, BlockCompletionFunc *cb,
void *opaque, const char *backing_file_str, Error **errp)
{
@@ -214,13 +220,6 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
BlockDriverState *overlay_bs;
Error *local_err = NULL;
- if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
- on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
- error_setg(errp, "Invalid parameter combination");
- return;
- }
-
assert(top != bs);
if (top == base) {
error_setg(errp, "Invalid files for merge: top and base are the same");
@@ -234,6 +233,12 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
return;
}
+ s = block_job_create(job_id, &commit_job_driver, bs, speed,
+ cb, opaque, errp);
+ if (!s) {
+ return;
+ }
+
orig_base_flags = bdrv_get_flags(base);
orig_overlay_flags = bdrv_get_flags(overlay_bs);
@@ -250,18 +255,18 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
bdrv_reopen_multiple(reopen_queue, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
+ block_job_unref(&s->common);
return;
}
}
- s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp);
- if (!s) {
- return;
- }
+ s->base = blk_new();
+ blk_insert_bs(s->base, base);
+
+ s->top = blk_new();
+ blk_insert_bs(s->top, top);
- s->base = base;
- s->top = top;
s->active = bs;
s->base_flags = orig_base_flags;
@@ -270,8 +275,129 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
s->backing_file_str = g_strdup(backing_file_str);
s->on_error = on_error;
- s->common.co = qemu_coroutine_create(commit_run);
+ s->common.co = qemu_coroutine_create(commit_run, s);
trace_commit_start(bs, base, top, s, s->common.co, opaque);
- qemu_coroutine_enter(s->common.co, s);
+ qemu_coroutine_enter(s->common.co);
+}
+
+
+#define COMMIT_BUF_SECTORS 2048
+
+/* commit COW file into the raw image */
+int bdrv_commit(BlockDriverState *bs)
+{
+ BlockBackend *src, *backing;
+ BlockDriver *drv = bs->drv;
+ int64_t sector, total_sectors, length, backing_length;
+ int n, ro, open_flags;
+ int ret = 0;
+ uint8_t *buf = NULL;
+
+ if (!drv)
+ return -ENOMEDIUM;
+
+ if (!bs->backing) {
+ return -ENOTSUP;
+ }
+
+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
+ bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
+ return -EBUSY;
+ }
+
+ ro = bs->backing->bs->read_only;
+ open_flags = bs->backing->bs->open_flags;
+
+ if (ro) {
+ if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
+ return -EACCES;
+ }
+ }
+
+ src = blk_new();
+ blk_insert_bs(src, bs);
+
+ backing = blk_new();
+ blk_insert_bs(backing, bs->backing->bs);
+
+ length = blk_getlength(src);
+ if (length < 0) {
+ ret = length;
+ goto ro_cleanup;
+ }
+
+ backing_length = blk_getlength(backing);
+ if (backing_length < 0) {
+ ret = backing_length;
+ goto ro_cleanup;
+ }
+
+ /* If our top snapshot is larger than the backing file image,
+ * grow the backing file image if possible. If not possible,
+ * we must return an error */
+ if (length > backing_length) {
+ ret = blk_truncate(backing, length);
+ if (ret < 0) {
+ goto ro_cleanup;
+ }
+ }
+
+ total_sectors = length >> BDRV_SECTOR_BITS;
+
+ /* blk_try_blockalign() for src will choose an alignment that works for
+ * backing as well, so no need to compare the alignment manually. */
+ buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+ if (buf == NULL) {
+ ret = -ENOMEM;
+ goto ro_cleanup;
+ }
+
+ for (sector = 0; sector < total_sectors; sector += n) {
+ ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
+ if (ret < 0) {
+ goto ro_cleanup;
+ }
+ if (ret) {
+ ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf,
+ n * BDRV_SECTOR_SIZE);
+ if (ret < 0) {
+ goto ro_cleanup;
+ }
+
+ ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf,
+ n * BDRV_SECTOR_SIZE, 0);
+ if (ret < 0) {
+ goto ro_cleanup;
+ }
+ }
+ }
+
+ if (drv->bdrv_make_empty) {
+ ret = drv->bdrv_make_empty(bs);
+ if (ret < 0) {
+ goto ro_cleanup;
+ }
+ blk_flush(src);
+ }
+
+ /*
+ * Make sure all data we wrote to the backing device is actually
+ * stable on disk.
+ */
+ blk_flush(backing);
+
+ ret = 0;
+ro_cleanup:
+ qemu_vfree(buf);
+
+ blk_unref(src);
+ blk_unref(backing);
+
+ if (ro) {
+ /* ignoring error return here */
+ bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
+ }
+
+ return ret;
}
diff --git a/block/crypto.c b/block/crypto.c
index 1903e84fb..7f61e1268 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -64,7 +64,7 @@ static ssize_t block_crypto_read_func(QCryptoBlock *block,
BlockDriverState *bs = opaque;
ssize_t ret;
- ret = bdrv_pread(bs->file->bs, offset, buf, buflen);
+ ret = bdrv_pread(bs->file, offset, buf, buflen);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read encryption header");
return ret;
@@ -91,7 +91,7 @@ static ssize_t block_crypto_write_func(QCryptoBlock *block,
struct BlockCryptoCreateData *data = opaque;
ssize_t ret;
- ret = blk_pwrite(data->blk, offset, buf, buflen);
+ ret = blk_pwrite(data->blk, offset, buf, buflen, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write encryption header");
return ret;
@@ -193,18 +193,16 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
QemuOpts *opts,
Error **errp)
{
- OptsVisitor *ov;
+ Visitor *v;
QCryptoBlockOpenOptions *ret = NULL;
Error *local_err = NULL;
- Error *end_err = NULL;
ret = g_new0(QCryptoBlockOpenOptions, 1);
ret->format = format;
- ov = opts_visitor_new(opts);
+ v = opts_visitor_new(opts);
- visit_start_struct(opts_get_visitor(ov),
- NULL, NULL, 0, &local_err);
+ visit_start_struct(v, NULL, NULL, 0, &local_err);
if (local_err) {
goto out;
}
@@ -212,16 +210,18 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
switch (format) {
case Q_CRYPTO_BLOCK_FORMAT_LUKS:
visit_type_QCryptoBlockOptionsLUKS_members(
- opts_get_visitor(ov), &ret->u.luks, &local_err);
+ v, &ret->u.luks, &local_err);
break;
default:
error_setg(&local_err, "Unsupported block format %d", format);
break;
}
+ if (!local_err) {
+ visit_check_struct(v, &local_err);
+ }
- visit_end_struct(opts_get_visitor(ov), &end_err);
- error_propagate(&local_err, end_err);
+ visit_end_struct(v, NULL);
out:
if (local_err) {
@@ -229,7 +229,7 @@ block_crypto_open_opts_init(QCryptoBlockFormat format,
qapi_free_QCryptoBlockOpenOptions(ret);
ret = NULL;
}
- opts_visitor_cleanup(ov);
+ visit_free(v);
return ret;
}
@@ -239,18 +239,16 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
QemuOpts *opts,
Error **errp)
{
- OptsVisitor *ov;
+ Visitor *v;
QCryptoBlockCreateOptions *ret = NULL;
Error *local_err = NULL;
- Error *end_err = NULL;
ret = g_new0(QCryptoBlockCreateOptions, 1);
ret->format = format;
- ov = opts_visitor_new(opts);
+ v = opts_visitor_new(opts);
- visit_start_struct(opts_get_visitor(ov),
- NULL, NULL, 0, &local_err);
+ visit_start_struct(v, NULL, NULL, 0, &local_err);
if (local_err) {
goto out;
}
@@ -258,16 +256,18 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
switch (format) {
case Q_CRYPTO_BLOCK_FORMAT_LUKS:
visit_type_QCryptoBlockCreateOptionsLUKS_members(
- opts_get_visitor(ov), &ret->u.luks, &local_err);
+ v, &ret->u.luks, &local_err);
break;
default:
error_setg(&local_err, "Unsupported block format %d", format);
break;
}
+ if (!local_err) {
+ visit_check_struct(v, &local_err);
+ }
- visit_end_struct(opts_get_visitor(ov), &end_err);
- error_propagate(&local_err, end_err);
+ visit_end_struct(v, NULL);
out:
if (local_err) {
@@ -275,7 +275,7 @@ block_crypto_create_opts_init(QCryptoBlockFormat format,
qapi_free_QCryptoBlockCreateOptions(ret);
ret = NULL;
}
- opts_visitor_cleanup(ov);
+ visit_free(v);
return ret;
}
@@ -320,8 +320,8 @@ static int block_crypto_open_generic(QCryptoBlockFormat format,
goto cleanup;
}
- bs->encrypted = 1;
- bs->valid_key = 1;
+ bs->encrypted = true;
+ bs->valid_key = true;
ret = 0;
cleanup:
@@ -426,7 +426,7 @@ block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
qemu_iovec_reset(&hd_qiov);
qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
- ret = bdrv_co_readv(bs->file->bs,
+ ret = bdrv_co_readv(bs->file,
payload_offset + sector_num,
cur_nr_sectors, &hd_qiov);
if (ret < 0) {
@@ -505,7 +505,7 @@ block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
qemu_iovec_reset(&hd_qiov);
qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
- ret = bdrv_co_writev(bs->file->bs,
+ ret = bdrv_co_writev(bs->file,
payload_offset + sector_num,
cur_nr_sectors, &hd_qiov);
if (ret < 0) {
@@ -563,6 +563,53 @@ static int block_crypto_create_luks(const char *filename,
filename, opts, errp);
}
+static int block_crypto_get_info_luks(BlockDriverState *bs,
+ BlockDriverInfo *bdi)
+{
+ BlockDriverInfo subbdi;
+ int ret;
+
+ ret = bdrv_get_info(bs->file->bs, &subbdi);
+ if (ret != 0) {
+ return ret;
+ }
+
+ bdi->unallocated_blocks_are_zero = false;
+ bdi->can_write_zeroes_with_unmap = false;
+ bdi->cluster_size = subbdi.cluster_size;
+
+ return 0;
+}
+
+static ImageInfoSpecific *
+block_crypto_get_specific_info_luks(BlockDriverState *bs)
+{
+ BlockCrypto *crypto = bs->opaque;
+ ImageInfoSpecific *spec_info;
+ QCryptoBlockInfo *info;
+
+ info = qcrypto_block_get_info(crypto->block, NULL);
+ if (!info) {
+ return NULL;
+ }
+ if (info->format != Q_CRYPTO_BLOCK_FORMAT_LUKS) {
+ qapi_free_QCryptoBlockInfo(info);
+ return NULL;
+ }
+
+ spec_info = g_new(ImageInfoSpecific, 1);
+ spec_info->type = IMAGE_INFO_SPECIFIC_KIND_LUKS;
+ spec_info->u.luks.data = g_new(QCryptoBlockInfoLUKS, 1);
+ *spec_info->u.luks.data = info->u.luks;
+
+ /* Blank out pointers we've just stolen to avoid double free */
+ memset(&info->u.luks, 0, sizeof(info->u.luks));
+
+ qapi_free_QCryptoBlockInfo(info);
+
+ return spec_info;
+}
+
BlockDriver bdrv_crypto_luks = {
.format_name = "luks",
.instance_size = sizeof(BlockCrypto),
@@ -576,6 +623,8 @@ BlockDriver bdrv_crypto_luks = {
.bdrv_co_readv = block_crypto_co_readv,
.bdrv_co_writev = block_crypto_co_writev,
.bdrv_getlength = block_crypto_getlength,
+ .bdrv_get_info = block_crypto_get_info_luks,
+ .bdrv_get_specific_info = block_crypto_get_specific_info_luks,
};
static void block_crypto_init(void)
diff --git a/block/curl.c b/block/curl.c
index 5a8f8b623..426fb4d67 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -36,10 +36,16 @@
// #define DEBUG_VERBOSE
#ifdef DEBUG_CURL
-#define DPRINTF(fmt, ...) do { printf(fmt, ## __VA_ARGS__); } while (0)
+#define DEBUG_CURL_PRINT 1
#else
-#define DPRINTF(fmt, ...) do { } while (0)
+#define DEBUG_CURL_PRINT 0
#endif
+#define DPRINTF(fmt, ...) \
+ do { \
+ if (DEBUG_CURL_PRINT) { \
+ fprintf(stderr, fmt, ## __VA_ARGS__); \
+ } \
+ } while (0)
#if LIBCURL_VERSION_NUM >= 0x071000
/* The multi interface timer callback was introduced in 7.16.0 */
@@ -163,7 +169,7 @@ static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
state->sock_fd = fd;
s = state->s;
- DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
+ DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, (int)fd);
switch (action) {
case CURL_POLL_IN:
aio_set_fd_handler(s->aio_context, fd, false,
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 4902ca557..f2bfdcfde 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -326,14 +326,14 @@ void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
}
void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int nr_sectors)
+ int64_t cur_sector, int64_t nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
}
void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
- int64_t cur_sector, int nr_sectors)
+ int64_t cur_sector, int64_t nr_sectors)
{
assert(bdrv_dirty_bitmap_enabled(bitmap));
hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
@@ -361,7 +361,7 @@ void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
}
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
- int nr_sectors)
+ int64_t nr_sectors)
{
BdrvDirtyBitmap *bitmap;
QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
diff --git a/block/dmg.c b/block/dmg.c
index a496eb7c9..b0ed89baa 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -32,7 +32,6 @@
#ifdef CONFIG_BZIP2
#include <bzlib.h>
#endif
-#include <glib.h>
enum {
/* Limit chunk sizes to prevent unreasonable amounts of memory being used
@@ -87,7 +86,7 @@ static int read_uint64(BlockDriverState *bs, int64_t offset, uint64_t *result)
uint64_t buffer;
int ret;
- ret = bdrv_pread(bs->file->bs, offset, &buffer, 8);
+ ret = bdrv_pread(bs->file, offset, &buffer, 8);
if (ret < 0) {
return ret;
}
@@ -101,7 +100,7 @@ static int read_uint32(BlockDriverState *bs, int64_t offset, uint32_t *result)
uint32_t buffer;
int ret;
- ret = bdrv_pread(bs->file->bs, offset, &buffer, 4);
+ ret = bdrv_pread(bs->file, offset, &buffer, 4);
if (ret < 0) {
return ret;
}
@@ -154,8 +153,9 @@ static void update_max_chunk_size(BDRVDMGState *s, uint32_t chunk,
}
}
-static int64_t dmg_find_koly_offset(BlockDriverState *file_bs, Error **errp)
+static int64_t dmg_find_koly_offset(BdrvChild *file, Error **errp)
{
+ BlockDriverState *file_bs = file->bs;
int64_t length;
int64_t offset = 0;
uint8_t buffer[515];
@@ -179,7 +179,7 @@ static int64_t dmg_find_koly_offset(BlockDriverState *file_bs, Error **errp)
offset = length - 511 - 512;
}
length = length < 515 ? length : 515;
- ret = bdrv_pread(file_bs, offset, buffer, length);
+ ret = bdrv_pread(file, offset, buffer, length);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed while reading UDIF trailer");
return ret;
@@ -356,7 +356,7 @@ static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
offset += 4;
buffer = g_realloc(buffer, count);
- ret = bdrv_pread(bs->file->bs, offset, buffer, count);
+ ret = bdrv_pread(bs->file, offset, buffer, count);
if (ret < 0) {
goto fail;
}
@@ -393,7 +393,7 @@ static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
buffer = g_malloc(info_length + 1);
buffer[info_length] = '\0';
- ret = bdrv_pread(bs->file->bs, info_begin, buffer, info_length);
+ ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
if (ret != info_length) {
ret = -EINVAL;
goto fail;
@@ -439,7 +439,8 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
int64_t offset;
int ret;
- bs->read_only = 1;
+ bs->read_only = true;
+
s->n_chunks = 0;
s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
/* used by dmg_read_mish_block to keep track of the current I/O position */
@@ -448,7 +449,7 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
ds.max_sectors_per_chunk = 1;
/* locate the UDIF trailer */
- offset = dmg_find_koly_offset(bs->file->bs, errp);
+ offset = dmg_find_koly_offset(bs->file, errp);
if (offset < 0) {
ret = offset;
goto fail;
@@ -546,6 +547,11 @@ fail:
return ret;
}
+static void dmg_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+}
+
static inline int is_sector_in_chunk(BDRVDMGState* s,
uint32_t chunk_num, uint64_t sector_num)
{
@@ -594,7 +600,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
case 0x80000005: { /* zlib compressed */
/* we need to buffer, because only the chunk as whole can be
* inflated. */
- ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
+ ret = bdrv_pread(bs->file, s->offsets[chunk],
s->compressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
@@ -618,7 +624,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
case 0x80000006: /* bzip2 compressed */
/* we need to buffer, because only the chunk as whole can be
* inflated. */
- ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
+ ret = bdrv_pread(bs->file, s->offsets[chunk],
s->compressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
@@ -643,7 +649,7 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
break;
#endif /* CONFIG_BZIP2 */
case 1: /* copy */
- ret = bdrv_pread(bs->file->bs, s->offsets[chunk],
+ ret = bdrv_pread(bs->file, s->offsets[chunk],
s->uncompressed_chunk, s->lengths[chunk]);
if (ret != s->lengths[chunk]) {
return -1;
@@ -659,38 +665,42 @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
return 0;
}
-static int dmg_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+dmg_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVDMGState *s = bs->opaque;
- int i;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int ret, i;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ qemu_co_mutex_lock(&s->lock);
for (i = 0; i < nb_sectors; i++) {
uint32_t sector_offset_in_chunk;
+ void *data;
+
if (dmg_read_chunk(bs, sector_num + i) != 0) {
- return -1;
+ ret = -EIO;
+ goto fail;
}
/* Special case: current chunk is all zeroes. Do not perform a memcpy as
* s->uncompressed_chunk may be too small to cover the large all-zeroes
* section. dmg_read_chunk is called to find s->current_chunk */
if (s->types[s->current_chunk] == 2) { /* all zeroes block entry */
- memset(buf + i * 512, 0, 512);
+ qemu_iovec_memset(qiov, i * 512, 0, 512);
continue;
}
sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk];
- memcpy(buf + i * 512,
- s->uncompressed_chunk + sector_offset_in_chunk * 512, 512);
+ data = s->uncompressed_chunk + sector_offset_in_chunk * 512;
+ qemu_iovec_from_buf(qiov, i * 512, data, 512);
}
- return 0;
-}
-static coroutine_fn int dmg_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVDMGState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = dmg_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
return ret;
}
@@ -715,7 +725,8 @@ static BlockDriver bdrv_dmg = {
.instance_size = sizeof(BDRVDMGState),
.bdrv_probe = dmg_probe,
.bdrv_open = dmg_open,
- .bdrv_read = dmg_co_read,
+ .bdrv_refresh_limits = dmg_refresh_limits,
+ .bdrv_co_preadv = dmg_co_preadv,
.bdrv_close = dmg_close,
};
diff --git a/block/gluster.c b/block/gluster.c
index a8aaacf64..01b479fbb 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -11,7 +11,27 @@
#include <glusterfs/api/glfs.h>
#include "block/block_int.h"
#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
#include "qemu/uri.h"
+#include "qemu/error-report.h"
+
+#define GLUSTER_OPT_FILENAME "filename"
+#define GLUSTER_OPT_VOLUME "volume"
+#define GLUSTER_OPT_PATH "path"
+#define GLUSTER_OPT_TYPE "type"
+#define GLUSTER_OPT_SERVER_PATTERN "server."
+#define GLUSTER_OPT_HOST "host"
+#define GLUSTER_OPT_PORT "port"
+#define GLUSTER_OPT_TO "to"
+#define GLUSTER_OPT_IPV4 "ipv4"
+#define GLUSTER_OPT_IPV6 "ipv6"
+#define GLUSTER_OPT_SOCKET "socket"
+#define GLUSTER_OPT_DEBUG "debug"
+#define GLUSTER_DEFAULT_PORT 24007
+#define GLUSTER_DEBUG_DEFAULT 4
+#define GLUSTER_DEBUG_MAX 9
+
+#define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n"
typedef struct GlusterAIOCB {
int64_t size;
@@ -24,28 +44,145 @@ typedef struct GlusterAIOCB {
typedef struct BDRVGlusterState {
struct glfs *glfs;
struct glfs_fd *fd;
+ bool supports_seek_data;
+ int debug_level;
} BDRVGlusterState;
-typedef struct GlusterConf {
- char *server;
- int port;
- char *volname;
- char *image;
- char *transport;
-} GlusterConf;
+typedef struct BDRVGlusterReopenState {
+ struct glfs *glfs;
+ struct glfs_fd *fd;
+} BDRVGlusterReopenState;
-static void qemu_gluster_gconf_free(GlusterConf *gconf)
-{
- if (gconf) {
- g_free(gconf->server);
- g_free(gconf->volname);
- g_free(gconf->image);
- g_free(gconf->transport);
- g_free(gconf);
+
+static QemuOptsList qemu_gluster_create_opts = {
+ .name = "qemu-gluster-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_PREALLOC,
+ .type = QEMU_OPT_STRING,
+ .help = "Preallocation mode (allowed values: off, full)"
+ },
+ {
+ .name = GLUSTER_OPT_DEBUG,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Gluster log level, valid range is 0-9",
+ },
+ { /* end of list */ }
}
-}
+};
-static int parse_volume_options(GlusterConf *gconf, char *path)
+static QemuOptsList runtime_opts = {
+ .name = "gluster",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+ .desc = {
+ {
+ .name = GLUSTER_OPT_FILENAME,
+ .type = QEMU_OPT_STRING,
+ .help = "URL to the gluster image",
+ },
+ {
+ .name = GLUSTER_OPT_DEBUG,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Gluster log level, valid range is 0-9",
+ },
+ { /* end of list */ }
+ },
+};
+
+static QemuOptsList runtime_json_opts = {
+ .name = "gluster_json",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_json_opts.head),
+ .desc = {
+ {
+ .name = GLUSTER_OPT_VOLUME,
+ .type = QEMU_OPT_STRING,
+ .help = "name of gluster volume where VM image resides",
+ },
+ {
+ .name = GLUSTER_OPT_PATH,
+ .type = QEMU_OPT_STRING,
+ .help = "absolute path to image file in gluster volume",
+ },
+ {
+ .name = GLUSTER_OPT_DEBUG,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Gluster log level, valid range is 0-9",
+ },
+ { /* end of list */ }
+ },
+};
+
+static QemuOptsList runtime_type_opts = {
+ .name = "gluster_type",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_type_opts.head),
+ .desc = {
+ {
+ .name = GLUSTER_OPT_TYPE,
+ .type = QEMU_OPT_STRING,
+ .help = "tcp|unix",
+ },
+ { /* end of list */ }
+ },
+};
+
+static QemuOptsList runtime_unix_opts = {
+ .name = "gluster_unix",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_unix_opts.head),
+ .desc = {
+ {
+ .name = GLUSTER_OPT_SOCKET,
+ .type = QEMU_OPT_STRING,
+ .help = "socket file path)",
+ },
+ { /* end of list */ }
+ },
+};
+
+static QemuOptsList runtime_tcp_opts = {
+ .name = "gluster_tcp",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
+ .desc = {
+ {
+ .name = GLUSTER_OPT_TYPE,
+ .type = QEMU_OPT_STRING,
+ .help = "tcp|unix",
+ },
+ {
+ .name = GLUSTER_OPT_HOST,
+ .type = QEMU_OPT_STRING,
+ .help = "host address (hostname/ipv4/ipv6 addresses)",
+ },
+ {
+ .name = GLUSTER_OPT_PORT,
+ .type = QEMU_OPT_NUMBER,
+ .help = "port number on which glusterd is listening (default 24007)",
+ },
+ {
+ .name = "to",
+ .type = QEMU_OPT_NUMBER,
+ .help = "max port number, not supported by gluster",
+ },
+ {
+ .name = "ipv4",
+ .type = QEMU_OPT_BOOL,
+ .help = "ipv4 bool value, not supported by gluster",
+ },
+ {
+ .name = "ipv6",
+ .type = QEMU_OPT_BOOL,
+ .help = "ipv6 bool value, not supported by gluster",
+ },
+ { /* end of list */ }
+ },
+};
+
+static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
{
char *p, *q;
@@ -59,31 +196,29 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
if (*p == '\0') {
return -EINVAL;
}
- gconf->volname = g_strndup(q, p - q);
+ gconf->volume = g_strndup(q, p - q);
- /* image */
+ /* path */
p += strspn(p, "/");
if (*p == '\0') {
return -EINVAL;
}
- gconf->image = g_strdup(p);
+ gconf->path = g_strdup(p);
return 0;
}
/*
- * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
+ * file=gluster[+transport]://[host[:port]]/volume/path[?socket=...]
*
* 'gluster' is the protocol.
*
* 'transport' specifies the transport type used to connect to gluster
* management daemon (glusterd). Valid transport types are
- * tcp, unix and rdma. If a transport type isn't specified, then tcp
- * type is assumed.
+ * tcp or unix. If a transport type isn't specified, then tcp type is assumed.
*
- * 'server' specifies the server where the volume file specification for
- * the given volume resides. This can be either hostname, ipv4 address
- * or ipv6 address. ipv6 address needs to be within square brackets [ ].
- * If transport type is 'unix', then 'server' field should not be specified.
+ * 'host' specifies the host where the volume file specification for
+ * the given volume resides. This can be either hostname or ipv4 address.
+ * If transport type is 'unix', then 'host' field should not be specified.
* The 'socket' field needs to be populated with the path to unix domain
* socket.
*
@@ -92,23 +227,22 @@ static int parse_volume_options(GlusterConf *gconf, char *path)
* default port. If the transport type is unix, then 'port' should not be
* specified.
*
- * 'volname' is the name of the gluster volume which contains the VM image.
+ * 'volume' is the name of the gluster volume which contains the VM image.
*
- * 'image' is the path to the actual VM image that resides on gluster volume.
+ * 'path' is the path to the actual VM image that resides on gluster volume.
*
* Examples:
*
* file=gluster://1.2.3.4/testvol/a.img
* file=gluster+tcp://1.2.3.4/testvol/a.img
* file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
- * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
- * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
- * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
+ * file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img
* file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
- * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
*/
-static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
+static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
+ const char *filename)
{
+ GlusterServer *gsconf;
URI *uri;
QueryParams *qp = NULL;
bool is_unix = false;
@@ -119,16 +253,21 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
return -EINVAL;
}
+ gconf->server = g_new0(GlusterServerList, 1);
+ gconf->server->value = gsconf = g_new0(GlusterServer, 1);
+
/* transport */
if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
- gconf->transport = g_strdup("tcp");
+ gsconf->type = GLUSTER_TRANSPORT_TCP;
} else if (!strcmp(uri->scheme, "gluster+tcp")) {
- gconf->transport = g_strdup("tcp");
+ gsconf->type = GLUSTER_TRANSPORT_TCP;
} else if (!strcmp(uri->scheme, "gluster+unix")) {
- gconf->transport = g_strdup("unix");
+ gsconf->type = GLUSTER_TRANSPORT_UNIX;
is_unix = true;
} else if (!strcmp(uri->scheme, "gluster+rdma")) {
- gconf->transport = g_strdup("rdma");
+ gsconf->type = GLUSTER_TRANSPORT_TCP;
+ error_report("Warning: rdma feature is not supported, falling "
+ "back to tcp");
} else {
ret = -EINVAL;
goto out;
@@ -154,10 +293,14 @@ static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
ret = -EINVAL;
goto out;
}
- gconf->server = g_strdup(qp->p[0].value);
+ gsconf->u.q_unix.path = g_strdup(qp->p[0].value);
} else {
- gconf->server = g_strdup(uri->server ? uri->server : "localhost");
- gconf->port = uri->port;
+ gsconf->u.tcp.host = g_strdup(uri->server ? uri->server : "localhost");
+ if (uri->port) {
+ gsconf->u.tcp.port = g_strdup_printf("%d", uri->port);
+ } else {
+ gsconf->u.tcp.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
+ }
}
out:
@@ -168,52 +311,62 @@ out:
return ret;
}
-static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
- Error **errp)
+static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
+ Error **errp)
{
- struct glfs *glfs = NULL;
+ struct glfs *glfs;
int ret;
int old_errno;
+ GlusterServerList *server;
- ret = qemu_gluster_parseuri(gconf, filename);
- if (ret < 0) {
- error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/"
- "volname/image[?socket=...]");
- errno = -ret;
- goto out;
- }
-
- glfs = glfs_new(gconf->volname);
+ glfs = glfs_new(gconf->volume);
if (!glfs) {
goto out;
}
- ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
- gconf->port);
- if (ret < 0) {
- goto out;
+ for (server = gconf->server; server; server = server->next) {
+ if (server->value->type == GLUSTER_TRANSPORT_UNIX) {
+ ret = glfs_set_volfile_server(glfs,
+ GlusterTransport_lookup[server->value->type],
+ server->value->u.q_unix.path, 0);
+ } else {
+ ret = glfs_set_volfile_server(glfs,
+ GlusterTransport_lookup[server->value->type],
+ server->value->u.tcp.host,
+ atoi(server->value->u.tcp.port));
+ }
+
+ if (ret < 0) {
+ goto out;
+ }
}
- /*
- * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when
- * GlusterFS makes GF_LOG_* macros available to libgfapi users.
- */
- ret = glfs_set_logging(glfs, "-", 4);
+ ret = glfs_set_logging(glfs, "-", gconf->debug_level);
if (ret < 0) {
goto out;
}
ret = glfs_init(glfs);
if (ret) {
- error_setg_errno(errp, errno,
- "Gluster connection failed for server=%s port=%d "
- "volume=%s image=%s transport=%s", gconf->server,
- gconf->port, gconf->volname, gconf->image,
- gconf->transport);
+ error_setg(errp, "Gluster connection for volume %s, path %s failed"
+ " to connect", gconf->volume, gconf->path);
+ for (server = gconf->server; server; server = server->next) {
+ if (server->value->type == GLUSTER_TRANSPORT_UNIX) {
+ error_append_hint(errp, "hint: failed on socket %s ",
+ server->value->u.q_unix.path);
+ } else {
+ error_append_hint(errp, "hint: failed on host %s and port %s ",
+ server->value->u.tcp.host,
+ server->value->u.tcp.port);
+ }
+ }
+
+ error_append_hint(errp, "Please refer to gluster logs for more info\n");
/* glfs_init sometimes doesn't set errno although docs suggest that */
- if (errno == 0)
+ if (errno == 0) {
errno = EINVAL;
+ }
goto out;
}
@@ -228,13 +381,233 @@ out:
return NULL;
}
+static int qapi_enum_parse(const char *opt)
+{
+ int i;
+
+ if (!opt) {
+ return GLUSTER_TRANSPORT__MAX;
+ }
+
+ for (i = 0; i < GLUSTER_TRANSPORT__MAX; i++) {
+ if (!strcmp(opt, GlusterTransport_lookup[i])) {
+ return i;
+ }
+ }
+
+ return i;
+}
+
+/*
+ * Convert the json formatted command line into qapi.
+*/
+static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
+ QDict *options, Error **errp)
+{
+ QemuOpts *opts;
+ GlusterServer *gsconf;
+ GlusterServerList *curr = NULL;
+ QDict *backing_options = NULL;
+ Error *local_err = NULL;
+ char *str = NULL;
+ const char *ptr;
+ size_t num_servers;
+ int i;
+
+ /* create opts info from runtime_json_opts list */
+ opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ num_servers = qdict_array_entries(options, GLUSTER_OPT_SERVER_PATTERN);
+ if (num_servers < 1) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER, "server");
+ goto out;
+ }
+
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_VOLUME);
+ if (!ptr) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_VOLUME);
+ goto out;
+ }
+ gconf->volume = g_strdup(ptr);
+
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH);
+ if (!ptr) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_PATH);
+ goto out;
+ }
+ gconf->path = g_strdup(ptr);
+ qemu_opts_del(opts);
+
+ for (i = 0; i < num_servers; i++) {
+ str = g_strdup_printf(GLUSTER_OPT_SERVER_PATTERN"%d.", i);
+ qdict_extract_subqdict(options, &backing_options, str);
+
+ /* create opts info from runtime_type_opts list */
+ opts = qemu_opts_create(&runtime_type_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, backing_options, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE);
+ gsconf = g_new0(GlusterServer, 1);
+ gsconf->type = qapi_enum_parse(ptr);
+ if (!ptr) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE);
+ error_append_hint(&local_err, GERR_INDEX_HINT, i);
+ goto out;
+
+ }
+ if (gsconf->type == GLUSTER_TRANSPORT__MAX) {
+ error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE,
+ GLUSTER_OPT_TYPE, "tcp or unix");
+ error_append_hint(&local_err, GERR_INDEX_HINT, i);
+ goto out;
+ }
+ qemu_opts_del(opts);
+
+ if (gsconf->type == GLUSTER_TRANSPORT_TCP) {
+ /* create opts info from runtime_tcp_opts list */
+ opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, backing_options, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_HOST);
+ if (!ptr) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER,
+ GLUSTER_OPT_HOST);
+ error_append_hint(&local_err, GERR_INDEX_HINT, i);
+ goto out;
+ }
+ gsconf->u.tcp.host = g_strdup(ptr);
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT);
+ if (!ptr) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER,
+ GLUSTER_OPT_PORT);
+ error_append_hint(&local_err, GERR_INDEX_HINT, i);
+ goto out;
+ }
+ gsconf->u.tcp.port = g_strdup(ptr);
+
+ /* defend for unsupported fields in InetSocketAddress,
+ * i.e. @ipv4, @ipv6 and @to
+ */
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_TO);
+ if (ptr) {
+ gsconf->u.tcp.has_to = true;
+ }
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4);
+ if (ptr) {
+ gsconf->u.tcp.has_ipv4 = true;
+ }
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6);
+ if (ptr) {
+ gsconf->u.tcp.has_ipv6 = true;
+ }
+ if (gsconf->u.tcp.has_to) {
+ error_setg(&local_err, "Parameter 'to' not supported");
+ goto out;
+ }
+ if (gsconf->u.tcp.has_ipv4 || gsconf->u.tcp.has_ipv6) {
+ error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported");
+ goto out;
+ }
+ qemu_opts_del(opts);
+ } else {
+ /* create opts info from runtime_unix_opts list */
+ opts = qemu_opts_create(&runtime_unix_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, backing_options, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET);
+ if (!ptr) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER,
+ GLUSTER_OPT_SOCKET);
+ error_append_hint(&local_err, GERR_INDEX_HINT, i);
+ goto out;
+ }
+ gsconf->u.q_unix.path = g_strdup(ptr);
+ qemu_opts_del(opts);
+ }
+
+ if (gconf->server == NULL) {
+ gconf->server = g_new0(GlusterServerList, 1);
+ gconf->server->value = gsconf;
+ curr = gconf->server;
+ } else {
+ curr->next = g_new0(GlusterServerList, 1);
+ curr->next->value = gsconf;
+ curr = curr->next;
+ }
+
+ qdict_del(backing_options, str);
+ g_free(str);
+ str = NULL;
+ }
+
+ return 0;
+
+out:
+ error_propagate(errp, local_err);
+ qemu_opts_del(opts);
+ if (str) {
+ qdict_del(backing_options, str);
+ g_free(str);
+ }
+ errno = EINVAL;
+ return -errno;
+}
+
+static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
+ const char *filename,
+ QDict *options, Error **errp)
+{
+ int ret;
+ if (filename) {
+ ret = qemu_gluster_parse_uri(gconf, filename);
+ if (ret < 0) {
+ error_setg(errp, "invalid URI");
+ error_append_hint(errp, "Usage: file=gluster[+transport]://"
+ "[host[:port]]/volume/path[?socket=...]\n");
+ errno = -ret;
+ return NULL;
+ }
+ } else {
+ ret = qemu_gluster_parse_json(gconf, options, errp);
+ if (ret < 0) {
+ error_append_hint(errp, "Usage: "
+ "-drive driver=qcow2,file.driver=gluster,"
+ "file.volume=testvol,file.path=/path/a.qcow2"
+ "[,file.debug=9],file.server.0.type=tcp,"
+ "file.server.0.host=1.2.3.4,"
+ "file.server.0.port=24007,"
+ "file.server.1.transport=unix,"
+ "file.server.1.socket=/var/run/glusterd.socket ..."
+ "\n");
+ errno = -ret;
+ return NULL;
+ }
+
+ }
+
+ return qemu_gluster_glfs_init(gconf, errp);
+}
+
static void qemu_gluster_complete_aio(void *opaque)
{
GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
qemu_bh_delete(acb->bh);
acb->bh = NULL;
- qemu_coroutine_enter(acb->coroutine, NULL);
+ qemu_coroutine_enter(acb->coroutine);
}
/*
@@ -256,20 +629,6 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
qemu_bh_schedule(acb->bh);
}
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
- .name = "gluster",
- .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
- .desc = {
- {
- .name = "filename",
- .type = QEMU_OPT_STRING,
- .help = "URL to the gluster image",
- },
- { /* end of list */ }
- },
-};
-
static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
{
assert(open_flags != NULL);
@@ -287,13 +646,35 @@ static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
}
}
+/*
+ * Do SEEK_DATA/HOLE to detect if it is functional. Older broken versions of
+ * gfapi incorrectly return the current offset when SEEK_DATA/HOLE is used.
+ * - Corrected versions return -1 and set errno to EINVAL.
+ * - Versions that support SEEK_DATA/HOLE correctly, will return -1 and set
+ * errno to ENXIO when SEEK_DATA is called with a position of EOF.
+ */
+static bool qemu_gluster_test_seek(struct glfs_fd *fd)
+{
+ off_t ret, eof;
+
+ eof = glfs_lseek(fd, 0, SEEK_END);
+ if (eof < 0) {
+ /* this should never occur */
+ return false;
+ }
+
+ /* this should always fail with ENXIO if SEEK_DATA is supported */
+ ret = glfs_lseek(fd, eof, SEEK_DATA);
+ return (ret < 0) && (errno == ENXIO);
+}
+
static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
int bdrv_flags, Error **errp)
{
BDRVGlusterState *s = bs->opaque;
int open_flags = 0;
int ret = 0;
- GlusterConf *gconf = g_new0(GlusterConf, 1);
+ BlockdevOptionsGluster *gconf = NULL;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
@@ -306,9 +687,20 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
goto out;
}
- filename = qemu_opt_get(opts, "filename");
+ filename = qemu_opt_get(opts, GLUSTER_OPT_FILENAME);
- s->glfs = qemu_gluster_init(gconf, filename, errp);
+ s->debug_level = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG,
+ GLUSTER_DEBUG_DEFAULT);
+ if (s->debug_level < 0) {
+ s->debug_level = 0;
+ } else if (s->debug_level > GLUSTER_DEBUG_MAX) {
+ s->debug_level = GLUSTER_DEBUG_MAX;
+ }
+
+ gconf = g_new0(BlockdevOptionsGluster, 1);
+ gconf->debug_level = s->debug_level;
+ gconf->has_debug_level = true;
+ s->glfs = qemu_gluster_init(gconf, filename, options, errp);
if (!s->glfs) {
ret = -errno;
goto out;
@@ -333,14 +725,16 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
qemu_gluster_parse_flags(bdrv_flags, &open_flags);
- s->fd = glfs_open(s->glfs, gconf->image, open_flags);
+ s->fd = glfs_open(s->glfs, gconf->path, open_flags);
if (!s->fd) {
ret = -errno;
}
+ s->supports_seek_data = qemu_gluster_test_seek(s->fd);
+
out:
qemu_opts_del(opts);
- qemu_gluster_gconf_free(gconf);
+ qapi_free_BlockdevOptionsGluster(gconf);
if (!ret) {
return ret;
}
@@ -353,31 +747,29 @@ out:
return ret;
}
-typedef struct BDRVGlusterReopenState {
- struct glfs *glfs;
- struct glfs_fd *fd;
-} BDRVGlusterReopenState;
-
-
static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
int ret = 0;
+ BDRVGlusterState *s;
BDRVGlusterReopenState *reop_s;
- GlusterConf *gconf = NULL;
+ BlockdevOptionsGluster *gconf;
int open_flags = 0;
assert(state != NULL);
assert(state->bs != NULL);
+ s = state->bs->opaque;
+
state->opaque = g_new0(BDRVGlusterReopenState, 1);
reop_s = state->opaque;
qemu_gluster_parse_flags(state->flags, &open_flags);
- gconf = g_new0(GlusterConf, 1);
-
- reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
+ gconf = g_new0(BlockdevOptionsGluster, 1);
+ gconf->debug_level = s->debug_level;
+ gconf->has_debug_level = true;
+ reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp);
if (reop_s->glfs == NULL) {
ret = -errno;
goto exit;
@@ -393,7 +785,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
}
#endif
- reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
+ reop_s->fd = glfs_open(reop_s->glfs, gconf->path, open_flags);
if (reop_s->fd == NULL) {
/* reops->glfs will be cleaned up in _abort */
ret = -errno;
@@ -402,7 +794,7 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
exit:
/* state->opaque will be freed in either the _abort or _commit */
- qemu_gluster_gconf_free(gconf);
+ qapi_free_BlockdevOptionsGluster(gconf);
return ret;
}
@@ -454,14 +846,14 @@ static void qemu_gluster_reopen_abort(BDRVReopenState *state)
}
#ifdef CONFIG_GLUSTERFS_ZEROFILL
-static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset,
+ int size,
+ BdrvRequestFlags flags)
{
int ret;
GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
- off_t size = nb_sectors * BDRV_SECTOR_SIZE;
- off_t offset = sector_num * BDRV_SECTOR_SIZE;
acb.size = size;
acb.ret = 0;
@@ -483,7 +875,7 @@ static inline bool gluster_supports_zerofill(void)
}
static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
- int64_t size)
+ int64_t size)
{
return glfs_zerofill(fd, offset, size);
}
@@ -495,7 +887,7 @@ static inline bool gluster_supports_zerofill(void)
}
static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
- int64_t size)
+ int64_t size)
{
return 0;
}
@@ -504,15 +896,25 @@ static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
static int qemu_gluster_create(const char *filename,
QemuOpts *opts, Error **errp)
{
+ BlockdevOptionsGluster *gconf;
struct glfs *glfs;
struct glfs_fd *fd;
int ret = 0;
int prealloc = 0;
int64_t total_size = 0;
char *tmp = NULL;
- GlusterConf *gconf = g_new0(GlusterConf, 1);
- glfs = qemu_gluster_init(gconf, filename, errp);
+ gconf = g_new0(BlockdevOptionsGluster, 1);
+ gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
+ GLUSTER_DEBUG_DEFAULT);
+ if (gconf->debug_level < 0) {
+ gconf->debug_level = 0;
+ } else if (gconf->debug_level > GLUSTER_DEBUG_MAX) {
+ gconf->debug_level = GLUSTER_DEBUG_MAX;
+ }
+ gconf->has_debug_level = true;
+
+ glfs = qemu_gluster_init(gconf, filename, NULL, errp);
if (!glfs) {
ret = -errno;
goto out;
@@ -524,19 +926,17 @@ static int qemu_gluster_create(const char *filename,
tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
if (!tmp || !strcmp(tmp, "off")) {
prealloc = 0;
- } else if (!strcmp(tmp, "full") &&
- gluster_supports_zerofill()) {
+ } else if (!strcmp(tmp, "full") && gluster_supports_zerofill()) {
prealloc = 1;
} else {
error_setg(errp, "Invalid preallocation mode: '%s'"
- " or GlusterFS doesn't support zerofill API",
- tmp);
+ " or GlusterFS doesn't support zerofill API", tmp);
ret = -EINVAL;
goto out;
}
- fd = glfs_creat(glfs, gconf->image,
- O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
+ fd = glfs_creat(glfs, gconf->path,
+ O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
if (!fd) {
ret = -errno;
} else {
@@ -554,7 +954,7 @@ static int qemu_gluster_create(const char *filename,
}
out:
g_free(tmp);
- qemu_gluster_gconf_free(gconf);
+ qapi_free_BlockdevOptionsGluster(gconf);
if (glfs) {
glfs_fini(glfs);
}
@@ -562,7 +962,8 @@ out:
}
static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov, int write)
{
int ret;
GlusterAIOCB acb;
@@ -577,10 +978,10 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
if (write) {
ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
- gluster_finish_aiocb, &acb);
+ gluster_finish_aiocb, &acb);
} else {
ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
- gluster_finish_aiocb, &acb);
+ gluster_finish_aiocb, &acb);
}
if (ret < 0) {
@@ -605,13 +1006,17 @@ static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
}
static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+ int64_t sector_num,
+ int nb_sectors,
+ QEMUIOVector *qiov)
{
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
}
static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+ int64_t sector_num,
+ int nb_sectors,
+ QEMUIOVector *qiov)
{
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
}
@@ -672,14 +1077,12 @@ error:
}
#ifdef CONFIG_GLUSTERFS_DISCARD
-static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int size)
{
int ret;
GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
- size_t size = nb_sectors * BDRV_SECTOR_SIZE;
- off_t offset = sector_num * BDRV_SECTOR_SIZE;
acb.size = 0;
acb.ret = 0;
@@ -729,29 +1132,164 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs)
return 0;
}
-static QemuOptsList qemu_gluster_create_opts = {
- .name = "qemu-gluster-create-opts",
- .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
- .desc = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = QEMU_OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_PREALLOC,
- .type = QEMU_OPT_STRING,
- .help = "Preallocation mode (allowed values: off, full)"
- },
- { /* end of list */ }
+/*
+ * Find allocation range in @bs around offset @start.
+ * May change underlying file descriptor's file offset.
+ * If @start is not in a hole, store @start in @data, and the
+ * beginning of the next hole in @hole, and return 0.
+ * If @start is in a non-trailing hole, store @start in @hole and the
+ * beginning of the next non-hole in @data, and return 0.
+ * If @start is in a trailing hole or beyond EOF, return -ENXIO.
+ * If we can't find out, return a negative errno other than -ENXIO.
+ *
+ * (Shamefully copied from raw-posix.c, only miniscule adaptions.)
+ */
+static int find_allocation(BlockDriverState *bs, off_t start,
+ off_t *data, off_t *hole)
+{
+ BDRVGlusterState *s = bs->opaque;
+ off_t offs;
+
+ if (!s->supports_seek_data) {
+ return -ENOTSUP;
}
-};
+
+ /*
+ * SEEK_DATA cases:
+ * D1. offs == start: start is in data
+ * D2. offs > start: start is in a hole, next data at offs
+ * D3. offs < 0, errno = ENXIO: either start is in a trailing hole
+ * or start is beyond EOF
+ * If the latter happens, the file has been truncated behind
+ * our back since we opened it. All bets are off then.
+ * Treating like a trailing hole is simplest.
+ * D4. offs < 0, errno != ENXIO: we learned nothing
+ */
+ offs = glfs_lseek(s->fd, start, SEEK_DATA);
+ if (offs < 0) {
+ return -errno; /* D3 or D4 */
+ }
+ assert(offs >= start);
+
+ if (offs > start) {
+ /* D2: in hole, next data at offs */
+ *hole = start;
+ *data = offs;
+ return 0;
+ }
+
+ /* D1: in data, end not yet known */
+
+ /*
+ * SEEK_HOLE cases:
+ * H1. offs == start: start is in a hole
+ * If this happens here, a hole has been dug behind our back
+ * since the previous lseek().
+ * H2. offs > start: either start is in data, next hole at offs,
+ * or start is in trailing hole, EOF at offs
+ * Linux treats trailing holes like any other hole: offs ==
+ * start. Solaris seeks to EOF instead: offs > start (blech).
+ * If that happens here, a hole has been dug behind our back
+ * since the previous lseek().
+ * H3. offs < 0, errno = ENXIO: start is beyond EOF
+ * If this happens, the file has been truncated behind our
+ * back since we opened it. Treat it like a trailing hole.
+ * H4. offs < 0, errno != ENXIO: we learned nothing
+ * Pretend we know nothing at all, i.e. "forget" about D1.
+ */
+ offs = glfs_lseek(s->fd, start, SEEK_HOLE);
+ if (offs < 0) {
+ return -errno; /* D1 and (H3 or H4) */
+ }
+ assert(offs >= start);
+
+ if (offs > start) {
+ /*
+ * D1 and H2: either in data, next hole at offs, or it was in
+ * data but is now in a trailing hole. In the latter case,
+ * all bets are off. Treating it as if it there was data all
+ * the way to EOF is safe, so simply do that.
+ */
+ *data = start;
+ *hole = offs;
+ return 0;
+ }
+
+ /* D1 and H1 */
+ return -EBUSY;
+}
+
+/*
+ * Returns the allocation status of the specified sectors.
+ *
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
+ * and 'pnum' is set to 0.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
+ * beyond the end of the disk image it will be clamped.
+ *
+ * (Based on raw_co_get_block_status() from raw-posix.c.)
+ */
+static int64_t coroutine_fn qemu_gluster_co_get_block_status(
+ BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+ BlockDriverState **file)
+{
+ BDRVGlusterState *s = bs->opaque;
+ off_t start, data = 0, hole = 0;
+ int64_t total_size;
+ int ret = -EINVAL;
+
+ if (!s->fd) {
+ return ret;
+ }
+
+ start = sector_num * BDRV_SECTOR_SIZE;
+ total_size = bdrv_getlength(bs);
+ if (total_size < 0) {
+ return total_size;
+ } else if (start >= total_size) {
+ *pnum = 0;
+ return 0;
+ } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
+ nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
+ }
+
+ ret = find_allocation(bs, start, &data, &hole);
+ if (ret == -ENXIO) {
+ /* Trailing hole */
+ *pnum = nb_sectors;
+ ret = BDRV_BLOCK_ZERO;
+ } else if (ret < 0) {
+ /* No info available, so pretend there are no holes */
+ *pnum = nb_sectors;
+ ret = BDRV_BLOCK_DATA;
+ } else if (data == start) {
+ /* On a data extent, compute sectors to the end of the extent,
+ * possibly including a partial sector at EOF. */
+ *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE));
+ ret = BDRV_BLOCK_DATA;
+ } else {
+ /* On a hole, compute sectors to the beginning of the next extent. */
+ assert(hole == start);
+ *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
+ ret = BDRV_BLOCK_ZERO;
+ }
+
+ *file = bs;
+
+ return ret | BDRV_BLOCK_OFFSET_VALID | start;
+}
+
static BlockDriver bdrv_gluster = {
.format_name = "gluster",
.protocol_name = "gluster",
.instance_size = sizeof(BDRVGlusterState),
- .bdrv_needs_filename = true,
+ .bdrv_needs_filename = false,
.bdrv_file_open = qemu_gluster_open,
.bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
@@ -766,11 +1304,12 @@ static BlockDriver bdrv_gluster = {
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
- .bdrv_co_discard = qemu_gluster_co_discard,
+ .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
- .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
+ .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
.create_opts = &qemu_gluster_create_opts,
};
@@ -778,7 +1317,7 @@ static BlockDriver bdrv_gluster_tcp = {
.format_name = "gluster",
.protocol_name = "gluster+tcp",
.instance_size = sizeof(BDRVGlusterState),
- .bdrv_needs_filename = true,
+ .bdrv_needs_filename = false,
.bdrv_file_open = qemu_gluster_open,
.bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
@@ -793,11 +1332,12 @@ static BlockDriver bdrv_gluster_tcp = {
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
- .bdrv_co_discard = qemu_gluster_co_discard,
+ .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
- .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
+ .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
.create_opts = &qemu_gluster_create_opts,
};
@@ -820,14 +1360,21 @@ static BlockDriver bdrv_gluster_unix = {
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
- .bdrv_co_discard = qemu_gluster_co_discard,
+ .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
- .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
+ .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
.create_opts = &qemu_gluster_create_opts,
};
+/* rdma is deprecated (actually never supported for volfile fetch).
+ * Let's maintain it for the protocol compatibility, to make sure things
+ * won't break immediately. For now, gluster+rdma will fall back to gluster+tcp
+ * protocol with a warning.
+ * TODO: remove gluster+rdma interface support
+ */
static BlockDriver bdrv_gluster_rdma = {
.format_name = "gluster",
.protocol_name = "gluster+rdma",
@@ -847,11 +1394,12 @@ static BlockDriver bdrv_gluster_rdma = {
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
- .bdrv_co_discard = qemu_gluster_co_discard,
+ .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
- .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
+ .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
.create_opts = &qemu_gluster_create_opts,
};
diff --git a/block/io.c b/block/io.c
index a7dbf85b1..420944d80 100644
--- a/block/io.c
+++ b/block/io.c
@@ -27,118 +27,54 @@
#include "sysemu/block-backend.h"
#include "block/blockjob.h"
#include "block/block_int.h"
-#include "block/throttle-groups.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
-static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque);
-static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque);
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov);
-static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BdrvRequestFlags flags,
- BlockCompletionFunc *cb,
- void *opaque,
- bool is_write);
+static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
+ int64_t offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags,
+ BlockCompletionFunc *cb,
+ void *opaque,
+ bool is_write);
static void coroutine_fn bdrv_co_do_rw(void *opaque);
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
+static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags);
-/* throttling disk I/O limits */
-void bdrv_set_io_limits(BlockDriverState *bs,
- ThrottleConfig *cfg)
+static void bdrv_parent_drained_begin(BlockDriverState *bs)
{
- int i;
-
- throttle_group_config(bs, cfg);
-
- for (i = 0; i < 2; i++) {
- qemu_co_enter_next(&bs->throttled_reqs[i]);
- }
-}
-
-/* this function drain all the throttled IOs */
-static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
-{
- bool drained = false;
- bool enabled = bs->io_limits_enabled;
- int i;
-
- bs->io_limits_enabled = false;
+ BdrvChild *c;
- for (i = 0; i < 2; i++) {
- while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
- drained = true;
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->drained_begin) {
+ c->role->drained_begin(c);
}
}
-
- bs->io_limits_enabled = enabled;
-
- return drained;
-}
-
-void bdrv_io_limits_disable(BlockDriverState *bs)
-{
- bs->io_limits_enabled = false;
- bdrv_start_throttled_reqs(bs);
- throttle_group_unregister_bs(bs);
-}
-
-/* should be called before bdrv_set_io_limits if a limit is set */
-void bdrv_io_limits_enable(BlockDriverState *bs, const char *group)
-{
- assert(!bs->io_limits_enabled);
- throttle_group_register_bs(bs, group);
- bs->io_limits_enabled = true;
}
-void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group)
+static void bdrv_parent_drained_end(BlockDriverState *bs)
{
- /* this bs is not part of any group */
- if (!bs->throttle_state) {
- return;
- }
+ BdrvChild *c;
- /* this bs is a part of the same group than the one we want */
- if (!g_strcmp0(throttle_group_get_name(bs), group)) {
- return;
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->drained_end) {
+ c->role->drained_end(c);
+ }
}
-
- /* need to change the group this bs belong to */
- bdrv_io_limits_disable(bs);
- bdrv_io_limits_enable(bs, group);
}
-void bdrv_setup_io_funcs(BlockDriver *bdrv)
+static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
{
- /* Block drivers without coroutine functions need emulation */
- if (!bdrv->bdrv_co_readv) {
- bdrv->bdrv_co_readv = bdrv_co_readv_em;
- bdrv->bdrv_co_writev = bdrv_co_writev_em;
-
- /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
- * the block driver lacks aio we need to emulate that too.
- */
- if (!bdrv->bdrv_aio_readv) {
- /* add AIO emulation layer */
- bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
- bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
- }
- }
+ dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
+ dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
+ dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
+ src->opt_mem_alignment);
+ dst->min_mem_alignment = MAX(dst->min_mem_alignment,
+ src->min_mem_alignment);
+ dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
}
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
@@ -152,6 +88,9 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
return;
}
+ /* Default alignment based on whether driver has byte interface */
+ bs->bl.request_alignment = drv->bdrv_co_preadv ? 1 : 512;
+
/* Take some limits from the children as a default */
if (bs->file) {
bdrv_refresh_limits(bs->file->bs, &local_err);
@@ -159,11 +98,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
error_propagate(errp, local_err);
return;
}
- bs->bl.opt_transfer_length = bs->file->bs->bl.opt_transfer_length;
- bs->bl.max_transfer_length = bs->file->bs->bl.max_transfer_length;
- bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment;
- bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment;
- bs->bl.max_iov = bs->file->bs->bl.max_iov;
+ bdrv_merge_limits(&bs->bl, &bs->file->bs->bl);
} else {
bs->bl.min_mem_alignment = 512;
bs->bl.opt_mem_alignment = getpagesize();
@@ -178,21 +113,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
error_propagate(errp, local_err);
return;
}
- bs->bl.opt_transfer_length =
- MAX(bs->bl.opt_transfer_length,
- bs->backing->bs->bl.opt_transfer_length);
- bs->bl.max_transfer_length =
- MIN_NON_ZERO(bs->bl.max_transfer_length,
- bs->backing->bs->bl.max_transfer_length);
- bs->bl.opt_mem_alignment =
- MAX(bs->bl.opt_mem_alignment,
- bs->backing->bs->bl.opt_mem_alignment);
- bs->bl.min_mem_alignment =
- MAX(bs->bl.min_mem_alignment,
- bs->backing->bs->bl.min_mem_alignment);
- bs->bl.max_iov =
- MIN(bs->bl.max_iov,
- bs->backing->bs->bl.max_iov);
+ bdrv_merge_limits(&bs->bl, &bs->backing->bs->bl);
}
/* Then let the driver override it */
@@ -225,12 +146,6 @@ bool bdrv_requests_pending(BlockDriverState *bs)
if (!QLIST_EMPTY(&bs->tracked_requests)) {
return true;
}
- if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
- return true;
- }
- if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
- return true;
- }
QLIST_FOREACH(child, &bs->children, next) {
if (bdrv_requests_pending(child->bs)) {
@@ -260,18 +175,29 @@ typedef struct {
bool done;
} BdrvCoDrainData;
+static void bdrv_drain_poll(BlockDriverState *bs)
+{
+ bool busy = true;
+
+ while (busy) {
+ /* Keep iterating */
+ busy = bdrv_requests_pending(bs);
+ busy |= aio_poll(bdrv_get_aio_context(bs), busy);
+ }
+}
+
static void bdrv_co_drain_bh_cb(void *opaque)
{
BdrvCoDrainData *data = opaque;
Coroutine *co = data->co;
qemu_bh_delete(data->bh);
- bdrv_drain(data->bs);
+ bdrv_drain_poll(data->bs);
data->done = true;
- qemu_coroutine_enter(co, NULL);
+ qemu_coroutine_enter(co);
}
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
+static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
{
BdrvCoDrainData data;
@@ -294,6 +220,34 @@ void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
assert(data.done);
}
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+ if (!bs->quiesce_counter++) {
+ aio_disable_external(bdrv_get_aio_context(bs));
+ bdrv_parent_drained_begin(bs);
+ }
+
+ bdrv_io_unplugged_begin(bs);
+ bdrv_drain_recurse(bs);
+ if (qemu_in_coroutine()) {
+ bdrv_co_yield_to_drain(bs);
+ } else {
+ bdrv_drain_poll(bs);
+ }
+ bdrv_io_unplugged_end(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+ assert(bs->quiesce_counter > 0);
+ if (--bs->quiesce_counter > 0) {
+ return;
+ }
+
+ bdrv_parent_drained_end(bs);
+ aio_enable_external(bdrv_get_aio_context(bs));
+}
+
/*
* Wait for pending requests to complete on a single BlockDriverState subtree,
* and suspend block driver's internal I/O until next request arrives.
@@ -305,21 +259,17 @@ void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
* not depend on events in other AioContexts. In that case, use
* bdrv_drain_all() instead.
*/
-void bdrv_drain(BlockDriverState *bs)
+void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
{
- bool busy = true;
+ assert(qemu_in_coroutine());
+ bdrv_drained_begin(bs);
+ bdrv_drained_end(bs);
+}
- bdrv_drain_recurse(bs);
- if (qemu_in_coroutine()) {
- bdrv_co_drain(bs);
- return;
- }
- while (busy) {
- /* Keep iterating */
- bdrv_flush_io_queue(bs);
- busy = bdrv_requests_pending(bs);
- busy |= aio_poll(bdrv_get_aio_context(bs), busy);
- }
+void bdrv_drain(BlockDriverState *bs)
+{
+ bdrv_drained_begin(bs);
+ bdrv_drained_end(bs);
}
/*
@@ -332,16 +282,25 @@ void bdrv_drain_all(void)
{
/* Always run first iteration so any pending completion BHs run */
bool busy = true;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
+ BlockJob *job = NULL;
GSList *aio_ctxs = NULL, *ctx;
- while ((bs = bdrv_next(bs))) {
+ while ((job = block_job_next(job))) {
+ AioContext *aio_context = blk_get_aio_context(job->blk);
+
+ aio_context_acquire(aio_context);
+ block_job_pause(job);
+ aio_context_release(aio_context);
+ }
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- if (bs->job) {
- block_job_pause(bs->job);
- }
+ bdrv_parent_drained_begin(bs);
+ bdrv_io_unplugged_begin(bs);
bdrv_drain_recurse(bs);
aio_context_release(aio_context);
@@ -361,12 +320,10 @@ void bdrv_drain_all(void)
for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
AioContext *aio_context = ctx->data;
- bs = NULL;
aio_context_acquire(aio_context);
- while ((bs = bdrv_next(bs))) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
if (aio_context == bdrv_get_aio_context(bs)) {
- bdrv_flush_io_queue(bs);
if (bdrv_requests_pending(bs)) {
busy = true;
aio_poll(aio_context, busy);
@@ -378,17 +335,24 @@ void bdrv_drain_all(void)
}
}
- bs = NULL;
- while ((bs = bdrv_next(bs))) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- if (bs->job) {
- block_job_resume(bs->job);
- }
+ bdrv_io_unplugged_end(bs);
+ bdrv_parent_drained_end(bs);
aio_context_release(aio_context);
}
g_slist_free(aio_ctxs);
+
+ job = NULL;
+ while ((job = block_job_next(job))) {
+ AioContext *aio_context = blk_get_aio_context(job->blk);
+
+ aio_context_acquire(aio_context);
+ block_job_resume(job);
+ aio_context_release(aio_context);
+ }
}
/**
@@ -447,12 +411,12 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
}
/**
- * Round a region to cluster boundaries
+ * Round a region to cluster boundaries (sector-based)
*/
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors)
+void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors)
{
BlockDriverInfo bdi;
@@ -467,6 +431,26 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
}
}
+/**
+ * Round a region to cluster boundaries
+ */
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes,
+ int64_t *cluster_offset,
+ unsigned int *cluster_bytes)
+{
+ BlockDriverInfo bdi;
+
+ if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+ *cluster_offset = offset;
+ *cluster_bytes = bytes;
+ } else {
+ int64_t c = bdi.cluster_size;
+ *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
+ *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
+ }
+}
+
static int bdrv_get_cluster_size(BlockDriverState *bs)
{
BlockDriverInfo bdi;
@@ -474,7 +458,7 @@ static int bdrv_get_cluster_size(BlockDriverState *bs)
ret = bdrv_get_info(bs, &bdi);
if (ret < 0 || bdi.cluster_size == 0) {
- return bs->request_alignment;
+ return bs->bl.request_alignment;
} else {
return bdi.cluster_size;
}
@@ -568,7 +552,7 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
}
typedef struct RwCo {
- BlockDriverState *bs;
+ BdrvChild *child;
int64_t offset;
QEMUIOVector *qiov;
bool is_write;
@@ -581,26 +565,26 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
RwCo *rwco = opaque;
if (!rwco->is_write) {
- rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
- rwco->qiov->size, rwco->qiov,
- rwco->flags);
+ rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset,
+ rwco->qiov->size, rwco->qiov,
+ rwco->flags);
} else {
- rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
- rwco->qiov->size, rwco->qiov,
- rwco->flags);
+ rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset,
+ rwco->qiov->size, rwco->qiov,
+ rwco->flags);
}
}
/*
* Process a vectored synchronous request using coroutines
*/
-static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
+static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
QEMUIOVector *qiov, bool is_write,
BdrvRequestFlags flags)
{
Coroutine *co;
RwCo rwco = {
- .bs = bs,
+ .child = child,
.offset = offset,
.qiov = qiov,
.is_write = is_write,
@@ -608,25 +592,14 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
.flags = flags,
};
- /**
- * In sync call context, when the vcpu is blocked, this throttling timer
- * will not fire; so the I/O throttling function has to be disabled here
- * if it has been enabled.
- */
- if (bs->io_limits_enabled) {
- fprintf(stderr, "Disabling I/O throttling on '%s' due "
- "to synchronous I/O.\n", bdrv_get_device_name(bs));
- bdrv_io_limits_disable(bs);
- }
-
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_rw_co_entry(&rwco);
} else {
- AioContext *aio_context = bdrv_get_aio_context(bs);
+ AioContext *aio_context = bdrv_get_aio_context(child->bs);
- co = qemu_coroutine_create(bdrv_rw_co_entry);
- qemu_coroutine_enter(co, &rwco);
+ co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
+ qemu_coroutine_enter(co);
while (rwco.ret == NOT_DONE) {
aio_poll(aio_context, true);
}
@@ -637,7 +610,7 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
/*
* Process a synchronous request using coroutines
*/
-static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
+static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf,
int nb_sectors, bool is_write, BdrvRequestFlags flags)
{
QEMUIOVector qiov;
@@ -651,15 +624,15 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
}
qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
+ return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS,
&qiov, is_write, flags);
}
/* return < 0 if error. See bdrv_write() for the return codes */
-int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+int bdrv_read(BdrvChild *child, int64_t sector_num,
uint8_t *buf, int nb_sectors)
{
- return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
+ return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0);
}
/* Return < 0 if error. Important errors are:
@@ -668,30 +641,39 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
-EINVAL Invalid sector number or nb_sectors
-EACCES Trying to write a read-only device
*/
-int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+int bdrv_write(BdrvChild *child, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
- return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
+ return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
}
-int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
- return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
- BDRV_REQ_ZERO_WRITE | flags);
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = NULL,
+ .iov_len = count,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_prwv_co(child, offset, &qiov, true,
+ BDRV_REQ_ZERO_WRITE | flags);
}
/*
- * Completely zero out a block device with the help of bdrv_write_zeroes.
+ * Completely zero out a block device with the help of bdrv_pwrite_zeroes.
* The operation is sped up by checking the block status and only writing
* zeroes to the device if they currently do not return zeroes. Optional
- * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
+ * flags are passed through to bdrv_pwrite_zeroes (e.g. BDRV_REQ_MAY_UNMAP,
+ * BDRV_REQ_FUA).
*
* Returns < 0 on error, 0 on success. For error codes see bdrv_write().
*/
-int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
+int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
{
int64_t target_sectors, ret, nb_sectors, sector_num = 0;
+ BlockDriverState *bs = child->bs;
BlockDriverState *file;
int n;
@@ -715,7 +697,8 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
sector_num += n;
continue;
}
- ret = bdrv_write_zeroes(bs, sector_num, n, flags);
+ ret = bdrv_pwrite_zeroes(child, sector_num << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, flags);
if (ret < 0) {
error_report("error writing zeroes at sector %" PRId64 ": %s",
sector_num, strerror(-ret));
@@ -725,33 +708,39 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
}
}
-int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
+int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
+{
+ int ret;
+
+ ret = bdrv_prwv_co(child, offset, qiov, false, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return qiov->size;
+}
+
+int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
{
QEMUIOVector qiov;
struct iovec iov = {
.iov_base = (void *)buf,
.iov_len = bytes,
};
- int ret;
if (bytes < 0) {
return -EINVAL;
}
qemu_iovec_init_external(&qiov, &iov, 1);
- ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
- if (ret < 0) {
- return ret;
- }
-
- return bytes;
+ return bdrv_preadv(child, offset, &qiov);
}
-int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
+int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
{
int ret;
- ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
+ ret = bdrv_prwv_co(child, offset, qiov, true, 0);
if (ret < 0) {
return ret;
}
@@ -759,8 +748,7 @@ int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
return qiov->size;
}
-int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
- const void *buf, int bytes)
+int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
{
QEMUIOVector qiov;
struct iovec iov = {
@@ -773,7 +761,7 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
}
qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_pwritev(bs, offset, &qiov);
+ return bdrv_pwritev(child, offset, &qiov);
}
/*
@@ -782,17 +770,17 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
*
* Returns 0 on success, -errno in error cases.
*/
-int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
- const void *buf, int count)
+int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
+ const void *buf, int count)
{
int ret;
- ret = bdrv_pwrite(bs, offset, buf, count);
+ ret = bdrv_pwrite(child, offset, buf, count);
if (ret < 0) {
return ret;
}
- ret = bdrv_flush(bs);
+ ret = bdrv_flush(child->bs);
if (ret < 0) {
return ret;
}
@@ -800,8 +788,117 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
return 0;
}
+typedef struct CoroutineIOCompletion {
+ Coroutine *coroutine;
+ int ret;
+} CoroutineIOCompletion;
+
+static void bdrv_co_io_em_complete(void *opaque, int ret)
+{
+ CoroutineIOCompletion *co = opaque;
+
+ co->ret = ret;
+ qemu_coroutine_enter(co->coroutine);
+}
+
+static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BlockDriver *drv = bs->drv;
+ int64_t sector_num;
+ unsigned int nb_sectors;
+
+ assert(!(flags & ~BDRV_REQ_MASK));
+
+ if (drv->bdrv_co_preadv) {
+ return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
+ }
+
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ nb_sectors = bytes >> BDRV_SECTOR_BITS;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+
+ if (drv->bdrv_co_readv) {
+ return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+ } else {
+ BlockAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ return -EIO;
+ } else {
+ qemu_coroutine_yield();
+ return co.ret;
+ }
+ }
+}
+
+static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BlockDriver *drv = bs->drv;
+ int64_t sector_num;
+ unsigned int nb_sectors;
+ int ret;
+
+ assert(!(flags & ~BDRV_REQ_MASK));
+
+ if (drv->bdrv_co_pwritev) {
+ ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
+ flags & bs->supported_write_flags);
+ flags &= ~bs->supported_write_flags;
+ goto emulate_flags;
+ }
+
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ nb_sectors = bytes >> BDRV_SECTOR_BITS;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+
+ if (drv->bdrv_co_writev_flags) {
+ ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
+ flags & bs->supported_write_flags);
+ flags &= ~bs->supported_write_flags;
+ } else if (drv->bdrv_co_writev) {
+ assert(!bs->supported_write_flags);
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ } else {
+ BlockAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ ret = -EIO;
+ } else {
+ qemu_coroutine_yield();
+ ret = co.ret;
+ }
+ }
+
+emulate_flags:
+ if (ret == 0 && (flags & BDRV_REQ_FUA)) {
+ ret = bdrv_co_flush(bs);
+ }
+
+ return ret;
+}
+
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
{
/* Perform I/O through a temporary buffer so that users who scribble over
* their read buffer while the operation is in progress do not end up
@@ -813,21 +910,20 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
BlockDriver *drv = bs->drv;
struct iovec iov;
QEMUIOVector bounce_qiov;
- int64_t cluster_sector_num;
- int cluster_nb_sectors;
+ int64_t cluster_offset;
+ unsigned int cluster_bytes;
size_t skip_bytes;
int ret;
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
- bdrv_round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
+ bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
- trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
- cluster_sector_num, cluster_nb_sectors);
+ trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
+ cluster_offset, cluster_bytes);
- iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_len = cluster_bytes;
iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
if (bounce_buffer == NULL) {
ret = -ENOMEM;
@@ -836,22 +932,24 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
- ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
+ ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes,
+ &bounce_qiov, 0);
if (ret < 0) {
goto err;
}
- if (drv->bdrv_co_write_zeroes &&
+ if (drv->bdrv_co_pwrite_zeroes &&
buffer_is_zero(bounce_buffer, iov.iov_len)) {
- ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
- cluster_nb_sectors, 0);
+ /* FIXME: Should we (perhaps conditionally) be setting
+ * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
+ * that still correctly reads as zero? */
+ ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0);
} else {
/* This does not change the data on the disk, it is not necessary
* to flush even in cache=writethrough mode.
*/
- ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
+ ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes,
+ &bounce_qiov, 0);
}
if (ret < 0) {
@@ -862,9 +960,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
goto err;
}
- skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
- qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
- nb_sectors * BDRV_SECTOR_SIZE);
+ skip_bytes = offset - cluster_offset;
+ qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes);
err:
qemu_vfree(bounce_buffer);
@@ -873,23 +970,31 @@ err:
/*
* Forwards an already correctly aligned request to the BlockDriver. This
- * handles copy on read and zeroing after EOF; any other features must be
- * implemented by the caller.
+ * handles copy on read, zeroing after EOF, and fragmentation of large
+ * reads; any other features must be implemented by the caller.
*/
static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
- BlockDriver *drv = bs->drv;
- int ret;
-
- int64_t sector_num = offset >> BDRV_SECTOR_BITS;
- unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int64_t total_bytes, max_bytes;
+ int ret = 0;
+ uint64_t bytes_remaining = bytes;
+ int max_transfer;
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert(is_power_of_2(align));
+ assert((offset & (align - 1)) == 0);
+ assert((bytes & (align - 1)) == 0);
assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
+ max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
+ align);
+
+ /* TODO: We would need a per-BDS .supported_read_flags and
+ * potential fallback support, if we ever implement any read flags
+ * to pass through to drivers. For now, there aren't any
+ * passthrough flags. */
+ assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ)));
/* Handle Copy on Read and associated serialisation */
if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -906,76 +1011,77 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
if (flags & BDRV_REQ_COPY_ON_READ) {
+ int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+ unsigned int nb_sectors = end_sector - start_sector;
int pnum;
- ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
+ ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum);
if (ret < 0) {
goto out;
}
if (!ret || pnum != nb_sectors) {
- ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
+ ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
goto out;
}
}
- /* Forward the request to the BlockDriver */
- if (!bs->zero_beyond_eof) {
- ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
- } else {
- /* Read zeros after EOF */
- int64_t total_sectors, max_nb_sectors;
+ /* Forward the request to the BlockDriver, possibly fragmenting it */
+ total_bytes = bdrv_getlength(bs);
+ if (total_bytes < 0) {
+ ret = total_bytes;
+ goto out;
+ }
- total_sectors = bdrv_nb_sectors(bs);
- if (total_sectors < 0) {
- ret = total_sectors;
- goto out;
- }
+ max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
+ if (bytes <= max_bytes && bytes <= max_transfer) {
+ ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
+ goto out;
+ }
+
+ while (bytes_remaining) {
+ int num;
- max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
- align >> BDRV_SECTOR_BITS);
- if (nb_sectors < max_nb_sectors) {
- ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
- } else if (max_nb_sectors > 0) {
+ if (max_bytes) {
QEMUIOVector local_qiov;
+ num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
+ assert(num);
qemu_iovec_init(&local_qiov, qiov->niov);
- qemu_iovec_concat(&local_qiov, qiov, 0,
- max_nb_sectors * BDRV_SECTOR_SIZE);
-
- ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
- &local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
+ ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
+ num, &local_qiov, 0);
+ max_bytes -= num;
qemu_iovec_destroy(&local_qiov);
} else {
- ret = 0;
+ num = bytes_remaining;
+ ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
+ bytes_remaining);
}
-
- /* Reading beyond end of file is supposed to produce zeroes */
- if (ret == 0 && total_sectors < sector_num + nb_sectors) {
- uint64_t offset = MAX(0, total_sectors - sector_num);
- uint64_t bytes = (sector_num + nb_sectors - offset) *
- BDRV_SECTOR_SIZE;
- qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
+ if (ret < 0) {
+ goto out;
}
+ bytes_remaining -= num;
}
out:
- return ret;
+ return ret < 0 ? ret : 0;
}
/*
* Handle a read request in coroutine context
*/
-int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+int coroutine_fn bdrv_co_preadv(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
+ BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
BdrvTrackedRequest req;
- /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->bl.request_alignment;
uint8_t *head_buf = NULL;
uint8_t *tail_buf = NULL;
QEMUIOVector local_qiov;
@@ -996,11 +1102,6 @@ int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
flags |= BDRV_REQ_COPY_ON_READ;
}
- /* throttling disk I/O */
- if (bs->io_limits_enabled) {
- throttle_group_co_io_limits_intercept(bs, bytes, false);
- }
-
/* Align read if necessary by padding qiov */
if (offset & (align - 1)) {
head_buf = qemu_blockalign(bs, align);
@@ -1041,7 +1142,7 @@ int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
return ret;
}
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_readv(BdrvChild *child,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
@@ -1049,67 +1150,56 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
return -EINVAL;
}
- return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+ return bdrv_co_preadv(child, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
}
-int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
{
- trace_bdrv_co_readv(bs, sector_num, nb_sectors);
+ trace_bdrv_co_readv(child->bs, sector_num, nb_sectors);
- return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
+ return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0);
}
-int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_readv_no_serialising(bs, sector_num, nb_sectors);
+/* Maximum buffer for write zeroes fallback, in bytes */
+#define MAX_WRITE_ZEROES_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
- return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
- BDRV_REQ_NO_SERIALISING);
-}
-
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
- BDRV_REQ_COPY_ON_READ);
-}
-
-#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
-
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
QEMUIOVector qiov;
struct iovec iov = {0};
int ret = 0;
+ bool need_flush = false;
+ int head = 0;
+ int tail = 0;
+
+ int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
+ int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
+ bs->bl.request_alignment);
- int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
- BDRV_REQUEST_MAX_SECTORS);
+ assert(alignment % bs->bl.request_alignment == 0);
+ head = offset % alignment;
+ tail = (offset + count) % alignment;
+ max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
+ assert(max_write_zeroes >= bs->bl.request_alignment);
- while (nb_sectors > 0 && !ret) {
- int num = nb_sectors;
+ while (count > 0 && !ret) {
+ int num = count;
/* Align request. Block drivers can expect the "bulk" of the request
- * to be aligned.
+ * to be aligned, and that unaligned requests do not cross cluster
+ * boundaries.
*/
- if (bs->bl.write_zeroes_alignment
- && num > bs->bl.write_zeroes_alignment) {
- if (sector_num % bs->bl.write_zeroes_alignment != 0) {
- /* Make a small request up to the first aligned sector. */
- num = bs->bl.write_zeroes_alignment;
- num -= sector_num % bs->bl.write_zeroes_alignment;
- } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
- /* Shorten the request to the last aligned sector. num cannot
- * underflow because num > bs->bl.write_zeroes_alignment.
- */
- num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
- }
+ if (head) {
+ /* Make a small request up to the first aligned sector. */
+ num = MIN(count, alignment - head);
+ head = 0;
+ } else if (tail && num > alignment) {
+ /* Shorten the request to the last aligned sector. */
+ num -= tail;
}
/* limit request size */
@@ -1119,64 +1209,90 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
ret = -ENOTSUP;
/* First try the efficient write zeroes operation */
- if (drv->bdrv_co_write_zeroes) {
- ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
+ if (drv->bdrv_co_pwrite_zeroes) {
+ ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
+ flags & bs->supported_zero_flags);
+ if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
+ !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
+ need_flush = true;
+ }
+ } else {
+ assert(!bs->supported_zero_flags);
}
if (ret == -ENOTSUP) {
/* Fall back to bounce buffer if write zeroes is unsupported */
- int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
+ int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
MAX_WRITE_ZEROES_BOUNCE_BUFFER);
- num = MIN(num, max_xfer_len);
- iov.iov_len = num * BDRV_SECTOR_SIZE;
+ BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
+
+ if ((flags & BDRV_REQ_FUA) &&
+ !(bs->supported_write_flags & BDRV_REQ_FUA)) {
+ /* No need for bdrv_driver_pwrite() to do a fallback
+ * flush on each chunk; use just one at the end */
+ write_flags &= ~BDRV_REQ_FUA;
+ need_flush = true;
+ }
+ num = MIN(num, max_transfer);
+ iov.iov_len = num;
if (iov.iov_base == NULL) {
- iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
+ iov.iov_base = qemu_try_blockalign(bs, num);
if (iov.iov_base == NULL) {
ret = -ENOMEM;
goto fail;
}
- memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
+ memset(iov.iov_base, 0, num);
}
qemu_iovec_init_external(&qiov, &iov, 1);
- ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
+ ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);
/* Keep bounce buffer around if it is big enough for all
* all future requests.
*/
- if (num < max_xfer_len) {
+ if (num < max_transfer) {
qemu_vfree(iov.iov_base);
iov.iov_base = NULL;
}
}
- sector_num += num;
- nb_sectors -= num;
+ offset += num;
+ count -= num;
}
fail:
+ if (ret == 0 && need_flush) {
+ ret = bdrv_co_flush(bs);
+ }
qemu_vfree(iov.iov_base);
return ret;
}
/*
- * Forwards an already correctly aligned write request to the BlockDriver.
+ * Forwards an already correctly aligned write request to the BlockDriver,
+ * after possibly fragmenting it.
*/
static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
- QEMUIOVector *qiov, int flags)
+ int64_t align, QEMUIOVector *qiov, int flags)
{
BlockDriver *drv = bs->drv;
bool waited;
int ret;
- int64_t sector_num = offset >> BDRV_SECTOR_BITS;
- unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+ uint64_t bytes_remaining = bytes;
+ int max_transfer;
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert(is_power_of_2(align));
+ assert((offset & (align - 1)) == 0);
+ assert((bytes & (align - 1)) == 0);
assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
+ assert(!(flags & ~BDRV_REQ_MASK));
+ max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
+ align);
waited = wait_serialising_requests(req);
assert(!waited || !req->serialising);
@@ -1186,7 +1302,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
- !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
+ !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
qemu_iovec_is_zero(qiov)) {
flags |= BDRV_REQ_ZERO_WRITE;
if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
@@ -1198,32 +1314,48 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
- ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
- } else if (drv->bdrv_co_writev_flags) {
+ ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
+ } else if (bytes <= max_transfer) {
bdrv_debug_event(bs, BLKDBG_PWRITEV);
- ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
- flags);
+ ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
} else {
- assert(drv->supported_write_flags == 0);
bdrv_debug_event(bs, BLKDBG_PWRITEV);
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
- }
- bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
+ while (bytes_remaining) {
+ int num = MIN(bytes_remaining, max_transfer);
+ QEMUIOVector local_qiov;
+ int local_flags = flags;
+
+ assert(num);
+ if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
+ !(bs->supported_write_flags & BDRV_REQ_FUA)) {
+ /* If FUA is going to be emulated by flush, we only
+ * need to flush on the last iteration */
+ local_flags &= ~BDRV_REQ_FUA;
+ }
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
- if (ret == 0 && (flags & BDRV_REQ_FUA) &&
- !(drv->supported_write_flags & BDRV_REQ_FUA))
- {
- ret = bdrv_co_flush(bs);
+ ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
+ num, &local_qiov, local_flags);
+ qemu_iovec_destroy(&local_qiov);
+ if (ret < 0) {
+ break;
+ }
+ bytes_remaining -= num;
+ }
}
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
- bdrv_set_dirty(bs, sector_num, nb_sectors);
+ ++bs->write_gen;
+ bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
if (bs->wr_highest_offset < offset + bytes) {
bs->wr_highest_offset = offset + bytes;
}
if (ret >= 0) {
- bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
+ bs->total_sectors = MAX(bs->total_sectors, end_sector);
+ ret = 0;
}
return ret;
@@ -1238,7 +1370,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
uint8_t *buf = NULL;
QEMUIOVector local_qiov;
struct iovec iov;
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->bl.request_alignment;
unsigned int head_padding_bytes, tail_padding_bytes;
int ret = 0;
@@ -1271,7 +1403,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
memset(buf + head_padding_bytes, 0, zero_bytes);
ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
- &local_qiov,
+ align, &local_qiov,
flags & ~BDRV_REQ_ZERO_WRITE);
if (ret < 0) {
goto fail;
@@ -1284,7 +1416,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
if (bytes >= align) {
/* Write the aligned part in the middle. */
uint64_t aligned_bytes = bytes & ~(align - 1);
- ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes,
+ ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
NULL, flags);
if (ret < 0) {
goto fail;
@@ -1308,7 +1440,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
memset(buf, 0, bytes);
- ret = bdrv_aligned_pwritev(bs, req, offset, align,
+ ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
&local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
}
fail:
@@ -1320,13 +1452,13 @@ fail:
/*
* Handle a write request in coroutine context
*/
-int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
+ BlockDriverState *bs = child->bs;
BdrvTrackedRequest req;
- /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->bl.request_alignment;
uint8_t *head_buf = NULL;
uint8_t *tail_buf = NULL;
QEMUIOVector local_qiov;
@@ -1346,11 +1478,6 @@ int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
return ret;
}
- /* throttling disk I/O */
- if (bs->io_limits_enabled) {
- throttle_group_co_io_limits_intercept(bs, bytes, true);
- }
-
/*
* Align write if necessary by performing a read-modify-write cycle.
* Pad qiov with the read parts and be sure to have a tracked request not
@@ -1392,6 +1519,14 @@ int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
bytes += offset & (align - 1);
offset = offset & ~(align - 1);
+
+ /* We have read the tail already if the request is smaller
+ * than one aligned block.
+ */
+ if (bytes < align) {
+ qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
+ bytes = align;
+ }
}
if ((offset + bytes) & (align - 1)) {
@@ -1431,7 +1566,7 @@ int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
bytes = ROUND_UP(bytes, align);
}
- ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
+ ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
@@ -1447,7 +1582,7 @@ out:
return ret;
}
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_writev(BdrvChild *child,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
@@ -1455,30 +1590,29 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
return -EINVAL;
}
- return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+ return bdrv_co_pwritev(child, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
}
-int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
- trace_bdrv_co_writev(bs, sector_num, nb_sectors);
+ trace_bdrv_co_writev(child->bs, sector_num, nb_sectors);
- return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
+ return bdrv_co_do_writev(child, sector_num, nb_sectors, qiov, 0);
}
-int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BdrvRequestFlags flags)
+int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
- trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
+ trace_bdrv_co_pwrite_zeroes(child->bs, offset, count, flags);
- if (!(bs->open_flags & BDRV_O_UNMAP)) {
+ if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
flags &= ~BDRV_REQ_MAY_UNMAP;
}
- return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
- BDRV_REQ_ZERO_WRITE | flags);
+ return bdrv_co_pwritev(child, offset, count, NULL,
+ BDRV_REQ_ZERO_WRITE | flags);
}
typedef struct BdrvCoGetBlockStatusData {
@@ -1663,8 +1797,9 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
} else {
AioContext *aio_context = bdrv_get_aio_context(bs);
- co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry);
- qemu_coroutine_enter(co, &data);
+ co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry,
+ &data);
+ qemu_coroutine_enter(co);
while (!data.done) {
aio_poll(aio_context, true);
}
@@ -1766,273 +1901,134 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
}
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
- int64_t pos, int size)
-{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *) buf,
- .iov_len = size,
- };
+typedef struct BdrvVmstateCo {
+ BlockDriverState *bs;
+ QEMUIOVector *qiov;
+ int64_t pos;
+ bool is_read;
+ int ret;
+} BdrvVmstateCo;
- qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_writev_vmstate(bs, &qiov, pos);
-}
-
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
+static int coroutine_fn
+bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+ bool is_read)
{
BlockDriver *drv = bs->drv;
if (!drv) {
return -ENOMEDIUM;
- } else if (drv->bdrv_save_vmstate) {
- return drv->bdrv_save_vmstate(bs, qiov, pos);
+ } else if (drv->bdrv_load_vmstate) {
+ return is_read ? drv->bdrv_load_vmstate(bs, qiov, pos)
+ : drv->bdrv_save_vmstate(bs, qiov, pos);
} else if (bs->file) {
- return bdrv_writev_vmstate(bs->file->bs, qiov, pos);
+ return bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
}
return -ENOTSUP;
}
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size)
-{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
- if (drv->bdrv_load_vmstate)
- return drv->bdrv_load_vmstate(bs, buf, pos, size);
- if (bs->file)
- return bdrv_load_vmstate(bs->file->bs, buf, pos, size);
- return -ENOTSUP;
-}
-
-/**************************************************************/
-/* async I/Os */
-
-BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
- cb, opaque, false);
-}
-
-BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
{
- trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
- cb, opaque, true);
+ BdrvVmstateCo *co = opaque;
+ co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
}
-BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque)
+static inline int
+bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+ bool is_read)
{
- trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
- BDRV_REQ_ZERO_WRITE | flags,
- cb, opaque, true);
-}
-
-
-typedef struct MultiwriteCB {
- int error;
- int num_requests;
- int num_callbacks;
- struct {
- BlockCompletionFunc *cb;
- void *opaque;
- QEMUIOVector *free_qiov;
- } callbacks[];
-} MultiwriteCB;
-
-static void multiwrite_user_cb(MultiwriteCB *mcb)
-{
- int i;
+ if (qemu_in_coroutine()) {
+ return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
+ } else {
+ BdrvVmstateCo data = {
+ .bs = bs,
+ .qiov = qiov,
+ .pos = pos,
+ .is_read = is_read,
+ .ret = -EINPROGRESS,
+ };
+ Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
- for (i = 0; i < mcb->num_callbacks; i++) {
- mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
- if (mcb->callbacks[i].free_qiov) {
- qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
+ qemu_coroutine_enter(co);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(bdrv_get_aio_context(bs), true);
}
- g_free(mcb->callbacks[i].free_qiov);
+ return data.ret;
}
}
-static void multiwrite_cb(void *opaque, int ret)
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size)
{
- MultiwriteCB *mcb = opaque;
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *) buf,
+ .iov_len = size,
+ };
+ int ret;
- trace_multiwrite_cb(mcb, ret);
+ qemu_iovec_init_external(&qiov, &iov, 1);
- if (ret < 0 && !mcb->error) {
- mcb->error = ret;
+ ret = bdrv_writev_vmstate(bs, &qiov, pos);
+ if (ret < 0) {
+ return ret;
}
- mcb->num_requests--;
- if (mcb->num_requests == 0) {
- multiwrite_user_cb(mcb);
- g_free(mcb);
- }
+ return size;
}
-static int multiwrite_req_compare(const void *a, const void *b)
+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
- const BlockRequest *req1 = a, *req2 = b;
-
- /*
- * Note that we can't simply subtract req2->sector from req1->sector
- * here as that could overflow the return value.
- */
- if (req1->sector > req2->sector) {
- return 1;
- } else if (req1->sector < req2->sector) {
- return -1;
- } else {
- return 0;
- }
+ return bdrv_rw_vmstate(bs, qiov, pos, false);
}
-/*
- * Takes a bunch of requests and tries to merge them. Returns the number of
- * requests that remain after merging.
- */
-static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
- int num_reqs, MultiwriteCB *mcb)
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size)
{
- int i, outidx;
-
- // Sort requests by start sector
- qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
-
- // Check if adjacent requests touch the same clusters. If so, combine them,
- // filling up gaps with zero sectors.
- outidx = 0;
- for (i = 1; i < num_reqs; i++) {
- int merge = 0;
- int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
-
- // Handle exactly sequential writes and overlapping writes.
- if (reqs[i].sector <= oldreq_last) {
- merge = 1;
- }
-
- if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 >
- bs->bl.max_iov) {
- merge = 0;
- }
-
- if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
- reqs[i].nb_sectors > bs->bl.max_transfer_length) {
- merge = 0;
- }
-
- if (merge) {
- size_t size;
- QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
- qemu_iovec_init(qiov,
- reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
-
- // Add the first request to the merged one. If the requests are
- // overlapping, drop the last sectors of the first request.
- size = (reqs[i].sector - reqs[outidx].sector) << 9;
- qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
-
- // We should need to add any zeros between the two requests
- assert (reqs[i].sector <= oldreq_last);
-
- // Add the second request
- qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
-
- // Add tail of first request, if necessary
- if (qiov->size < reqs[outidx].qiov->size) {
- qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
- reqs[outidx].qiov->size - qiov->size);
- }
-
- reqs[outidx].nb_sectors = qiov->size >> 9;
- reqs[outidx].qiov = qiov;
-
- mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
- } else {
- outidx++;
- reqs[outidx].sector = reqs[i].sector;
- reqs[outidx].nb_sectors = reqs[i].nb_sectors;
- reqs[outidx].qiov = reqs[i].qiov;
- }
- }
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = size,
+ };
+ int ret;
- if (bs->blk) {
- block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
- num_reqs - outidx - 1);
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ ret = bdrv_readv_vmstate(bs, &qiov, pos);
+ if (ret < 0) {
+ return ret;
}
- return outidx + 1;
+ return size;
}
-/*
- * Submit multiple AIO write requests at once.
- *
- * On success, the function returns 0 and all requests in the reqs array have
- * been submitted. In error case this function returns -1, and any of the
- * requests may or may not be submitted yet. In particular, this means that the
- * callback will be called for some of the requests, for others it won't. The
- * caller must check the error field of the BlockRequest to wait for the right
- * callbacks (if error != 0, no callback will be called).
- *
- * The implementation may modify the contents of the reqs array, e.g. to merge
- * requests. However, the fields opaque and error are left unmodified as they
- * are used to signal failure for a single request to the caller.
- */
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
+int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
- MultiwriteCB *mcb;
- int i;
-
- /* don't submit writes if we don't have a medium */
- if (bs->drv == NULL) {
- for (i = 0; i < num_reqs; i++) {
- reqs[i].error = -ENOMEDIUM;
- }
- return -1;
- }
-
- if (num_reqs == 0) {
- return 0;
- }
-
- // Create MultiwriteCB structure
- mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
- mcb->num_requests = 0;
- mcb->num_callbacks = num_reqs;
+ return bdrv_rw_vmstate(bs, qiov, pos, true);
+}
- for (i = 0; i < num_reqs; i++) {
- mcb->callbacks[i].cb = reqs[i].cb;
- mcb->callbacks[i].opaque = reqs[i].opaque;
- }
+/**************************************************************/
+/* async I/Os */
- // Check for mergable requests
- num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
+BlockAIOCB *bdrv_aio_readv(BdrvChild *child, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_readv(child->bs, sector_num, nb_sectors, opaque);
- trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
+ assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size);
+ return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov,
+ 0, cb, opaque, false);
+}
- /* Run the aio requests. */
- mcb->num_requests = num_reqs;
- for (i = 0; i < num_reqs; i++) {
- bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
- reqs[i].nb_sectors, reqs[i].flags,
- multiwrite_cb, mcb,
- true);
- }
+BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_writev(child->bs, sector_num, nb_sectors, opaque);
- return 0;
+ assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size);
+ return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov,
+ 0, cb, opaque, true);
}
void bdrv_aio_cancel(BlockAIOCB *acb)
@@ -2064,82 +2060,30 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb)
/**************************************************************/
/* async block device emulation */
-typedef struct BlockAIOCBSync {
- BlockAIOCB common;
- QEMUBH *bh;
- int ret;
- /* vector translation state */
- QEMUIOVector *qiov;
- uint8_t *bounce;
- int is_write;
-} BlockAIOCBSync;
-
-static const AIOCBInfo bdrv_em_aiocb_info = {
- .aiocb_size = sizeof(BlockAIOCBSync),
-};
-
-static void bdrv_aio_bh_cb(void *opaque)
-{
- BlockAIOCBSync *acb = opaque;
-
- if (!acb->is_write && acb->ret >= 0) {
- qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
- }
- qemu_vfree(acb->bounce);
- acb->common.cb(acb->common.opaque, acb->ret);
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
- qemu_aio_unref(acb);
-}
-
-static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque,
- int is_write)
-
-{
- BlockAIOCBSync *acb;
-
- acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
- acb->is_write = is_write;
- acb->qiov = qiov;
- acb->bounce = qemu_try_blockalign(bs, qiov->size);
- acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
-
- if (acb->bounce == NULL) {
- acb->ret = -ENOMEM;
- } else if (is_write) {
- qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
- acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
- } else {
- acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
- }
-
- qemu_bh_schedule(acb->bh);
-
- return &acb->common;
-}
-
-static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-}
-
-static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-}
+typedef struct BlockRequest {
+ union {
+ /* Used during read, write, trim */
+ struct {
+ int64_t offset;
+ int bytes;
+ int flags;
+ QEMUIOVector *qiov;
+ };
+ /* Used during ioctl */
+ struct {
+ int req;
+ void *buf;
+ };
+ };
+ BlockCompletionFunc *cb;
+ void *opaque;
+ int error;
+} BlockRequest;
typedef struct BlockAIOCBCoroutine {
BlockAIOCB common;
+ BdrvChild *child;
BlockRequest req;
bool is_write;
bool need_bh;
@@ -2183,42 +2127,40 @@ static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
static void coroutine_fn bdrv_co_do_rw(void *opaque)
{
BlockAIOCBCoroutine *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
if (!acb->is_write) {
- acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
- acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
+ acb->req.error = bdrv_co_preadv(acb->child, acb->req.offset,
+ acb->req.qiov->size, acb->req.qiov, acb->req.flags);
} else {
- acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
- acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
+ acb->req.error = bdrv_co_pwritev(acb->child, acb->req.offset,
+ acb->req.qiov->size, acb->req.qiov, acb->req.flags);
}
bdrv_co_complete(acb);
}
-static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BdrvRequestFlags flags,
- BlockCompletionFunc *cb,
- void *opaque,
- bool is_write)
+static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
+ int64_t offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags,
+ BlockCompletionFunc *cb,
+ void *opaque,
+ bool is_write)
{
Coroutine *co;
BlockAIOCBCoroutine *acb;
- acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, child->bs, cb, opaque);
+ acb->child = child;
acb->need_bh = true;
acb->req.error = -EINPROGRESS;
- acb->req.sector = sector_num;
- acb->req.nb_sectors = nb_sectors;
+ acb->req.offset = offset;
acb->req.qiov = qiov;
acb->req.flags = flags;
acb->is_write = is_write;
- co = qemu_coroutine_create(bdrv_co_do_rw);
- qemu_coroutine_enter(co, acb);
+ co = qemu_coroutine_create(bdrv_co_do_rw, acb);
+ qemu_coroutine_enter(co);
bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
@@ -2245,38 +2187,37 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
acb->need_bh = true;
acb->req.error = -EINPROGRESS;
- co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
- qemu_coroutine_enter(co, acb);
+ co = qemu_coroutine_create(bdrv_aio_flush_co_entry, acb);
+ qemu_coroutine_enter(co);
bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
}
-static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
+static void coroutine_fn bdrv_aio_pdiscard_co_entry(void *opaque)
{
BlockAIOCBCoroutine *acb = opaque;
BlockDriverState *bs = acb->common.bs;
- acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
+ acb->req.error = bdrv_co_pdiscard(bs, acb->req.offset, acb->req.bytes);
bdrv_co_complete(acb);
}
-BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *bdrv_aio_pdiscard(BlockDriverState *bs, int64_t offset, int count,
+ BlockCompletionFunc *cb, void *opaque)
{
Coroutine *co;
BlockAIOCBCoroutine *acb;
- trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
+ trace_bdrv_aio_pdiscard(bs, offset, count, opaque);
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
acb->need_bh = true;
acb->req.error = -EINPROGRESS;
- acb->req.sector = sector_num;
- acb->req.nb_sectors = nb_sectors;
- co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
- qemu_coroutine_enter(co, acb);
+ acb->req.offset = offset;
+ acb->req.bytes = count;
+ co = qemu_coroutine_create(bdrv_aio_pdiscard_co_entry, acb);
+ qemu_coroutine_enter(co);
bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
@@ -2314,62 +2255,15 @@ void qemu_aio_unref(void *p)
/**************************************************************/
/* Coroutine block device emulation */
-typedef struct CoroutineIOCompletion {
- Coroutine *coroutine;
+typedef struct FlushCo {
+ BlockDriverState *bs;
int ret;
-} CoroutineIOCompletion;
-
-static void bdrv_co_io_em_complete(void *opaque, int ret)
-{
- CoroutineIOCompletion *co = opaque;
-
- co->ret = ret;
- qemu_coroutine_enter(co->coroutine, NULL);
-}
-
-static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *iov,
- bool is_write)
-{
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
- BlockAIOCB *acb;
-
- if (is_write) {
- acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
- } else {
- acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
- }
-
- trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
- if (!acb) {
- return -EIO;
- }
- qemu_coroutine_yield();
-
- return co.ret;
-}
-
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
-}
+} FlushCo;
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
-}
static void coroutine_fn bdrv_flush_co_entry(void *opaque)
{
- RwCo *rwco = opaque;
+ FlushCo *rwco = opaque;
rwco->ret = bdrv_co_flush(rwco->bs);
}
@@ -2386,6 +2280,15 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
+ int current_gen = bs->write_gen;
+
+ /* Wait until any previous flushes are completed */
+ while (bs->active_flush_req != NULL) {
+ qemu_co_queue_wait(&bs->flush_queue);
+ }
+
+ bs->active_flush_req = &req;
+
/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
ret = bs->drv->bdrv_co_flush(bs);
@@ -2406,6 +2309,11 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
goto flush_parent;
}
+ /* Check if we really need to flush anything */
+ if (bs->flushed_gen == current_gen) {
+ goto flush_parent;
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
if (bs->drv->bdrv_co_flush_to_disk) {
ret = bs->drv->bdrv_co_flush_to_disk(bs);
@@ -2436,6 +2344,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
*/
ret = 0;
}
+
if (ret < 0) {
goto out;
}
@@ -2446,6 +2355,12 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
flush_parent:
ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
out:
+ /* Notify any pending flushes that we have completed */
+ bs->flushed_gen = current_gen;
+ bs->active_flush_req = NULL;
+ /* Return value is ignored - it's ok if wait queue is empty */
+ qemu_co_queue_next(&bs->flush_queue);
+
tracked_request_end(&req);
return ret;
}
@@ -2453,51 +2368,52 @@ out:
int bdrv_flush(BlockDriverState *bs)
{
Coroutine *co;
- RwCo rwco = {
+ FlushCo flush_co = {
.bs = bs,
.ret = NOT_DONE,
};
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
- bdrv_flush_co_entry(&rwco);
+ bdrv_flush_co_entry(&flush_co);
} else {
AioContext *aio_context = bdrv_get_aio_context(bs);
- co = qemu_coroutine_create(bdrv_flush_co_entry);
- qemu_coroutine_enter(co, &rwco);
- while (rwco.ret == NOT_DONE) {
+ co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
+ qemu_coroutine_enter(co);
+ while (flush_co.ret == NOT_DONE) {
aio_poll(aio_context, true);
}
}
- return rwco.ret;
+ return flush_co.ret;
}
typedef struct DiscardCo {
BlockDriverState *bs;
- int64_t sector_num;
- int nb_sectors;
+ int64_t offset;
+ int count;
int ret;
} DiscardCo;
-static void coroutine_fn bdrv_discard_co_entry(void *opaque)
+static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
{
DiscardCo *rwco = opaque;
- rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
+ rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->count);
}
-int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
+int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
+ int count)
{
BdrvTrackedRequest req;
- int max_discard, ret;
+ int max_pdiscard, ret;
+ int head, align;
if (!bs->drv) {
return -ENOMEDIUM;
}
- ret = bdrv_check_request(bs, sector_num, nb_sectors);
+ ret = bdrv_check_byte_request(bs, offset, count);
if (ret < 0) {
return ret;
} else if (bs->read_only) {
@@ -2510,44 +2426,49 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
return 0;
}
- if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
+ if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
return 0;
}
- tracked_request_begin(&req, bs, sector_num, nb_sectors,
- BDRV_TRACKED_DISCARD);
- bdrv_set_dirty(bs, sector_num, nb_sectors);
+ /* Discard is advisory, so ignore any unaligned head or tail */
+ align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
+ assert(align % bs->bl.request_alignment == 0);
+ head = offset % align;
+ if (head) {
+ head = MIN(count, align - head);
+ count -= head;
+ offset += head;
+ }
+ count = QEMU_ALIGN_DOWN(count, align);
+ if (!count) {
+ return 0;
+ }
- max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
- while (nb_sectors > 0) {
- int ret;
- int num = nb_sectors;
-
- /* align request */
- if (bs->bl.discard_alignment &&
- num >= bs->bl.discard_alignment &&
- sector_num % bs->bl.discard_alignment) {
- if (num > bs->bl.discard_alignment) {
- num = bs->bl.discard_alignment;
- }
- num -= sector_num % bs->bl.discard_alignment;
- }
+ tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD);
- /* limit request size */
- if (num > max_discard) {
- num = max_discard;
- }
+ ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
+ if (ret < 0) {
+ goto out;
+ }
- if (bs->drv->bdrv_co_discard) {
- ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
+ max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
+ align);
+ assert(max_pdiscard);
+
+ while (count > 0) {
+ int ret;
+ int num = MIN(count, max_pdiscard);
+
+ if (bs->drv->bdrv_co_pdiscard) {
+ ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
} else {
BlockAIOCB *acb;
CoroutineIOCompletion co = {
.coroutine = qemu_coroutine_self(),
};
- acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
- bdrv_co_io_em_complete, &co);
+ acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
+ bdrv_co_io_em_complete, &co);
if (acb == NULL) {
ret = -EIO;
goto out;
@@ -2560,33 +2481,36 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
goto out;
}
- sector_num += num;
- nb_sectors -= num;
+ offset += num;
+ count -= num;
}
ret = 0;
out:
+ ++bs->write_gen;
+ bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
+ req.bytes >> BDRV_SECTOR_BITS);
tracked_request_end(&req);
return ret;
}
-int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
Coroutine *co;
DiscardCo rwco = {
.bs = bs,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
+ .offset = offset,
+ .count = count,
.ret = NOT_DONE,
};
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
- bdrv_discard_co_entry(&rwco);
+ bdrv_pdiscard_co_entry(&rwco);
} else {
AioContext *aio_context = bdrv_get_aio_context(bs);
- co = qemu_coroutine_create(bdrv_discard_co_entry);
- qemu_coroutine_enter(co, &rwco);
+ co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
+ qemu_coroutine_enter(co);
while (rwco.ret == NOT_DONE) {
aio_poll(aio_context, true);
}
@@ -2595,19 +2519,6 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
return rwco.ret;
}
-typedef struct {
- CoroutineIOCompletion *co;
- QEMUBH *bh;
-} BdrvIoctlCompletionData;
-
-static void bdrv_ioctl_bh_cb(void *opaque)
-{
- BdrvIoctlCompletionData *data = opaque;
-
- bdrv_co_io_em_complete(data->co, -ENOTSUP);
- qemu_bh_delete(data->bh);
-}
-
static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
{
BlockDriver *drv = bs->drv;
@@ -2625,11 +2536,8 @@ static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
if (!acb) {
- BdrvIoctlCompletionData *data = g_new(BdrvIoctlCompletionData, 1);
- data->bh = aio_bh_new(bdrv_get_aio_context(bs),
- bdrv_ioctl_bh_cb, data);
- data->co = &co;
- qemu_bh_schedule(data->bh);
+ co.ret = -ENOTSUP;
+ goto out;
}
qemu_coroutine_yield();
out:
@@ -2664,9 +2572,9 @@ int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
/* Fast-path if already in coroutine context */
bdrv_co_ioctl_entry(&data);
} else {
- Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry);
+ Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry, &data);
- qemu_coroutine_enter(co, &data);
+ qemu_coroutine_enter(co);
while (data.ret == -EINPROGRESS) {
aio_poll(bdrv_get_aio_context(bs), true);
}
@@ -2694,8 +2602,8 @@ BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
acb->req.error = -EINPROGRESS;
acb->req.req = req;
acb->req.buf = buf;
- co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry);
- qemu_coroutine_enter(co, acb);
+ co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry, acb);
+ qemu_coroutine_enter(co);
bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
@@ -2763,48 +2671,66 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs,
void bdrv_io_plug(BlockDriverState *bs)
{
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_io_plug) {
- drv->bdrv_io_plug(bs);
- } else if (bs->file) {
- bdrv_io_plug(bs->file->bs);
+ BdrvChild *child;
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_plug(child->bs);
+ }
+
+ if (bs->io_plugged++ == 0 && bs->io_plug_disabled == 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_plug) {
+ drv->bdrv_io_plug(bs);
+ }
}
}
void bdrv_io_unplug(BlockDriverState *bs)
{
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_io_unplug) {
- drv->bdrv_io_unplug(bs);
- } else if (bs->file) {
- bdrv_io_unplug(bs->file->bs);
+ BdrvChild *child;
+
+ assert(bs->io_plugged);
+ if (--bs->io_plugged == 0 && bs->io_plug_disabled == 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_unplug) {
+ drv->bdrv_io_unplug(bs);
+ }
}
-}
-void bdrv_flush_io_queue(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_flush_io_queue) {
- drv->bdrv_flush_io_queue(bs);
- } else if (bs->file) {
- bdrv_flush_io_queue(bs->file->bs);
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_unplug(child->bs);
}
- bdrv_start_throttled_reqs(bs);
}
-void bdrv_drained_begin(BlockDriverState *bs)
+void bdrv_io_unplugged_begin(BlockDriverState *bs)
{
- if (!bs->quiesce_counter++) {
- aio_disable_external(bdrv_get_aio_context(bs));
+ BdrvChild *child;
+
+ if (bs->io_plug_disabled++ == 0 && bs->io_plugged > 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_unplug) {
+ drv->bdrv_io_unplug(bs);
+ }
+ }
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_unplugged_begin(child->bs);
}
- bdrv_drain(bs);
}
-void bdrv_drained_end(BlockDriverState *bs)
+void bdrv_io_unplugged_end(BlockDriverState *bs)
{
- assert(bs->quiesce_counter > 0);
- if (--bs->quiesce_counter > 0) {
- return;
+ BdrvChild *child;
+
+ assert(bs->io_plug_disabled);
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_unplugged_end(child->bs);
+ }
+
+ if (--bs->io_plug_disabled == 0 && bs->io_plugged > 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_plug) {
+ drv->bdrv_io_plug(bs);
+ }
}
- aio_enable_external(bdrv_get_aio_context(bs));
}
diff --git a/block/iscsi.c b/block/iscsi.c
index 302baf84c..95ce9e139 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -2,7 +2,7 @@
* QEMU Block driver for iSCSI images
*
* Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
- * Copyright (c) 2012-2015 Peter Lieven <pl@kamp.de>
+ * Copyright (c) 2012-2016 Peter Lieven <pl@kamp.de>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -46,7 +46,6 @@
#ifdef __linux__
#include <scsi/sg.h>
-#include <block/scsi.h>
#endif
typedef struct IscsiLun {
@@ -62,7 +61,23 @@ typedef struct IscsiLun {
struct scsi_inquiry_logical_block_provisioning lbp;
struct scsi_inquiry_block_limits bl;
unsigned char *zeroblock;
- unsigned long *allocationmap;
+ /* The allocmap tracks which clusters (pages) on the iSCSI target are
+ * allocated and which are not. In case a target returns zeros for
+ * unallocated pages (iscsilun->lprz) we can directly return zeros instead
+ * of reading zeros over the wire if a read request falls within an
+ * unallocated block. As there are 3 possible states we need 2 bitmaps to
+ * track. allocmap_valid keeps track if QEMU's information about a page is
+ * valid. allocmap tracks if a page is allocated or not. In case QEMU has no
+ * valid information about a page the corresponding allocmap entry should be
+ * switched to unallocated as well to force a new lookup of the allocation
+ * status as lookups are generally skipped if a page is suspect to be
+ * allocated. If a iSCSI target is opened with cache.direct = on the
+ * allocmap_valid does not exist turning all cached information invalid so
+ * that a fresh lookup is made for any page even if allocmap entry returns
+ * it's unallocated. */
+ unsigned long *allocmap;
+ unsigned long *allocmap_valid;
+ long allocmap_size;
int cluster_sectors;
bool use_16_for_rw;
bool write_protected;
@@ -153,7 +168,7 @@ static void iscsi_co_generic_bh_cb(void *opaque)
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
qemu_bh_delete(iTask->bh);
- qemu_coroutine_enter(iTask->co, NULL);
+ qemu_coroutine_enter(iTask->co);
}
static void iscsi_retry_timer_expired(void *opaque)
@@ -161,7 +176,7 @@ static void iscsi_retry_timer_expired(void *opaque)
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
if (iTask->co) {
- qemu_coroutine_enter(iTask->co, NULL);
+ qemu_coroutine_enter(iTask->co);
}
}
@@ -401,53 +416,159 @@ static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
}
-static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
- IscsiLun *iscsilun)
+static bool is_byte_request_lun_aligned(int64_t offset, int count,
+ IscsiLun *iscsilun)
{
- if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
- (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
- error_report("iSCSI misaligned request: "
- "iscsilun->block_size %u, sector_num %" PRIi64
- ", nb_sectors %d",
- iscsilun->block_size, sector_num, nb_sectors);
- return 0;
- }
- return 1;
+ if (offset % iscsilun->block_size || count % iscsilun->block_size) {
+ error_report("iSCSI misaligned request: "
+ "iscsilun->block_size %u, offset %" PRIi64
+ ", count %d",
+ iscsilun->block_size, offset, count);
+ return false;
+ }
+ return true;
}
-static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
+static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
+ IscsiLun *iscsilun)
{
- return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
- iscsilun),
- iscsilun->cluster_sectors));
+ assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS);
+ return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS,
+ iscsilun);
}
-static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
- int nb_sectors)
+static void iscsi_allocmap_free(IscsiLun *iscsilun)
{
- if (iscsilun->allocationmap == NULL) {
- return;
+ g_free(iscsilun->allocmap);
+ g_free(iscsilun->allocmap_valid);
+ iscsilun->allocmap = NULL;
+ iscsilun->allocmap_valid = NULL;
+}
+
+
+static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
+{
+ iscsi_allocmap_free(iscsilun);
+
+ iscsilun->allocmap_size =
+ DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks, iscsilun),
+ iscsilun->cluster_sectors);
+
+ iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
+ if (!iscsilun->allocmap) {
+ return -ENOMEM;
+ }
+
+ if (open_flags & BDRV_O_NOCACHE) {
+ /* in case that cache.direct = on all allocmap entries are
+ * treated as invalid to force a relookup of the block
+ * status on every read request */
+ return 0;
}
- bitmap_set(iscsilun->allocationmap,
- sector_num / iscsilun->cluster_sectors,
- DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
+
+ iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size);
+ if (!iscsilun->allocmap_valid) {
+ /* if we are under memory pressure free the allocmap as well */
+ iscsi_allocmap_free(iscsilun);
+ return -ENOMEM;
+ }
+
+ return 0;
}
-static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
- int nb_sectors)
+static void
+iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors, bool allocated, bool valid)
{
- int64_t cluster_num, nb_clusters;
- if (iscsilun->allocationmap == NULL) {
+ int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
+
+ if (iscsilun->allocmap == NULL) {
return;
}
- cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
- nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
- - cluster_num;
- if (nb_clusters > 0) {
- bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
+ /* expand to entirely contain all affected clusters */
+ cl_num_expanded = sector_num / iscsilun->cluster_sectors;
+ nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors,
+ iscsilun->cluster_sectors) - cl_num_expanded;
+ /* shrink to touch only completely contained clusters */
+ cl_num_shrunk = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
+ nb_cls_shrunk = (sector_num + nb_sectors) / iscsilun->cluster_sectors
+ - cl_num_shrunk;
+ if (allocated) {
+ bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
+ } else {
+ bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
+ }
+
+ if (iscsilun->allocmap_valid == NULL) {
+ return;
+ }
+ if (valid) {
+ bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
+ } else {
+ bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
+ nb_cls_expanded);
+ }
+}
+
+static void
+iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors)
+{
+ iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true);
+}
+
+static void
+iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors)
+{
+ /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
+ * is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
+ */
+ iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true);
+}
+
+static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors)
+{
+ iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false);
+}
+
+static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
+{
+ if (iscsilun->allocmap) {
+ bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size);
+ }
+ if (iscsilun->allocmap_valid) {
+ bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size);
}
}
+static inline bool
+iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors)
+{
+ unsigned long size;
+ if (iscsilun->allocmap == NULL) {
+ return true;
+ }
+ size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
+ return !(find_next_bit(iscsilun->allocmap, size,
+ sector_num / iscsilun->cluster_sectors) == size);
+}
+
+static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
+ int64_t sector_num, int nb_sectors)
+{
+ unsigned long size;
+ if (iscsilun->allocmap_valid == NULL) {
+ return false;
+ }
+ size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
+ return (find_next_zero_bit(iscsilun->allocmap_valid, size,
+ sector_num / iscsilun->cluster_sectors) == size);
+}
+
static int coroutine_fn
iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
QEMUIOVector *iov, int flags)
@@ -456,23 +577,23 @@ iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
struct IscsiTask iTask;
uint64_t lba;
uint32_t num_sectors;
- bool fua;
+ bool fua = flags & BDRV_REQ_FUA;
- if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+ if (fua) {
+ assert(iscsilun->dpofua);
+ }
+ if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
}
- if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
- error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
- "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
- return -EINVAL;
+ if (bs->bl.max_transfer) {
+ assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
}
lba = sector_qemu2lun(sector_num, iscsilun);
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
- fua = iscsilun->dpofua && (flags & BDRV_REQ_FUA);
if (iscsilun->use_16_for_rw) {
iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
NULL, num_sectors * iscsilun->block_size,
@@ -505,34 +626,17 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
+ iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
return iTask.err_code;
}
- iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
+ iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
return 0;
}
-static int coroutine_fn
-iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return iscsi_co_writev_flags(bs, sector_num, nb_sectors, iov, 0);
-}
-static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
- int64_t sector_num, int nb_sectors)
-{
- unsigned long size;
- if (iscsilun->allocationmap == NULL) {
- return true;
- }
- size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
- return !(find_next_bit(iscsilun->allocationmap, size,
- sector_num / iscsilun->cluster_sectors) == size);
-}
-
static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum,
@@ -546,7 +650,7 @@ static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
iscsi_co_init_iscsitask(iscsilun, &iTask);
- if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+ if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
ret = -EINVAL;
goto out;
}
@@ -616,9 +720,9 @@ retry:
}
if (ret & BDRV_BLOCK_ZERO) {
- iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
+ iscsi_allocmap_set_unallocated(iscsilun, sector_num, *pnum);
} else {
- iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
+ iscsi_allocmap_set_allocated(iscsilun, sector_num, *pnum);
}
if (*pnum > nb_sectors) {
@@ -643,26 +747,40 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
uint64_t lba;
uint32_t num_sectors;
- if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+ if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
}
- if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
- error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
- "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
- return -EINVAL;
+ if (bs->bl.max_transfer) {
+ assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
}
- if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
- !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
- int64_t ret;
+ /* if cache.direct is off and we have a valid entry in our allocation map
+ * we can skip checking the block status and directly return zeroes if
+ * the request falls within an unallocated area */
+ if (iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
+ !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
+ qemu_iovec_memset(iov, 0, 0x00, iov->size);
+ return 0;
+ }
+
+ if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
+ !iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
+ !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
int pnum;
BlockDriverState *file;
- ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum, &file);
+ /* check the block status from the beginning of the cluster
+ * containing the start sector */
+ int64_t ret = iscsi_co_get_block_status(bs,
+ sector_num - sector_num % iscsilun->cluster_sectors,
+ BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
if (ret < 0) {
return ret;
}
- if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
+ /* if the whole request falls into an unallocated area we can avoid
+ * to read and directly return zeroes instead */
+ if (ret & BDRV_BLOCK_ZERO &&
+ pnum >= nb_sectors + sector_num % iscsilun->cluster_sectors) {
qemu_iovec_memset(iov, 0, 0x00, iov->size);
return 0;
}
@@ -766,6 +884,7 @@ iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
acb->ioh->driver_status = 0;
acb->ioh->host_status = 0;
acb->ioh->resid = 0;
+ acb->ioh->status = status;
#define SG_ERR_DRIVER_SENSE 0x08
@@ -837,6 +956,13 @@ static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
return &acb->common;
}
+ if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
+ error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
+ acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
+ qemu_aio_unref(acb);
+ return NULL;
+ }
+
acb->task = malloc(sizeof(struct scsi_task));
if (acb->task == NULL) {
error_report("iSCSI: Failed to allocate task for scsi command. %s",
@@ -916,29 +1042,26 @@ iscsi_getlength(BlockDriverState *bs)
}
static int
-coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
+coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
struct unmap_list list;
- if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
- return -EINVAL;
- }
+ assert(is_byte_request_lun_aligned(offset, count, iscsilun));
if (!iscsilun->lbp.lbpu) {
/* UNMAP is not supported by the target */
return 0;
}
- list.lba = sector_qemu2lun(sector_num, iscsilun);
- list.num = sector_qemu2lun(nb_sectors, iscsilun);
+ list.lba = offset / iscsilun->block_size;
+ list.num = count / iscsilun->block_size;
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
- iscsi_co_generic_cb, &iTask) == NULL) {
+ iscsi_co_generic_cb, &iTask) == NULL) {
return -ENOMEM;
}
@@ -968,14 +1091,15 @@ retry:
return iTask.err_code;
}
- iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
+ iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
+ count >> BDRV_SECTOR_BITS);
return 0;
}
static int
-coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
@@ -983,8 +1107,8 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
uint32_t nb_blocks;
bool use_16_for_ws = iscsilun->use_16_for_rw;
- if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
- return -EINVAL;
+ if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
+ return -ENOTSUP;
}
if (flags & BDRV_REQ_MAY_UNMAP) {
@@ -1005,8 +1129,8 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
return -ENOTSUP;
}
- lba = sector_qemu2lun(sector_num, iscsilun);
- nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
+ lba = offset / iscsilun->block_size;
+ nb_blocks = count / iscsilun->block_size;
if (iscsilun->zeroblock == NULL) {
iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
@@ -1058,13 +1182,17 @@ retry:
}
if (iTask.status != SCSI_STATUS_GOOD) {
+ iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
+ count >> BDRV_SECTOR_BITS);
return iTask.err_code;
}
if (flags & BDRV_REQ_MAY_UNMAP) {
- iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
+ iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
+ count >> BDRV_SECTOR_BITS);
} else {
- iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
+ iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS,
+ count >> BDRV_SECTOR_BITS);
}
return 0;
@@ -1555,6 +1683,10 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
task = NULL;
iscsi_modesense_sync(iscsilun);
+ if (iscsilun->dpofua) {
+ bs->supported_write_flags = BDRV_REQ_FUA;
+ }
+ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
/* Check the write protect flag of the LUN if we want to write */
if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
@@ -1571,14 +1703,13 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
goto out;
}
bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
- bs->request_alignment = iscsilun->block_size;
/* We don't have any emulation for devices other than disks and CD-ROMs, so
* this must be sg ioctl compatible. We force it to be sg, otherwise qemu
* will try to read from the device to guess the image format.
*/
if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
- bs->sg = 1;
+ bs->sg = true;
}
task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
@@ -1634,10 +1765,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
iscsilun->block_size) >> BDRV_SECTOR_BITS;
if (iscsilun->lbprz) {
- iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
- if (iscsilun->allocationmap == NULL) {
- ret = -ENOMEM;
- }
+ ret = iscsi_allocmap_init(iscsilun, bs->open_flags);
}
}
@@ -1674,48 +1802,54 @@ static void iscsi_close(BlockDriverState *bs)
}
iscsi_destroy_context(iscsi);
g_free(iscsilun->zeroblock);
- g_free(iscsilun->allocationmap);
+ iscsi_allocmap_free(iscsilun);
memset(iscsilun, 0, sizeof(IscsiLun));
}
-static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
-{
- return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
-}
-
static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
{
/* We don't actually refresh here, but just return data queried in
* iscsi_open(): iscsi targets don't change their limits. */
IscsiLun *iscsilun = bs->opaque;
- uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
+ uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
+
+ bs->bl.request_alignment = iscsilun->block_size;
if (iscsilun->bl.max_xfer_len) {
max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
}
- bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
+ if (max_xfer_len * iscsilun->block_size < INT_MAX) {
+ bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
+ }
if (iscsilun->lbp.lbpu) {
- if (iscsilun->bl.max_unmap < 0xffffffff) {
- bs->bl.max_discard =
- sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
+ if (iscsilun->bl.max_unmap < 0xffffffff / iscsilun->block_size) {
+ bs->bl.max_pdiscard =
+ iscsilun->bl.max_unmap * iscsilun->block_size;
}
- bs->bl.discard_alignment =
- sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
+ bs->bl.pdiscard_alignment =
+ iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
+ } else {
+ bs->bl.pdiscard_alignment = iscsilun->block_size;
}
- if (iscsilun->bl.max_ws_len < 0xffffffff) {
- bs->bl.max_write_zeroes =
- sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
+ if (iscsilun->bl.max_ws_len < 0xffffffff / iscsilun->block_size) {
+ bs->bl.max_pwrite_zeroes =
+ iscsilun->bl.max_ws_len * iscsilun->block_size;
}
if (iscsilun->lbp.lbpws) {
- bs->bl.write_zeroes_alignment =
- sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
+ bs->bl.pwrite_zeroes_alignment =
+ iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
+ } else {
+ bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
+ }
+ if (iscsilun->bl.opt_xfer_len &&
+ iscsilun->bl.opt_xfer_len < INT_MAX / iscsilun->block_size) {
+ bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
+ iscsilun->block_size);
}
- bs->bl.opt_transfer_length =
- sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
}
/* Note that this will not re-establish a connection with an iSCSI target - it
@@ -1732,6 +1866,16 @@ static int iscsi_reopen_prepare(BDRVReopenState *state,
return 0;
}
+static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
+{
+ IscsiLun *iscsilun = reopen_state->bs->opaque;
+
+ /* the cache.direct status might have changed */
+ if (iscsilun->allocmap != NULL) {
+ iscsi_allocmap_init(iscsilun, reopen_state->flags);
+ }
+}
+
static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
{
IscsiLun *iscsilun = bs->opaque;
@@ -1751,9 +1895,8 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
return -EINVAL;
}
- if (iscsilun->allocationmap != NULL) {
- g_free(iscsilun->allocationmap);
- iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
+ if (iscsilun->allocmap != NULL) {
+ iscsi_allocmap_init(iscsilun, bs->open_flags);
}
return 0;
@@ -1813,6 +1956,13 @@ static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return 0;
}
+static void iscsi_invalidate_cache(BlockDriverState *bs,
+ Error **errp)
+{
+ IscsiLun *iscsilun = bs->opaque;
+ iscsi_allocmap_invalidate(iscsilun);
+}
+
static QemuOptsList iscsi_create_opts = {
.name = "iscsi-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
@@ -1836,7 +1986,9 @@ static BlockDriver bdrv_iscsi = {
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
- .bdrv_reopen_prepare = iscsi_reopen_prepare,
+ .bdrv_reopen_prepare = iscsi_reopen_prepare,
+ .bdrv_reopen_commit = iscsi_reopen_commit,
+ .bdrv_invalidate_cache = iscsi_invalidate_cache,
.bdrv_getlength = iscsi_getlength,
.bdrv_get_info = iscsi_get_info,
@@ -1844,12 +1996,10 @@ static BlockDriver bdrv_iscsi = {
.bdrv_refresh_limits = iscsi_refresh_limits,
.bdrv_co_get_block_status = iscsi_co_get_block_status,
- .bdrv_co_discard = iscsi_co_discard,
- .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
+ .bdrv_co_pdiscard = iscsi_co_pdiscard,
+ .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
- .bdrv_co_writev = iscsi_co_writev,
.bdrv_co_writev_flags = iscsi_co_writev_flags,
- .supported_write_flags = BDRV_REQ_FUA,
.bdrv_co_flush_to_disk = iscsi_co_flush,
#ifdef __linux__
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 805757e02..e906abebb 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -11,8 +11,10 @@
#include "qemu-common.h"
#include "block/aio.h"
#include "qemu/queue.h"
+#include "block/block.h"
#include "block/raw-aio.h"
#include "qemu/event_notifier.h"
+#include "qemu/coroutine.h"
#include <libaio.h>
@@ -26,11 +28,10 @@
*/
#define MAX_EVENTS 128
-#define MAX_QUEUED_IO 128
-
struct qemu_laiocb {
BlockAIOCB common;
- struct qemu_laio_state *ctx;
+ Coroutine *co;
+ LinuxAioState *ctx;
struct iocb iocb;
ssize_t ret;
size_t nbytes;
@@ -41,12 +42,15 @@ struct qemu_laiocb {
typedef struct {
int plugged;
- unsigned int n;
+ unsigned int in_queue;
+ unsigned int in_flight;
bool blocked;
QSIMPLEQ_HEAD(, qemu_laiocb) pending;
} LaioQueue;
-struct qemu_laio_state {
+struct LinuxAioState {
+ AioContext *aio_context;
+
io_context_t ctx;
EventNotifier e;
@@ -60,7 +64,7 @@ struct qemu_laio_state {
int event_max;
};
-static void ioq_submit(struct qemu_laio_state *s);
+static void ioq_submit(LinuxAioState *s);
static inline ssize_t io_event_ret(struct io_event *ev)
{
@@ -70,8 +74,7 @@ static inline ssize_t io_event_ret(struct io_event *ev)
/*
* Completes an AIO request (calls the callback and frees the ACB).
*/
-static void qemu_laio_process_completion(struct qemu_laio_state *s,
- struct qemu_laiocb *laiocb)
+static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
{
int ret;
@@ -85,13 +88,18 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
qemu_iovec_memset(laiocb->qiov, ret, 0,
laiocb->qiov->size - ret);
} else {
- ret = -EINVAL;
+ ret = -ENOSPC;
}
}
}
- laiocb->common.cb(laiocb->common.opaque, ret);
- qemu_aio_unref(laiocb);
+ laiocb->ret = ret;
+ if (laiocb->co) {
+ qemu_coroutine_enter(laiocb->co);
+ } else {
+ laiocb->common.cb(laiocb->common.opaque, ret);
+ qemu_aio_unref(laiocb);
+ }
}
/* The completion BH fetches completed I/O requests and invokes their
@@ -99,7 +107,7 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
*
* The function is somewhat tricky because it supports nested event loops, for
* example when a request callback invokes aio_poll(). In order to do this,
- * the completion events array and index are kept in qemu_laio_state. The BH
+ * the completion events array and index are kept in LinuxAioState. The BH
* reschedules itself as long as there are completions pending so it will
* either be called again in a nested event loop or will be called after all
* events have been completed. When there are no events left to complete, the
@@ -107,7 +115,7 @@ static void qemu_laio_process_completion(struct qemu_laio_state *s,
*/
static void qemu_laio_completion_bh(void *opaque)
{
- struct qemu_laio_state *s = opaque;
+ LinuxAioState *s = opaque;
/* Fetch more completion events when empty */
if (s->event_idx == s->event_max) {
@@ -122,6 +130,7 @@ static void qemu_laio_completion_bh(void *opaque)
s->event_max = 0;
return; /* no more events */
}
+ s->io_q.in_flight -= s->event_max;
}
/* Reschedule so nested event loops see currently pending completions */
@@ -136,20 +145,22 @@ static void qemu_laio_completion_bh(void *opaque)
laiocb->ret = io_event_ret(&s->events[s->event_idx]);
s->event_idx++;
- qemu_laio_process_completion(s, laiocb);
+ qemu_laio_process_completion(laiocb);
}
if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}
+
+ qemu_bh_cancel(s->completion_bh);
}
static void qemu_laio_completion_cb(EventNotifier *e)
{
- struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
+ LinuxAioState *s = container_of(e, LinuxAioState, e);
if (event_notifier_test_and_clear(&s->e)) {
- qemu_bh_schedule(s->completion_bh);
+ qemu_laio_completion_bh(s);
}
}
@@ -181,22 +192,26 @@ static void ioq_init(LaioQueue *io_q)
{
QSIMPLEQ_INIT(&io_q->pending);
io_q->plugged = 0;
- io_q->n = 0;
+ io_q->in_queue = 0;
+ io_q->in_flight = 0;
io_q->blocked = false;
}
-static void ioq_submit(struct qemu_laio_state *s)
+static void ioq_submit(LinuxAioState *s)
{
int ret, len;
struct qemu_laiocb *aiocb;
- struct iocb *iocbs[MAX_QUEUED_IO];
+ struct iocb *iocbs[MAX_EVENTS];
QSIMPLEQ_HEAD(, qemu_laiocb) completed;
do {
+ if (s->io_q.in_flight >= MAX_EVENTS) {
+ break;
+ }
len = 0;
QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) {
iocbs[len++] = &aiocb->iocb;
- if (len == MAX_QUEUED_IO) {
+ if (s->io_q.in_flight + len >= MAX_EVENTS) {
break;
}
}
@@ -206,55 +221,43 @@ static void ioq_submit(struct qemu_laio_state *s)
break;
}
if (ret < 0) {
- abort();
+ /* Fail the first request, retry the rest */
+ aiocb = QSIMPLEQ_FIRST(&s->io_q.pending);
+ QSIMPLEQ_REMOVE_HEAD(&s->io_q.pending, next);
+ s->io_q.in_queue--;
+ aiocb->ret = ret;
+ qemu_laio_process_completion(aiocb);
+ continue;
}
- s->io_q.n -= ret;
+ s->io_q.in_flight += ret;
+ s->io_q.in_queue -= ret;
aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb);
QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed);
} while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
- s->io_q.blocked = (s->io_q.n > 0);
+ s->io_q.blocked = (s->io_q.in_queue > 0);
}
-void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
+void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
{
- struct qemu_laio_state *s = aio_ctx;
-
s->io_q.plugged++;
}
-void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
+void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
{
- struct qemu_laio_state *s = aio_ctx;
-
- assert(s->io_q.plugged > 0 || !unplug);
-
- if (unplug && --s->io_q.plugged > 0) {
- return;
- }
-
- if (!s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+ assert(s->io_q.plugged);
+ if (--s->io_q.plugged == 0 &&
+ !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}
}
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type)
+static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
+ int type)
{
- struct qemu_laio_state *s = aio_ctx;
- struct qemu_laiocb *laiocb;
- struct iocb *iocbs;
- off_t offset = sector_num * 512;
-
- laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
- laiocb->nbytes = nb_sectors * 512;
- laiocb->ctx = s;
- laiocb->ret = -EINPROGRESS;
- laiocb->is_read = (type == QEMU_AIO_READ);
- laiocb->qiov = qiov;
-
- iocbs = &laiocb->iocb;
+ LinuxAioState *s = laiocb->ctx;
+ struct iocb *iocbs = &laiocb->iocb;
+ QEMUIOVector *qiov = laiocb->qiov;
switch (type) {
case QEMU_AIO_WRITE:
@@ -267,43 +270,83 @@ BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
default:
fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
__func__, type);
- goto out_free_aiocb;
+ return -EIO;
}
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
- s->io_q.n++;
+ s->io_q.in_queue++;
if (!s->io_q.blocked &&
- (!s->io_q.plugged || s->io_q.n >= MAX_QUEUED_IO)) {
+ (!s->io_q.plugged ||
+ s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) {
ioq_submit(s);
}
- return &laiocb->common;
-out_free_aiocb:
- qemu_aio_unref(laiocb);
- return NULL;
+ return 0;
}
-void laio_detach_aio_context(void *s_, AioContext *old_context)
+int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+ uint64_t offset, QEMUIOVector *qiov, int type)
{
- struct qemu_laio_state *s = s_;
+ int ret;
+ struct qemu_laiocb laiocb = {
+ .co = qemu_coroutine_self(),
+ .nbytes = qiov->size,
+ .ctx = s,
+ .is_read = (type == QEMU_AIO_READ),
+ .qiov = qiov,
+ };
+
+ ret = laio_do_submit(fd, &laiocb, offset, type);
+ if (ret < 0) {
+ return ret;
+ }
+
+ qemu_coroutine_yield();
+ return laiocb.ret;
+}
+
+BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque, int type)
+{
+ struct qemu_laiocb *laiocb;
+ off_t offset = sector_num * BDRV_SECTOR_SIZE;
+ int ret;
+
+ laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
+ laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE;
+ laiocb->ctx = s;
+ laiocb->ret = -EINPROGRESS;
+ laiocb->is_read = (type == QEMU_AIO_READ);
+ laiocb->qiov = qiov;
+
+ ret = laio_do_submit(fd, laiocb, offset, type);
+ if (ret < 0) {
+ qemu_aio_unref(laiocb);
+ return NULL;
+ }
+ return &laiocb->common;
+}
+
+void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
+{
aio_set_event_notifier(old_context, &s->e, false, NULL);
qemu_bh_delete(s->completion_bh);
}
-void laio_attach_aio_context(void *s_, AioContext *new_context)
+void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
{
- struct qemu_laio_state *s = s_;
-
+ s->aio_context = new_context;
s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
aio_set_event_notifier(new_context, &s->e, false,
qemu_laio_completion_cb);
}
-void *laio_init(void)
+LinuxAioState *laio_init(void)
{
- struct qemu_laio_state *s;
+ LinuxAioState *s;
s = g_malloc0(sizeof(*s));
if (event_notifier_init(&s->e, false) < 0) {
@@ -325,10 +368,8 @@ out_free_state:
return NULL;
}
-void laio_cleanup(void *s_)
+void laio_cleanup(LinuxAioState *s)
{
- struct qemu_laio_state *s = s_;
-
event_notifier_cleanup(&s->e);
if (io_destroy(s->ctx) != 0) {
diff --git a/block/mirror.c b/block/mirror.c
index 039f48125..e0b3f4180 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -20,11 +20,12 @@
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
#include "qemu/bitmap.h"
-#include "qemu/error-report.h"
#define SLICE_TIME 100000000ULL /* ns */
#define MAX_IN_FLIGHT 16
-#define DEFAULT_MIRROR_BUF_SIZE (10 << 20)
+#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
+#define DEFAULT_MIRROR_BUF_SIZE \
+ (MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
/* The mirroring buffer is a list of granularity-sized chunks.
* Free chunks are organized in a list.
@@ -36,7 +37,7 @@ typedef struct MirrorBuffer {
typedef struct MirrorBlockJob {
BlockJob common;
RateLimit limit;
- BlockDriverState *target;
+ BlockBackend *target;
BlockDriverState *base;
/* The name of the graph node to replace */
char *replaces;
@@ -45,6 +46,7 @@ typedef struct MirrorBlockJob {
/* Used to block operations on the drive-mirror-replace target */
Error *replace_blocker;
bool is_none_mode;
+ BlockMirrorBackingMode backing_mode;
BlockdevOnError on_source_error, on_target_error;
bool synced;
bool should_complete;
@@ -58,9 +60,10 @@ typedef struct MirrorBlockJob {
QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
int buf_free_count;
+ uint64_t last_pause_ns;
unsigned long *in_flight_bitmap;
int in_flight;
- int sectors_in_flight;
+ int64_t sectors_in_flight;
int ret;
bool unmap;
bool waiting_for_io;
@@ -80,11 +83,11 @@ static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
{
s->synced = false;
if (read) {
- return block_job_error_action(&s->common, s->common.bs,
- s->on_source_error, true, error);
+ return block_job_error_action(&s->common, s->on_source_error,
+ true, error);
} else {
- return block_job_error_action(&s->common, s->target,
- s->on_target_error, false, error);
+ return block_job_error_action(&s->common, s->on_target_error,
+ false, error);
}
}
@@ -121,7 +124,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
g_free(op);
if (s->waiting_for_io) {
- qemu_coroutine_enter(s->common.co, NULL);
+ qemu_coroutine_enter(s->common.co);
}
}
@@ -157,8 +160,8 @@ static void mirror_read_complete(void *opaque, int ret)
mirror_iteration_done(op, ret);
return;
}
- bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors,
- mirror_write_complete, op);
+ blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
+ 0, mirror_write_complete, op);
}
static inline void mirror_clip_sectors(MirrorBlockJob *s,
@@ -186,8 +189,9 @@ static int mirror_cow_align(MirrorBlockJob *s,
need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
s->cow_bitmap);
if (need_cow) {
- bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
- &align_sector_num, &align_nb_sectors);
+ bdrv_round_sectors_to_clusters(blk_bs(s->target), *sector_num,
+ *nb_sectors, &align_sector_num,
+ &align_nb_sectors);
}
if (align_nb_sectors > max_sectors) {
@@ -217,23 +221,29 @@ static inline void mirror_wait_for_io(MirrorBlockJob *s)
}
/* Submit async read while handling COW.
- * Returns: nb_sectors if no alignment is necessary, or
+ * Returns: The number of sectors copied after and including sector_num,
+ * excluding any sectors copied prior to sector_num due to alignment.
+ * This will be nb_sectors if no alignment is necessary, or
* (new_end - sector_num) if tail is rounded up or down due to
* alignment or buffer limit.
*/
static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
int nb_sectors)
{
- BlockDriverState *source = s->common.bs;
+ BlockBackend *source = s->common.blk;
int sectors_per_chunk, nb_chunks;
- int ret = nb_sectors;
+ int ret;
MirrorOp *op;
+ int max_sectors;
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+ max_sectors = sectors_per_chunk * s->max_iov;
/* We can only handle as much as buf_size at a time. */
nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
+ nb_sectors = MIN(max_sectors, nb_sectors);
assert(nb_sectors);
+ ret = nb_sectors;
if (s->cow_bitmap) {
ret += mirror_cow_align(s, &sector_num, &nb_sectors);
@@ -274,7 +284,7 @@ static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
s->sectors_in_flight += nb_sectors;
trace_mirror_one_iteration(s, sector_num, nb_sectors);
- bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+ blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov, 0,
mirror_read_complete, op);
return ret;
}
@@ -296,10 +306,12 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
s->in_flight++;
s->sectors_in_flight += nb_sectors;
if (is_discard) {
- bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
+ blk_aio_pdiscard(s->target, sector_num << BDRV_SECTOR_BITS,
+ op->nb_sectors << BDRV_SECTOR_BITS,
mirror_write_complete, op);
} else {
- bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
+ blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE,
+ op->nb_sectors * BDRV_SECTOR_SIZE,
s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
mirror_write_complete, op);
}
@@ -307,13 +319,16 @@ static void mirror_do_zero_or_discard(MirrorBlockJob *s,
static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
{
- BlockDriverState *source = s->common.bs;
+ BlockDriverState *source = blk_bs(s->common.blk);
int64_t sector_num, first_chunk;
uint64_t delay_ns = 0;
/* At least the first dirty chunk is mirrored in one iteration. */
int nb_chunks = 1;
int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+ bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
+ int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
+ MAX_IO_SECTORS);
sector_num = hbitmap_iter_next(&s->hbi);
if (sector_num < 0) {
@@ -325,10 +340,12 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
first_chunk = sector_num / sectors_per_chunk;
while (test_bit(first_chunk, s->in_flight_bitmap)) {
- trace_mirror_yield_in_flight(s, first_chunk, s->in_flight);
+ trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
mirror_wait_for_io(s);
}
+ block_job_pause_point(&s->common);
+
/* Find the number of consective dirty chunks following the first dirty
* one, and wait for in flight requests in them. */
while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
@@ -362,7 +379,7 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
while (nb_chunks > 0 && sector_num < end) {
int ret;
- int io_sectors;
+ int io_sectors, io_sectors_acct;
BlockDriverState *file;
enum MirrorMethod {
MIRROR_METHOD_COPY,
@@ -375,7 +392,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
nb_chunks * sectors_per_chunk,
&io_sectors, &file);
if (ret < 0) {
- io_sectors = nb_chunks * sectors_per_chunk;
+ io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
+ } else if (ret & BDRV_BLOCK_DATA) {
+ io_sectors = MIN(io_sectors, max_io_sectors);
}
io_sectors -= io_sectors % sectors_per_chunk;
@@ -384,8 +403,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
} else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
int64_t target_sector_num;
int target_nb_sectors;
- bdrv_round_to_clusters(s->target, sector_num, io_sectors,
- &target_sector_num, &target_nb_sectors);
+ bdrv_round_sectors_to_clusters(blk_bs(s->target), sector_num,
+ io_sectors, &target_sector_num,
+ &target_nb_sectors);
if (target_sector_num == sector_num &&
target_nb_sectors == io_sectors) {
mirror_method = ret & BDRV_BLOCK_ZERO ?
@@ -394,16 +414,30 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
}
}
+ while (s->in_flight >= MAX_IN_FLIGHT) {
+ trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+ mirror_wait_for_io(s);
+ }
+
+ if (s->ret < 0) {
+ return 0;
+ }
+
mirror_clip_sectors(s, sector_num, &io_sectors);
switch (mirror_method) {
case MIRROR_METHOD_COPY:
io_sectors = mirror_do_read(s, sector_num, io_sectors);
+ io_sectors_acct = io_sectors;
break;
case MIRROR_METHOD_ZERO:
- mirror_do_zero_or_discard(s, sector_num, io_sectors, false);
- break;
case MIRROR_METHOD_DISCARD:
- mirror_do_zero_or_discard(s, sector_num, io_sectors, true);
+ mirror_do_zero_or_discard(s, sector_num, io_sectors,
+ mirror_method == MIRROR_METHOD_DISCARD);
+ if (write_zeroes_ok) {
+ io_sectors_acct = 0;
+ } else {
+ io_sectors_acct = io_sectors;
+ }
break;
default:
abort();
@@ -411,7 +445,9 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
assert(io_sectors);
sector_num += io_sectors;
nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
- delay_ns += ratelimit_calculate_delay(&s->limit, io_sectors);
+ if (s->common.speed) {
+ delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors_acct);
+ }
}
return delay_ns;
}
@@ -449,7 +485,8 @@ static void mirror_exit(BlockJob *job, void *opaque)
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
MirrorExitData *data = opaque;
AioContext *replace_aio_context = NULL;
- BlockDriverState *src = s->common.bs;
+ BlockDriverState *src = blk_bs(s->common.blk);
+ BlockDriverState *target_bs = blk_bs(s->target);
/* Make sure that the source BDS doesn't go away before we called
* block_job_completed(). */
@@ -461,26 +498,25 @@ static void mirror_exit(BlockJob *job, void *opaque)
}
if (s->should_complete && data->ret == 0) {
- BlockDriverState *to_replace = s->common.bs;
+ BlockDriverState *to_replace = src;
if (s->to_replace) {
to_replace = s->to_replace;
}
- /* This was checked in mirror_start_job(), but meanwhile one of the
- * nodes could have been newly attached to a BlockBackend. */
- if (to_replace->blk && s->target->blk) {
- error_report("block job: Can't create node with two BlockBackends");
- data->ret = -EINVAL;
- goto out;
+ if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) {
+ bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL);
}
- if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
- bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
- }
- bdrv_replace_in_backing_chain(to_replace, s->target);
- }
+ /* The mirror job has no requests in flight any more, but we need to
+ * drain potential other users of the BDS before changing the graph. */
+ bdrv_drained_begin(target_bs);
+ bdrv_replace_in_backing_chain(to_replace, target_bs);
+ bdrv_drained_end(target_bs);
-out:
+ /* We just changed the BDS the job BB refers to */
+ blk_remove_bs(job->blk);
+ blk_insert_bs(job->blk, src);
+ }
if (s->to_replace) {
bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
error_free(s->replace_blocker);
@@ -490,29 +526,102 @@ out:
aio_context_release(replace_aio_context);
}
g_free(s->replaces);
- bdrv_op_unblock_all(s->target, s->common.blocker);
- bdrv_unref(s->target);
+ bdrv_op_unblock_all(target_bs, s->common.blocker);
+ blk_unref(s->target);
block_job_completed(&s->common, data->ret);
g_free(data);
bdrv_drained_end(src);
- if (qemu_get_aio_context() == bdrv_get_aio_context(src)) {
- aio_enable_external(iohandler_get_aio_context());
- }
bdrv_unref(src);
}
+static void mirror_throttle(MirrorBlockJob *s)
+{
+ int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+ if (now - s->last_pause_ns > SLICE_TIME) {
+ s->last_pause_ns = now;
+ block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
+ } else {
+ block_job_pause_point(&s->common);
+ }
+}
+
+static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
+{
+ int64_t sector_num, end;
+ BlockDriverState *base = s->base;
+ BlockDriverState *bs = blk_bs(s->common.blk);
+ BlockDriverState *target_bs = blk_bs(s->target);
+ int ret, n;
+
+ end = s->bdev_length / BDRV_SECTOR_SIZE;
+
+ if (base == NULL && !bdrv_has_zero_init(target_bs)) {
+ if (!bdrv_can_write_zeroes_with_unmap(target_bs)) {
+ bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end);
+ return 0;
+ }
+
+ for (sector_num = 0; sector_num < end; ) {
+ int nb_sectors = MIN(end - sector_num,
+ QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
+
+ mirror_throttle(s);
+
+ if (block_job_is_cancelled(&s->common)) {
+ return 0;
+ }
+
+ if (s->in_flight >= MAX_IN_FLIGHT) {
+ trace_mirror_yield(s, s->in_flight, s->buf_free_count, -1);
+ mirror_wait_for_io(s);
+ continue;
+ }
+
+ mirror_do_zero_or_discard(s, sector_num, nb_sectors, false);
+ sector_num += nb_sectors;
+ }
+
+ mirror_drain(s);
+ }
+
+ /* First part, loop on the sectors and initialize the dirty bitmap. */
+ for (sector_num = 0; sector_num < end; ) {
+ /* Just to make sure we are not exceeding int limit. */
+ int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
+ end - sector_num);
+
+ mirror_throttle(s);
+
+ if (block_job_is_cancelled(&s->common)) {
+ return 0;
+ }
+
+ ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(n > 0);
+ if (ret == 1) {
+ bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
+ }
+ sector_num += n;
+ }
+ return 0;
+}
+
static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
MirrorExitData *data;
- BlockDriverState *bs = s->common.bs;
- int64_t sector_num, end, length;
- uint64_t last_pause_ns;
+ BlockDriverState *bs = blk_bs(s->common.blk);
+ BlockDriverState *target_bs = blk_bs(s->target);
+ int64_t length;
BlockDriverInfo bdi;
char backing_filename[2]; /* we only need 2 characters because we are only
checking for a NULL string */
int ret = 0;
- int n;
int target_cluster_size = BDRV_SECTOR_SIZE;
if (block_job_is_cancelled(&s->common)) {
@@ -541,20 +650,19 @@ static void coroutine_fn mirror_run(void *opaque)
* the destination do COW. Instead, we copy sectors around the
* dirty data if needed. We need a bitmap to do that.
*/
- bdrv_get_backing_filename(s->target, backing_filename,
+ bdrv_get_backing_filename(target_bs, backing_filename,
sizeof(backing_filename));
- if (!bdrv_get_info(s->target, &bdi) && bdi.cluster_size) {
+ if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) {
target_cluster_size = bdi.cluster_size;
}
- if (backing_filename[0] && !s->target->backing
+ if (backing_filename[0] && !target_bs->backing
&& s->granularity < target_cluster_size) {
s->buf_size = MAX(s->buf_size, target_cluster_size);
s->cow_bitmap = bitmap_new(length);
}
s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
- s->max_iov = MIN(s->common.bs->bl.max_iov, s->target->bl.max_iov);
+ s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);
- end = s->bdev_length / BDRV_SECTOR_SIZE;
s->buf = qemu_try_blockalign(bs, s->buf_size);
if (s->buf == NULL) {
ret = -ENOMEM;
@@ -563,45 +671,18 @@ static void coroutine_fn mirror_run(void *opaque)
mirror_free_init(s);
- last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
if (!s->is_none_mode) {
- /* First part, loop on the sectors and initialize the dirty bitmap. */
- BlockDriverState *base = s->base;
- bool mark_all_dirty = s->base == NULL && !bdrv_has_zero_init(s->target);
-
- for (sector_num = 0; sector_num < end; ) {
- /* Just to make sure we are not exceeding int limit. */
- int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
- end - sector_num);
- int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-
- if (now - last_pause_ns > SLICE_TIME) {
- last_pause_ns = now;
- block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
- }
-
- if (block_job_is_cancelled(&s->common)) {
- goto immediate_exit;
- }
-
- ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
-
- if (ret < 0) {
- goto immediate_exit;
- }
-
- assert(n > 0);
- if (ret == 1 || mark_all_dirty) {
- bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
- }
- sector_num += n;
+ ret = mirror_dirty_init(s);
+ if (ret < 0 || block_job_is_cancelled(&s->common)) {
+ goto immediate_exit;
}
}
bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
for (;;) {
uint64_t delay_ns = 0;
- int64_t cnt;
+ int64_t cnt, delta;
bool should_complete;
if (s->ret < 0) {
@@ -609,6 +690,8 @@ static void coroutine_fn mirror_run(void *opaque)
goto immediate_exit;
}
+ block_job_pause_point(&s->common);
+
cnt = bdrv_get_dirty_count(s->dirty_bitmap);
/* s->common.offset contains the number of bytes already processed so
* far, cnt is the number of dirty sectors remaining and
@@ -622,9 +705,10 @@ static void coroutine_fn mirror_run(void *opaque)
* We do so every SLICE_TIME nanoseconds, or when there is an error,
* or when the source is clean, whichever comes first.
*/
- if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
+ delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
+ if (delta < SLICE_TIME &&
s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
- if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
+ if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
mirror_wait_for_io(s);
@@ -637,7 +721,7 @@ static void coroutine_fn mirror_run(void *opaque)
should_complete = false;
if (s->in_flight == 0 && cnt == 0) {
trace_mirror_before_flush(s);
- ret = bdrv_flush(s->target);
+ ret = blk_flush(s->target);
if (ret < 0) {
if (mirror_error_action(s, false, -ret) ==
BLOCK_ERROR_ACTION_REPORT) {
@@ -692,7 +776,7 @@ static void coroutine_fn mirror_run(void *opaque)
s->common.cancelled = false;
break;
}
- last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
}
immediate_exit:
@@ -710,21 +794,12 @@ immediate_exit:
g_free(s->cow_bitmap);
g_free(s->in_flight_bitmap);
bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
- if (s->target->blk) {
- blk_iostatus_disable(s->target->blk);
- }
data = g_malloc(sizeof(*data));
data->ret = ret;
/* Before we switch to target in mirror_exit, make sure data doesn't
* change. */
- bdrv_drained_begin(s->common.bs);
- if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) {
- /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the
- * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we
- * need a block layer API change to achieve this. */
- aio_disable_external(iohandler_get_aio_context());
- }
+ bdrv_drained_begin(bs);
block_job_defer_to_main_loop(&s->common, mirror_exit, data);
}
@@ -739,32 +814,31 @@ static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
}
-static void mirror_iostatus_reset(BlockJob *job)
-{
- MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-
- if (s->target->blk) {
- blk_iostatus_reset(s->target->blk);
- }
-}
-
static void mirror_complete(BlockJob *job, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
- Error *local_err = NULL;
- int ret;
+ BlockDriverState *src, *target;
+
+ src = blk_bs(job->blk);
+ target = blk_bs(s->target);
- ret = bdrv_open_backing_file(s->target, NULL, "backing", &local_err);
- if (ret < 0) {
- error_propagate(errp, local_err);
- return;
- }
if (!s->synced) {
- error_setg(errp, QERR_BLOCK_JOB_NOT_READY, job->id);
+ error_setg(errp, "The active block job '%s' cannot be completed",
+ job->id);
return;
}
- /* check the target bs is not blocked and block all operations on it */
+ if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
+ int ret;
+
+ assert(!target->backing);
+ ret = bdrv_open_backing_file(target, NULL, "backing", errp);
+ if (ret < 0) {
+ return;
+ }
+ }
+
+ /* block all operations on to_replace bs */
if (s->replaces) {
AioContext *replace_aio_context;
@@ -785,31 +859,57 @@ static void mirror_complete(BlockJob *job, Error **errp)
aio_context_release(replace_aio_context);
}
+ if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+ BlockDriverState *backing = s->is_none_mode ? src : s->base;
+ if (backing_bs(target) != backing) {
+ bdrv_set_backing_hd(target, backing);
+ }
+ }
+
s->should_complete = true;
block_job_enter(&s->common);
}
+/* There is no matching mirror_resume() because mirror_run() will begin
+ * iterating again when the job is resumed.
+ */
+static void coroutine_fn mirror_pause(BlockJob *job)
+{
+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+ mirror_drain(s);
+}
+
+static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context)
+{
+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+ blk_set_aio_context(s->target, new_context);
+}
+
static const BlockJobDriver mirror_job_driver = {
- .instance_size = sizeof(MirrorBlockJob),
- .job_type = BLOCK_JOB_TYPE_MIRROR,
- .set_speed = mirror_set_speed,
- .iostatus_reset= mirror_iostatus_reset,
- .complete = mirror_complete,
+ .instance_size = sizeof(MirrorBlockJob),
+ .job_type = BLOCK_JOB_TYPE_MIRROR,
+ .set_speed = mirror_set_speed,
+ .complete = mirror_complete,
+ .pause = mirror_pause,
+ .attached_aio_context = mirror_attached_aio_context,
};
static const BlockJobDriver commit_active_job_driver = {
- .instance_size = sizeof(MirrorBlockJob),
- .job_type = BLOCK_JOB_TYPE_COMMIT,
- .set_speed = mirror_set_speed,
- .iostatus_reset
- = mirror_iostatus_reset,
- .complete = mirror_complete,
+ .instance_size = sizeof(MirrorBlockJob),
+ .job_type = BLOCK_JOB_TYPE_COMMIT,
+ .set_speed = mirror_set_speed,
+ .complete = mirror_complete,
+ .pause = mirror_pause,
+ .attached_aio_context = mirror_attached_aio_context,
};
-static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
- const char *replaces,
+static void mirror_start_job(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, const char *replaces,
int64_t speed, uint32_t granularity,
int64_t buf_size,
+ BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap,
@@ -819,7 +919,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
bool is_none_mode, BlockDriverState *base)
{
MirrorBlockJob *s;
- BlockDriverState *replaced_bs;
if (granularity == 0) {
granularity = bdrv_get_default_bitmap_granularity(target);
@@ -827,13 +926,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
assert ((granularity & (granularity - 1)) == 0);
- if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
- on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
- error_setg(errp, QERR_INVALID_PARAMETER, "on-source-error");
- return;
- }
-
if (buf_size < 0) {
error_setg(errp, "Invalid parameter 'buf-size'");
return;
@@ -843,31 +935,19 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
buf_size = DEFAULT_MIRROR_BUF_SIZE;
}
- /* We can't support this case as long as the block layer can't handle
- * multiple BlockBackends per BlockDriverState. */
- if (replaces) {
- replaced_bs = bdrv_lookup_bs(replaces, replaces, errp);
- if (replaced_bs == NULL) {
- return;
- }
- } else {
- replaced_bs = bs;
- }
- if (replaced_bs->blk && target->blk) {
- error_setg(errp, "Can't create node with two BlockBackends");
- return;
- }
-
- s = block_job_create(driver, bs, speed, cb, opaque, errp);
+ s = block_job_create(job_id, driver, bs, speed, cb, opaque, errp);
if (!s) {
return;
}
+ s->target = blk_new();
+ blk_insert_bs(s->target, target);
+
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
s->on_target_error = on_target_error;
- s->target = target;
s->is_none_mode = is_none_mode;
+ s->backing_mode = backing_mode;
s->base = base;
s->granularity = granularity;
s->buf_size = ROUND_UP(buf_size, granularity);
@@ -876,25 +956,23 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
if (!s->dirty_bitmap) {
g_free(s->replaces);
+ blk_unref(s->target);
block_job_unref(&s->common);
return;
}
- bdrv_op_block_all(s->target, s->common.blocker);
+ bdrv_op_block_all(target, s->common.blocker);
- if (s->target->blk) {
- blk_set_on_error(s->target->blk, on_target_error, on_target_error);
- blk_iostatus_enable(s->target->blk);
- }
- s->common.co = qemu_coroutine_create(mirror_run);
+ s->common.co = qemu_coroutine_create(mirror_run, s);
trace_mirror_start(bs, s, s->common.co, opaque);
- qemu_coroutine_enter(s->common.co, s);
+ qemu_coroutine_enter(s->common.co);
}
-void mirror_start(BlockDriverState *bs, BlockDriverState *target,
- const char *replaces,
+void mirror_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, const char *replaces,
int64_t speed, uint32_t granularity, int64_t buf_size,
- MirrorSyncMode mode, BlockdevOnError on_source_error,
+ MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
+ BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap,
BlockCompletionFunc *cb,
@@ -909,14 +987,14 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
}
is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
- mirror_start_job(bs, target, replaces,
- speed, granularity, buf_size,
+ mirror_start_job(job_id, bs, target, replaces,
+ speed, granularity, buf_size, backing_mode,
on_source_error, on_target_error, unmap, cb, opaque, errp,
&mirror_job_driver, is_none_mode, base);
}
-void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
- int64_t speed,
+void commit_active_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, int64_t speed,
BlockdevOnError on_error,
BlockCompletionFunc *cb,
void *opaque, Error **errp)
@@ -957,8 +1035,8 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
}
}
- bdrv_ref(base);
- mirror_start_job(bs, base, NULL, speed, 0, 0,
+ mirror_start_job(job_id, bs, base, NULL, speed, 0, 0,
+ MIRROR_LEAVE_BACKING_CHAIN,
on_error, on_error, false, cb, opaque, &local_err,
&commit_active_job_driver, false, base);
if (local_err) {
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 878e879ac..2cf3237ef 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -38,7 +38,7 @@ static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
if (s->recv_coroutine[i]) {
- qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+ qemu_coroutine_enter(s->recv_coroutine[i]);
}
}
}
@@ -99,7 +99,7 @@ static void nbd_reply_ready(void *opaque)
}
if (s->recv_coroutine[i]) {
- qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+ qemu_coroutine_enter(s->recv_coroutine[i]);
return;
}
@@ -111,12 +111,12 @@ static void nbd_restart_write(void *opaque)
{
BlockDriverState *bs = opaque;
- qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine, NULL);
+ qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
}
static int nbd_co_send_request(BlockDriverState *bs,
struct nbd_request *request,
- QEMUIOVector *qiov, int offset)
+ QEMUIOVector *qiov)
{
NbdClientSession *s = nbd_get_client_session(bs);
AioContext *aio_context;
@@ -149,8 +149,8 @@ static int nbd_co_send_request(BlockDriverState *bs,
qio_channel_set_cork(s->ioc, true);
rc = nbd_send_request(s->ioc, request);
if (rc >= 0) {
- ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
- offset, request->len, 0);
+ ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
+ false);
if (ret != request->len) {
rc = -EIO;
}
@@ -167,8 +167,9 @@ static int nbd_co_send_request(BlockDriverState *bs,
}
static void nbd_co_receive_reply(NbdClientSession *s,
- struct nbd_request *request, struct nbd_reply *reply,
- QEMUIOVector *qiov, int offset)
+ struct nbd_request *request,
+ struct nbd_reply *reply,
+ QEMUIOVector *qiov)
{
int ret;
@@ -181,8 +182,8 @@ static void nbd_co_receive_reply(NbdClientSession *s,
reply->error = EIO;
} else {
if (qiov && reply->error == 0) {
- ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
- offset, request->len, 1);
+ ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
+ true);
if (ret != request->len) {
reply->error = EIO;
}
@@ -217,97 +218,62 @@ static void nbd_coroutine_end(NbdClientSession *s,
}
}
-static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov,
- int offset)
+int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags)
{
NbdClientSession *client = nbd_get_client_session(bs);
- struct nbd_request request = { .type = NBD_CMD_READ };
+ struct nbd_request request = {
+ .type = NBD_CMD_READ,
+ .from = offset,
+ .len = bytes,
+ };
struct nbd_reply reply;
ssize_t ret;
- request.from = sector_num * 512;
- request.len = nb_sectors * 512;
+ assert(bytes <= NBD_MAX_BUFFER_SIZE);
+ assert(!flags);
nbd_coroutine_start(client, &request);
- ret = nbd_co_send_request(bs, &request, NULL, 0);
+ ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
- nbd_co_receive_reply(client, &request, &reply, qiov, offset);
+ nbd_co_receive_reply(client, &request, &reply, qiov);
}
nbd_coroutine_end(client, &request);
return -reply.error;
-
}
-static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov,
- int offset, int *flags)
+int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags)
{
NbdClientSession *client = nbd_get_client_session(bs);
- struct nbd_request request = { .type = NBD_CMD_WRITE };
+ struct nbd_request request = {
+ .type = NBD_CMD_WRITE,
+ .from = offset,
+ .len = bytes,
+ };
struct nbd_reply reply;
ssize_t ret;
- if ((*flags & BDRV_REQ_FUA) && (client->nbdflags & NBD_FLAG_SEND_FUA)) {
- *flags &= ~BDRV_REQ_FUA;
+ if (flags & BDRV_REQ_FUA) {
+ assert(client->nbdflags & NBD_FLAG_SEND_FUA);
request.type |= NBD_CMD_FLAG_FUA;
}
- request.from = sector_num * 512;
- request.len = nb_sectors * 512;
+ assert(bytes <= NBD_MAX_BUFFER_SIZE);
nbd_coroutine_start(client, &request);
- ret = nbd_co_send_request(bs, &request, qiov, offset);
+ ret = nbd_co_send_request(bs, &request, qiov);
if (ret < 0) {
reply.error = -ret;
} else {
- nbd_co_receive_reply(client, &request, &reply, NULL, 0);
+ nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;
}
-/* qemu-nbd has a limit of slightly less than 1M per request. Try to
- * remain aligned to 4K. */
-#define NBD_MAX_SECTORS 2040
-
-int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- int offset = 0;
- int ret;
- while (nb_sectors > NBD_MAX_SECTORS) {
- ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
- if (ret < 0) {
- return ret;
- }
- offset += NBD_MAX_SECTORS * 512;
- sector_num += NBD_MAX_SECTORS;
- nb_sectors -= NBD_MAX_SECTORS;
- }
- return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
-}
-
-int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov, int *flags)
-{
- int offset = 0;
- int ret;
- while (nb_sectors > NBD_MAX_SECTORS) {
- ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset,
- flags);
- if (ret < 0) {
- return ret;
- }
- offset += NBD_MAX_SECTORS * 512;
- sector_num += NBD_MAX_SECTORS;
- nb_sectors -= NBD_MAX_SECTORS;
- }
- return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset, flags);
-}
-
int nbd_client_co_flush(BlockDriverState *bs)
{
NbdClientSession *client = nbd_get_client_session(bs);
@@ -323,36 +289,37 @@ int nbd_client_co_flush(BlockDriverState *bs)
request.len = 0;
nbd_coroutine_start(client, &request);
- ret = nbd_co_send_request(bs, &request, NULL, 0);
+ ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
- nbd_co_receive_reply(client, &request, &reply, NULL, 0);
+ nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;
}
-int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
+int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
NbdClientSession *client = nbd_get_client_session(bs);
- struct nbd_request request = { .type = NBD_CMD_TRIM };
+ struct nbd_request request = {
+ .type = NBD_CMD_TRIM,
+ .from = offset,
+ .len = count,
+ };
struct nbd_reply reply;
ssize_t ret;
if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
return 0;
}
- request.from = sector_num * 512;
- request.len = nb_sectors * 512;
nbd_coroutine_start(client, &request);
- ret = nbd_co_send_request(bs, &request, NULL, 0);
+ ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
- nbd_co_receive_reply(client, &request, &reply, NULL, 0);
+ nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;
@@ -414,6 +381,9 @@ int nbd_client_init(BlockDriverState *bs,
logout("Failed to negotiate with the NBD server\n");
return ret;
}
+ if (client->nbdflags & NBD_FLAG_SEND_FUA) {
+ bs->supported_write_flags = BDRV_REQ_FUA;
+ }
qemu_co_mutex_init(&client->send_mutex);
qemu_co_mutex_init(&client->free_sema);
diff --git a/block/nbd-client.h b/block/nbd-client.h
index bc7aec079..044aca453 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -20,7 +20,7 @@
typedef struct NbdClientSession {
QIOChannelSocket *sioc; /* The master data channel */
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
- uint32_t nbdflags;
+ uint16_t nbdflags;
off_t size;
CoMutex send_mutex;
@@ -44,13 +44,12 @@ int nbd_client_init(BlockDriverState *bs,
Error **errp);
void nbd_client_close(BlockDriverState *bs);
-int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors);
+int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
int nbd_client_co_flush(BlockDriverState *bs);
-int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov, int *flags);
-int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
+int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags);
+int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags);
void nbd_client_detach_aio_context(BlockDriverState *bs);
void nbd_client_attach_aio_context(BlockDriverState *bs,
diff --git a/block/nbd.c b/block/nbd.c
index f7ea3b360..6bc06d619 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -42,6 +42,9 @@
typedef struct BDRVNBDState {
NbdClientSession client;
+
+ /* For nbd_refresh_filename() */
+ char *path, *host, *port, *export, *tlscredsid;
} BDRVNBDState;
static int nbd_parse_uri(const char *filename, QDict *options)
@@ -188,13 +191,15 @@ out:
g_free(file);
}
-static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
- Error **errp)
+static SocketAddress *nbd_config(BDRVNBDState *s, QemuOpts *opts, Error **errp)
{
SocketAddress *saddr;
- if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
- if (qdict_haskey(options, "path")) {
+ s->path = g_strdup(qemu_opt_get(opts, "path"));
+ s->host = g_strdup(qemu_opt_get(opts, "host"));
+
+ if (!s->path == !s->host) {
+ if (s->path) {
error_setg(errp, "path and host may not be used at the same time.");
} else {
error_setg(errp, "one of path and host must be specified.");
@@ -204,32 +209,28 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
saddr = g_new0(SocketAddress, 1);
- if (qdict_haskey(options, "path")) {
+ if (s->path) {
UnixSocketAddress *q_unix;
saddr->type = SOCKET_ADDRESS_KIND_UNIX;
q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
- q_unix->path = g_strdup(qdict_get_str(options, "path"));
- qdict_del(options, "path");
+ q_unix->path = g_strdup(s->path);
} else {
InetSocketAddress *inet;
+
+ s->port = g_strdup(qemu_opt_get(opts, "port"));
+
saddr->type = SOCKET_ADDRESS_KIND_INET;
inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
- inet->host = g_strdup(qdict_get_str(options, "host"));
- if (!qdict_get_try_str(options, "port")) {
+ inet->host = g_strdup(s->host);
+ inet->port = g_strdup(s->port);
+ if (!inet->port) {
inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
- } else {
- inet->port = g_strdup(qdict_get_str(options, "port"));
}
- qdict_del(options, "host");
- qdict_del(options, "port");
}
s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
- *export = g_strdup(qdict_get_try_str(options, "export"));
- if (*export) {
- qdict_del(options, "export");
- }
+ s->export = g_strdup(qemu_opt_get(opts, "export"));
return saddr;
}
@@ -292,28 +293,66 @@ static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
}
+static QemuOptsList nbd_runtime_opts = {
+ .name = "nbd",
+ .head = QTAILQ_HEAD_INITIALIZER(nbd_runtime_opts.head),
+ .desc = {
+ {
+ .name = "host",
+ .type = QEMU_OPT_STRING,
+ .help = "TCP host to connect to",
+ },
+ {
+ .name = "port",
+ .type = QEMU_OPT_STRING,
+ .help = "TCP port to connect to",
+ },
+ {
+ .name = "path",
+ .type = QEMU_OPT_STRING,
+ .help = "Unix socket path to connect to",
+ },
+ {
+ .name = "export",
+ .type = QEMU_OPT_STRING,
+ .help = "Name of the NBD export to open",
+ },
+ {
+ .name = "tls-creds",
+ .type = QEMU_OPT_STRING,
+ .help = "ID of the TLS credentials to use",
+ },
+ },
+};
+
static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVNBDState *s = bs->opaque;
- char *export = NULL;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
QIOChannelSocket *sioc = NULL;
- SocketAddress *saddr;
- const char *tlscredsid;
+ SocketAddress *saddr = NULL;
QCryptoTLSCreds *tlscreds = NULL;
const char *hostname = NULL;
int ret = -EINVAL;
+ opts = qemu_opts_create(&nbd_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto error;
+ }
+
/* Pop the config into our state object. Exit if invalid. */
- saddr = nbd_config(s, options, &export, errp);
+ saddr = nbd_config(s, opts, errp);
if (!saddr) {
goto error;
}
- tlscredsid = g_strdup(qdict_get_try_str(options, "tls-creds"));
- if (tlscredsid) {
- qdict_del(options, "tls-creds");
- tlscreds = nbd_get_tls_creds(tlscredsid, errp);
+ s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
+ if (s->tlscredsid) {
+ tlscreds = nbd_get_tls_creds(s->tlscredsid, errp);
if (!tlscreds) {
goto error;
}
@@ -335,7 +374,7 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
}
/* NBD handshake */
- ret = nbd_client_init(bs, sioc, export,
+ ret = nbd_client_init(bs, sioc, s->export,
tlscreds, hostname, errp);
error:
if (sioc) {
@@ -344,42 +383,18 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
if (tlscreds) {
object_unref(OBJECT(tlscreds));
}
- qapi_free_SocketAddress(saddr);
- g_free(export);
- return ret;
-}
-
-static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov);
-}
-
-static int nbd_co_writev_flags(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov, int flags)
-{
- int ret;
-
- ret = nbd_client_co_writev(bs, sector_num, nb_sectors, qiov, &flags);
if (ret < 0) {
- return ret;
- }
-
- /* The flag wasn't sent to the server, so we need to emulate it with an
- * explicit flush */
- if (flags & BDRV_REQ_FUA) {
- ret = nbd_client_co_flush(bs);
+ g_free(s->path);
+ g_free(s->host);
+ g_free(s->port);
+ g_free(s->export);
+ g_free(s->tlscredsid);
}
-
+ qapi_free_SocketAddress(saddr);
+ qemu_opts_del(opts);
return ret;
}
-static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- return nbd_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0);
-}
-
static int nbd_co_flush(BlockDriverState *bs)
{
return nbd_client_co_flush(bs);
@@ -387,19 +402,21 @@ static int nbd_co_flush(BlockDriverState *bs)
static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
{
- bs->bl.max_discard = UINT32_MAX >> BDRV_SECTOR_BITS;
- bs->bl.max_transfer_length = UINT32_MAX >> BDRV_SECTOR_BITS;
-}
-
-static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
-{
- return nbd_client_co_discard(bs, sector_num, nb_sectors);
+ bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
+ bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
}
static void nbd_close(BlockDriverState *bs)
{
+ BDRVNBDState *s = bs->opaque;
+
nbd_client_close(bs);
+
+ g_free(s->path);
+ g_free(s->host);
+ g_free(s->port);
+ g_free(s->export);
+ g_free(s->tlscredsid);
}
static int64_t nbd_getlength(BlockDriverState *bs)
@@ -422,48 +439,45 @@ static void nbd_attach_aio_context(BlockDriverState *bs,
static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
{
+ BDRVNBDState *s = bs->opaque;
QDict *opts = qdict_new();
- const char *path = qdict_get_try_str(options, "path");
- const char *host = qdict_get_try_str(options, "host");
- const char *port = qdict_get_try_str(options, "port");
- const char *export = qdict_get_try_str(options, "export");
- const char *tlscreds = qdict_get_try_str(options, "tls-creds");
qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
- if (path && export) {
+ if (s->path && s->export) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "nbd+unix:///%s?socket=%s", export, path);
- } else if (path && !export) {
+ "nbd+unix:///%s?socket=%s", s->export, s->path);
+ } else if (s->path && !s->export) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "nbd+unix://?socket=%s", path);
- } else if (!path && export && port) {
+ "nbd+unix://?socket=%s", s->path);
+ } else if (!s->path && s->export && s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "nbd://%s:%s/%s", host, port, export);
- } else if (!path && export && !port) {
+ "nbd://%s:%s/%s", s->host, s->port, s->export);
+ } else if (!s->path && s->export && !s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "nbd://%s/%s", host, export);
- } else if (!path && !export && port) {
+ "nbd://%s/%s", s->host, s->export);
+ } else if (!s->path && !s->export && s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "nbd://%s:%s", host, port);
- } else if (!path && !export && !port) {
+ "nbd://%s:%s", s->host, s->port);
+ } else if (!s->path && !s->export && !s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "nbd://%s", host);
+ "nbd://%s", s->host);
}
- if (path) {
- qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(path)));
- } else if (port) {
- qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
- qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(port)));
+ if (s->path) {
+ qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(s->path)));
+ } else if (s->port) {
+ qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
+ qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(s->port)));
} else {
- qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
+ qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
}
- if (export) {
- qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export)));
+ if (s->export) {
+ qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(s->export)));
}
- if (tlscreds) {
- qdict_put_obj(opts, "tls-creds", QOBJECT(qstring_from_str(tlscreds)));
+ if (s->tlscredsid) {
+ qdict_put_obj(opts, "tls-creds",
+ QOBJECT(qstring_from_str(s->tlscredsid)));
}
bs->full_open_options = opts;
@@ -475,13 +489,11 @@ static BlockDriver bdrv_nbd = {
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
- .bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
- .bdrv_co_writev_flags = nbd_co_writev_flags,
- .supported_write_flags = BDRV_REQ_FUA,
+ .bdrv_co_preadv = nbd_client_co_preadv,
+ .bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
- .bdrv_co_discard = nbd_co_discard,
+ .bdrv_co_pdiscard = nbd_client_co_pdiscard,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
@@ -495,13 +507,11 @@ static BlockDriver bdrv_nbd_tcp = {
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
- .bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
- .bdrv_co_writev_flags = nbd_co_writev_flags,
- .supported_write_flags = BDRV_REQ_FUA,
+ .bdrv_co_preadv = nbd_client_co_preadv,
+ .bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
- .bdrv_co_discard = nbd_co_discard,
+ .bdrv_co_pdiscard = nbd_client_co_pdiscard,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
@@ -515,13 +525,11 @@ static BlockDriver bdrv_nbd_unix = {
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
- .bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
- .bdrv_co_writev_flags = nbd_co_writev_flags,
- .supported_write_flags = BDRV_REQ_FUA,
+ .bdrv_co_preadv = nbd_client_co_preadv,
+ .bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
- .bdrv_co_discard = nbd_co_discard,
+ .bdrv_co_pdiscard = nbd_client_co_pdiscard,
.bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
diff --git a/block/nfs.c b/block/nfs.c
index 9f51cc3f1..8602a4421 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -1,7 +1,7 @@
/*
* QEMU Block driver for native access to files on NFS shares
*
- * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
+ * Copyright (c) 2014-2016 Peter Lieven <pl@kamp.de>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -38,6 +38,7 @@
#include <nfsc/libnfs.h>
#define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
+#define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE)
#define QEMU_NFS_MAX_DEBUG_LEVEL 2
typedef struct NFSClient {
@@ -47,6 +48,7 @@ typedef struct NFSClient {
bool has_zero_init;
AioContext *aio_context;
blkcnt_t st_blocks;
+ bool cache_used;
} NFSClient;
typedef struct NFSRPC {
@@ -102,7 +104,7 @@ static void nfs_co_generic_bh_cb(void *opaque)
NFSRPC *task = opaque;
task->complete = 1;
qemu_bh_delete(task->bh);
- qemu_coroutine_enter(task->co, NULL);
+ qemu_coroutine_enter(task->co);
}
static void
@@ -278,7 +280,7 @@ static void nfs_file_close(BlockDriverState *bs)
}
static int64_t nfs_client_open(NFSClient *client, const char *filename,
- int flags, Error **errp)
+ int flags, Error **errp, int open_flags)
{
int ret = -EINVAL, i;
struct stat st;
@@ -330,12 +332,38 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
nfs_set_tcp_syncnt(client->context, val);
#ifdef LIBNFS_FEATURE_READAHEAD
} else if (!strcmp(qp->p[i].name, "readahead")) {
+ if (open_flags & BDRV_O_NOCACHE) {
+ error_setg(errp, "Cannot enable NFS readahead "
+ "if cache.direct = on");
+ goto fail;
+ }
if (val > QEMU_NFS_MAX_READAHEAD_SIZE) {
error_report("NFS Warning: Truncating NFS readahead"
" size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
val = QEMU_NFS_MAX_READAHEAD_SIZE;
}
nfs_set_readahead(client->context, val);
+#ifdef LIBNFS_FEATURE_PAGECACHE
+ nfs_set_pagecache_ttl(client->context, 0);
+#endif
+ client->cache_used = true;
+#endif
+#ifdef LIBNFS_FEATURE_PAGECACHE
+ nfs_set_pagecache_ttl(client->context, 0);
+ } else if (!strcmp(qp->p[i].name, "pagecache")) {
+ if (open_flags & BDRV_O_NOCACHE) {
+ error_setg(errp, "Cannot enable NFS pagecache "
+ "if cache.direct = on");
+ goto fail;
+ }
+ if (val > QEMU_NFS_MAX_PAGECACHE_SIZE) {
+ error_report("NFS Warning: Truncating NFS pagecache"
+ " size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
+ val = QEMU_NFS_MAX_PAGECACHE_SIZE;
+ }
+ nfs_set_pagecache(client->context, val);
+ nfs_set_pagecache_ttl(client->context, 0);
+ client->cache_used = true;
#endif
#ifdef LIBNFS_FEATURE_DEBUG
} else if (!strcmp(qp->p[i].name, "debug")) {
@@ -418,7 +446,7 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
}
ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
(flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
- errp);
+ errp, bs->open_flags);
if (ret < 0) {
goto out;
}
@@ -454,7 +482,7 @@ static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
BDRV_SECTOR_SIZE);
- ret = nfs_client_open(client, url, O_CREAT, errp);
+ ret = nfs_client_open(client, url, O_CREAT, errp, 0);
if (ret < 0) {
goto out;
}
@@ -516,6 +544,12 @@ static int nfs_reopen_prepare(BDRVReopenState *state,
return -EACCES;
}
+ if ((state->flags & BDRV_O_NOCACHE) && client->cache_used) {
+ error_setg(errp, "Cannot disable cache if libnfs readahead or"
+ " pagecache is enabled");
+ return -EINVAL;
+ }
+
/* Update cache for read-only reopens */
if (!(state->flags & BDRV_O_RDWR)) {
ret = nfs_fstat(client->context, client->fh, &st);
@@ -530,6 +564,15 @@ static int nfs_reopen_prepare(BDRVReopenState *state,
return 0;
}
+#ifdef LIBNFS_FEATURE_PAGECACHE
+static void nfs_invalidate_cache(BlockDriverState *bs,
+ Error **errp)
+{
+ NFSClient *client = bs->opaque;
+ nfs_pagecache_invalidate(client->context, client->fh);
+}
+#endif
+
static BlockDriver bdrv_nfs = {
.format_name = "nfs",
.protocol_name = "nfs",
@@ -553,6 +596,10 @@ static BlockDriver bdrv_nfs = {
.bdrv_detach_aio_context = nfs_detach_aio_context,
.bdrv_attach_aio_context = nfs_attach_aio_context,
+
+#ifdef LIBNFS_FEATURE_PAGECACHE
+ .bdrv_invalidate_cache = nfs_invalidate_cache,
+#endif
};
static void nfs_block_init(void)
diff --git a/block/null.c b/block/null.c
index 396500bab..b511010ba 100644
--- a/block/null.c
+++ b/block/null.c
@@ -12,6 +12,8 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
#include "block/block_int.h"
#define NULL_OPT_LATENCY "latency-ns"
@@ -223,6 +225,20 @@ static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs,
}
}
+static void null_refresh_filename(BlockDriverState *bs, QDict *opts)
+{
+ QINCREF(opts);
+ qdict_del(opts, "filename");
+
+ if (!qdict_size(opts)) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s://",
+ bs->drv->format_name);
+ }
+
+ qdict_put(opts, "driver", qstring_from_str(bs->drv->format_name));
+ bs->full_open_options = opts;
+}
+
static BlockDriver bdrv_null_co = {
.format_name = "null-co",
.protocol_name = "null-co",
@@ -238,6 +254,8 @@ static BlockDriver bdrv_null_co = {
.bdrv_reopen_prepare = null_reopen_prepare,
.bdrv_co_get_block_status = null_co_get_block_status,
+
+ .bdrv_refresh_filename = null_refresh_filename,
};
static BlockDriver bdrv_null_aio = {
@@ -255,6 +273,8 @@ static BlockDriver bdrv_null_aio = {
.bdrv_reopen_prepare = null_reopen_prepare,
.bdrv_co_get_block_status = null_co_get_block_status,
+
+ .bdrv_refresh_filename = null_refresh_filename,
};
static void bdrv_null_init(void)
diff --git a/block/parallels.c b/block/parallels.c
index 324ed43ac..2ccefa7d8 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -33,6 +33,7 @@
#include "block/block_int.h"
#include "sysemu/block-backend.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include "qemu/bitmap.h"
#include "qapi/util.h"
@@ -42,6 +43,7 @@
#define HEADER_MAGIC2 "WithouFreSpacExt"
#define HEADER_VERSION 2
#define HEADER_INUSE_MAGIC (0x746F6E59)
+#define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32)
#define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */
@@ -203,13 +205,15 @@ static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
return -EINVAL;
}
- to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx;
+ to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
space = to_allocate * s->tracks;
if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
int ret;
space += s->prealloc_size;
if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
- ret = bdrv_write_zeroes(bs->file->bs, s->data_end, space, 0);
+ ret = bdrv_pwrite_zeroes(bs->file,
+ s->data_end << BDRV_SECTOR_BITS,
+ space << BDRV_SECTOR_BITS, 0);
} else {
ret = bdrv_truncate(bs->file->bs,
(s->data_end + space) << BDRV_SECTOR_BITS);
@@ -247,7 +251,7 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
if (off + to_write > s->header_size) {
to_write = s->header_size - off;
}
- ret = bdrv_pwrite(bs->file->bs, off, (uint8_t *)s->header + off,
+ ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off,
to_write);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
@@ -308,7 +312,7 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
- ret = bdrv_co_writev(bs->file->bs, position, n, &hd_qiov);
+ ret = bdrv_co_writev(bs->file, position, n, &hd_qiov);
if (ret < 0) {
break;
}
@@ -348,7 +352,7 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
qemu_iovec_reset(&hd_qiov);
qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
- ret = bdrv_co_readv(bs->file->bs, position, n, &hd_qiov);
+ ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
if (ret < 0) {
break;
}
@@ -429,7 +433,7 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
}
if (flush_bat) {
- ret = bdrv_pwrite_sync(bs->file->bs, 0, s->header, s->header_size);
+ ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
if (ret < 0) {
res->check_errors++;
return ret;
@@ -472,6 +476,10 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
BDRV_SECTOR_SIZE);
cl_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
DEFAULT_CLUSTER_SIZE), BDRV_SECTOR_SIZE);
+ if (total_size >= MAX_PARALLELS_IMAGE_FACTOR * cl_size) {
+ error_propagate(errp, local_err);
+ return -E2BIG;
+ }
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
@@ -512,11 +520,12 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
memset(tmp, 0, sizeof(tmp));
memcpy(tmp, &header, sizeof(header));
- ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
+ ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE, 0);
if (ret < 0) {
goto exit;
}
- ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0);
+ ret = blk_pwrite_zeroes(file, BDRV_SECTOR_SIZE,
+ (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
if (ret < 0) {
goto exit;
}
@@ -559,7 +568,7 @@ static int parallels_update_header(BlockDriverState *bs)
if (size > s->header_size) {
size = s->header_size;
}
- return bdrv_pwrite_sync(bs->file->bs, 0, s->header, size);
+ return bdrv_pwrite_sync(bs->file, 0, s->header, size);
}
static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
@@ -572,7 +581,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
Error *local_err = NULL;
char *buf;
- ret = bdrv_pread(bs->file->bs, 0, &ph, sizeof(ph));
+ ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
if (ret < 0) {
goto fail;
}
@@ -627,7 +636,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
s->header_size = size;
}
- ret = bdrv_pread(bs->file->bs, 0, s->header, s->header_size);
+ ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
if (ret < 0) {
goto fail;
}
diff --git a/block/qapi.c b/block/qapi.c
index c5f6ba643..6f947e3e6 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -67,10 +67,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
info->backing_file_depth = bdrv_get_backing_file_depth(bs);
info->detect_zeroes = bs->detect_zeroes;
- if (bs->throttle_state) {
+ if (blk && blk_get_public(blk)->throttle_state) {
ThrottleConfig cfg;
- throttle_group_get_config(bs, &cfg);
+ throttle_group_get_config(blk, &cfg);
info->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
info->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg;
@@ -118,7 +118,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
info->iops_size = cfg.op_size;
info->has_group = true;
- info->group = g_strdup(throttle_group_get_name(bs));
+ info->group = g_strdup(throttle_group_get_name(blk));
}
info->write_threshold = bdrv_write_threshold_get(bs);
@@ -690,16 +690,15 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
ImageInfoSpecific *info_spec)
{
- QmpOutputVisitor *ov = qmp_output_visitor_new();
QObject *obj, *data;
+ Visitor *v = qmp_output_visitor_new(&obj);
- visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), NULL, &info_spec,
- &error_abort);
- obj = qmp_output_get_qobject(ov);
+ visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
+ visit_complete(v, &obj);
assert(qobject_type(obj) == QTYPE_QDICT);
data = qdict_get(qobject_to_qdict(obj), "data");
dump_qobject(func_fprintf, f, 1, data);
- qmp_output_visitor_cleanup(ov);
+ visit_free(v);
}
void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
diff --git a/block/qcow.c b/block/qcow.c
index 60ddb12ec..6f9b2e2d2 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -28,6 +28,7 @@
#include "block/block_int.h"
#include "sysemu/block-backend.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include <zlib.h>
#include "qapi/qmp/qerror.h"
#include "crypto/cipher.h"
@@ -104,7 +105,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
int ret;
QCowHeader header;
- ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
+ ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
if (ret < 0) {
goto fail;
}
@@ -161,13 +162,19 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
if (s->crypt_method_header) {
if (bdrv_uses_whitelist() &&
s->crypt_method_header == QCOW_CRYPT_AES) {
- error_report("qcow built-in AES encryption is deprecated");
- error_printf("Support for it will be removed in a future release.\n"
- "You can use 'qemu-img convert' to switch to an\n"
- "unencrypted qcow image, or a LUKS raw image.\n");
+ error_setg(errp,
+ "Use of AES-CBC encrypted qcow images is no longer "
+ "supported in system emulators");
+ error_append_hint(errp,
+ "You can use 'qemu-img convert' to convert your "
+ "image to an alternative supported format, such "
+ "as unencrypted qcow, or raw with the LUKS "
+ "format instead.\n");
+ ret = -ENOSYS;
+ goto fail;
}
- bs->encrypted = 1;
+ bs->encrypted = true;
}
s->cluster_bits = header.cluster_bits;
s->cluster_size = 1 << s->cluster_bits;
@@ -201,7 +208,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
+ ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
goto fail;
@@ -232,7 +239,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EINVAL;
goto fail;
}
- ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
+ ret = bdrv_pread(bs->file, header.backing_file_offset,
bs->backing_file, len);
if (ret < 0) {
goto fail;
@@ -383,7 +390,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
/* update the L1 entry */
s->l1_table[l1_index] = l2_offset;
tmp = cpu_to_be64(l2_offset);
- if (bdrv_pwrite_sync(bs->file->bs,
+ if (bdrv_pwrite_sync(bs->file,
s->l1_table_offset + l1_index * sizeof(tmp),
&tmp, sizeof(tmp)) < 0)
return 0;
@@ -413,11 +420,11 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
l2_table = s->l2_cache + (min_index << s->l2_bits);
if (new_l2_table) {
memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
- if (bdrv_pwrite_sync(bs->file->bs, l2_offset, l2_table,
+ if (bdrv_pwrite_sync(bs->file, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t)) < 0)
return 0;
} else {
- if (bdrv_pread(bs->file->bs, l2_offset, l2_table,
+ if (bdrv_pread(bs->file, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t)) !=
s->l2_size * sizeof(uint64_t))
return 0;
@@ -443,7 +450,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
/* write the cluster content */
- if (bdrv_pwrite(bs->file->bs, cluster_offset, s->cluster_cache,
+ if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache,
s->cluster_size) !=
s->cluster_size)
return -1;
@@ -473,7 +480,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
errno = EIO;
return -1;
}
- if (bdrv_pwrite(bs->file->bs,
+ if (bdrv_pwrite(bs->file,
cluster_offset + i * 512,
s->cluster_data, 512) != 512)
return -1;
@@ -488,7 +495,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
/* update L2 table */
tmp = cpu_to_be64(cluster_offset);
l2_table[l2_index] = tmp;
- if (bdrv_pwrite_sync(bs->file->bs, l2_offset + l2_index * sizeof(tmp),
+ if (bdrv_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
&tmp, sizeof(tmp)) < 0)
return 0;
}
@@ -558,7 +565,7 @@ static int decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
if (s->cluster_cache_offset != coffset) {
csize = cluster_offset >> (63 - s->cluster_bits);
csize &= (s->cluster_size - 1);
- ret = bdrv_pread(bs->file->bs, coffset, s->cluster_data, csize);
+ ret = bdrv_pread(bs->file, coffset, s->cluster_data, csize);
if (ret != csize)
return -1;
if (decompress_buffer(s->cluster_cache, s->cluster_size,
@@ -612,8 +619,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing->bs, sector_num,
- n, &hd_qiov);
+ ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
@@ -637,7 +643,7 @@ static coroutine_fn int qcow_co_readv(BlockDriverState *bs, int64_t sector_num,
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file->bs,
+ ret = bdrv_co_readv(bs->file,
(cluster_offset >> 9) + index_in_cluster,
n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -739,7 +745,7 @@ static coroutine_fn int qcow_co_writev(BlockDriverState *bs, int64_t sector_num,
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_writev(bs->file->bs,
+ ret = bdrv_co_writev(bs->file,
(cluster_offset >> 9) + index_in_cluster,
n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -853,24 +859,24 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
}
/* write all the data */
- ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header));
+ ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header), 0);
if (ret != sizeof(header)) {
goto exit;
}
if (backing_file) {
ret = blk_pwrite(qcow_blk, sizeof(header),
- backing_file, backing_filename_len);
+ backing_file, backing_filename_len, 0);
if (ret != backing_filename_len) {
goto exit;
}
}
tmp = g_malloc0(BDRV_SECTOR_SIZE);
- for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
- BDRV_SECTOR_SIZE); i++) {
- ret = blk_pwrite(qcow_blk, header_size +
- BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
+ for (i = 0; i < DIV_ROUND_UP(sizeof(uint64_t) * l1_size, BDRV_SECTOR_SIZE);
+ i++) {
+ ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i,
+ tmp, BDRV_SECTOR_SIZE, 0);
if (ret != BDRV_SECTOR_SIZE) {
g_free(tmp);
goto exit;
@@ -893,7 +899,7 @@ static int qcow_make_empty(BlockDriverState *bs)
int ret;
memset(s->l1_table, 0, l1_length);
- if (bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, s->l1_table,
+ if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
l1_length) < 0)
return -1;
ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
@@ -907,6 +913,49 @@ static int qcow_make_empty(BlockDriverState *bs)
return 0;
}
+typedef struct QcowWriteCo {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ const uint8_t *buf;
+ int nb_sectors;
+ int ret;
+} QcowWriteCo;
+
+static void qcow_write_co_entry(void *opaque)
+{
+ QcowWriteCo *co = opaque;
+ QEMUIOVector qiov;
+
+ struct iovec iov = (struct iovec) {
+ .iov_base = (uint8_t*) co->buf,
+ .iov_len = co->nb_sectors * BDRV_SECTOR_SIZE,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ co->ret = qcow_co_writev(co->bs, co->sector_num, co->nb_sectors, &qiov);
+}
+
+/* Wrapper for non-coroutine contexts */
+static int qcow_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ Coroutine *co;
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ QcowWriteCo data = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .buf = buf,
+ .nb_sectors = nb_sectors,
+ .ret = -EINPROGRESS,
+ };
+ co = qemu_coroutine_create(qcow_write_co_entry, &data);
+ qemu_coroutine_enter(co);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(aio_context, true);
+ }
+ return data.ret;
+}
+
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
@@ -934,7 +983,7 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
return ret;
}
- out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+ out_buf = g_malloc(s->cluster_size);
/* best compression, small window, no zlib header */
memset(&strm, 0, sizeof(strm));
@@ -963,7 +1012,7 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
/* could not compress: write normal cluster */
- ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+ ret = qcow_write(bs, sector_num, buf, s->cluster_sectors);
if (ret < 0) {
goto fail;
}
@@ -976,7 +1025,7 @@ static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
}
cluster_offset &= s->cluster_offset_mask;
- ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
+ ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
if (ret < 0) {
goto fail;
}
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 0fe8edae4..6eaefeddc 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -24,11 +24,6 @@
/* Needed for CONFIG_MADVISE */
#include "qemu/osdep.h"
-
-#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
-#include <sys/mman.h>
-#endif
-
#include "block/block_int.h"
#include "qemu-common.h"
#include "qcow2.h"
@@ -215,7 +210,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
}
- ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset,
+ ret = bdrv_pwrite(bs->file, c->entries[i].offset,
qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
if (ret < 0) {
return ret;
@@ -226,7 +221,7 @@ static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
return 0;
}
-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
+int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c)
{
BDRVQcow2State *s = bs->opaque;
int result = 0;
@@ -242,8 +237,15 @@ int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
}
}
+ return result;
+}
+
+int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
+{
+ int result = qcow2_cache_write(bs, c);
+
if (result == 0) {
- ret = bdrv_flush(bs->file->bs);
+ int ret = bdrv_flush(bs->file->bs);
if (ret < 0) {
result = ret;
}
@@ -355,7 +357,7 @@ static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
}
- ret = bdrv_pread(bs->file->bs, offset,
+ ret = bdrv_pread(bs->file, offset,
qcow2_cache_get_table_addr(bs, c, i),
s->cluster_size);
if (ret < 0) {
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 31ecc1030..f94183529 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -29,6 +29,7 @@
#include "qemu-common.h"
#include "block/block_int.h"
#include "block/qcow2.h"
+#include "qemu/bswap.h"
#include "trace.h"
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
@@ -64,7 +65,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
}
}
- if (new_l1_size > INT_MAX / sizeof(uint64_t)) {
+ QEMU_BUILD_BUG_ON(QCOW_MAX_L1_SIZE > INT_MAX);
+ if (new_l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
return -EFBIG;
}
@@ -107,7 +109,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
- ret = bdrv_pwrite_sync(bs->file->bs, new_l1_table_offset,
+ ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset,
new_l1_table, new_l1_size2);
if (ret < 0)
goto fail;
@@ -116,9 +118,9 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
/* set new table */
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
- cpu_to_be32w((uint32_t*)data, new_l1_size);
+ stl_be_p(data, new_l1_size);
stq_be_p(data + 4, new_l1_table_offset);
- ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_size),
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size),
data, sizeof(data));
if (ret < 0) {
goto fail;
@@ -153,11 +155,9 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
uint64_t **l2_table)
{
BDRVQcow2State *s = bs->opaque;
- int ret;
-
- ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
- return ret;
+ return qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
+ (void **)l2_table);
}
/*
@@ -186,7 +186,7 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
}
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
- ret = bdrv_pwrite_sync(bs->file->bs,
+ ret = bdrv_pwrite_sync(bs->file,
s->l1_table_offset + 8 * l1_start_index,
buf, sizeof(buf));
if (ret < 0) {
@@ -389,22 +389,18 @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
return 0;
}
-static int coroutine_fn copy_sectors(BlockDriverState *bs,
- uint64_t start_sect,
- uint64_t cluster_offset,
- int n_start, int n_end)
+static int coroutine_fn do_perform_cow(BlockDriverState *bs,
+ uint64_t src_cluster_offset,
+ uint64_t cluster_offset,
+ int offset_in_cluster,
+ int bytes)
{
BDRVQcow2State *s = bs->opaque;
QEMUIOVector qiov;
struct iovec iov;
- int n, ret;
-
- n = n_end - n_start;
- if (n <= 0) {
- return 0;
- }
+ int ret;
- iov.iov_len = n * BDRV_SECTOR_SIZE;
+ iov.iov_len = bytes;
iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
if (iov.iov_base == NULL) {
return -ENOMEM;
@@ -423,17 +419,21 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
* interface. This avoids double I/O throttling and request tracking,
* which can lead to deadlock when block layer copy-on-read is enabled.
*/
- ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
+ ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster,
+ bytes, &qiov, 0);
if (ret < 0) {
goto out;
}
if (bs->encrypted) {
Error *err = NULL;
+ int64_t sector = (cluster_offset + offset_in_cluster)
+ >> BDRV_SECTOR_BITS;
assert(s->cipher);
- if (qcow2_encrypt_sectors(s, start_sect + n_start,
- iov.iov_base, iov.iov_base, n,
- true, &err) < 0) {
+ assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
+ assert((bytes & ~BDRV_SECTOR_MASK) == 0);
+ if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base,
+ bytes >> BDRV_SECTOR_BITS, true, &err) < 0) {
ret = -EIO;
error_free(err);
goto out;
@@ -441,14 +441,14 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
}
ret = qcow2_pre_write_overlap_check(bs, 0,
- cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE);
+ cluster_offset + offset_in_cluster, bytes);
if (ret < 0) {
goto out;
}
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
- ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n,
- &qiov);
+ ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster,
+ bytes, &qiov, 0);
if (ret < 0) {
goto out;
}
@@ -463,47 +463,43 @@ out:
/*
* get_cluster_offset
*
- * For a given offset of the disk image, find the cluster offset in
- * qcow2 file. The offset is stored in *cluster_offset.
+ * For a given offset of the virtual disk, find the cluster type and offset in
+ * the qcow2 file. The offset is stored in *cluster_offset.
*
- * on entry, *num is the number of contiguous sectors we'd like to
- * access following offset.
+ * On entry, *bytes is the maximum number of contiguous bytes starting at
+ * offset that we are interested in.
*
- * on exit, *num is the number of contiguous sectors we can read.
+ * On exit, *bytes is the number of bytes starting at offset that have the same
+ * cluster type and (if applicable) are stored contiguously in the image file.
+ * Compressed clusters are always returned one by one.
*
* Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
* cases.
*/
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *cluster_offset)
+ unsigned int *bytes, uint64_t *cluster_offset)
{
BDRVQcow2State *s = bs->opaque;
unsigned int l2_index;
uint64_t l1_index, l2_offset, *l2_table;
int l1_bits, c;
- unsigned int index_in_cluster, nb_clusters;
- uint64_t nb_available, nb_needed;
+ unsigned int offset_in_cluster;
+ uint64_t bytes_available, bytes_needed, nb_clusters;
int ret;
- index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
- nb_needed = *num + index_in_cluster;
+ offset_in_cluster = offset_into_cluster(s, offset);
+ bytes_needed = (uint64_t) *bytes + offset_in_cluster;
l1_bits = s->l2_bits + s->cluster_bits;
- /* compute how many bytes there are between the offset and
- * the end of the l1 entry
- */
-
- nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
-
- /* compute the number of available sectors */
+ /* compute how many bytes there are between the start of the cluster
+ * containing offset and the end of the l1 entry */
+ bytes_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1))
+ + offset_in_cluster;
- nb_available = (nb_available >> 9) + index_in_cluster;
-
- if (nb_needed > nb_available) {
- nb_needed = nb_available;
+ if (bytes_needed > bytes_available) {
+ bytes_needed = bytes_available;
}
- assert(nb_needed <= INT_MAX);
*cluster_offset = 0;
@@ -540,8 +536,11 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
*cluster_offset = be64_to_cpu(l2_table[l2_index]);
- /* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */
- nb_clusters = size_to_clusters(s, nb_needed << 9);
+ nb_clusters = size_to_clusters(s, bytes_needed);
+ /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned
+ * integers; the minimum cluster size is 512, so this assertion is always
+ * true */
+ assert(nb_clusters <= INT_MAX);
ret = qcow2_get_cluster_type(*cluster_offset);
switch (ret) {
@@ -588,13 +587,18 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- nb_available = (c * s->cluster_sectors);
+ bytes_available = (int64_t)c * s->cluster_size;
out:
- if (nb_available > nb_needed)
- nb_available = nb_needed;
+ if (bytes_available > bytes_needed) {
+ bytes_available = bytes_needed;
+ }
- *num = nb_available - index_in_cluster;
+ /* bytes_available <= bytes_needed <= *bytes + offset_in_cluster;
+ * subtracting offset_in_cluster will therefore definitely yield something
+ * not exceeding UINT_MAX */
+ assert(bytes_available - offset_in_cluster <= UINT_MAX);
+ *bytes = bytes_available - offset_in_cluster;
return ret;
@@ -740,14 +744,12 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
BDRVQcow2State *s = bs->opaque;
int ret;
- if (r->nb_sectors == 0) {
+ if (r->nb_bytes == 0) {
return 0;
}
qemu_co_mutex_unlock(&s->lock);
- ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
- r->offset / BDRV_SECTOR_SIZE,
- r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
+ ret = do_perform_cow(bs, m->offset, m->alloc_offset, r->offset, r->nb_bytes);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
@@ -809,13 +811,14 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
assert(l2_index + m->nb_clusters <= s->l2_size);
for (i = 0; i < m->nb_clusters; i++) {
/* if two concurrent writes happen to the same unallocated cluster
- * each write allocates separate cluster and writes data concurrently.
- * The first one to complete updates l2 table with pointer to its
- * cluster the second one has to do RMW (which is done above by
- * copy_sectors()), update l2 table with its cluster pointer and free
- * old cluster. This is what this loop does */
- if(l2_table[l2_index + i] != 0)
+ * each write allocates separate cluster and writes data concurrently.
+ * The first one to complete updates l2 table with pointer to its
+ * cluster the second one has to do RMW (which is done above by
+ * perform_cow()), update l2 table with its cluster pointer and free
+ * old cluster. This is what this loop does */
+ if (l2_table[l2_index + i] != 0) {
old_cluster[j++] = l2_table[l2_index + i];
+ }
l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
(i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
@@ -1197,25 +1200,20 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
/*
* Save info needed for meta data update.
*
- * requested_sectors: Number of sectors from the start of the first
+ * requested_bytes: Number of bytes from the start of the first
* newly allocated cluster to the end of the (possibly shortened
* before) write request.
*
- * avail_sectors: Number of sectors from the start of the first
+ * avail_bytes: Number of bytes from the start of the first
* newly allocated to the end of the last newly allocated cluster.
*
- * nb_sectors: The number of sectors from the start of the first
+ * nb_bytes: The number of bytes from the start of the first
* newly allocated cluster to the end of the area that the write
* request actually writes to (excluding COW at the end)
*/
- int requested_sectors =
- (*bytes + offset_into_cluster(s, guest_offset))
- >> BDRV_SECTOR_BITS;
- int avail_sectors = nb_clusters
- << (s->cluster_bits - BDRV_SECTOR_BITS);
- int alloc_n_start = offset_into_cluster(s, guest_offset)
- >> BDRV_SECTOR_BITS;
- int nb_sectors = MIN(requested_sectors, avail_sectors);
+ uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset);
+ int avail_bytes = MIN(INT_MAX, nb_clusters << s->cluster_bits);
+ int nb_bytes = MIN(requested_bytes, avail_bytes);
QCowL2Meta *old_m = *m;
*m = g_malloc0(sizeof(**m));
@@ -1226,23 +1224,21 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
.alloc_offset = alloc_cluster_offset,
.offset = start_of_cluster(s, guest_offset),
.nb_clusters = nb_clusters,
- .nb_available = nb_sectors,
.cow_start = {
.offset = 0,
- .nb_sectors = alloc_n_start,
+ .nb_bytes = offset_into_cluster(s, guest_offset),
},
.cow_end = {
- .offset = nb_sectors * BDRV_SECTOR_SIZE,
- .nb_sectors = avail_sectors - nb_sectors,
+ .offset = nb_bytes,
+ .nb_bytes = avail_bytes - nb_bytes,
},
};
qemu_co_queue_init(&(*m)->dependent_requests);
QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
*host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
- *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE)
- - offset_into_cluster(s, guest_offset));
+ *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset));
assert(*bytes != 0);
return 1;
@@ -1274,7 +1270,8 @@ fail:
* Return 0 on success and -errno in error cases
*/
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *host_offset, QCowL2Meta **m)
+ unsigned int *bytes, uint64_t *host_offset,
+ QCowL2Meta **m)
{
BDRVQcow2State *s = bs->opaque;
uint64_t start, remaining;
@@ -1282,13 +1279,11 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
uint64_t cur_bytes;
int ret;
- trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num);
-
- assert((offset & ~BDRV_SECTOR_MASK) == 0);
+ trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *bytes);
again:
start = offset;
- remaining = (uint64_t)*num << BDRV_SECTOR_BITS;
+ remaining = *bytes;
cluster_offset = 0;
*host_offset = 0;
cur_bytes = 0;
@@ -1374,8 +1369,8 @@ again:
}
}
- *num -= remaining >> BDRV_SECTOR_BITS;
- assert(*num > 0);
+ *bytes -= remaining;
+ assert(*bytes > 0);
assert(*host_offset != 0);
return 0;
@@ -1420,7 +1415,7 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
sector_offset = coffset & 511;
csize = nb_csectors * 512 - sector_offset;
BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
- ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data,
+ ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data,
nb_csectors);
if (ret < 0) {
return ret;
@@ -1689,7 +1684,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
(void **)&l2_table);
} else {
/* load inactive L2 tables from disk */
- ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+ ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
(void *)l2_table, s->cluster_sectors);
}
if (ret < 0) {
@@ -1764,8 +1759,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
goto fail;
}
- ret = bdrv_write_zeroes(bs->file->bs, offset / BDRV_SECTOR_SIZE,
- s->cluster_sectors, 0);
+ ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
if (ret < 0) {
if (!preallocated) {
qcow2_free_clusters(bs, offset, s->cluster_size,
@@ -1797,7 +1791,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
goto fail;
}
- ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+ ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
(void *)l2_table, s->cluster_sectors);
if (ret < 0) {
goto fail;
@@ -1867,12 +1861,12 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
}
for (i = 0; i < s->nb_snapshots; i++) {
- int l1_sectors = (s->snapshots[i].l1_size * sizeof(uint64_t) +
- BDRV_SECTOR_SIZE - 1) / BDRV_SECTOR_SIZE;
+ int l1_sectors = DIV_ROUND_UP(s->snapshots[i].l1_size *
+ sizeof(uint64_t), BDRV_SECTOR_SIZE);
l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
- ret = bdrv_read(bs->file->bs,
+ ret = bdrv_read(bs->file,
s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
(void *)l1_table, l1_sectors);
if (ret < 0) {
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index ca6094ff5..cbfb3fe06 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -28,6 +28,7 @@
#include "block/block_int.h"
#include "block/qcow2.h"
#include "qemu/range.h"
+#include "qemu/bswap.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
@@ -103,7 +104,7 @@ int qcow2_refcount_init(BlockDriverState *bs)
goto fail;
}
BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
- ret = bdrv_pread(bs->file->bs, s->refcount_table_offset,
+ ret = bdrv_pread(bs->file, s->refcount_table_offset,
s->refcount_table, refcount_table_size2);
if (ret < 0) {
goto fail;
@@ -217,13 +218,10 @@ static int load_refcount_block(BlockDriverState *bs,
void **refcount_block)
{
BDRVQcow2State *s = bs->opaque;
- int ret;
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
- ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
- refcount_block);
-
- return ret;
+ return qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
+ refcount_block);
}
/*
@@ -433,7 +431,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
if (refcount_table_index < s->refcount_table_size) {
uint64_t data64 = cpu_to_be64(new_block);
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
- ret = bdrv_pwrite_sync(bs->file->bs,
+ ret = bdrv_pwrite_sync(bs->file,
s->refcount_table_offset + refcount_table_index * sizeof(uint64_t),
&data64, sizeof(data64));
if (ret < 0) {
@@ -489,14 +487,12 @@ static int alloc_refcount_block(BlockDriverState *bs,
uint64_t table_clusters =
size_to_clusters(s, table_size * sizeof(uint64_t));
blocks_clusters = 1 +
- ((table_clusters + s->refcount_block_size - 1)
- / s->refcount_block_size);
+ DIV_ROUND_UP(table_clusters, s->refcount_block_size);
uint64_t meta_clusters = table_clusters + blocks_clusters;
last_table_size = table_size;
table_size = next_refcount_table_size(s, blocks_used +
- ((meta_clusters + s->refcount_block_size - 1)
- / s->refcount_block_size));
+ DIV_ROUND_UP(meta_clusters, s->refcount_block_size));
} while (last_table_size != table_size);
@@ -537,7 +533,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
/* Write refcount blocks to disk */
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS);
- ret = bdrv_pwrite_sync(bs->file->bs, meta_offset, new_blocks,
+ ret = bdrv_pwrite_sync(bs->file, meta_offset, new_blocks,
blocks_clusters * s->cluster_size);
g_free(new_blocks);
new_blocks = NULL;
@@ -551,7 +547,7 @@ static int alloc_refcount_block(BlockDriverState *bs,
}
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
- ret = bdrv_pwrite_sync(bs->file->bs, table_offset, new_table,
+ ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
table_size * sizeof(uint64_t));
if (ret < 0) {
goto fail_table;
@@ -566,10 +562,10 @@ static int alloc_refcount_block(BlockDriverState *bs,
uint64_t d64;
uint32_t d32;
} data;
- cpu_to_be64w(&data.d64, table_offset);
- cpu_to_be32w(&data.d32, table_clusters);
+ data.d64 = cpu_to_be64(table_offset);
+ data.d32 = cpu_to_be32(table_clusters);
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
- ret = bdrv_pwrite_sync(bs->file->bs,
+ ret = bdrv_pwrite_sync(bs->file,
offsetof(QCowHeader, refcount_table_offset),
&data, sizeof(data));
if (ret < 0) {
@@ -619,9 +615,7 @@ void qcow2_process_discards(BlockDriverState *bs, int ret)
/* Discard is optional, ignore the return value */
if (ret >= 0) {
- bdrv_discard(bs->file->bs,
- d->offset >> BDRV_SECTOR_BITS,
- d->bytes >> BDRV_SECTOR_BITS);
+ bdrv_pdiscard(bs->file->bs, d->offset, d->bytes);
}
g_free(d);
@@ -1074,7 +1068,7 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
}
l1_allocated = true;
- ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
+ ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
if (ret < 0) {
goto fail;
}
@@ -1227,7 +1221,7 @@ fail:
cpu_to_be64s(&l1_table[i]);
}
- ret = bdrv_pwrite_sync(bs->file->bs, l1_table_offset,
+ ret = bdrv_pwrite_sync(bs->file, l1_table_offset,
l1_table, l1_size2);
for (i = 0; i < l1_size; i++) {
@@ -1386,7 +1380,7 @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
l2_size = s->l2_size * sizeof(uint64_t);
l2_table = g_malloc(l2_size);
- ret = bdrv_pread(bs->file->bs, l2_offset, l2_table, l2_size);
+ ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size);
if (ret < 0) {
fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
res->check_errors++;
@@ -1518,7 +1512,7 @@ static int check_refcounts_l1(BlockDriverState *bs,
res->check_errors++;
goto fail;
}
- ret = bdrv_pread(bs->file->bs, l1_table_offset, l1_table, l1_size2);
+ ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
if (ret < 0) {
fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
res->check_errors++;
@@ -1616,7 +1610,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
}
}
- ret = bdrv_pread(bs->file->bs, l2_offset, l2_table,
+ ret = bdrv_pread(bs->file, l2_offset, l2_table,
s->l2_size * sizeof(uint64_t));
if (ret < 0) {
fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
@@ -1668,7 +1662,7 @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
goto fail;
}
- ret = bdrv_pwrite(bs->file->bs, l2_offset, l2_table,
+ ret = bdrv_pwrite(bs->file, l2_offset, l2_table,
s->cluster_size);
if (ret < 0) {
fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
@@ -2102,7 +2096,7 @@ write_refblocks:
on_disk_refblock = (void *)((char *) *refcount_table +
refblock_index * s->cluster_size);
- ret = bdrv_write(bs->file->bs, refblock_offset / BDRV_SECTOR_SIZE,
+ ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE,
on_disk_refblock, s->cluster_sectors);
if (ret < 0) {
fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
@@ -2151,7 +2145,7 @@ write_refblocks:
}
assert(reftable_size < INT_MAX / sizeof(uint64_t));
- ret = bdrv_pwrite(bs->file->bs, reftable_offset, on_disk_reftable,
+ ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
reftable_size * sizeof(uint64_t));
if (ret < 0) {
fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
@@ -2159,12 +2153,11 @@ write_refblocks:
}
/* Enter new reftable into the image header */
- cpu_to_be64w(&reftable_offset_and_clusters.reftable_offset,
- reftable_offset);
- cpu_to_be32w(&reftable_offset_and_clusters.reftable_clusters,
- size_to_clusters(s, reftable_size * sizeof(uint64_t)));
- ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader,
- refcount_table_offset),
+ reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
+ reftable_offset_and_clusters.reftable_clusters =
+ cpu_to_be32(size_to_clusters(s, reftable_size * sizeof(uint64_t)));
+ ret = bdrv_pwrite_sync(bs->file,
+ offsetof(QCowHeader, refcount_table_offset),
&reftable_offset_and_clusters,
sizeof(reftable_offset_and_clusters));
if (ret < 0) {
@@ -2411,7 +2404,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
return -ENOMEM;
}
- ret = bdrv_pread(bs->file->bs, l1_ofs, l1, l1_sz2);
+ ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
if (ret < 0) {
g_free(l1);
return ret;
@@ -2564,7 +2557,7 @@ static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
return ret;
}
- ret = bdrv_pwrite(bs->file->bs, offset, refblock, s->cluster_size);
+ ret = bdrv_pwrite(bs->file, offset, refblock, s->cluster_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to write refblock");
return ret;
@@ -2834,7 +2827,7 @@ int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
cpu_to_be64s(&new_reftable[i]);
}
- ret = bdrv_pwrite(bs->file->bs, new_reftable_offset, new_reftable,
+ ret = bdrv_pwrite(bs->file, new_reftable_offset, new_reftable,
new_reftable_size * sizeof(uint64_t));
for (i = 0; i < new_reftable_size; i++) {
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 5f4a17e47..032424322 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -26,6 +26,7 @@
#include "qapi/error.h"
#include "block/block_int.h"
#include "block/qcow2.h"
+#include "qemu/bswap.h"
#include "qemu/error-report.h"
#include "qemu/cutils.h"
@@ -66,7 +67,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
for(i = 0; i < s->nb_snapshots; i++) {
/* Read statically sized part of the snapshot header */
offset = align_offset(offset, 8);
- ret = bdrv_pread(bs->file->bs, offset, &h, sizeof(h));
+ ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
}
@@ -85,7 +86,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
name_size = be16_to_cpu(h.name_size);
/* Read extra data */
- ret = bdrv_pread(bs->file->bs, offset, &extra,
+ ret = bdrv_pread(bs->file, offset, &extra,
MIN(sizeof(extra), extra_data_size));
if (ret < 0) {
goto fail;
@@ -104,7 +105,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
/* Read snapshot ID */
sn->id_str = g_malloc(id_str_size + 1);
- ret = bdrv_pread(bs->file->bs, offset, sn->id_str, id_str_size);
+ ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
if (ret < 0) {
goto fail;
}
@@ -113,7 +114,7 @@ int qcow2_read_snapshots(BlockDriverState *bs)
/* Read snapshot name */
sn->name = g_malloc(name_size + 1);
- ret = bdrv_pread(bs->file->bs, offset, sn->name, name_size);
+ ret = bdrv_pread(bs->file, offset, sn->name, name_size);
if (ret < 0) {
goto fail;
}
@@ -216,25 +217,25 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
h.name_size = cpu_to_be16(name_size);
offset = align_offset(offset, 8);
- ret = bdrv_pwrite(bs->file->bs, offset, &h, sizeof(h));
+ ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
if (ret < 0) {
goto fail;
}
offset += sizeof(h);
- ret = bdrv_pwrite(bs->file->bs, offset, &extra, sizeof(extra));
+ ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
if (ret < 0) {
goto fail;
}
offset += sizeof(extra);
- ret = bdrv_pwrite(bs->file->bs, offset, sn->id_str, id_str_size);
+ ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
if (ret < 0) {
goto fail;
}
offset += id_str_size;
- ret = bdrv_pwrite(bs->file->bs, offset, sn->name, name_size);
+ ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
if (ret < 0) {
goto fail;
}
@@ -256,7 +257,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
- ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, nb_snapshots),
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
&header_data, sizeof(header_data));
if (ret < 0) {
goto fail;
@@ -398,7 +399,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
goto fail;
}
- ret = bdrv_pwrite(bs->file->bs, sn->l1_table_offset, l1_table,
+ ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
goto fail;
@@ -511,7 +512,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
}
- ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
+ ret = bdrv_pread(bs->file, sn->l1_table_offset,
sn_l1_table, sn_l1_bytes);
if (ret < 0) {
goto fail;
@@ -529,7 +530,7 @@ int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
goto fail;
}
- ret = bdrv_pwrite_sync(bs->file->bs, s->l1_table_offset, sn_l1_table,
+ ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
cur_l1_bytes);
if (ret < 0) {
goto fail;
@@ -715,7 +716,7 @@ int qcow2_snapshot_load_tmp(BlockDriverState *bs,
return -ENOMEM;
}
- ret = bdrv_pread(bs->file->bs, sn->l1_table_offset,
+ ret = bdrv_pread(bs->file, sn->l1_table_offset,
new_l1_table, new_l1_bytes);
if (ret < 0) {
error_setg(errp, "Failed to read l1 table for snapshot");
diff --git a/block/qcow2.c b/block/qcow2.c
index 470734be9..91ef4dfef 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -36,6 +36,7 @@
#include "trace.h"
#include "qemu/option_int.h"
#include "qemu/cutils.h"
+#include "qemu/bswap.h"
/*
Differences with QCOW:
@@ -106,7 +107,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
printf("attempting to read extended header in offset %lu\n", offset);
#endif
- ret = bdrv_pread(bs->file->bs, offset, &ext, sizeof(ext));
+ ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
if (ret < 0) {
error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
"pread fail from offset %" PRIu64, offset);
@@ -134,7 +135,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
sizeof(bs->backing_format));
return 2;
}
- ret = bdrv_pread(bs->file->bs, offset, bs->backing_format, ext.len);
+ ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
"Could not read format name");
@@ -150,7 +151,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
case QCOW2_EXT_MAGIC_FEATURE_TABLE:
if (p_feature_table != NULL) {
void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
- ret = bdrv_pread(bs->file->bs, offset , feature_table, ext.len);
+ ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
"Could not read table");
@@ -171,7 +172,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
uext->len = ext.len;
QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
- ret = bdrv_pread(bs->file->bs, offset , uext->data, uext->len);
+ ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
if (ret < 0) {
error_setg_errno(errp, -ret, "ERROR: unknown extension: "
"Could not read data");
@@ -248,7 +249,7 @@ int qcow2_mark_dirty(BlockDriverState *bs)
}
val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
- ret = bdrv_pwrite(bs->file->bs, offsetof(QCowHeader, incompatible_features),
+ ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
&val, sizeof(val));
if (ret < 0) {
return ret;
@@ -816,7 +817,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
uint64_t ext_end;
uint64_t l1_vm_state_index;
- ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
+ ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read qcow2 header");
goto fail;
@@ -891,7 +892,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
if (header.header_length > sizeof(header)) {
s->unknown_header_fields_size = header.header_length - sizeof(header);
s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
- ret = bdrv_pread(bs->file->bs, sizeof(header), s->unknown_header_fields,
+ ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
s->unknown_header_fields_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
@@ -967,13 +968,19 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
if (s->crypt_method_header) {
if (bdrv_uses_whitelist() &&
s->crypt_method_header == QCOW_CRYPT_AES) {
- error_report("qcow2 built-in AES encryption is deprecated");
- error_printf("Support for it will be removed in a future release.\n"
- "You can use 'qemu-img convert' to switch to an\n"
- "unencrypted qcow2 image, or a LUKS raw image.\n");
+ error_setg(errp,
+ "Use of AES-CBC encrypted qcow2 images is no longer "
+ "supported in system emulators");
+ error_append_hint(errp,
+ "You can use 'qemu-img convert' to convert your "
+ "image to an alternative supported format, such "
+ "as unencrypted qcow2, or raw with the LUKS "
+ "format instead.\n");
+ ret = -ENOSYS;
+ goto fail;
}
- bs->encrypted = 1;
+ bs->encrypted = true;
}
s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
@@ -1059,7 +1066,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
ret = -ENOMEM;
goto fail;
}
- ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
+ ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
s->l1_size * sizeof(uint64_t));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read L1 table");
@@ -1115,7 +1122,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EINVAL;
goto fail;
}
- ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
+ ret = bdrv_pread(bs->file, header.backing_file_offset,
bs->backing_file, len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read backing file name");
@@ -1192,7 +1199,11 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQcow2State *s = bs->opaque;
- bs->bl.write_zeroes_alignment = s->cluster_sectors;
+ if (bs->encrypted) {
+ /* Encryption works on a sector granularity */
+ bs->bl.request_alignment = BDRV_SECTOR_SIZE;
+ }
+ bs->bl.pwrite_zeroes_alignment = s->cluster_size;
}
static int qcow2_set_key(BlockDriverState *bs, const char *key)
@@ -1330,16 +1341,20 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
BDRVQcow2State *s = bs->opaque;
uint64_t cluster_offset;
int index_in_cluster, ret;
+ unsigned int bytes;
int64_t status = 0;
- *pnum = nb_sectors;
+ bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
qemu_co_mutex_lock(&s->lock);
- ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+ ret = qcow2_get_cluster_offset(bs, sector_num << 9, &bytes,
+ &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
return ret;
}
+ *pnum = bytes >> BDRV_SECTOR_BITS;
+
if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
!s->cipher) {
index_in_cluster = sector_num & (s->cluster_sectors - 1);
@@ -1357,28 +1372,34 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
/* handle reading after the end of the backing file */
int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t sector_num, int nb_sectors)
+ int64_t offset, int bytes)
{
+ uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
int n1;
- if ((sector_num + nb_sectors) <= bs->total_sectors)
- return nb_sectors;
- if (sector_num >= bs->total_sectors)
+
+ if ((offset + bytes) <= bs_size) {
+ return bytes;
+ }
+
+ if (offset >= bs_size) {
n1 = 0;
- else
- n1 = bs->total_sectors - sector_num;
+ } else {
+ n1 = bs_size - offset;
+ }
- qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
+ qemu_iovec_memset(qiov, n1, 0, bytes - n1);
return n1;
}
-static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
- int remaining_sectors, QEMUIOVector *qiov)
+static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
BDRVQcow2State *s = bs->opaque;
- int index_in_cluster, n1;
+ int offset_in_cluster, n1;
int ret;
- int cur_nr_sectors; /* number of sectors in current iteration */
+ unsigned int cur_bytes; /* number of bytes in current iteration */
uint64_t cluster_offset = 0;
uint64_t bytes_done = 0;
QEMUIOVector hd_qiov;
@@ -1388,26 +1409,24 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
qemu_co_mutex_lock(&s->lock);
- while (remaining_sectors != 0) {
+ while (bytes != 0) {
/* prepare next request */
- cur_nr_sectors = remaining_sectors;
+ cur_bytes = MIN(bytes, INT_MAX);
if (s->cipher) {
- cur_nr_sectors = MIN(cur_nr_sectors,
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ cur_bytes = MIN(cur_bytes,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
}
- ret = qcow2_get_cluster_offset(bs, sector_num << 9,
- &cur_nr_sectors, &cluster_offset);
+ ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
if (ret < 0) {
goto fail;
}
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ offset_in_cluster = offset_into_cluster(s, offset);
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
- cur_nr_sectors * 512);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
switch (ret) {
case QCOW2_CLUSTER_UNALLOCATED:
@@ -1415,18 +1434,17 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
if (bs->backing) {
/* read from the base image */
n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
- sector_num, cur_nr_sectors);
+ offset, cur_bytes);
if (n1 > 0) {
QEMUIOVector local_qiov;
qemu_iovec_init(&local_qiov, hd_qiov.niov);
- qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
- n1 * BDRV_SECTOR_SIZE);
+ qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing->bs, sector_num,
- n1, &local_qiov);
+ ret = bdrv_co_preadv(bs->backing, offset, n1,
+ &local_qiov, 0);
qemu_co_mutex_lock(&s->lock);
qemu_iovec_destroy(&local_qiov);
@@ -1437,12 +1455,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
}
} else {
/* Note: in this case, no need to wait */
- qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+ qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
}
break;
case QCOW2_CLUSTER_ZERO:
- qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+ qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
break;
case QCOW2_CLUSTER_COMPRESSED:
@@ -1453,8 +1471,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
}
qemu_iovec_from_buf(&hd_qiov, 0,
- s->cluster_cache + index_in_cluster * 512,
- 512 * cur_nr_sectors);
+ s->cluster_cache + offset_in_cluster,
+ cur_bytes);
break;
case QCOW2_CLUSTER_NORMAL:
@@ -1481,34 +1499,34 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
}
}
- assert(cur_nr_sectors <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data,
- 512 * cur_nr_sectors);
+ qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file->bs,
- (cluster_offset >> 9) + index_in_cluster,
- cur_nr_sectors, &hd_qiov);
+ ret = bdrv_co_preadv(bs->file,
+ cluster_offset + offset_in_cluster,
+ cur_bytes, &hd_qiov, 0);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
}
if (bs->encrypted) {
assert(s->cipher);
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
Error *err = NULL;
- if (qcow2_encrypt_sectors(s, sector_num, cluster_data,
- cluster_data, cur_nr_sectors, false,
- &err) < 0) {
+ if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
+ cluster_data, cluster_data,
+ cur_bytes >> BDRV_SECTOR_BITS,
+ false, &err) < 0) {
error_free(err);
ret = -EIO;
goto fail;
}
- qemu_iovec_from_buf(qiov, bytes_done,
- cluster_data, 512 * cur_nr_sectors);
+ qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
}
break;
@@ -1518,9 +1536,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
goto fail;
}
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
+ bytes -= cur_bytes;
+ offset += cur_bytes;
+ bytes_done += cur_bytes;
}
ret = 0;
@@ -1533,23 +1551,21 @@ fail:
return ret;
}
-static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
- int64_t sector_num,
- int remaining_sectors,
- QEMUIOVector *qiov)
+static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
BDRVQcow2State *s = bs->opaque;
- int index_in_cluster;
+ int offset_in_cluster;
int ret;
- int cur_nr_sectors; /* number of sectors in current iteration */
+ unsigned int cur_bytes; /* number of sectors in current iteration */
uint64_t cluster_offset;
QEMUIOVector hd_qiov;
uint64_t bytes_done = 0;
uint8_t *cluster_data = NULL;
QCowL2Meta *l2meta = NULL;
- trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
- remaining_sectors);
+ trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
qemu_iovec_init(&hd_qiov, qiov->niov);
@@ -1557,22 +1573,21 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
qemu_co_mutex_lock(&s->lock);
- while (remaining_sectors != 0) {
+ while (bytes != 0) {
l2meta = NULL;
trace_qcow2_writev_start_part(qemu_coroutine_self());
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- cur_nr_sectors = remaining_sectors;
- if (bs->encrypted &&
- cur_nr_sectors >
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) {
- cur_nr_sectors =
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster;
+ offset_in_cluster = offset_into_cluster(s, offset);
+ cur_bytes = MIN(bytes, INT_MAX);
+ if (bs->encrypted) {
+ cur_bytes = MIN(cur_bytes,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
+ - offset_in_cluster);
}
- ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
- &cur_nr_sectors, &cluster_offset, &l2meta);
+ ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
+ &cluster_offset, &l2meta);
if (ret < 0) {
goto fail;
}
@@ -1580,8 +1595,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
assert((cluster_offset & 511) == 0);
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
- cur_nr_sectors * 512);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
if (bs->encrypted) {
Error *err = NULL;
@@ -1600,8 +1614,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
- if (qcow2_encrypt_sectors(s, sector_num, cluster_data,
- cluster_data, cur_nr_sectors,
+ if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
+ cluster_data, cluster_data,
+ cur_bytes >>BDRV_SECTOR_BITS,
true, &err) < 0) {
error_free(err);
ret = -EIO;
@@ -1609,13 +1624,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
}
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data,
- cur_nr_sectors * 512);
+ qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
}
ret = qcow2_pre_write_overlap_check(bs, 0,
- cluster_offset + index_in_cluster * BDRV_SECTOR_SIZE,
- cur_nr_sectors * BDRV_SECTOR_SIZE);
+ cluster_offset + offset_in_cluster, cur_bytes);
if (ret < 0) {
goto fail;
}
@@ -1623,10 +1636,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
qemu_co_mutex_unlock(&s->lock);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
trace_qcow2_writev_data(qemu_coroutine_self(),
- (cluster_offset >> 9) + index_in_cluster);
- ret = bdrv_co_writev(bs->file->bs,
- (cluster_offset >> 9) + index_in_cluster,
- cur_nr_sectors, &hd_qiov);
+ cluster_offset + offset_in_cluster);
+ ret = bdrv_co_pwritev(bs->file,
+ cluster_offset + offset_in_cluster,
+ cur_bytes, &hd_qiov, 0);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
@@ -1652,10 +1665,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
l2meta = next;
}
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
- trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
+ bytes -= cur_bytes;
+ offset += cur_bytes;
+ bytes_done += cur_bytes;
+ trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
}
ret = 0;
@@ -1757,13 +1770,6 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
qcow2_close(bs);
- bdrv_invalidate_cache(bs->file->bs, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- bs->drv = NULL;
- return;
- }
-
memset(s, 0, sizeof(BDRVQcow2State));
options = qdict_clone_shallow(bs->options);
@@ -1970,7 +1976,7 @@ int qcow2_update_header(BlockDriverState *bs)
}
/* Write the new header */
- ret = bdrv_pwrite(bs->file->bs, 0, header, s->cluster_size);
+ ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
if (ret < 0) {
goto fail;
}
@@ -2004,19 +2010,19 @@ static int qcow2_change_backing_file(BlockDriverState *bs,
static int preallocate(BlockDriverState *bs)
{
- uint64_t nb_sectors;
+ uint64_t bytes;
uint64_t offset;
uint64_t host_offset = 0;
- int num;
+ unsigned int cur_bytes;
int ret;
QCowL2Meta *meta;
- nb_sectors = bdrv_nb_sectors(bs);
+ bytes = bdrv_getlength(bs);
offset = 0;
- while (nb_sectors) {
- num = MIN(nb_sectors, INT_MAX >> BDRV_SECTOR_BITS);
- ret = qcow2_alloc_cluster_offset(bs, offset, &num,
+ while (bytes) {
+ cur_bytes = MIN(bytes, INT_MAX);
+ ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
&host_offset, &meta);
if (ret < 0) {
return ret;
@@ -2042,8 +2048,8 @@ static int preallocate(BlockDriverState *bs)
/* TODO Preallocate data if requested */
- nb_sectors -= num;
- offset += num << BDRV_SECTOR_BITS;
+ bytes -= cur_bytes;
+ offset += cur_bytes;
}
/*
@@ -2052,11 +2058,9 @@ static int preallocate(BlockDriverState *bs)
* EOF). Extend the image to the last allocated sector.
*/
if (host_offset != 0) {
- uint8_t buf[BDRV_SECTOR_SIZE];
- memset(buf, 0, BDRV_SECTOR_SIZE);
- ret = bdrv_write(bs->file->bs,
- (host_offset >> BDRV_SECTOR_BITS) + num - 1,
- buf, 1);
+ uint8_t data = 0;
+ ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1,
+ &data, 1);
if (ret < 0) {
return ret;
}
@@ -2207,7 +2211,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
}
- ret = blk_pwrite(blk, 0, header, cluster_size);
+ ret = blk_pwrite(blk, 0, header, cluster_size, 0);
g_free(header);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write qcow2 header");
@@ -2217,7 +2221,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* Write a refcount table with one refcount block */
refcount_table = g_malloc0(2 * cluster_size);
refcount_table[0] = cpu_to_be64(2 * cluster_size);
- ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size);
+ ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
g_free(refcount_table);
if (ret < 0) {
@@ -2400,9 +2404,7 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
cluster_size, prealloc, opts, version, refcount_order,
&local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- }
+ error_propagate(errp, local_err);
finish:
g_free(backing_file);
@@ -2411,35 +2413,81 @@ finish:
return ret;
}
-static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+
+static bool is_zero_sectors(BlockDriverState *bs, int64_t start,
+ uint32_t count)
+{
+ int nr;
+ BlockDriverState *file;
+ int64_t res;
+
+ if (!count) {
+ return true;
+ }
+ res = bdrv_get_block_status_above(bs, NULL, start, count,
+ &nr, &file);
+ return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == count;
+}
+
+static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
{
int ret;
BDRVQcow2State *s = bs->opaque;
- /* Emulate misaligned zero writes */
- if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
- return -ENOTSUP;
+ uint32_t head = offset % s->cluster_size;
+ uint32_t tail = (offset + count) % s->cluster_size;
+
+ trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, count);
+
+ if (head || tail) {
+ int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
+ uint64_t off;
+ unsigned int nr;
+
+ assert(head + count <= s->cluster_size);
+
+ /* check whether remainder of cluster already reads as zero */
+ if (!(is_zero_sectors(bs, cl_start,
+ DIV_ROUND_UP(head, BDRV_SECTOR_SIZE)) &&
+ is_zero_sectors(bs, (offset + count) >> BDRV_SECTOR_BITS,
+ DIV_ROUND_UP(-tail & (s->cluster_size - 1),
+ BDRV_SECTOR_SIZE)))) {
+ return -ENOTSUP;
+ }
+
+ qemu_co_mutex_lock(&s->lock);
+ /* We can have new write after previous check */
+ offset = cl_start << BDRV_SECTOR_BITS;
+ count = s->cluster_size;
+ nr = s->cluster_size;
+ ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
+ if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
+ qemu_co_mutex_unlock(&s->lock);
+ return -ENOTSUP;
+ }
+ } else {
+ qemu_co_mutex_lock(&s->lock);
}
+ trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count);
+
/* Whatever is left can use real zero clusters */
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors);
+ ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
-static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
{
int ret;
BDRVQcow2State *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
- ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors, QCOW2_DISCARD_REQUEST, false);
+ ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
+ QCOW2_DISCARD_REQUEST, false);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
@@ -2475,7 +2523,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
/* write updated header.size */
offset = cpu_to_be64(offset);
- ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, size),
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
&offset, sizeof(uint64_t));
if (ret < 0) {
return ret;
@@ -2485,6 +2533,51 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
return 0;
}
+typedef struct Qcow2WriteCo {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ const uint8_t *buf;
+ int nb_sectors;
+ int ret;
+} Qcow2WriteCo;
+
+static void qcow2_write_co_entry(void *opaque)
+{
+ Qcow2WriteCo *co = opaque;
+ QEMUIOVector qiov;
+ uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
+ uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
+
+ struct iovec iov = (struct iovec) {
+ .iov_base = (uint8_t*) co->buf,
+ .iov_len = bytes,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ co->ret = qcow2_co_pwritev(co->bs, offset, bytes, &qiov, 0);
+}
+
+/* Wrapper for non-coroutine contexts */
+static int qcow2_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ Coroutine *co;
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ Qcow2WriteCo data = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .buf = buf,
+ .nb_sectors = nb_sectors,
+ .ret = -EINPROGRESS,
+ };
+ co = qemu_coroutine_create(qcow2_write_co_entry, &data);
+ qemu_coroutine_enter(co);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(aio_context, true);
+ }
+ return data.ret;
+}
+
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
@@ -2519,7 +2612,7 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
return ret;
}
- out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+ out_buf = g_malloc(s->cluster_size);
/* best compression, small window, no zlib header */
memset(&strm, 0, sizeof(strm));
@@ -2548,7 +2641,7 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
/* could not compress: write normal cluster */
- ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+ ret = qcow2_write(bs, sector_num, buf, s->cluster_sectors);
if (ret < 0) {
goto fail;
}
@@ -2567,7 +2660,7 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
- ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
+ ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
if (ret < 0) {
goto fail;
}
@@ -2616,8 +2709,8 @@ static int make_completely_empty(BlockDriverState *bs)
/* After this call, neither the in-memory nor the on-disk refcount
* information accurately describe the actual references */
- ret = bdrv_write_zeroes(bs->file->bs, s->l1_table_offset / BDRV_SECTOR_SIZE,
- l1_clusters * s->cluster_sectors, 0);
+ ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
+ l1_clusters * s->cluster_size, 0);
if (ret < 0) {
goto fail_broken_refcounts;
}
@@ -2630,9 +2723,8 @@ static int make_completely_empty(BlockDriverState *bs)
* overwrite parts of the existing refcount and L1 table, which is not
* an issue because the dirty flag is set, complete data loss is in fact
* desired and partial data loss is consequently fine as well */
- ret = bdrv_write_zeroes(bs->file->bs, s->cluster_size / BDRV_SECTOR_SIZE,
- (2 + l1_clusters) * s->cluster_size /
- BDRV_SECTOR_SIZE, 0);
+ ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
+ (2 + l1_clusters) * s->cluster_size, 0);
/* This call (even if it failed overall) may have overwritten on-disk
* refcount structures; in that case, the in-memory refcount information
* will probably differ from the on-disk information which makes the BDS
@@ -2647,10 +2739,10 @@ static int make_completely_empty(BlockDriverState *bs)
/* "Create" an empty reftable (one cluster) directly after the image
* header and an empty L1 table three clusters after the image header;
* the cluster between those two will be used as the first refblock */
- cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
- cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
- cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
- ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_table_offset),
+ l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
+ l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
+ l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
&l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
if (ret < 0) {
goto fail_broken_refcounts;
@@ -2681,7 +2773,7 @@ static int make_completely_empty(BlockDriverState *bs)
/* Enter the first refblock into the reftable */
rt_entry = cpu_to_be64(2 * s->cluster_size);
- ret = bdrv_pwrite_sync(bs->file->bs, s->cluster_size,
+ ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
&rt_entry, sizeof(rt_entry));
if (ret < 0) {
goto fail_broken_refcounts;
@@ -2774,14 +2866,14 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
int ret;
qemu_co_mutex_lock(&s->lock);
- ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ ret = qcow2_cache_write(bs, s->l2_table_cache);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
return ret;
}
if (qcow2_need_accurate_refcounts(s)) {
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ ret = qcow2_cache_write(bs, s->refcount_block_cache);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
return ret;
@@ -2861,36 +2953,20 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t pos)
{
BDRVQcow2State *s = bs->opaque;
- int64_t total_sectors = bs->total_sectors;
- bool zero_beyond_eof = bs->zero_beyond_eof;
- int ret;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
- bs->zero_beyond_eof = false;
- ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
- bs->zero_beyond_eof = zero_beyond_eof;
-
- /* bdrv_co_do_writev will have increased the total_sectors value to include
- * the VM state - the VM state is however not an actual part of the block
- * device, therefore, we need to restore the old value. */
- bs->total_sectors = total_sectors;
-
- return ret;
+ return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
+ qiov->size, qiov, 0);
}
-static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size)
+static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos)
{
BDRVQcow2State *s = bs->opaque;
- bool zero_beyond_eof = bs->zero_beyond_eof;
- int ret;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
- bs->zero_beyond_eof = false;
- ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
- bs->zero_beyond_eof = zero_beyond_eof;
-
- return ret;
+ return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
+ qiov->size, qiov, 0);
}
/*
@@ -3329,12 +3405,12 @@ BlockDriver bdrv_qcow2 = {
.bdrv_co_get_block_status = qcow2_co_get_block_status,
.bdrv_set_key = qcow2_set_key,
- .bdrv_co_readv = qcow2_co_readv,
- .bdrv_co_writev = qcow2_co_writev,
+ .bdrv_co_preadv = qcow2_co_preadv,
+ .bdrv_co_pwritev = qcow2_co_pwritev,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
- .bdrv_co_write_zeroes = qcow2_co_write_zeroes,
- .bdrv_co_discard = qcow2_co_discard,
+ .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
+ .bdrv_co_pdiscard = qcow2_co_pdiscard,
.bdrv_truncate = qcow2_truncate,
.bdrv_write_compressed = qcow2_write_compressed,
.bdrv_make_empty = qcow2_make_empty,
diff --git a/block/qcow2.h b/block/qcow2.h
index a063a3c1a..b36a7bf8a 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -302,8 +302,8 @@ typedef struct Qcow2COWRegion {
*/
uint64_t offset;
- /** Number of sectors to copy */
- int nb_sectors;
+ /** Number of bytes to copy */
+ int nb_bytes;
} Qcow2COWRegion;
/**
@@ -318,12 +318,6 @@ typedef struct QCowL2Meta
/** Host offset of the first newly allocated cluster */
uint64_t alloc_offset;
- /**
- * Number of sectors from the start of the first allocated cluster to
- * the end of the (possibly shortened) request
- */
- int nb_available;
-
/** Number of newly allocated clusters */
int nb_clusters;
@@ -471,8 +465,7 @@ static inline uint64_t l2meta_cow_start(QCowL2Meta *m)
static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
{
- return m->offset + m->cow_end.offset
- + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
+ return m->offset + m->cow_end.offset + m->cow_end.nb_bytes;
}
static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
@@ -544,9 +537,10 @@ int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
int nb_sectors, bool enc, Error **errp);
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *cluster_offset);
+ unsigned int *bytes, uint64_t *cluster_offset);
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *host_offset, QCowL2Meta **m);
+ unsigned int *bytes, uint64_t *host_offset,
+ QCowL2Meta **m);
uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
uint64_t offset,
int compressed_size);
@@ -583,6 +577,7 @@ int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
void *table);
int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
+int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
Qcow2Cache *dependency);
void qcow2_cache_depends_on_flush(Qcow2Cache *c);
diff --git a/block/qed-check.c b/block/qed-check.c
index 622f30897..dcd4f036b 100644
--- a/block/qed-check.c
+++ b/block/qed-check.c
@@ -234,8 +234,7 @@ int qed_check(BDRVQEDState *s, BdrvCheckResult *result, bool fix)
}
check.result->bfi.total_clusters =
- (s->header.image_size + s->header.cluster_size - 1) /
- s->header.cluster_size;
+ DIV_ROUND_UP(s->header.image_size, s->header.cluster_size);
ret = qed_check_l1_table(&check, s->l1_table);
if (ret == 0) {
/* Only check for leaks if entire image was scanned successfully */
diff --git a/block/qed-table.c b/block/qed-table.c
index 802945f5e..1a731dff5 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -16,6 +16,7 @@
#include "trace.h"
#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
#include "qed.h"
+#include "qemu/bswap.h"
typedef struct {
GenericCB gencb;
@@ -64,7 +65,7 @@ static void qed_read_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
read_table_cb->iov.iov_len = s->header.cluster_size * s->header.table_size,
qemu_iovec_init_external(qiov, &read_table_cb->iov, 1);
- bdrv_aio_readv(s->bs->file->bs, offset / BDRV_SECTOR_SIZE, qiov,
+ bdrv_aio_readv(s->bs->file, offset / BDRV_SECTOR_SIZE, qiov,
qiov->size / BDRV_SECTOR_SIZE,
qed_read_table_cb, read_table_cb);
}
@@ -153,7 +154,7 @@ static void qed_write_table(BDRVQEDState *s, uint64_t offset, QEDTable *table,
/* Adjust for offset into table */
offset += start * sizeof(uint64_t);
- bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
+ bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
&write_table_cb->qiov,
write_table_cb->qiov.size / BDRV_SECTOR_SIZE,
qed_write_table_cb, write_table_cb);
diff --git a/block/qed.c b/block/qed.c
index 0af52741d..426f3cb44 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -15,6 +15,7 @@
#include "qemu/osdep.h"
#include "qapi/error.h"
#include "qemu/timer.h"
+#include "qemu/bswap.h"
#include "trace.h"
#include "qed.h"
#include "qapi/qmp/qerror.h"
@@ -85,7 +86,7 @@ int qed_write_header_sync(BDRVQEDState *s)
int ret;
qed_header_cpu_to_le(&s->header, &le);
- ret = bdrv_pwrite(s->bs->file->bs, 0, &le, sizeof(le));
+ ret = bdrv_pwrite(s->bs->file, 0, &le, sizeof(le));
if (ret != sizeof(le)) {
return ret;
}
@@ -122,7 +123,7 @@ static void qed_write_header_read_cb(void *opaque, int ret)
/* Update header */
qed_header_cpu_to_le(&s->header, (QEDHeader *)write_header_cb->buf);
- bdrv_aio_writev(s->bs->file->bs, 0, &write_header_cb->qiov,
+ bdrv_aio_writev(s->bs->file, 0, &write_header_cb->qiov,
write_header_cb->nsectors, qed_write_header_cb,
write_header_cb);
}
@@ -142,8 +143,7 @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
* them, and write back.
*/
- int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) /
- BDRV_SECTOR_SIZE;
+ int nsectors = DIV_ROUND_UP(sizeof(QEDHeader), BDRV_SECTOR_SIZE);
size_t len = nsectors * BDRV_SECTOR_SIZE;
QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
cb, opaque);
@@ -155,7 +155,7 @@ static void qed_write_header(BDRVQEDState *s, BlockCompletionFunc cb,
write_header_cb->iov.iov_len = len;
qemu_iovec_init_external(&write_header_cb->qiov, &write_header_cb->iov, 1);
- bdrv_aio_readv(s->bs->file->bs, 0, &write_header_cb->qiov, nsectors,
+ bdrv_aio_readv(s->bs->file, 0, &write_header_cb->qiov, nsectors,
qed_write_header_read_cb, write_header_cb);
}
@@ -218,7 +218,7 @@ static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
*
* The string is NUL-terminated.
*/
-static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
+static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n,
char *buf, size_t buflen)
{
int ret;
@@ -389,7 +389,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
s->bs = bs;
QSIMPLEQ_INIT(&s->allocating_write_reqs);
- ret = bdrv_pread(bs->file->bs, 0, &le_header, sizeof(le_header));
+ ret = bdrv_pread(bs->file, 0, &le_header, sizeof(le_header));
if (ret < 0) {
return ret;
}
@@ -446,7 +446,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
return -EINVAL;
}
- ret = qed_read_string(bs->file->bs, s->header.backing_filename_offset,
+ ret = qed_read_string(bs->file, s->header.backing_filename_offset,
s->header.backing_filename_size, bs->backing_file,
sizeof(bs->backing_file));
if (ret < 0) {
@@ -517,7 +517,7 @@ static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQEDState *s = bs->opaque;
- bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
+ bs->bl.pwrite_zeroes_alignment = s->header.cluster_size;
}
/* We have nothing to do for QED reopen, stubs just return
@@ -601,18 +601,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
}
qed_header_cpu_to_le(&header, &le_header);
- ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header));
+ ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header), 0);
if (ret < 0) {
goto out;
}
ret = blk_pwrite(blk, sizeof(le_header), backing_file,
- header.backing_filename_size);
+ header.backing_filename_size, 0);
if (ret < 0) {
goto out;
}
l1_table = g_malloc0(l1_size);
- ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size);
+ ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size, 0);
if (ret < 0) {
goto out;
}
@@ -708,7 +708,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
}
if (cb->co) {
- qemu_coroutine_enter(cb->co, NULL);
+ qemu_coroutine_enter(cb->co);
}
}
@@ -800,7 +800,7 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
qemu_iovec_concat(*backing_qiov, qiov, 0, size);
BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
- bdrv_aio_readv(s->bs->backing->bs, pos / BDRV_SECTOR_SIZE,
+ bdrv_aio_readv(s->bs->backing, pos / BDRV_SECTOR_SIZE,
*backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
}
@@ -837,7 +837,7 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
}
BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
- bdrv_aio_writev(s->bs->file->bs, copy_cb->offset / BDRV_SECTOR_SIZE,
+ bdrv_aio_writev(s->bs->file, copy_cb->offset / BDRV_SECTOR_SIZE,
&copy_cb->qiov, copy_cb->qiov.size / BDRV_SECTOR_SIZE,
qed_copy_from_backing_file_cb, copy_cb);
}
@@ -1087,7 +1087,7 @@ static void qed_aio_write_main(void *opaque, int ret)
}
BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
- bdrv_aio_writev(s->bs->file->bs, offset / BDRV_SECTOR_SIZE,
+ bdrv_aio_writev(s->bs->file, offset / BDRV_SECTOR_SIZE,
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
next_fn, acb);
}
@@ -1319,7 +1319,7 @@ static void qed_aio_read_data(void *opaque, int ret,
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- bdrv_aio_readv(bs->file->bs, offset / BDRV_SECTOR_SIZE,
+ bdrv_aio_readv(bs->file, offset / BDRV_SECTOR_SIZE,
&acb->cur_qiov, acb->cur_qiov.size / BDRV_SECTOR_SIZE,
qed_aio_next_io, acb);
return;
@@ -1418,21 +1418,21 @@ typedef struct {
bool done;
} QEDWriteZeroesCB;
-static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
+static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
{
QEDWriteZeroesCB *cb = opaque;
cb->done = true;
cb->ret = ret;
if (cb->co) {
- qemu_coroutine_enter(cb->co, NULL);
+ qemu_coroutine_enter(cb->co);
}
}
-static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- BdrvRequestFlags flags)
+static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset,
+ int count,
+ BdrvRequestFlags flags)
{
BlockAIOCB *blockacb;
BDRVQEDState *s = bs->opaque;
@@ -1440,25 +1440,22 @@ static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
QEMUIOVector qiov;
struct iovec iov;
- /* Refuse if there are untouched backing file sectors */
- if (bs->backing) {
- if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
- return -ENOTSUP;
- }
- if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) {
- return -ENOTSUP;
- }
+ /* Fall back if the request is not aligned */
+ if (qed_offset_into_cluster(s, offset) ||
+ qed_offset_into_cluster(s, count)) {
+ return -ENOTSUP;
}
/* Zero writes start without an I/O buffer. If a buffer becomes necessary
* then it will be allocated during request processing.
*/
- iov.iov_base = NULL,
- iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+ iov.iov_base = NULL;
+ iov.iov_len = count;
qemu_iovec_init_external(&qiov, &iov, 1);
- blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors,
- qed_co_write_zeroes_cb, &cb,
+ blockacb = qed_aio_setup(bs, offset >> BDRV_SECTOR_BITS, &qiov,
+ count >> BDRV_SECTOR_BITS,
+ qed_co_pwrite_zeroes_cb, &cb,
QED_AIOCB_WRITE | QED_AIOCB_ZERO);
if (!blockacb) {
return -EIO;
@@ -1578,7 +1575,7 @@ static int bdrv_qed_change_backing_file(BlockDriverState *bs,
}
/* Write new header */
- ret = bdrv_pwrite_sync(bs->file->bs, 0, buffer, buffer_len);
+ ret = bdrv_pwrite_sync(bs->file, 0, buffer, buffer_len);
g_free(buffer);
if (ret == 0) {
memcpy(&s->header, &new_header, sizeof(new_header));
@@ -1594,12 +1591,6 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
bdrv_qed_close(bs);
- bdrv_invalidate_cache(bs->file->bs, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
-
memset(s, 0, sizeof(BDRVQEDState));
ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
if (local_err) {
@@ -1669,7 +1660,7 @@ static BlockDriver bdrv_qed = {
.bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
.bdrv_aio_readv = bdrv_qed_aio_readv,
.bdrv_aio_writev = bdrv_qed_aio_writev,
- .bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes,
.bdrv_truncate = bdrv_qed_truncate,
.bdrv_getlength = bdrv_qed_getlength,
.bdrv_get_info = bdrv_qed_get_info,
diff --git a/block/quorum.c b/block/quorum.c
index da15465a9..9cf876fb3 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -14,6 +14,7 @@
*/
#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "block/block_int.h"
#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qdict.h"
@@ -67,6 +68,9 @@ typedef struct QuorumVotes {
typedef struct BDRVQuorumState {
BdrvChild **children; /* children BlockDriverStates */
int num_children; /* children count */
+ unsigned next_child_index; /* the index of the next child that should
+ * be added
+ */
int threshold; /* if less than threshold children reads gave the
* same result a quorum error occurs.
*/
@@ -379,7 +383,7 @@ static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- bdrv_aio_writev(s->children[item->index]->bs, acb->sector_num,
+ bdrv_aio_writev(s->children[item->index], acb->sector_num,
acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
acb);
}
@@ -656,7 +660,7 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
}
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num,
+ acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
&acb->qcrs[i].qiov, acb->nb_sectors,
quorum_aio_cb, &acb->qcrs[i]);
}
@@ -674,7 +678,7 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
acb->qcrs[acb->child_iter].buf);
acb->qcrs[acb->child_iter].aiocb =
- bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
+ bdrv_aio_readv(s->children[acb->child_iter], acb->sector_num,
&acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
quorum_aio_cb, &acb->qcrs[acb->child_iter]);
@@ -715,7 +719,7 @@ static BlockAIOCB *quorum_aio_writev(BlockDriverState *bs,
int i;
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i]->bs, sector_num,
+ acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num,
qiov, nb_sectors, &quorum_aio_cb,
&acb->qcrs[i]);
}
@@ -747,21 +751,6 @@ static int64_t quorum_getlength(BlockDriverState *bs)
return result;
}
-static void quorum_invalidate_cache(BlockDriverState *bs, Error **errp)
-{
- BDRVQuorumState *s = bs->opaque;
- Error *local_err = NULL;
- int i;
-
- for (i = 0; i < s->num_children; i++) {
- bdrv_invalidate_cache(s->children[i]->bs, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
- }
-}
-
static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
{
BDRVQuorumState *s = bs->opaque;
@@ -898,9 +887,9 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
ret = -EINVAL;
goto exit;
}
- if (s->num_children < 2) {
+ if (s->num_children < 1) {
error_setg(&local_err,
- "Number of provided children must be greater than 1");
+ "Number of provided children must be 1 or more");
ret = -EINVAL;
goto exit;
}
@@ -964,6 +953,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
opened[i] = true;
}
+ s->next_child_index = s->num_children;
g_free(opened);
goto exit;
@@ -981,9 +971,7 @@ close_exit:
exit:
qemu_opts_del(opts);
/* propagate error */
- if (local_err) {
- error_propagate(errp, local_err);
- }
+ error_propagate(errp, local_err);
return ret;
}
@@ -999,25 +987,70 @@ static void quorum_close(BlockDriverState *bs)
g_free(s->children);
}
-static void quorum_detach_aio_context(BlockDriverState *bs)
+static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
+ Error **errp)
{
BDRVQuorumState *s = bs->opaque;
- int i;
+ BdrvChild *child;
+ char indexstr[32];
+ int ret;
- for (i = 0; i < s->num_children; i++) {
- bdrv_detach_aio_context(s->children[i]->bs);
+ assert(s->num_children <= INT_MAX / sizeof(BdrvChild *));
+ if (s->num_children == INT_MAX / sizeof(BdrvChild *) ||
+ s->next_child_index == UINT_MAX) {
+ error_setg(errp, "Too many children");
+ return;
}
+
+ ret = snprintf(indexstr, 32, "children.%u", s->next_child_index);
+ if (ret < 0 || ret >= 32) {
+ error_setg(errp, "cannot generate child name");
+ return;
+ }
+ s->next_child_index++;
+
+ bdrv_drained_begin(bs);
+
+ /* We can safely add the child now */
+ bdrv_ref(child_bs);
+ child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
+ s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
+ s->children[s->num_children++] = child;
+
+ bdrv_drained_end(bs);
}
-static void quorum_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
+static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
+ Error **errp)
{
BDRVQuorumState *s = bs->opaque;
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_attach_aio_context(s->children[i]->bs, new_context);
+ if (s->children[i] == child) {
+ break;
+ }
}
+
+ /* we have checked it in bdrv_del_child() */
+ assert(i < s->num_children);
+
+ if (s->num_children <= s->threshold) {
+ error_setg(errp,
+ "The number of children cannot be lower than the vote threshold %d",
+ s->threshold);
+ return;
+ }
+
+ bdrv_drained_begin(bs);
+
+ /* We can safely remove this child now */
+ memmove(&s->children[i], &s->children[i + 1],
+ (s->num_children - i - 1) * sizeof(BdrvChild *));
+ s->children = g_renew(BdrvChild *, s->children, --s->num_children);
+ bdrv_unref_child(bs, child);
+
+ bdrv_drained_end(bs);
}
static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
@@ -1070,10 +1103,9 @@ static BlockDriver bdrv_quorum = {
.bdrv_aio_readv = quorum_aio_readv,
.bdrv_aio_writev = quorum_aio_writev,
- .bdrv_invalidate_cache = quorum_invalidate_cache,
- .bdrv_detach_aio_context = quorum_detach_aio_context,
- .bdrv_attach_aio_context = quorum_attach_aio_context,
+ .bdrv_add_child = quorum_add_child,
+ .bdrv_del_child = quorum_del_child,
.is_filter = true,
.bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
diff --git a/block/raw-aio.h b/block/raw-aio.h
deleted file mode 100644
index 811e37501..000000000
--- a/block/raw-aio.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Declarations for AIO in the raw protocol
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-#ifndef QEMU_RAW_AIO_H
-#define QEMU_RAW_AIO_H
-
-#include "qemu/iov.h"
-
-/* AIO request types */
-#define QEMU_AIO_READ 0x0001
-#define QEMU_AIO_WRITE 0x0002
-#define QEMU_AIO_IOCTL 0x0004
-#define QEMU_AIO_FLUSH 0x0008
-#define QEMU_AIO_DISCARD 0x0010
-#define QEMU_AIO_WRITE_ZEROES 0x0020
-#define QEMU_AIO_TYPE_MASK \
- (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
- QEMU_AIO_DISCARD|QEMU_AIO_WRITE_ZEROES)
-
-/* AIO flags */
-#define QEMU_AIO_MISALIGNED 0x1000
-#define QEMU_AIO_BLKDEV 0x2000
-
-
-/* linux-aio.c - Linux native implementation */
-#ifdef CONFIG_LINUX_AIO
-void *laio_init(void);
-void laio_cleanup(void *s);
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type);
-void laio_detach_aio_context(void *s, AioContext *old_context);
-void laio_attach_aio_context(void *s, AioContext *new_context);
-void laio_io_plug(BlockDriverState *bs, void *aio_ctx);
-void laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
-#endif
-
-#ifdef _WIN32
-typedef struct QEMUWin32AIOState QEMUWin32AIOState;
-QEMUWin32AIOState *win32_aio_init(void);
-void win32_aio_cleanup(QEMUWin32AIOState *aio);
-int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
-BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
- QEMUWin32AIOState *aio, HANDLE hfile,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type);
-void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
- AioContext *old_context);
-void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
- AioContext *new_context);
-#endif
-
-#endif /* QEMU_RAW_AIO_H */
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 906d5c941..6ed754739 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -32,7 +32,7 @@
#include "trace.h"
#include "block/thread-pool.h"
#include "qemu/iov.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
#include "qapi/util.h"
#include "qapi/qmp/qstring.h"
@@ -137,10 +137,6 @@ typedef struct BDRVRawState {
int open_flags;
size_t buf_align;
-#ifdef CONFIG_LINUX_AIO
- int use_aio;
- void *aio_ctx;
-#endif
#ifdef CONFIG_XFS
bool is_xfs:1;
#endif
@@ -154,9 +150,6 @@ typedef struct BDRVRawState {
typedef struct BDRVRawReopenState {
int fd;
int open_flags;
-#ifdef CONFIG_LINUX_AIO
- int use_aio;
-#endif
} BDRVRawReopenState;
static int fd_open(BlockDriverState *bs);
@@ -302,22 +295,22 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
/* For SCSI generic devices the alignment is not really used.
With buffered I/O, we don't have any restrictions. */
if (bdrv_is_sg(bs) || !s->needs_alignment) {
- bs->request_alignment = 1;
+ bs->bl.request_alignment = 1;
s->buf_align = 1;
return;
}
- bs->request_alignment = 0;
+ bs->bl.request_alignment = 0;
s->buf_align = 0;
/* Let's try to use the logical blocksize for the alignment. */
- if (probe_logical_blocksize(fd, &bs->request_alignment) < 0) {
- bs->request_alignment = 0;
+ if (probe_logical_blocksize(fd, &bs->bl.request_alignment) < 0) {
+ bs->bl.request_alignment = 0;
}
#ifdef CONFIG_XFS
if (s->is_xfs) {
struct dioattr da;
if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) {
- bs->request_alignment = da.d_miniosz;
+ bs->bl.request_alignment = da.d_miniosz;
/* The kernel returns wrong information for d_mem */
/* s->buf_align = da.d_mem; */
}
@@ -337,21 +330,21 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
qemu_vfree(buf);
}
- if (!bs->request_alignment) {
+ if (!bs->bl.request_alignment) {
size_t align;
buf = qemu_memalign(s->buf_align, max_align);
for (align = 512; align <= max_align; align <<= 1) {
if (raw_is_io_aligned(fd, buf, align)) {
- bs->request_alignment = align;
+ bs->bl.request_alignment = align;
break;
}
}
qemu_vfree(buf);
}
- if (!s->buf_align || !bs->request_alignment) {
- error_setg(errp, "Could not find working O_DIRECT alignment. "
- "Try cache.direct=off.");
+ if (!s->buf_align || !bs->bl.request_alignment) {
+ error_setg(errp, "Could not find working O_DIRECT alignment");
+ error_append_hint(errp, "Try cache.direct=off\n");
}
}
@@ -374,58 +367,15 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
}
}
-static void raw_detach_aio_context(BlockDriverState *bs)
-{
-#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
-
- if (s->use_aio) {
- laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
- }
-#endif
-}
-
-static void raw_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
-{
-#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
-
- if (s->use_aio) {
- laio_attach_aio_context(s->aio_ctx, new_context);
- }
-#endif
-}
-
#ifdef CONFIG_LINUX_AIO
-static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
+static bool raw_use_aio(int bdrv_flags)
{
- int ret = -1;
- assert(aio_ctx != NULL);
- assert(use_aio != NULL);
/*
* Currently Linux do AIO only for files opened with O_DIRECT
* specified so check NOCACHE flag too
*/
- if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
- (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
-
- /* if non-NULL, laio_init() has already been run */
- if (*aio_ctx == NULL) {
- *aio_ctx = laio_init();
- if (!*aio_ctx) {
- goto error;
- }
- }
- *use_aio = 1;
- } else {
- *use_aio = 0;
- }
-
- ret = 0;
-
-error:
- return ret;
+ return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
+ (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO);
}
#endif
@@ -494,13 +444,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->fd = fd;
#ifdef CONFIG_LINUX_AIO
- if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
- qemu_close(fd);
- ret = -errno;
- error_setg_errno(errp, -ret, "Could not set AIO state");
- goto fail;
- }
- if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
+ if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
error_setg(errp, "aio=native was specified, but it requires "
"cache.direct=on, which was not specified.");
ret = -EINVAL;
@@ -517,6 +461,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->has_discard = true;
s->has_write_zeroes = true;
+ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
s->needs_alignment = true;
}
@@ -566,8 +511,6 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
}
#endif
- raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
-
ret = 0;
fail:
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
@@ -581,15 +524,9 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVRawState *s = bs->opaque;
- Error *local_err = NULL;
- int ret;
s->type = FTYPE_FILE;
- ret = raw_open_common(bs, options, flags, 0, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
+ return raw_open_common(bs, options, flags, 0, errp);
}
static int raw_reopen_prepare(BDRVReopenState *state,
@@ -608,18 +545,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
state->opaque = g_new0(BDRVRawReopenState, 1);
raw_s = state->opaque;
-#ifdef CONFIG_LINUX_AIO
- raw_s->use_aio = s->use_aio;
-
- /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
- * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
- * won't override aio_ctx if aio_ctx is non-NULL */
- if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
- error_setg(errp, "Could not set AIO state");
- return -1;
- }
-#endif
-
if (s->type == FTYPE_CD) {
raw_s->open_flags |= O_NONBLOCK;
}
@@ -644,15 +569,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
/* dup the original fd */
- /* TODO: use qemu fcntl wrapper */
-#ifdef F_DUPFD_CLOEXEC
- raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
-#else
- raw_s->fd = dup(s->fd);
- if (raw_s->fd != -1) {
- qemu_set_cloexec(raw_s->fd);
- }
-#endif
+ raw_s->fd = qemu_dup(s->fd);
if (raw_s->fd >= 0) {
ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
if (ret) {
@@ -702,9 +619,6 @@ static void raw_reopen_commit(BDRVReopenState *state)
qemu_close(s->fd);
s->fd = raw_s->fd;
-#ifdef CONFIG_LINUX_AIO
- s->use_aio = raw_s->use_aio;
-#endif
g_free(state->opaque);
state->opaque = NULL;
@@ -728,9 +642,33 @@ static void raw_reopen_abort(BDRVReopenState *state)
state->opaque = NULL;
}
+static int hdev_get_max_transfer_length(int fd)
+{
+#ifdef BLKSECTGET
+ int max_sectors = 0;
+ if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
+ return max_sectors;
+ } else {
+ return -errno;
+ }
+#else
+ return -ENOSYS;
+#endif
+}
+
static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVRawState *s = bs->opaque;
+ struct stat st;
+
+ if (!fstat(s->fd, &st)) {
+ if (S_ISBLK(st.st_mode)) {
+ int ret = hdev_get_max_transfer_length(s->fd);
+ if (ret > 0 && ret <= BDRV_REQUEST_MAX_SECTORS) {
+ bs->bl.max_transfer = pow2floor(ret << BDRV_SECTOR_BITS);
+ }
+ }
+ }
raw_probe_alignment(bs, s->fd, errp);
bs->bl.min_mem_alignment = s->buf_align;
@@ -1251,8 +1189,8 @@ static int aio_worker(void *arg)
}
static int paio_submit_co(BlockDriverState *bs, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- int type)
+ int64_t offset, QEMUIOVector *qiov,
+ int count, int type)
{
RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
ThreadPool *pool;
@@ -1261,22 +1199,22 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
acb->aio_type = type;
acb->aio_fildes = fd;
- acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
- acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
+ acb->aio_nbytes = count;
+ acb->aio_offset = offset;
if (qiov) {
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
- assert(qiov->size == acb->aio_nbytes);
+ assert(qiov->size == count);
}
- trace_paio_submit_co(sector_num, nb_sectors, type);
+ trace_paio_submit_co(offset, count, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_co(pool, aio_worker, acb);
}
static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ int64_t offset, QEMUIOVector *qiov, int count,
BlockCompletionFunc *cb, void *opaque, int type)
{
RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
@@ -1286,8 +1224,8 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
acb->aio_type = type;
acb->aio_fildes = fd;
- acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
- acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
+ acb->aio_nbytes = count;
+ acb->aio_offset = offset;
if (qiov) {
acb->aio_iov = qiov->iov;
@@ -1295,19 +1233,18 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
assert(qiov->size == acb->aio_nbytes);
}
- trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
+ trace_paio_submit(acb, opaque, offset, count, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
-static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type)
+static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int type)
{
BDRVRawState *s = bs->opaque;
if (fd_open(bs) < 0)
- return NULL;
+ return -EIO;
/*
* Check if the underlying device requires requests to be aligned,
@@ -1319,61 +1256,50 @@ static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
if (!bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
- } else if (s->use_aio) {
- return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
- nb_sectors, cb, opaque, type);
+ } else if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+ LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+ assert(qiov->size == bytes);
+ return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
#endif
}
}
- return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
- cb, opaque, type);
+ return paio_submit_co(bs, s->fd, offset, qiov, bytes, type);
}
-static void raw_aio_plug(BlockDriverState *bs)
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
-#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
- if (s->use_aio) {
- laio_io_plug(bs, s->aio_ctx);
- }
-#endif
+ return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
}
-static void raw_aio_unplug(BlockDriverState *bs)
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
-#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
- if (s->use_aio) {
- laio_io_unplug(bs, s->aio_ctx, true);
- }
-#endif
+ assert(flags == 0);
+ return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
}
-static void raw_aio_flush_io_queue(BlockDriverState *bs)
+static void raw_aio_plug(BlockDriverState *bs)
{
#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
- if (s->use_aio) {
- laio_io_unplug(bs, s->aio_ctx, false);
+ if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+ LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+ laio_io_plug(bs, aio);
}
#endif
}
-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_READ);
-}
-
-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+static void raw_aio_unplug(BlockDriverState *bs)
{
- return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_WRITE);
+#ifdef CONFIG_LINUX_AIO
+ if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+ LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+ laio_io_unplug(bs, aio);
+ }
+#endif
}
static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
@@ -1391,13 +1317,6 @@ static void raw_close(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
- raw_detach_aio_context(bs);
-
-#ifdef CONFIG_LINUX_AIO
- if (s->use_aio) {
- laio_cleanup(s->aio_ctx);
- }
-#endif
if (s->fd >= 0) {
qemu_close(s->fd);
s->fd = -1;
@@ -1867,27 +1786,27 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
return ret | BDRV_BLOCK_OFFSET_VALID | start;
}
-static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
+static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
- return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+ return paio_submit(bs, s->fd, offset, NULL, count,
cb, opaque, QEMU_AIO_DISCARD);
}
-static int coroutine_fn raw_co_write_zeroes(
- BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+static int coroutine_fn raw_co_pwrite_zeroes(
+ BlockDriverState *bs, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
BDRVRawState *s = bs->opaque;
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
- return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+ return paio_submit_co(bs, s->fd, offset, NULL, count,
QEMU_AIO_WRITE_ZEROES);
} else if (s->discard_zeroes) {
- return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+ return paio_submit_co(bs, s->fd, offset, NULL, count,
QEMU_AIO_DISCARD);
}
return -ENOTSUP;
@@ -1940,16 +1859,15 @@ BlockDriver bdrv_file = {
.bdrv_create = raw_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = raw_co_get_block_status,
- .bdrv_co_write_zeroes = raw_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
- .bdrv_aio_discard = raw_aio_discard,
+ .bdrv_aio_pdiscard = raw_aio_pdiscard,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1957,9 +1875,6 @@ BlockDriver bdrv_file = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
.create_opts = &raw_create_opts,
};
@@ -2225,9 +2140,7 @@ hdev_open_Mac_error:
ret = raw_open_common(bs, options, flags, 0, &local_err);
if (ret < 0) {
- if (local_err) {
- error_propagate(errp, local_err);
- }
+ error_propagate(errp, local_err);
#if defined(__APPLE__) && defined(__MACH__)
if (*bsd_path) {
filename = bsd_path;
@@ -2290,8 +2203,8 @@ static int fd_open(BlockDriverState *bs)
return -EIO;
}
-static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
+static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
@@ -2299,12 +2212,12 @@ static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
if (fd_open(bs) < 0) {
return NULL;
}
- return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+ return paio_submit(bs, s->fd, offset, NULL, count,
cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
-static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
{
BDRVRawState *s = bs->opaque;
int rc;
@@ -2314,10 +2227,10 @@ static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
return rc;
}
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
- return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+ return paio_submit_co(bs, s->fd, offset, NULL, count,
QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
} else if (s->discard_zeroes) {
- return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+ return paio_submit_co(bs, s->fd, offset, NULL, count,
QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
return -ENOTSUP;
@@ -2389,16 +2302,15 @@ static BlockDriver bdrv_host_device = {
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_co_write_zeroes = hdev_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
- .bdrv_aio_discard = hdev_aio_discard,
+ .bdrv_aio_pdiscard = hdev_aio_pdiscard,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2408,9 +2320,6 @@ static BlockDriver bdrv_host_device = {
.bdrv_probe_blocksizes = hdev_probe_blocksizes,
.bdrv_probe_geometry = hdev_probe_geometry,
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
/* generic scsi device */
#ifdef __linux__
.bdrv_aio_ioctl = hdev_aio_ioctl,
@@ -2433,17 +2342,11 @@ static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVRawState *s = bs->opaque;
- Error *local_err = NULL;
- int ret;
s->type = FTYPE_CD;
/* open will not fail even if no CD is inserted, so add O_NONBLOCK */
- ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
+ return raw_open_common(bs, options, flags, O_NONBLOCK, errp);
}
static int cdrom_probe_device(const char *filename)
@@ -2522,13 +2425,13 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2536,9 +2439,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
/* removable device support */
.bdrv_is_inserted = cdrom_is_inserted,
.bdrv_eject = cdrom_eject,
@@ -2561,9 +2461,7 @@ static int cdrom_open(BlockDriverState *bs, QDict *options, int flags,
ret = raw_open_common(bs, options, flags, 0, &local_err);
if (ret) {
- if (local_err) {
- error_propagate(errp, local_err);
- }
+ error_propagate(errp, local_err);
return ret;
}
@@ -2658,13 +2556,12 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2672,9 +2569,6 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
/* removable device support */
.bdrv_is_inserted = cdrom_is_inserted,
.bdrv_eject = cdrom_eject,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index fd2389153..56f45fea9 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -27,7 +27,7 @@
#include "qemu/timer.h"
#include "block/block_int.h"
#include "qemu/module.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
#include "trace.h"
#include "block/thread-pool.h"
#include "qemu/iov.h"
@@ -142,7 +142,7 @@ static int aio_worker(void *arg)
}
static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ int64_t offset, QEMUIOVector *qiov, int count,
BlockCompletionFunc *cb, void *opaque, int type)
{
RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
@@ -155,11 +155,12 @@ static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
if (qiov) {
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
+ assert(qiov->size == count);
}
- acb->aio_nbytes = nb_sectors * 512;
- acb->aio_offset = sector_num * 512;
+ acb->aio_nbytes = count;
+ acb->aio_offset = offset;
- trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
+ trace_paio_submit(acb, opaque, offset, count, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
@@ -222,7 +223,7 @@ static void raw_attach_aio_context(BlockDriverState *bs,
}
}
-static void raw_probe_alignment(BlockDriverState *bs)
+static void raw_probe_alignment(BlockDriverState *bs, Error **errp)
{
BDRVRawState *s = bs->opaque;
DWORD sectorsPerCluster, freeClusters, totalClusters, count;
@@ -230,14 +231,14 @@ static void raw_probe_alignment(BlockDriverState *bs)
BOOL status;
if (s->type == FTYPE_CD) {
- bs->request_alignment = 2048;
+ bs->bl.request_alignment = 2048;
return;
}
if (s->type == FTYPE_HARDDISK) {
status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
NULL, 0, &dg, sizeof(dg), &count, NULL);
if (status != 0) {
- bs->request_alignment = dg.Geometry.BytesPerSector;
+ bs->bl.request_alignment = dg.Geometry.BytesPerSector;
return;
}
/* try GetDiskFreeSpace too */
@@ -247,7 +248,7 @@ static void raw_probe_alignment(BlockDriverState *bs)
GetDiskFreeSpace(s->drive_path, &sectorsPerCluster,
&dg.Geometry.BytesPerSector,
&freeClusters, &totalClusters);
- bs->request_alignment = dg.Geometry.BytesPerSector;
+ bs->bl.request_alignment = dg.Geometry.BytesPerSector;
}
}
@@ -365,7 +366,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
}
- raw_probe_alignment(bs);
ret = 0;
fail:
qemu_opts_del(opts);
@@ -379,9 +379,10 @@ static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
BDRVRawState *s = bs->opaque;
if (s->aio) {
return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
- nb_sectors, cb, opaque, QEMU_AIO_READ);
+ nb_sectors, cb, opaque, QEMU_AIO_READ);
} else {
- return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+ return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
+ nb_sectors << BDRV_SECTOR_BITS,
cb, opaque, QEMU_AIO_READ);
}
}
@@ -393,9 +394,10 @@ static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
BDRVRawState *s = bs->opaque;
if (s->aio) {
return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
- nb_sectors, cb, opaque, QEMU_AIO_WRITE);
+ nb_sectors, cb, opaque, QEMU_AIO_WRITE);
} else {
- return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+ return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
+ nb_sectors << BDRV_SECTOR_BITS,
cb, opaque, QEMU_AIO_WRITE);
}
}
@@ -550,6 +552,7 @@ BlockDriver bdrv_file = {
.bdrv_needs_filename = true,
.bdrv_parse_filename = raw_parse_filename,
.bdrv_file_open = raw_open,
+ .bdrv_refresh_limits = raw_probe_alignment,
.bdrv_close = raw_close,
.bdrv_create = raw_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index a6cc7e991..588d4080f 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -1,6 +1,6 @@
/* BlockDriver implementation for "raw"
*
- * Copyright (C) 2010, 2013, Red Hat, Inc.
+ * Copyright (C) 2010-2016 Red Hat, Inc.
* Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
* Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
*
@@ -50,33 +50,30 @@ static int raw_reopen_prepare(BDRVReopenState *reopen_state,
return 0;
}
-static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
}
-static int coroutine_fn
-raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- QEMUIOVector *qiov, int flags)
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
void *buf = NULL;
BlockDriver *drv;
QEMUIOVector local_qiov;
int ret;
- if (bs->probed && sector_num == 0) {
- /* As long as these conditions are true, we can't get partial writes to
- * the probe buffer and can just directly check the request. */
+ if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
+ /* Handling partial writes would be a pain - so we just
+ * require that guests have 512-byte request alignment if
+ * probing occurred */
QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
-
- if (nb_sectors == 0) {
- /* qemu_iovec_to_buf() would fail, but we want to return success
- * instead of -EINVAL in this case. */
- return 0;
- }
+ assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE);
buf = qemu_try_blockalign(bs->file->bs, 512);
if (!buf) {
@@ -105,8 +102,7 @@ raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- ret = bdrv_co_do_pwritev(bs->file->bs, sector_num * BDRV_SECTOR_SIZE,
- nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);
+ ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
fail:
if (qiov == &local_qiov) {
@@ -116,13 +112,6 @@ fail:
return ret;
}
-static int coroutine_fn
-raw_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- QEMUIOVector *qiov)
-{
- return raw_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0);
-}
-
static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors, int *pnum,
@@ -134,17 +123,17 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
(sector_num << BDRV_SECTOR_BITS);
}
-static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BdrvRequestFlags flags)
+static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count,
+ BdrvRequestFlags flags)
{
- return bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
+ return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
}
-static int coroutine_fn raw_co_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
{
- return bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+ return bdrv_co_pdiscard(bs->file->bs, offset, count);
}
static int64_t raw_getlength(BlockDriverState *bs)
@@ -159,7 +148,12 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
- bs->bl = bs->file->bs->bl;
+ if (bs->probed) {
+ /* To make it easier to protect the first sector, any probed
+ * image is restricted to read-modify-write on sub-sector
+ * operations. */
+ bs->bl.request_alignment = BDRV_SECTOR_SIZE;
+ }
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
@@ -197,20 +191,17 @@ static int raw_has_zero_init(BlockDriverState *bs)
static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
{
- Error *local_err = NULL;
- int ret;
-
- ret = bdrv_create_file(filename, opts, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
+ return bdrv_create_file(filename, opts, errp);
}
static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
bs->sg = bs->file->bs->sg;
+ bs->supported_write_flags = BDRV_REQ_FUA &
+ bs->file->bs->supported_write_flags;
+ bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+ bs->file->bs->supported_zero_flags;
if (bs->probed && !bdrv_is_read_only(bs)) {
fprintf(stderr,
@@ -255,12 +246,10 @@ BlockDriver bdrv_raw = {
.bdrv_open = &raw_open,
.bdrv_close = &raw_close,
.bdrv_create = &raw_create,
- .bdrv_co_readv = &raw_co_readv,
- .bdrv_co_writev = &raw_co_writev,
- .bdrv_co_writev_flags = &raw_co_writev_flags,
- .supported_write_flags = BDRV_REQ_FUA,
- .bdrv_co_write_zeroes = &raw_co_write_zeroes,
- .bdrv_co_discard = &raw_co_discard,
+ .bdrv_co_preadv = &raw_co_preadv,
+ .bdrv_co_pwritev = &raw_co_pwritev,
+ .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
+ .bdrv_co_pdiscard = &raw_co_pdiscard,
.bdrv_co_get_block_status = &raw_co_get_block_status,
.bdrv_truncate = &raw_truncate,
.bdrv_getlength = &raw_getlength,
diff --git a/block/rbd.c b/block/rbd.c
index 5bc5b3253..0106fea45 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -290,7 +290,8 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
if (only_read_conf_file) {
ret = rados_conf_read_file(cluster, value);
if (ret < 0) {
- error_setg(errp, "error reading conf file %s", value);
+ error_setg_errno(errp, -ret, "error reading conf file %s",
+ value);
break;
}
}
@@ -299,7 +300,7 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
} else if (!only_read_conf_file) {
ret = rados_conf_set(cluster, name, value);
if (ret < 0) {
- error_setg(errp, "invalid conf option %s", name);
+ error_setg_errno(errp, -ret, "invalid conf option %s", name);
ret = -EINVAL;
break;
}
@@ -354,9 +355,10 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
}
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
- if (rados_create(&cluster, clientname) < 0) {
- error_setg(errp, "error initializing");
- return -EIO;
+ ret = rados_create(&cluster, clientname);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error initializing");
+ return ret;
}
if (strstr(conf, "conf=") == NULL) {
@@ -381,21 +383,27 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
return -EIO;
}
- if (rados_connect(cluster) < 0) {
- error_setg(errp, "error connecting");
+ ret = rados_connect(cluster);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error connecting");
rados_shutdown(cluster);
- return -EIO;
+ return ret;
}
- if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) {
- error_setg(errp, "error opening pool %s", pool);
+ ret = rados_ioctx_create(cluster, pool, &io_ctx);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error opening pool %s", pool);
rados_shutdown(cluster);
- return -EIO;
+ return ret;
}
ret = rbd_create(io_ctx, name, bytes, &obj_order);
rados_ioctx_destroy(io_ctx);
rados_shutdown(cluster);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error rbd create");
+ return ret;
+ }
return ret;
}
@@ -500,7 +508,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
r = rados_create(&s->cluster, clientname);
if (r < 0) {
- error_setg(errp, "error initializing");
+ error_setg_errno(errp, -r, "error initializing");
goto failed_opts;
}
@@ -546,19 +554,19 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
r = rados_connect(s->cluster);
if (r < 0) {
- error_setg(errp, "error connecting");
+ error_setg_errno(errp, -r, "error connecting");
goto failed_shutdown;
}
r = rados_ioctx_create(s->cluster, pool, &s->io_ctx);
if (r < 0) {
- error_setg(errp, "error opening pool %s", pool);
+ error_setg_errno(errp, -r, "error opening pool %s", pool);
goto failed_shutdown;
}
r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
if (r < 0) {
- error_setg(errp, "error reading header from %s", s->name);
+ error_setg_errno(errp, -r, "error reading header from %s", s->name);
goto failed_open;
}
@@ -641,9 +649,9 @@ static int rbd_aio_flush_wrapper(rbd_image_t image,
}
static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
- int64_t sector_num,
+ int64_t off,
QEMUIOVector *qiov,
- int nb_sectors,
+ int64_t size,
BlockCompletionFunc *cb,
void *opaque,
RBDAIOCmd cmd)
@@ -651,7 +659,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
RBDAIOCB *acb;
RADOSCB *rcb = NULL;
rbd_completion_t c;
- int64_t off, size;
char *buf;
int r;
@@ -660,6 +667,7 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
acb->cmd = cmd;
acb->qiov = qiov;
+ assert(!qiov || qiov->size == size);
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
@@ -679,9 +687,6 @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
buf = acb->bounce;
- off = sector_num * BDRV_SECTOR_SIZE;
- size = nb_sectors * BDRV_SECTOR_SIZE;
-
rcb = g_new(RADOSCB, 1);
rcb->acb = acb;
rcb->buf = buf;
@@ -731,7 +736,8 @@ static BlockAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
BlockCompletionFunc *cb,
void *opaque)
{
- return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
+ return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
+ nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
RBD_AIO_READ);
}
@@ -742,7 +748,8 @@ static BlockAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
BlockCompletionFunc *cb,
void *opaque)
{
- return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
+ return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
+ nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
RBD_AIO_WRITE);
}
@@ -875,10 +882,8 @@ static int qemu_rbd_snap_rollback(BlockDriverState *bs,
const char *snapshot_name)
{
BDRVRBDState *s = bs->opaque;
- int r;
- r = rbd_snap_rollback(s->image, snapshot_name);
- return r;
+ return rbd_snap_rollback(s->image, snapshot_name);
}
static int qemu_rbd_snap_list(BlockDriverState *bs,
@@ -925,13 +930,13 @@ static int qemu_rbd_snap_list(BlockDriverState *bs,
}
#ifdef LIBRBD_SUPPORTS_DISCARD
-static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
+static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
+ int64_t offset,
+ int count,
+ BlockCompletionFunc *cb,
+ void *opaque)
{
- return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
+ return rbd_start_aio(bs, offset, NULL, count, cb, opaque,
RBD_AIO_DISCARD);
}
#endif
@@ -995,7 +1000,7 @@ static BlockDriver bdrv_rbd = {
#endif
#ifdef LIBRBD_SUPPORTS_DISCARD
- .bdrv_aio_discard = qemu_rbd_aio_discard,
+ .bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard,
#endif
.bdrv_snapshot_create = qemu_rbd_snap_create,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 33e0a3382..66e1cb2b2 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -294,13 +294,16 @@ static inline size_t count_data_objs(const struct SheepdogInode *inode)
#undef DPRINTF
#ifdef DEBUG_SDOG
-#define DPRINTF(fmt, args...) \
- do { \
- fprintf(stdout, "%s %d: " fmt, __func__, __LINE__, ##args); \
- } while (0)
+#define DEBUG_SDOG_PRINT 1
#else
-#define DPRINTF(fmt, args...)
+#define DEBUG_SDOG_PRINT 0
#endif
+#define DPRINTF(fmt, args...) \
+ do { \
+ if (DEBUG_SDOG_PRINT) { \
+ fprintf(stderr, "%s %d: " fmt, __func__, __LINE__, ##args); \
+ } \
+ } while (0)
typedef struct SheepdogAIOCB SheepdogAIOCB;
@@ -492,7 +495,7 @@ static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
{
- qemu_coroutine_enter(acb->coroutine, NULL);
+ qemu_coroutine_enter(acb->coroutine);
qemu_aio_unref(acb);
}
@@ -633,7 +636,7 @@ static void restart_co_req(void *opaque)
{
Coroutine *co = opaque;
- qemu_coroutine_enter(co, NULL);
+ qemu_coroutine_enter(co);
}
typedef struct SheepdogReqCo {
@@ -723,8 +726,8 @@ static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
if (qemu_in_coroutine()) {
do_co_req(&srco);
} else {
- co = qemu_coroutine_create(do_co_req);
- qemu_coroutine_enter(co, &srco);
+ co = qemu_coroutine_create(do_co_req, &srco);
+ qemu_coroutine_enter(co);
while (!srco.finished) {
aio_poll(aio_context, true);
}
@@ -922,17 +925,17 @@ static void co_read_response(void *opaque)
BDRVSheepdogState *s = opaque;
if (!s->co_recv) {
- s->co_recv = qemu_coroutine_create(aio_read_response);
+ s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
}
- qemu_coroutine_enter(s->co_recv, opaque);
+ qemu_coroutine_enter(s->co_recv);
}
static void co_write_request(void *opaque)
{
BDRVSheepdogState *s = opaque;
- qemu_coroutine_enter(s->co_send, NULL);
+ qemu_coroutine_enter(s->co_send);
}
/*
@@ -1678,7 +1681,7 @@ static int sd_prealloc(const char *filename, Error **errp)
if (ret < 0) {
goto out;
}
- ret = blk_pwrite(blk, idx * buf_size, buf, buf_size);
+ ret = blk_pwrite(blk, idx * buf_size, buf, buf_size, 0);
if (ret < 0) {
goto out;
}
@@ -2781,17 +2784,24 @@ static int sd_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
return ret;
}
-static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
- int64_t pos, int size)
+static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos)
{
BDRVSheepdogState *s = bs->opaque;
+ void *buf;
+ int ret;
- return do_load_save_vmstate(s, data, pos, size, 1);
+ buf = qemu_blockalign(bs, qiov->size);
+ ret = do_load_save_vmstate(s, buf, pos, qiov->size, 1);
+ qemu_iovec_from_buf(qiov, 0, buf, qiov->size);
+ qemu_vfree(buf);
+
+ return ret;
}
-static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
+static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
+ int count)
{
SheepdogAIOCB *acb;
BDRVSheepdogState *s = bs->opaque;
@@ -2801,7 +2811,7 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
uint32_t zero = 0;
if (!s->discard_supported) {
- return 0;
+ return 0;
}
memset(&discard_iov, 0, sizeof(discard_iov));
@@ -2810,7 +2820,10 @@ static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
iov.iov_len = sizeof(zero);
discard_iov.iov = &iov;
discard_iov.niov = 1;
- acb = sd_aio_setup(bs, &discard_iov, sector_num, nb_sectors);
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((count & (BDRV_SECTOR_SIZE - 1)) == 0);
+ acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
+ count >> BDRV_SECTOR_BITS);
acb->aiocb_type = AIOCB_DISCARD_OBJ;
acb->aio_done_func = sd_finish_aiocb;
@@ -2944,7 +2957,7 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
- .bdrv_co_discard = sd_co_discard,
+ .bdrv_co_pdiscard = sd_co_pdiscard,
.bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
@@ -2980,7 +2993,7 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
- .bdrv_co_discard = sd_co_discard,
+ .bdrv_co_pdiscard = sd_co_pdiscard,
.bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
@@ -3016,7 +3029,7 @@ static BlockDriver bdrv_sheepdog_unix = {
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
- .bdrv_co_discard = sd_co_discard,
+ .bdrv_co_pdiscard = sd_co_pdiscard,
.bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
diff --git a/block/snapshot.c b/block/snapshot.c
index e9d721df6..bf5c2ca5e 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -358,9 +358,7 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
ret = bdrv_snapshot_load_tmp(bs, NULL, id_or_name, &local_err);
}
- if (local_err) {
- error_propagate(errp, local_err);
- }
+ error_propagate(errp, local_err);
return ret;
}
@@ -373,9 +371,10 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
{
bool ok = true;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
- while (ok && (bs = bdrv_next(bs))) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
@@ -383,8 +382,12 @@ bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
ok = bdrv_can_snapshot(bs);
}
aio_context_release(ctx);
+ if (!ok) {
+ goto fail;
+ }
}
+fail:
*first_bad_bs = bs;
return ok;
}
@@ -393,10 +396,11 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
Error **err)
{
int ret = 0;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
QEMUSnapshotInfo sn1, *snapshot = &sn1;
- while (ret == 0 && (bs = bdrv_next(bs))) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
@@ -405,8 +409,12 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err);
}
aio_context_release(ctx);
+ if (ret < 0) {
+ goto fail;
+ }
}
+fail:
*first_bad_bs = bs;
return ret;
}
@@ -415,9 +423,10 @@ int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
{
int err = 0;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
- while (err == 0 && (bs = bdrv_next(bs))) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
@@ -425,8 +434,12 @@ int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
err = bdrv_snapshot_goto(bs, name);
}
aio_context_release(ctx);
+ if (err < 0) {
+ goto fail;
+ }
}
+fail:
*first_bad_bs = bs;
return err;
}
@@ -435,9 +448,10 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
{
QEMUSnapshotInfo sn;
int err = 0;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
- while (err == 0 && (bs = bdrv_next(bs))) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
@@ -445,8 +459,12 @@ int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
err = bdrv_snapshot_find(bs, &sn, name);
}
aio_context_release(ctx);
+ if (err < 0) {
+ goto fail;
+ }
}
+fail:
*first_bad_bs = bs;
return err;
}
@@ -457,9 +475,10 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
BlockDriverState **first_bad_bs)
{
int err = 0;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
- while (err == 0 && (bs = bdrv_next(bs))) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *ctx = bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
@@ -471,23 +490,32 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
err = bdrv_snapshot_create(bs, sn);
}
aio_context_release(ctx);
+ if (err < 0) {
+ goto fail;
+ }
}
+fail:
*first_bad_bs = bs;
return err;
}
BlockDriverState *bdrv_all_find_vmstate_bs(void)
{
- bool not_found = true;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
- while (not_found && (bs = bdrv_next(bs))) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *ctx = bdrv_get_aio_context(bs);
+ bool found;
aio_context_acquire(ctx);
- not_found = !bdrv_can_snapshot(bs);
+ found = bdrv_can_snapshot(bs);
aio_context_release(ctx);
+
+ if (found) {
+ break;
+ }
}
return bs;
}
diff --git a/block/ssh.c b/block/ssh.c
index 06928ed93..5ce12b633 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -508,36 +508,73 @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
return ret;
}
+static QemuOptsList ssh_runtime_opts = {
+ .name = "ssh",
+ .head = QTAILQ_HEAD_INITIALIZER(ssh_runtime_opts.head),
+ .desc = {
+ {
+ .name = "host",
+ .type = QEMU_OPT_STRING,
+ .help = "Host to connect to",
+ },
+ {
+ .name = "port",
+ .type = QEMU_OPT_NUMBER,
+ .help = "Port to connect to",
+ },
+ {
+ .name = "path",
+ .type = QEMU_OPT_STRING,
+ .help = "Path of the image on the host",
+ },
+ {
+ .name = "user",
+ .type = QEMU_OPT_STRING,
+ .help = "User as which to connect",
+ },
+ {
+ .name = "host_key_check",
+ .type = QEMU_OPT_STRING,
+ .help = "Defines how and what to check the host key against",
+ },
+ },
+};
+
static int connect_to_ssh(BDRVSSHState *s, QDict *options,
int ssh_flags, int creat_mode, Error **errp)
{
int r, ret;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
const char *host, *user, *path, *host_key_check;
int port;
- if (!qdict_haskey(options, "host")) {
+ opts = qemu_opts_create(&ssh_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
ret = -EINVAL;
- error_setg(errp, "No hostname was specified");
+ error_propagate(errp, local_err);
goto err;
}
- host = qdict_get_str(options, "host");
- if (qdict_haskey(options, "port")) {
- port = qdict_get_int(options, "port");
- } else {
- port = 22;
+ host = qemu_opt_get(opts, "host");
+ if (!host) {
+ ret = -EINVAL;
+ error_setg(errp, "No hostname was specified");
+ goto err;
}
- if (!qdict_haskey(options, "path")) {
+ port = qemu_opt_get_number(opts, "port", 22);
+
+ path = qemu_opt_get(opts, "path");
+ if (!path) {
ret = -EINVAL;
error_setg(errp, "No path was specified");
goto err;
}
- path = qdict_get_str(options, "path");
- if (qdict_haskey(options, "user")) {
- user = qdict_get_str(options, "user");
- } else {
+ user = qemu_opt_get(opts, "user");
+ if (!user) {
user = g_get_user_name();
if (!user) {
error_setg_errno(errp, errno, "Can't get user name");
@@ -546,9 +583,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
}
}
- if (qdict_haskey(options, "host_key_check")) {
- host_key_check = qdict_get_str(options, "host_key_check");
- } else {
+ host_key_check = qemu_opt_get(opts, "host_key_check");
+ if (!host_key_check) {
host_key_check = "yes";
}
@@ -612,21 +648,14 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
goto err;
}
+ qemu_opts_del(opts);
+
r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
if (r < 0) {
sftp_error_setg(errp, s, "failed to read file attributes");
return -EINVAL;
}
- /* Delete the options we've used; any not deleted will cause the
- * block layer to give an error about unused options.
- */
- qdict_del(options, "host");
- qdict_del(options, "port");
- qdict_del(options, "user");
- qdict_del(options, "path");
- qdict_del(options, "host_key_check");
-
return 0;
err:
@@ -646,6 +675,8 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
}
s->session = NULL;
+ qemu_opts_del(opts);
+
return ret;
}
@@ -777,7 +808,7 @@ static void restart_coroutine(void *opaque)
DPRINTF("co=%p", co);
- qemu_coroutine_enter(co, NULL);
+ qemu_coroutine_enter(co);
}
static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
diff --git a/block/stream.c b/block/stream.c
index 332b9a183..31874817c 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -39,7 +39,7 @@ typedef struct StreamBlockJob {
char *backing_file_str;
} StreamBlockJob;
-static int coroutine_fn stream_populate(BlockDriverState *bs,
+static int coroutine_fn stream_populate(BlockBackend *blk,
int64_t sector_num, int nb_sectors,
void *buf)
{
@@ -52,7 +52,8 @@ static int coroutine_fn stream_populate(BlockDriverState *bs,
qemu_iovec_init_external(&qiov, &iov, 1);
/* Copy-on-read the unallocated clusters */
- return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
+ return blk_co_preadv(blk, sector_num * BDRV_SECTOR_SIZE, qiov.size, &qiov,
+ BDRV_REQ_COPY_ON_READ);
}
typedef struct {
@@ -64,6 +65,7 @@ static void stream_complete(BlockJob *job, void *opaque)
{
StreamBlockJob *s = container_of(job, StreamBlockJob, common);
StreamCompleteData *data = opaque;
+ BlockDriverState *bs = blk_bs(job->blk);
BlockDriverState *base = s->base;
if (!block_job_is_cancelled(&s->common) && data->reached_end &&
@@ -75,8 +77,8 @@ static void stream_complete(BlockJob *job, void *opaque)
base_fmt = base->drv->format_name;
}
}
- data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
- bdrv_set_backing_hd(job->bs, base);
+ data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
+ bdrv_set_backing_hd(bs, base);
}
g_free(s->backing_file_str);
@@ -88,10 +90,12 @@ static void coroutine_fn stream_run(void *opaque)
{
StreamBlockJob *s = opaque;
StreamCompleteData *data;
- BlockDriverState *bs = s->common.bs;
+ BlockBackend *blk = s->common.blk;
+ BlockDriverState *bs = blk_bs(blk);
BlockDriverState *base = s->base;
int64_t sector_num = 0;
int64_t end = -1;
+ uint64_t delay_ns = 0;
int error = 0;
int ret = 0;
int n = 0;
@@ -120,10 +124,8 @@ static void coroutine_fn stream_run(void *opaque)
}
for (sector_num = 0; sector_num < end; sector_num += n) {
- uint64_t delay_ns = 0;
bool copy;
-wait:
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
@@ -153,18 +155,11 @@ wait:
}
trace_stream_one_iteration(s, sector_num, n, ret);
if (copy) {
- if (s->common.speed) {
- delay_ns = ratelimit_calculate_delay(&s->limit, n);
- if (delay_ns > 0) {
- goto wait;
- }
- }
- ret = stream_populate(bs, sector_num, n, buf);
+ ret = stream_populate(blk, sector_num, n, buf);
}
if (ret < 0) {
BlockErrorAction action =
- block_job_error_action(&s->common, s->common.bs, s->on_error,
- true, -ret);
+ block_job_error_action(&s->common, s->on_error, true, -ret);
if (action == BLOCK_ERROR_ACTION_STOP) {
n = 0;
continue;
@@ -180,6 +175,9 @@ wait:
/* Publish progress */
s->common.offset += n * BDRV_SECTOR_SIZE;
+ if (copy && s->common.speed) {
+ delay_ns = ratelimit_calculate_delay(&s->limit, n);
+ }
}
if (!base) {
@@ -216,22 +214,15 @@ static const BlockJobDriver stream_job_driver = {
.set_speed = stream_set_speed,
};
-void stream_start(BlockDriverState *bs, BlockDriverState *base,
- const char *backing_file_str, int64_t speed,
- BlockdevOnError on_error,
- BlockCompletionFunc *cb,
- void *opaque, Error **errp)
+void stream_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, const char *backing_file_str,
+ int64_t speed, BlockdevOnError on_error,
+ BlockCompletionFunc *cb, void *opaque, Error **errp)
{
StreamBlockJob *s;
- if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
- on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- (!bs->blk || !blk_iostatus_is_enabled(bs->blk))) {
- error_setg(errp, QERR_INVALID_PARAMETER, "on-error");
- return;
- }
-
- s = block_job_create(&stream_job_driver, bs, speed, cb, opaque, errp);
+ s = block_job_create(job_id, &stream_job_driver, bs, speed,
+ cb, opaque, errp);
if (!s) {
return;
}
@@ -240,7 +231,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
s->backing_file_str = g_strdup(backing_file_str);
s->on_error = on_error;
- s->common.co = qemu_coroutine_create(stream_run);
+ s->common.co = qemu_coroutine_create(stream_run, s);
trace_stream_start(bs, base, s, s->common.co, opaque);
- qemu_coroutine_enter(s->common.co, s);
+ qemu_coroutine_enter(s->common.co);
}
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 4920e0949..59545e287 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -23,13 +23,14 @@
*/
#include "qemu/osdep.h"
+#include "sysemu/block-backend.h"
#include "block/throttle-groups.h"
#include "qemu/queue.h"
#include "qemu/thread.h"
#include "sysemu/qtest.h"
/* The ThrottleGroup structure (with its ThrottleState) is shared
- * among different BlockDriverState and it's independent from
+ * among different BlockBackends and it's independent from
* AioContext, so in order to use it from different threads it needs
* its own locking.
*
@@ -39,26 +40,26 @@
* The whole ThrottleGroup structure is private and invisible to
* outside users, that only use it through its ThrottleState.
*
- * In addition to the ThrottleGroup structure, BlockDriverState has
+ * In addition to the ThrottleGroup structure, BlockBackendPublic has
* fields that need to be accessed by other members of the group and
- * therefore also need to be protected by this lock. Once a BDS is
- * registered in a group those fields can be accessed by other threads
- * any time.
+ * therefore also need to be protected by this lock. Once a
+ * BlockBackend is registered in a group those fields can be accessed
+ * by other threads any time.
*
* Again, all this is handled internally and is mostly transparent to
* the outside. The 'throttle_timers' field however has an additional
* constraint because it may be temporarily invalid (see for example
* bdrv_set_aio_context()). Therefore in this file a thread will
- * access some other BDS's timers only after verifying that that BDS
- * has throttled requests in the queue.
+ * access some other BlockBackend's timers only after verifying that
+ * that BlockBackend has throttled requests in the queue.
*/
typedef struct ThrottleGroup {
char *name; /* This is constant during the lifetime of the group */
QemuMutex lock; /* This lock protects the following four fields */
ThrottleState ts;
- QLIST_HEAD(, BlockDriverState) head;
- BlockDriverState *tokens[2];
+ QLIST_HEAD(, BlockBackendPublic) head;
+ BlockBackend *tokens[2];
bool any_timer_armed[2];
/* These two are protected by the global throttle_groups_lock */
@@ -132,93 +133,98 @@ void throttle_group_unref(ThrottleState *ts)
qemu_mutex_unlock(&throttle_groups_lock);
}
-/* Get the name from a BlockDriverState's ThrottleGroup. The name (and
- * the pointer) is guaranteed to remain constant during the lifetime
- * of the group.
+/* Get the name from a BlockBackend's ThrottleGroup. The name (and the pointer)
+ * is guaranteed to remain constant during the lifetime of the group.
*
- * @bs: a BlockDriverState that is member of a throttling group
+ * @blk: a BlockBackend that is member of a throttling group
* @ret: the name of the group.
*/
-const char *throttle_group_get_name(BlockDriverState *bs)
+const char *throttle_group_get_name(BlockBackend *blk)
{
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
return tg->name;
}
-/* Return the next BlockDriverState in the round-robin sequence,
- * simulating a circular list.
+/* Return the next BlockBackend in the round-robin sequence, simulating a
+ * circular list.
*
* This assumes that tg->lock is held.
*
- * @bs: the current BlockDriverState
- * @ret: the next BlockDriverState in the sequence
+ * @blk: the current BlockBackend
+ * @ret: the next BlockBackend in the sequence
*/
-static BlockDriverState *throttle_group_next_bs(BlockDriverState *bs)
+static BlockBackend *throttle_group_next_blk(BlockBackend *blk)
{
- ThrottleState *ts = bs->throttle_state;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = blkp->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
- BlockDriverState *next = QLIST_NEXT(bs, round_robin);
+ BlockBackendPublic *next = QLIST_NEXT(blkp, round_robin);
if (!next) {
- return QLIST_FIRST(&tg->head);
+ next = QLIST_FIRST(&tg->head);
}
- return next;
+ return blk_by_public(next);
}
-/* Return the next BlockDriverState in the round-robin sequence with
- * pending I/O requests.
+/* Return the next BlockBackend in the round-robin sequence with pending I/O
+ * requests.
*
* This assumes that tg->lock is held.
*
- * @bs: the current BlockDriverState
+ * @blk: the current BlockBackend
* @is_write: the type of operation (read/write)
- * @ret: the next BlockDriverState with pending requests, or bs
- * if there is none.
+ * @ret: the next BlockBackend with pending requests, or blk if there is
+ * none.
*/
-static BlockDriverState *next_throttle_token(BlockDriverState *bs,
- bool is_write)
+static BlockBackend *next_throttle_token(BlockBackend *blk, bool is_write)
{
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
- BlockDriverState *token, *start;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
+ BlockBackend *token, *start;
start = token = tg->tokens[is_write];
/* get next bs round in round robin style */
- token = throttle_group_next_bs(token);
- while (token != start && !token->pending_reqs[is_write]) {
- token = throttle_group_next_bs(token);
+ token = throttle_group_next_blk(token);
+ while (token != start && !blkp->pending_reqs[is_write]) {
+ token = throttle_group_next_blk(token);
}
/* If no IO are queued for scheduling on the next round robin token
* then decide the token is the current bs because chances are
* the current bs get the current request queued.
*/
- if (token == start && !token->pending_reqs[is_write]) {
- token = bs;
+ if (token == start && !blkp->pending_reqs[is_write]) {
+ token = blk;
}
return token;
}
-/* Check if the next I/O request for a BlockDriverState needs to be
- * throttled or not. If there's no timer set in this group, set one
- * and update the token accordingly.
+/* Check if the next I/O request for a BlockBackend needs to be throttled or
+ * not. If there's no timer set in this group, set one and update the token
+ * accordingly.
*
* This assumes that tg->lock is held.
*
- * @bs: the current BlockDriverState
+ * @blk: the current BlockBackend
* @is_write: the type of operation (read/write)
* @ret: whether the I/O request needs to be throttled or not
*/
-static bool throttle_group_schedule_timer(BlockDriverState *bs,
- bool is_write)
+static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
{
- ThrottleState *ts = bs->throttle_state;
- ThrottleTimers *tt = &bs->throttle_timers;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = blkp->throttle_state;
+ ThrottleTimers *tt = &blkp->throttle_timers;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
bool must_wait;
+ if (blkp->io_limits_disabled) {
+ return false;
+ }
+
/* Check if any of the timers in this group is already armed */
if (tg->any_timer_armed[is_write]) {
return true;
@@ -226,9 +232,9 @@ static bool throttle_group_schedule_timer(BlockDriverState *bs,
must_wait = throttle_schedule_timer(ts, tt, is_write);
- /* If a timer just got armed, set bs as the current token */
+ /* If a timer just got armed, set blk as the current token */
if (must_wait) {
- tg->tokens[is_write] = bs;
+ tg->tokens[is_write] = blk;
tg->any_timer_armed[is_write] = true;
}
@@ -239,18 +245,19 @@ static bool throttle_group_schedule_timer(BlockDriverState *bs,
*
* This assumes that tg->lock is held.
*
- * @bs: the current BlockDriverState
+ * @blk: the current BlockBackend
* @is_write: the type of operation (read/write)
*/
-static void schedule_next_request(BlockDriverState *bs, bool is_write)
+static void schedule_next_request(BlockBackend *blk, bool is_write)
{
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
bool must_wait;
- BlockDriverState *token;
+ BlockBackend *token;
/* Check if there's any pending request to schedule next */
- token = next_throttle_token(bs, is_write);
- if (!token->pending_reqs[is_write]) {
+ token = next_throttle_token(blk, is_write);
+ if (!blkp->pending_reqs[is_write]) {
return;
}
@@ -259,12 +266,12 @@ static void schedule_next_request(BlockDriverState *bs, bool is_write)
/* If it doesn't have to wait, queue it for immediate execution */
if (!must_wait) {
- /* Give preference to requests from the current bs */
+ /* Give preference to requests from the current blk */
if (qemu_in_coroutine() &&
- qemu_co_queue_next(&bs->throttled_reqs[is_write])) {
- token = bs;
+ qemu_co_queue_next(&blkp->throttled_reqs[is_write])) {
+ token = blk;
} else {
- ThrottleTimers *tt = &token->throttle_timers;
+ ThrottleTimers *tt = &blkp->throttle_timers;
int64_t now = qemu_clock_get_ns(tt->clock_type);
timer_mod(tt->timers[is_write], now + 1);
tg->any_timer_armed[is_write] = true;
@@ -277,53 +284,67 @@ static void schedule_next_request(BlockDriverState *bs, bool is_write)
* if necessary, and schedule the next request using a round robin
* algorithm.
*
- * @bs: the current BlockDriverState
+ * @blk: the current BlockBackend
* @bytes: the number of bytes for this I/O
* @is_write: the type of operation (read/write)
*/
-void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs,
+void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
unsigned int bytes,
bool is_write)
{
bool must_wait;
- BlockDriverState *token;
+ BlockBackend *token;
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
qemu_mutex_lock(&tg->lock);
/* First we check if this I/O has to be throttled. */
- token = next_throttle_token(bs, is_write);
+ token = next_throttle_token(blk, is_write);
must_wait = throttle_group_schedule_timer(token, is_write);
/* Wait if there's a timer set or queued requests of this type */
- if (must_wait || bs->pending_reqs[is_write]) {
- bs->pending_reqs[is_write]++;
+ if (must_wait || blkp->pending_reqs[is_write]) {
+ blkp->pending_reqs[is_write]++;
qemu_mutex_unlock(&tg->lock);
- qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
+ qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
qemu_mutex_lock(&tg->lock);
- bs->pending_reqs[is_write]--;
+ blkp->pending_reqs[is_write]--;
}
/* The I/O will be executed, so do the accounting */
- throttle_account(bs->throttle_state, is_write, bytes);
+ throttle_account(blkp->throttle_state, is_write, bytes);
/* Schedule the next request */
- schedule_next_request(bs, is_write);
+ schedule_next_request(blk, is_write);
qemu_mutex_unlock(&tg->lock);
}
+void throttle_group_restart_blk(BlockBackend *blk)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ while (qemu_co_enter_next(&blkp->throttled_reqs[i])) {
+ ;
+ }
+ }
+}
+
/* Update the throttle configuration for a particular group. Similar
* to throttle_config(), but guarantees atomicity within the
* throttling group.
*
- * @bs: a BlockDriverState that is member of the group
+ * @blk: a BlockBackend that is a member of the group
* @cfg: the configuration to set
*/
-void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg)
+void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg)
{
- ThrottleTimers *tt = &bs->throttle_timers;
- ThrottleState *ts = bs->throttle_state;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleTimers *tt = &blkp->throttle_timers;
+ ThrottleState *ts = blkp->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
qemu_mutex_lock(&tg->lock);
/* throttle_config() cancels the timers */
@@ -335,18 +356,22 @@ void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg)
}
throttle_config(ts, tt, cfg);
qemu_mutex_unlock(&tg->lock);
+
+ qemu_co_enter_next(&blkp->throttled_reqs[0]);
+ qemu_co_enter_next(&blkp->throttled_reqs[1]);
}
/* Get the throttle configuration from a particular group. Similar to
* throttle_get_config(), but guarantees atomicity within the
* throttling group.
*
- * @bs: a BlockDriverState that is member of the group
+ * @blk: a BlockBackend that is a member of the group
* @cfg: the configuration will be written here
*/
-void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg)
+void throttle_group_get_config(BlockBackend *blk, ThrottleConfig *cfg)
{
- ThrottleState *ts = bs->throttle_state;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = blkp->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
qemu_mutex_lock(&tg->lock);
throttle_get_config(ts, cfg);
@@ -356,12 +381,13 @@ void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg)
/* ThrottleTimers callback. This wakes up a request that was waiting
* because it had been throttled.
*
- * @bs: the BlockDriverState whose request had been throttled
+ * @blk: the BlockBackend whose request had been throttled
* @is_write: the type of operation (read/write)
*/
-static void timer_cb(BlockDriverState *bs, bool is_write)
+static void timer_cb(BlockBackend *blk, bool is_write)
{
- ThrottleState *ts = bs->throttle_state;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = blkp->throttle_state;
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
bool empty_queue;
@@ -371,13 +397,13 @@ static void timer_cb(BlockDriverState *bs, bool is_write)
qemu_mutex_unlock(&tg->lock);
/* Run the request that was waiting for this timer */
- empty_queue = !qemu_co_enter_next(&bs->throttled_reqs[is_write]);
+ empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
/* If the request queue was empty then we have to take care of
* scheduling the next one */
if (empty_queue) {
qemu_mutex_lock(&tg->lock);
- schedule_next_request(bs, is_write);
+ schedule_next_request(blk, is_write);
qemu_mutex_unlock(&tg->lock);
}
}
@@ -392,17 +418,17 @@ static void write_timer_cb(void *opaque)
timer_cb(opaque, true);
}
-/* Register a BlockDriverState in the throttling group, also
- * initializing its timers and updating its throttle_state pointer to
- * point to it. If a throttling group with that name does not exist
- * yet, it will be created.
+/* Register a BlockBackend in the throttling group, also initializing its
+ * timers and updating its throttle_state pointer to point to it. If a
+ * throttling group with that name does not exist yet, it will be created.
*
- * @bs: the BlockDriverState to insert
+ * @blk: the BlockBackend to insert
* @groupname: the name of the group
*/
-void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
+void throttle_group_register_blk(BlockBackend *blk, const char *groupname)
{
int i;
+ BlockBackendPublic *blkp = blk_get_public(blk);
ThrottleState *ts = throttle_group_incref(groupname);
ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
int clock_type = QEMU_CLOCK_REALTIME;
@@ -412,67 +438,67 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
clock_type = QEMU_CLOCK_VIRTUAL;
}
- bs->throttle_state = ts;
+ blkp->throttle_state = ts;
qemu_mutex_lock(&tg->lock);
- /* If the ThrottleGroup is new set this BlockDriverState as the token */
+ /* If the ThrottleGroup is new set this BlockBackend as the token */
for (i = 0; i < 2; i++) {
if (!tg->tokens[i]) {
- tg->tokens[i] = bs;
+ tg->tokens[i] = blk;
}
}
- QLIST_INSERT_HEAD(&tg->head, bs, round_robin);
+ QLIST_INSERT_HEAD(&tg->head, blkp, round_robin);
- throttle_timers_init(&bs->throttle_timers,
- bdrv_get_aio_context(bs),
+ throttle_timers_init(&blkp->throttle_timers,
+ blk_get_aio_context(blk),
clock_type,
read_timer_cb,
write_timer_cb,
- bs);
+ blk);
qemu_mutex_unlock(&tg->lock);
}
-/* Unregister a BlockDriverState from its group, removing it from the
- * list, destroying the timers and setting the throttle_state pointer
- * to NULL.
+/* Unregister a BlockBackend from its group, removing it from the list,
+ * destroying the timers and setting the throttle_state pointer to NULL.
*
- * The BlockDriverState must not have pending throttled requests, so
- * the caller has to drain them first.
+ * The BlockBackend must not have pending throttled requests, so the caller has
+ * to drain them first.
*
* The group will be destroyed if it's empty after this operation.
*
- * @bs: the BlockDriverState to remove
+ * @blk: the BlockBackend to remove
*/
-void throttle_group_unregister_bs(BlockDriverState *bs)
+void throttle_group_unregister_blk(BlockBackend *blk)
{
- ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
int i;
- assert(bs->pending_reqs[0] == 0 && bs->pending_reqs[1] == 0);
- assert(qemu_co_queue_empty(&bs->throttled_reqs[0]));
- assert(qemu_co_queue_empty(&bs->throttled_reqs[1]));
+ assert(blkp->pending_reqs[0] == 0 && blkp->pending_reqs[1] == 0);
+ assert(qemu_co_queue_empty(&blkp->throttled_reqs[0]));
+ assert(qemu_co_queue_empty(&blkp->throttled_reqs[1]));
qemu_mutex_lock(&tg->lock);
for (i = 0; i < 2; i++) {
- if (tg->tokens[i] == bs) {
- BlockDriverState *token = throttle_group_next_bs(bs);
- /* Take care of the case where this is the last bs in the group */
- if (token == bs) {
+ if (tg->tokens[i] == blk) {
+ BlockBackend *token = throttle_group_next_blk(blk);
+ /* Take care of the case where this is the last blk in the group */
+ if (token == blk) {
token = NULL;
}
tg->tokens[i] = token;
}
}
- /* remove the current bs from the list */
- QLIST_REMOVE(bs, round_robin);
- throttle_timers_destroy(&bs->throttle_timers);
+ /* remove the current blk from the list */
+ QLIST_REMOVE(blkp, round_robin);
+ throttle_timers_destroy(&blkp->throttle_timers);
qemu_mutex_unlock(&tg->lock);
throttle_group_unref(&tg->ts);
- bs->throttle_state = NULL;
+ blkp->throttle_state = NULL;
}
static void throttle_groups_init(void)
diff --git a/block/trace-events b/block/trace-events
new file mode 100644
index 000000000..05fa13c89
--- /dev/null
+++ b/block/trace-events
@@ -0,0 +1,116 @@
+# See docs/tracing.txt for syntax documentation.
+
+# block.c
+bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags %#x format_name \"%s\""
+bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
+
+# block/block-backend.c
+blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
+blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
+
+# block/io.c
+bdrv_aio_pdiscard(void *bs, int64_t offset, int count, void *opaque) "bs %p offset %"PRId64" count %d opaque %p"
+bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
+bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
+bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
+bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
+bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
+bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags %#x"
+bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u"
+
+# block/stream.c
+stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
+
+# block/commit.c
+commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p"
+
+# block/mirror.c
+mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p"
+mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64
+mirror_before_flush(void *s) "s %p"
+mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
+mirror_before_sleep(void *s, int64_t cnt, int synced, uint64_t delay_ns) "s %p dirty count %"PRId64" synced %d delay %"PRIu64"ns"
+mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
+mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
+mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
+mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
+mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
+mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
+
+# block/backup.c
+backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d"
+backup_do_cow_return(void *job, int64_t sector_num, int nb_sectors, int ret) "job %p sector_num %"PRId64" nb_sectors %d ret %d"
+backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
+backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
+backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
+backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
+
+# blockdev.c
+qmp_block_job_cancel(void *job) "job %p"
+qmp_block_job_pause(void *job) "job %p"
+qmp_block_job_resume(void *job) "job %p"
+qmp_block_job_complete(void *job) "job %p"
+block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
+qmp_block_stream(void *bs, void *job) "bs %p job %p"
+
+# block/raw-win32.c
+# block/raw-posix.c
+paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
+paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"
+
+# block/qcow2.c
+qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d"
+qcow2_writev_done_req(void *co, int ret) "co %p ret %d"
+qcow2_writev_start_part(void *co) "co %p"
+qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d"
+qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64
+qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
+qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
+
+# block/qcow2-cluster.c
+qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d"
+qcow2_handle_copied(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64
+qcow2_handle_alloc(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64
+qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " nb_clusters %d"
+qcow2_cluster_alloc_phys(void *co) "co %p"
+qcow2_cluster_link_l2(void *co, int nb_clusters) "co %p nb_clusters %d"
+
+qcow2_l2_allocate(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_get_empty(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_write_l2(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_write_l1(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_done(void *bs, int l1_index, int ret) "bs %p l1_index %d ret %d"
+
+# block/qcow2-cache.c
+qcow2_cache_get(void *co, int c, uint64_t offset, bool read_from_disk) "co %p is_l2_cache %d offset %" PRIx64 " read_from_disk %d"
+qcow2_cache_get_replace_entry(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+qcow2_cache_get_read(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+qcow2_cache_get_done(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+qcow2_cache_flush(void *co, int c) "co %p is_l2_cache %d"
+qcow2_cache_entry_flush(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+
+# block/qed-l2-cache.c
+qed_alloc_l2_cache_entry(void *l2_cache, void *entry) "l2_cache %p entry %p"
+qed_unref_l2_cache_entry(void *entry, int ref) "entry %p ref %d"
+qed_find_l2_cache_entry(void *l2_cache, void *entry, uint64_t offset, int ref) "l2_cache %p entry %p offset %"PRIu64" ref %d"
+
+# block/qed-table.c
+qed_read_table(void *s, uint64_t offset, void *table) "s %p offset %"PRIu64" table %p"
+qed_read_table_cb(void *s, void *table, int ret) "s %p table %p ret %d"
+qed_write_table(void *s, uint64_t offset, void *table, unsigned int index, unsigned int n) "s %p offset %"PRIu64" table %p index %u n %u"
+qed_write_table_cb(void *s, void *table, int flush, int ret) "s %p table %p flush %d ret %d"
+
+# block/qed.c
+qed_need_check_timer_cb(void *s) "s %p"
+qed_start_need_check_timer(void *s) "s %p"
+qed_cancel_need_check_timer(void *s) "s %p"
+qed_aio_complete(void *s, void *acb, int ret) "s %p acb %p ret %d"
+qed_aio_setup(void *s, void *acb, int64_t sector_num, int nb_sectors, void *opaque, int flags) "s %p acb %p sector_num %"PRId64" nb_sectors %d opaque %p flags %#x"
+qed_aio_next_io(void *s, void *acb, int ret, uint64_t cur_pos) "s %p acb %p ret %d cur_pos %"PRIu64
+qed_aio_read_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
+qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
+qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
+qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
+qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
diff --git a/block/vdi.c b/block/vdi.c
index 75d4819ed..8a1cf9792 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -54,6 +54,7 @@
#include "block/block_int.h"
#include "sysemu/block-backend.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include "migration/migration.h"
#include "qemu/coroutine.h"
#include "qemu/cutils.h"
@@ -402,7 +403,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
logout("\n");
- ret = bdrv_read(bs->file->bs, 0, (uint8_t *)&header, 1);
+ ret = bdrv_read(bs->file, 0, (uint8_t *)&header, 1);
if (ret < 0) {
goto fail;
}
@@ -499,7 +500,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- ret = bdrv_read(bs->file->bs, s->bmap_sector, (uint8_t *)s->bmap,
+ ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap,
bmap_size);
if (ret < 0) {
goto fail_free_bmap;
@@ -557,98 +558,109 @@ static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
}
-static int vdi_co_read(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVdiState *s = bs->opaque;
+ QEMUIOVector local_qiov;
uint32_t bmap_entry;
uint32_t block_index;
- uint32_t sector_in_block;
- uint32_t n_sectors;
+ uint32_t offset_in_block;
+ uint32_t n_bytes;
+ uint64_t bytes_done = 0;
int ret = 0;
logout("\n");
- while (ret >= 0 && nb_sectors > 0) {
- block_index = sector_num / s->block_sectors;
- sector_in_block = sector_num % s->block_sectors;
- n_sectors = s->block_sectors - sector_in_block;
- if (n_sectors > nb_sectors) {
- n_sectors = nb_sectors;
- }
+ qemu_iovec_init(&local_qiov, qiov->niov);
+
+ while (ret >= 0 && bytes > 0) {
+ block_index = offset / s->block_size;
+ offset_in_block = offset % s->block_size;
+ n_bytes = MIN(bytes, s->block_size - offset_in_block);
- logout("will read %u sectors starting at sector %" PRIu64 "\n",
- n_sectors, sector_num);
+ logout("will read %u bytes starting at offset %" PRIu64 "\n",
+ n_bytes, offset);
/* prepare next AIO request */
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Block not allocated, return zeros, no need to wait. */
- memset(buf, 0, n_sectors * SECTOR_SIZE);
+ qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
ret = 0;
} else {
- uint64_t offset = s->header.offset_data / SECTOR_SIZE +
- (uint64_t)bmap_entry * s->block_sectors +
- sector_in_block;
- ret = bdrv_read(bs->file->bs, offset, buf, n_sectors);
+ uint64_t data_offset = s->header.offset_data +
+ (uint64_t)bmap_entry * s->block_size +
+ offset_in_block;
+
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_preadv(bs->file, data_offset, n_bytes,
+ &local_qiov, 0);
}
- logout("%u sectors read\n", n_sectors);
+ logout("%u bytes read\n", n_bytes);
- nb_sectors -= n_sectors;
- sector_num += n_sectors;
- buf += n_sectors * SECTOR_SIZE;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
}
+ qemu_iovec_destroy(&local_qiov);
+
return ret;
}
-static int vdi_co_write(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVdiState *s = bs->opaque;
+ QEMUIOVector local_qiov;
uint32_t bmap_entry;
uint32_t block_index;
- uint32_t sector_in_block;
- uint32_t n_sectors;
+ uint32_t offset_in_block;
+ uint32_t n_bytes;
uint32_t bmap_first = VDI_UNALLOCATED;
uint32_t bmap_last = VDI_UNALLOCATED;
uint8_t *block = NULL;
+ uint64_t bytes_done = 0;
int ret = 0;
logout("\n");
- while (ret >= 0 && nb_sectors > 0) {
- block_index = sector_num / s->block_sectors;
- sector_in_block = sector_num % s->block_sectors;
- n_sectors = s->block_sectors - sector_in_block;
- if (n_sectors > nb_sectors) {
- n_sectors = nb_sectors;
- }
+ qemu_iovec_init(&local_qiov, qiov->niov);
+
+ while (ret >= 0 && bytes > 0) {
+ block_index = offset / s->block_size;
+ offset_in_block = offset % s->block_size;
+ n_bytes = MIN(bytes, s->block_size - offset_in_block);
- logout("will write %u sectors starting at sector %" PRIu64 "\n",
- n_sectors, sector_num);
+ logout("will write %u bytes starting at offset %" PRIu64 "\n",
+ n_bytes, offset);
/* prepare next AIO request */
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Allocate new block and write to it. */
- uint64_t offset;
+ uint64_t data_offset;
bmap_entry = s->header.blocks_allocated;
s->bmap[block_index] = cpu_to_le32(bmap_entry);
s->header.blocks_allocated++;
- offset = s->header.offset_data / SECTOR_SIZE +
- (uint64_t)bmap_entry * s->block_sectors;
+ data_offset = s->header.offset_data +
+ (uint64_t)bmap_entry * s->block_size;
if (block == NULL) {
block = g_malloc(s->block_size);
bmap_first = block_index;
}
bmap_last = block_index;
/* Copy data to be written to new block and zero unused parts. */
- memset(block, 0, sector_in_block * SECTOR_SIZE);
- memcpy(block + sector_in_block * SECTOR_SIZE,
- buf, n_sectors * SECTOR_SIZE);
- memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
- (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
+ memset(block, 0, offset_in_block);
+ qemu_iovec_to_buf(qiov, bytes_done, block + offset_in_block,
+ n_bytes);
+ memset(block + offset_in_block + n_bytes, 0,
+ s->block_size - n_bytes - offset_in_block);
/* Note that this coroutine does not yield anywhere from reading the
* bmap entry until here, so in regards to all the coroutines trying
@@ -658,12 +670,12 @@ static int vdi_co_write(BlockDriverState *bs,
* acquire the lock and thus the padded cluster is written before
* the other coroutines can write to the affected area. */
qemu_co_mutex_lock(&s->write_lock);
- ret = bdrv_write(bs->file->bs, offset, block, s->block_sectors);
+ ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size);
qemu_co_mutex_unlock(&s->write_lock);
} else {
- uint64_t offset = s->header.offset_data / SECTOR_SIZE +
- (uint64_t)bmap_entry * s->block_sectors +
- sector_in_block;
+ uint64_t data_offset = s->header.offset_data +
+ (uint64_t)bmap_entry * s->block_size +
+ offset_in_block;
qemu_co_mutex_lock(&s->write_lock);
/* This lock is only used to make sure the following write operation
* is executed after the write issued by the coroutine allocating
@@ -674,16 +686,23 @@ static int vdi_co_write(BlockDriverState *bs,
* that that write operation has returned (there may be other writes
* in flight, but they do not concern this very operation). */
qemu_co_mutex_unlock(&s->write_lock);
- ret = bdrv_write(bs->file->bs, offset, buf, n_sectors);
+
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_pwritev(bs->file, data_offset, n_bytes,
+ &local_qiov, 0);
}
- nb_sectors -= n_sectors;
- sector_num += n_sectors;
- buf += n_sectors * SECTOR_SIZE;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
- logout("%u sectors written\n", n_sectors);
+ logout("%u bytes written\n", n_bytes);
}
+ qemu_iovec_destroy(&local_qiov);
+
logout("finished data write\n");
if (ret < 0) {
return ret;
@@ -694,12 +713,13 @@ static int vdi_co_write(BlockDriverState *bs,
VdiHeader *header = (VdiHeader *) block;
uint8_t *base;
uint64_t offset;
+ uint32_t n_sectors;
logout("now writing modified header\n");
assert(VDI_IS_ALLOCATED(bmap_first));
*header = s->header;
vdi_header_to_le(header);
- ret = bdrv_write(bs->file->bs, 0, block, 1);
+ ret = bdrv_write(bs->file, 0, block, 1);
g_free(block);
block = NULL;
@@ -717,7 +737,7 @@ static int vdi_co_write(BlockDriverState *bs,
base = ((uint8_t *)&s->bmap[0]) + bmap_first * SECTOR_SIZE;
logout("will write %u block map sectors starting from entry %u\n",
n_sectors, bmap_first);
- ret = bdrv_write(bs->file->bs, offset, base, n_sectors);
+ ret = bdrv_write(bs->file, offset, base, n_sectors);
}
return ret;
@@ -808,7 +828,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
vdi_header_print(&header);
#endif
vdi_header_to_le(&header);
- ret = blk_pwrite(blk, offset, &header, sizeof(header));
+ ret = blk_pwrite(blk, offset, &header, sizeof(header), 0);
if (ret < 0) {
error_setg(errp, "Error writing header to %s", filename);
goto exit;
@@ -829,7 +849,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
bmap[i] = VDI_UNALLOCATED;
}
}
- ret = blk_pwrite(blk, offset, bmap, bmap_size);
+ ret = blk_pwrite(blk, offset, bmap, bmap_size, 0);
if (ret < 0) {
error_setg(errp, "Error writing bmap to %s", filename);
goto exit;
@@ -903,9 +923,9 @@ static BlockDriver bdrv_vdi = {
.bdrv_co_get_block_status = vdi_co_get_block_status,
.bdrv_make_empty = vdi_make_empty,
- .bdrv_read = vdi_co_read,
+ .bdrv_co_preadv = vdi_co_preadv,
#if defined(CONFIG_VDI_WRITE)
- .bdrv_write = vdi_co_write,
+ .bdrv_co_pwritev = vdi_co_pwritev,
#endif
.bdrv_get_info = vdi_get_info,
diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c
index da33cd38e..c306b90d5 100644
--- a/block/vhdx-endian.c
+++ b/block/vhdx-endian.c
@@ -18,6 +18,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "qemu/bswap.h"
#include "block/vhdx.h"
#include <uuid/uuid.h>
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index 7ea7187fc..02eb10431 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -23,6 +23,7 @@
#include "block/block_int.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include "block/vhdx.h"
@@ -83,7 +84,7 @@ static int vhdx_log_peek_hdr(BlockDriverState *bs, VHDXLogEntries *log,
offset = log->offset + read;
- ret = bdrv_pread(bs->file->bs, offset, hdr, sizeof(VHDXLogEntryHeader));
+ ret = bdrv_pread(bs->file, offset, hdr, sizeof(VHDXLogEntryHeader));
if (ret < 0) {
goto exit;
}
@@ -143,7 +144,7 @@ static int vhdx_log_read_sectors(BlockDriverState *bs, VHDXLogEntries *log,
}
offset = log->offset + read;
- ret = bdrv_pread(bs->file->bs, offset, buffer, VHDX_LOG_SECTOR_SIZE);
+ ret = bdrv_pread(bs->file, offset, buffer, VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
}
@@ -193,7 +194,7 @@ static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
/* full */
break;
}
- ret = bdrv_pwrite(bs->file->bs, offset, buffer_tmp,
+ ret = bdrv_pwrite(bs->file, offset, buffer_tmp,
VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
@@ -465,7 +466,7 @@ static int vhdx_log_flush_desc(BlockDriverState *bs, VHDXLogDescriptor *desc,
/* count is only > 1 if we are writing zeroes */
for (i = 0; i < count; i++) {
- ret = bdrv_pwrite_sync(bs->file->bs, file_offset, buffer,
+ ret = bdrv_pwrite_sync(bs->file, file_offset, buffer,
VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
@@ -944,7 +945,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
if (i == 0 && leading_length) {
/* partial sector at the front of the buffer */
- ret = bdrv_pread(bs->file->bs, file_offset, merged_sector,
+ ret = bdrv_pread(bs->file, file_offset, merged_sector,
VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
@@ -954,7 +955,7 @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
sector_write = merged_sector;
} else if (i == sectors - 1 && trailing_length) {
/* partial sector at the end of the buffer */
- ret = bdrv_pread(bs->file->bs,
+ ret = bdrv_pread(bs->file,
file_offset,
merged_sector + trailing_length,
VHDX_LOG_SECTOR_SIZE - trailing_length);
diff --git a/block/vhdx.c b/block/vhdx.c
index 2b7b33240..75ef2b1c2 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -22,11 +22,11 @@
#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "qemu/crc32c.h"
+#include "qemu/bswap.h"
#include "block/vhdx.h"
#include "migration/migration.h"
#include <uuid/uuid.h>
-#include <glib.h>
/* Options for VHDX creation */
@@ -298,9 +298,10 @@ static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
* and then update the header checksum. Header is converted to proper
* endianness before being written to the specified file offset
*/
-static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
+static int vhdx_write_header(BdrvChild *file, VHDXHeader *hdr,
uint64_t offset, bool read)
{
+ BlockDriverState *bs_file = file->bs;
uint8_t *buffer = NULL;
int ret;
VHDXHeader *header_le;
@@ -315,7 +316,7 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE);
if (read) {
/* if true, we can't assume the extra reserved bytes are 0 */
- ret = bdrv_pread(bs_file, offset, buffer, VHDX_HEADER_SIZE);
+ ret = bdrv_pread(file, offset, buffer, VHDX_HEADER_SIZE);
if (ret < 0) {
goto exit;
}
@@ -329,7 +330,7 @@ static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
vhdx_header_le_export(hdr, header_le);
vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
offsetof(VHDXHeader, checksum));
- ret = bdrv_pwrite_sync(bs_file, offset, header_le, sizeof(VHDXHeader));
+ ret = bdrv_pwrite_sync(file, offset, header_le, sizeof(VHDXHeader));
exit:
qemu_vfree(buffer);
@@ -378,7 +379,7 @@ static int vhdx_update_header(BlockDriverState *bs, BDRVVHDXState *s,
inactive_header->log_guid = *log_guid;
}
- ret = vhdx_write_header(bs->file->bs, inactive_header, header_offset, true);
+ ret = vhdx_write_header(bs->file, inactive_header, header_offset, true);
if (ret < 0) {
goto exit;
}
@@ -430,7 +431,7 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
/* We have to read the whole VHDX_HEADER_SIZE instead of
* sizeof(VHDXHeader), because the checksum is over the whole
* region */
- ret = bdrv_pread(bs->file->bs, VHDX_HEADER1_OFFSET, buffer,
+ ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer,
VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
@@ -447,7 +448,7 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
}
}
- ret = bdrv_pread(bs->file->bs, VHDX_HEADER2_OFFSET, buffer,
+ ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer,
VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
@@ -521,7 +522,7 @@ static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
* whole block */
buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
- ret = bdrv_pread(bs->file->bs, VHDX_REGION_TABLE_OFFSET, buffer,
+ ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
VHDX_HEADER_BLOCK_SIZE);
if (ret < 0) {
goto fail;
@@ -634,7 +635,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
- ret = bdrv_pread(bs->file->bs, s->metadata_rt.file_offset, buffer,
+ ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
VHDX_METADATA_TABLE_MAX_SIZE);
if (ret < 0) {
goto exit;
@@ -737,7 +738,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
goto exit;
}
- ret = bdrv_pread(bs->file->bs,
+ ret = bdrv_pread(bs->file,
s->metadata_entries.file_parameters_entry.offset
+ s->metadata_rt.file_offset,
&s->params,
@@ -772,7 +773,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
/* determine virtual disk size, logical sector size,
* and phys sector size */
- ret = bdrv_pread(bs->file->bs,
+ ret = bdrv_pread(bs->file,
s->metadata_entries.virtual_disk_size_entry.offset
+ s->metadata_rt.file_offset,
&s->virtual_disk_size,
@@ -780,7 +781,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
if (ret < 0) {
goto exit;
}
- ret = bdrv_pread(bs->file->bs,
+ ret = bdrv_pread(bs->file,
s->metadata_entries.logical_sector_size_entry.offset
+ s->metadata_rt.file_offset,
&s->logical_sector_size,
@@ -788,7 +789,7 @@ static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
if (ret < 0) {
goto exit;
}
- ret = bdrv_pread(bs->file->bs,
+ ret = bdrv_pread(bs->file,
s->metadata_entries.phys_sector_size_entry.offset
+ s->metadata_rt.file_offset,
&s->physical_sector_size,
@@ -905,7 +906,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
QLIST_INIT(&s->regions);
/* validate the file signature */
- ret = bdrv_pread(bs->file->bs, 0, &signature, sizeof(uint64_t));
+ ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
if (ret < 0) {
goto fail;
}
@@ -964,7 +965,7 @@ static int vhdx_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- ret = bdrv_pread(bs->file->bs, s->bat_offset, s->bat, s->bat_rt.length);
+ ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
if (ret < 0) {
goto fail;
}
@@ -1117,7 +1118,7 @@ static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
break;
case PAYLOAD_BLOCK_FULLY_PRESENT:
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file->bs,
+ ret = bdrv_co_readv(bs->file,
sinfo.file_offset >> BDRV_SECTOR_BITS,
sinfo.sectors_avail, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1326,7 +1327,7 @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
}
/* block exists, so we can just overwrite it */
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_writev(bs->file->bs,
+ ret = bdrv_co_writev(bs->file,
sinfo.file_offset >> BDRV_SECTOR_BITS,
sectors_to_write, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
@@ -1387,9 +1388,11 @@ exit:
* There are 2 headers, and the highest sequence number will represent
* the active header
*/
-static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
+static int vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
uint32_t log_size)
{
+ BlockDriverState *bs = blk_bs(blk);
+ BdrvChild *child;
int ret = 0;
VHDXHeader *hdr = NULL;
@@ -1404,12 +1407,18 @@ static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
vhdx_guid_generate(&hdr->file_write_guid);
vhdx_guid_generate(&hdr->data_write_guid);
- ret = vhdx_write_header(bs, hdr, VHDX_HEADER1_OFFSET, false);
+ /* XXX Ugly way to get blk->root, but that's a feature, not a bug. This
+ * hack makes it obvious that vhdx_write_header() bypasses the BlockBackend
+ * here, which it really shouldn't be doing. */
+ child = QLIST_FIRST(&bs->parents);
+ assert(!QLIST_NEXT(child, next_parent));
+
+ ret = vhdx_write_header(child, hdr, VHDX_HEADER1_OFFSET, false);
if (ret < 0) {
goto exit;
}
hdr->sequence_number++;
- ret = vhdx_write_header(bs, hdr, VHDX_HEADER2_OFFSET, false);
+ ret = vhdx_write_header(child, hdr, VHDX_HEADER2_OFFSET, false);
if (ret < 0) {
goto exit;
}
@@ -1442,7 +1451,7 @@ exit:
* The first 64KB of the Metadata section is reserved for the metadata
* header and entries; beyond that, the metadata items themselves reside.
*/
-static int vhdx_create_new_metadata(BlockDriverState *bs,
+static int vhdx_create_new_metadata(BlockBackend *blk,
uint64_t image_size,
uint32_t block_size,
uint32_t sector_size,
@@ -1538,13 +1547,13 @@ static int vhdx_create_new_metadata(BlockDriverState *bs,
VHDX_META_FLAGS_IS_VIRTUAL_DISK;
vhdx_metadata_entry_le_export(&md_table_entry[4]);
- ret = bdrv_pwrite(bs, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE);
+ ret = blk_pwrite(blk, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE, 0);
if (ret < 0) {
goto exit;
}
- ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer,
- VHDX_METADATA_ENTRY_BUFFER_SIZE);
+ ret = blk_pwrite(blk, metadata_offset + (64 * KiB), entry_buffer,
+ VHDX_METADATA_ENTRY_BUFFER_SIZE, 0);
if (ret < 0) {
goto exit;
}
@@ -1564,7 +1573,7 @@ exit:
* Fixed images: default state of the BAT is fully populated, with
* file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT.
*/
-static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
+static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
uint64_t image_size, VHDXImageType type,
bool use_zero_blocks, uint64_t file_offset,
uint32_t length)
@@ -1588,12 +1597,12 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
if (type == VHDX_TYPE_DYNAMIC) {
/* All zeroes, so we can just extend the file - the end of the BAT
* is the furthest thing we have written yet */
- ret = bdrv_truncate(bs, data_file_offset);
+ ret = blk_truncate(blk, data_file_offset);
if (ret < 0) {
goto exit;
}
} else if (type == VHDX_TYPE_FIXED) {
- ret = bdrv_truncate(bs, data_file_offset + image_size);
+ ret = blk_truncate(blk, data_file_offset + image_size);
if (ret < 0) {
goto exit;
}
@@ -1604,7 +1613,7 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
if (type == VHDX_TYPE_FIXED ||
use_zero_blocks ||
- bdrv_has_zero_init(bs) == 0) {
+ bdrv_has_zero_init(blk_bs(blk)) == 0) {
/* for a fixed file, the default BAT entry is not zero */
s->bat = g_try_malloc0(length);
if (length && s->bat == NULL) {
@@ -1620,12 +1629,12 @@ static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
sinfo.file_offset = data_file_offset +
(sector_num << s->logical_sector_size_bits);
sinfo.file_offset = ROUND_UP(sinfo.file_offset, MiB);
- vhdx_update_bat_table_entry(bs, s, &sinfo, &unused, &unused,
+ vhdx_update_bat_table_entry(blk_bs(blk), s, &sinfo, &unused, &unused,
block_state);
cpu_to_le64s(&s->bat[sinfo.bat_idx]);
sector_num += s->sectors_per_block;
}
- ret = bdrv_pwrite(bs, file_offset, s->bat, length);
+ ret = blk_pwrite(blk, file_offset, s->bat, length, 0);
if (ret < 0) {
goto exit;
}
@@ -1645,7 +1654,7 @@ exit:
* to create the BAT itself, we will also cause the BAT to be
* created.
*/
-static int vhdx_create_new_region_table(BlockDriverState *bs,
+static int vhdx_create_new_region_table(BlockBackend *blk,
uint64_t image_size,
uint32_t block_size,
uint32_t sector_size,
@@ -1720,21 +1729,21 @@ static int vhdx_create_new_region_table(BlockDriverState *bs,
/* The region table gives us the data we need to create the BAT,
* so do that now */
- ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks,
+ ret = vhdx_create_bat(blk, s, image_size, type, use_zero_blocks,
bat_file_offset, bat_length);
if (ret < 0) {
goto exit;
}
/* Now write out the region headers to disk */
- ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer,
- VHDX_HEADER_BLOCK_SIZE);
+ ret = blk_pwrite(blk, VHDX_REGION_TABLE_OFFSET, buffer,
+ VHDX_HEADER_BLOCK_SIZE, 0);
if (ret < 0) {
goto exit;
}
- ret = bdrv_pwrite(bs, VHDX_REGION_TABLE2_OFFSET, buffer,
- VHDX_HEADER_BLOCK_SIZE);
+ ret = blk_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, buffer,
+ VHDX_HEADER_BLOCK_SIZE, 0);
if (ret < 0) {
goto exit;
}
@@ -1856,13 +1865,14 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
&creator_items, NULL);
signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
- ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+ ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature),
+ 0);
if (ret < 0) {
goto delete_and_exit;
}
if (creator) {
ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
- creator, creator_items * sizeof(gunichar2));
+ creator, creator_items * sizeof(gunichar2), 0);
if (ret < 0) {
goto delete_and_exit;
}
@@ -1870,13 +1880,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
/* Creates (B),(C) */
- ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size);
+ ret = vhdx_create_new_headers(blk, image_size, log_size);
if (ret < 0) {
goto delete_and_exit;
}
/* Creates (D),(E),(G) explicitly. (F) created as by-product */
- ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512,
+ ret = vhdx_create_new_region_table(blk, image_size, block_size, 512,
log_size, use_zero_blocks, image_type,
&metadata_offset);
if (ret < 0) {
@@ -1884,7 +1894,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
}
/* Creates (H) */
- ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512,
+ ret = vhdx_create_new_metadata(blk, image_size, block_size, 512,
metadata_offset, image_type);
if (ret < 0) {
goto delete_and_exit;
diff --git a/block/vmdk.c b/block/vmdk.c
index 45f9d3c5b..46d474e44 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -30,10 +30,10 @@
#include "qapi/qmp/qerror.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include "migration/migration.h"
#include "qemu/cutils.h"
#include <zlib.h>
-#include <glib.h>
#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
@@ -252,7 +252,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
int ret;
desc = g_malloc0(DESC_SIZE);
- ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
+ ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
g_free(desc);
return 0;
@@ -286,7 +286,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
desc = g_malloc0(DESC_SIZE);
tmp_desc = g_malloc0(DESC_SIZE);
- ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
+ ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
goto out;
}
@@ -306,7 +306,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
pstrcat(desc, DESC_SIZE, tmp_desc);
}
- ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
+ ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
out:
g_free(desc);
@@ -350,7 +350,7 @@ static int vmdk_parent_open(BlockDriverState *bs)
int ret;
desc = g_malloc0(DESC_SIZE + 1);
- ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
+ ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
goto out;
}
@@ -454,7 +454,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
return -ENOMEM;
}
- ret = bdrv_pread(extent->file->bs,
+ ret = bdrv_pread(extent->file,
extent->l1_table_offset,
extent->l1_table,
l1_size);
@@ -474,7 +474,7 @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
ret = -ENOMEM;
goto fail_l1;
}
- ret = bdrv_pread(extent->file->bs,
+ ret = bdrv_pread(extent->file,
extent->l1_backup_table_offset,
extent->l1_backup_table,
l1_size);
@@ -508,7 +508,7 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
VMDK3Header header;
VmdkExtent *extent;
- ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
+ ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read header from file '%s'",
@@ -538,14 +538,13 @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
QDict *options, Error **errp);
-static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
- Error **errp)
+static char *vmdk_read_desc(BdrvChild *file, uint64_t desc_offset, Error **errp)
{
int64_t size;
char *buf;
int ret;
- size = bdrv_getlength(file);
+ size = bdrv_getlength(file->bs);
if (size < 0) {
error_setg_errno(errp, -size, "Could not access file");
return NULL;
@@ -586,7 +585,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
int64_t l1_backup_offset = 0;
bool compressed;
- ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
+ ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read header from file '%s'",
@@ -596,7 +595,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
if (header.capacity == 0) {
uint64_t desc_offset = le64_to_cpu(header.desc_offset);
if (desc_offset) {
- char *buf = vmdk_read_desc(file->bs, desc_offset << 9, errp);
+ char *buf = vmdk_read_desc(file, desc_offset << 9, errp);
if (!buf) {
return -EINVAL;
}
@@ -636,7 +635,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
} QEMU_PACKED eos_marker;
} QEMU_PACKED footer;
- ret = bdrv_pread(file->bs,
+ ret = bdrv_pread(file,
bs->file->bs->total_sectors * 512 - 1536,
&footer, sizeof(footer));
if (ret < 0) {
@@ -874,7 +873,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
extent->flat_start_offset = flat_offset << 9;
} else if (!strcmp(type, "SPARSE") || !strcmp(type, "VMFSSPARSE")) {
/* SPARSE extent and VMFSSPARSE extent are both "COWD" sparse file*/
- char *buf = vmdk_read_desc(extent_file->bs, 0, errp);
+ char *buf = vmdk_read_desc(extent_file, 0, errp);
if (!buf) {
ret = -EINVAL;
} else {
@@ -943,7 +942,7 @@ static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
BDRVVmdkState *s = bs->opaque;
uint32_t magic;
- buf = vmdk_read_desc(bs->file->bs, 0, errp);
+ buf = vmdk_read_desc(bs->file, 0, errp);
if (!buf) {
return -EINVAL;
}
@@ -997,9 +996,9 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
for (i = 0; i < s->num_extents; i++) {
if (!s->extents[i].flat) {
- bs->bl.write_zeroes_alignment =
- MAX(bs->bl.write_zeroes_alignment,
- s->extents[i].cluster_sectors);
+ bs->bl.pwrite_zeroes_alignment =
+ MAX(bs->bl.pwrite_zeroes_alignment,
+ s->extents[i].cluster_sectors << BDRV_SECTOR_BITS);
}
}
}
@@ -1016,27 +1015,26 @@ static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
*/
static int get_whole_cluster(BlockDriverState *bs,
VmdkExtent *extent,
- uint64_t cluster_sector_num,
- uint64_t sector_num,
- uint64_t skip_start_sector,
- uint64_t skip_end_sector)
+ uint64_t cluster_offset,
+ uint64_t offset,
+ uint64_t skip_start_bytes,
+ uint64_t skip_end_bytes)
{
int ret = VMDK_OK;
int64_t cluster_bytes;
uint8_t *whole_grain;
/* For COW, align request sector_num to cluster start */
- sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors);
cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
+ offset = QEMU_ALIGN_DOWN(offset, cluster_bytes);
whole_grain = qemu_blockalign(bs, cluster_bytes);
if (!bs->backing) {
- memset(whole_grain, 0, skip_start_sector << BDRV_SECTOR_BITS);
- memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
- cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
+ memset(whole_grain, 0, skip_start_bytes);
+ memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes);
}
- assert(skip_end_sector <= extent->cluster_sectors);
+ assert(skip_end_bytes <= cluster_bytes);
/* we will be here if it's first write on non-exist grain(cluster).
* try to read from parent image, if exist */
if (bs->backing && !vmdk_is_cid_valid(bs)) {
@@ -1045,42 +1043,43 @@ static int get_whole_cluster(BlockDriverState *bs,
}
/* Read backing data before skip range */
- if (skip_start_sector > 0) {
+ if (skip_start_bytes > 0) {
if (bs->backing) {
- ret = bdrv_read(bs->backing->bs, sector_num,
- whole_grain, skip_start_sector);
+ ret = bdrv_pread(bs->backing, offset, whole_grain,
+ skip_start_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
- ret = bdrv_write(extent->file->bs, cluster_sector_num, whole_grain,
- skip_start_sector);
+ ret = bdrv_pwrite(extent->file, cluster_offset, whole_grain,
+ skip_start_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
/* Read backing data after skip range */
- if (skip_end_sector < extent->cluster_sectors) {
+ if (skip_end_bytes < cluster_bytes) {
if (bs->backing) {
- ret = bdrv_read(bs->backing->bs, sector_num + skip_end_sector,
- whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
- extent->cluster_sectors - skip_end_sector);
+ ret = bdrv_pread(bs->backing, offset + skip_end_bytes,
+ whole_grain + skip_end_bytes,
+ cluster_bytes - skip_end_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
- ret = bdrv_write(extent->file->bs, cluster_sector_num + skip_end_sector,
- whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
- extent->cluster_sectors - skip_end_sector);
+ ret = bdrv_pwrite(extent->file, cluster_offset + skip_end_bytes,
+ whole_grain + skip_end_bytes,
+ cluster_bytes - skip_end_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
+ ret = VMDK_OK;
exit:
qemu_vfree(whole_grain);
return ret;
@@ -1091,8 +1090,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
{
offset = cpu_to_le32(offset);
/* update L2 table */
- if (bdrv_pwrite_sync(
- extent->file->bs,
+ if (bdrv_pwrite_sync(extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
@@ -1101,8 +1099,7 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data,
/* update backup L2 table */
if (extent->l1_backup_table_offset != 0) {
m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
- if (bdrv_pwrite_sync(
- extent->file->bs,
+ if (bdrv_pwrite_sync(extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
@@ -1142,8 +1139,8 @@ static int get_cluster_offset(BlockDriverState *bs,
uint64_t offset,
bool allocate,
uint64_t *cluster_offset,
- uint64_t skip_start_sector,
- uint64_t skip_end_sector)
+ uint64_t skip_start_bytes,
+ uint64_t skip_end_bytes)
{
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
@@ -1191,8 +1188,7 @@ static int get_cluster_offset(BlockDriverState *bs,
}
}
l2_table = extent->l2_cache + (min_index * extent->l2_size);
- if (bdrv_pread(
- extent->file->bs,
+ if (bdrv_pread(extent->file,
(int64_t)l2_offset * 512,
l2_table,
extent->l2_size * sizeof(uint32_t)
@@ -1206,13 +1202,6 @@ static int get_cluster_offset(BlockDriverState *bs,
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
cluster_sector = le32_to_cpu(l2_table[l2_index]);
- if (m_data) {
- m_data->valid = 1;
- m_data->l1_index = l1_index;
- m_data->l2_index = l2_index;
- m_data->l2_offset = l2_offset;
- m_data->l2_cache_entry = &l2_table[l2_index];
- }
if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
zeroed = true;
}
@@ -1230,13 +1219,18 @@ static int get_cluster_offset(BlockDriverState *bs,
* This problem may occur because of insufficient space on host disk
* or inappropriate VM shutdown.
*/
- ret = get_whole_cluster(bs, extent,
- cluster_sector,
- offset >> BDRV_SECTOR_BITS,
- skip_start_sector, skip_end_sector);
+ ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE,
+ offset, skip_start_bytes, skip_end_bytes);
if (ret) {
return ret;
}
+ if (m_data) {
+ m_data->valid = 1;
+ m_data->l1_index = l1_index;
+ m_data->l2_index = l2_index;
+ m_data->l2_offset = l2_offset;
+ m_data->l2_cache_entry = &l2_table[l2_index];
+ }
}
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
return VMDK_OK;
@@ -1259,15 +1253,24 @@ static VmdkExtent *find_extent(BDRVVmdkState *s,
return NULL;
}
+static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent,
+ int64_t offset)
+{
+ uint64_t extent_begin_offset, extent_relative_offset;
+ uint64_t cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE;
+
+ extent_begin_offset =
+ (extent->end_sector - extent->sectors) * BDRV_SECTOR_SIZE;
+ extent_relative_offset = offset - extent_begin_offset;
+ return extent_relative_offset % cluster_size;
+}
+
static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent,
int64_t sector_num)
{
- uint64_t index_in_cluster, extent_begin_sector, extent_relative_sector_num;
-
- extent_begin_sector = extent->end_sector - extent->sectors;
- extent_relative_sector_num = sector_num - extent_begin_sector;
- index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
- return index_in_cluster;
+ uint64_t offset;
+ offset = vmdk_find_offset_in_cluster(extent, sector_num * BDRV_SECTOR_SIZE);
+ return offset / BDRV_SECTOR_SIZE;
}
static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
@@ -1319,38 +1322,57 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
}
static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
- int64_t offset_in_cluster, const uint8_t *buf,
- int nb_sectors, int64_t sector_num)
+ int64_t offset_in_cluster, QEMUIOVector *qiov,
+ uint64_t qiov_offset, uint64_t n_bytes,
+ uint64_t offset)
{
int ret;
VmdkGrainMarker *data = NULL;
uLongf buf_len;
- const uint8_t *write_buf = buf;
- int write_len = nb_sectors * 512;
+ QEMUIOVector local_qiov;
+ struct iovec iov;
int64_t write_offset;
int64_t write_end_sector;
if (extent->compressed) {
+ void *compressed_data;
+
if (!extent->has_marker) {
ret = -EINVAL;
goto out;
}
buf_len = (extent->cluster_sectors << 9) * 2;
data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
- if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK ||
- buf_len == 0) {
+
+ compressed_data = g_malloc(n_bytes);
+ qemu_iovec_to_buf(qiov, qiov_offset, compressed_data, n_bytes);
+ ret = compress(data->data, &buf_len, compressed_data, n_bytes);
+ g_free(compressed_data);
+
+ if (ret != Z_OK || buf_len == 0) {
ret = -EINVAL;
goto out;
}
- data->lba = sector_num;
+
+ data->lba = offset >> BDRV_SECTOR_BITS;
data->size = buf_len;
- write_buf = (uint8_t *)data;
- write_len = buf_len + sizeof(VmdkGrainMarker);
+
+ n_bytes = buf_len + sizeof(VmdkGrainMarker);
+ iov = (struct iovec) {
+ .iov_base = data,
+ .iov_len = n_bytes,
+ };
+ qemu_iovec_init_external(&local_qiov, &iov, 1);
+ } else {
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes);
}
+
write_offset = cluster_offset + offset_in_cluster,
- ret = bdrv_pwrite(extent->file->bs, write_offset, write_buf, write_len);
+ ret = bdrv_co_pwritev(extent->file, write_offset, n_bytes,
+ &local_qiov, 0);
- write_end_sector = DIV_ROUND_UP(write_offset + write_len, BDRV_SECTOR_SIZE);
+ write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE);
if (extent->compressed) {
extent->next_cluster_sector = write_end_sector;
@@ -1359,19 +1381,21 @@ static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
write_end_sector);
}
- if (ret != write_len) {
- ret = ret < 0 ? ret : -EIO;
+ if (ret < 0) {
goto out;
}
ret = 0;
out:
g_free(data);
+ if (!extent->compressed) {
+ qemu_iovec_destroy(&local_qiov);
+ }
return ret;
}
static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
- int64_t offset_in_cluster, uint8_t *buf,
- int nb_sectors)
+ int64_t offset_in_cluster, QEMUIOVector *qiov,
+ int bytes)
{
int ret;
int cluster_bytes, buf_bytes;
@@ -1383,21 +1407,20 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
if (!extent->compressed) {
- ret = bdrv_pread(extent->file->bs,
- cluster_offset + offset_in_cluster,
- buf, nb_sectors * 512);
- if (ret == nb_sectors * 512) {
- return 0;
- } else {
- return -EIO;
+ ret = bdrv_co_preadv(extent->file,
+ cluster_offset + offset_in_cluster, bytes,
+ qiov, 0);
+ if (ret < 0) {
+ return ret;
}
+ return 0;
}
cluster_bytes = extent->cluster_sectors * 512;
/* Read two clusters in case GrainMarker + compressed data > one cluster */
buf_bytes = cluster_bytes * 2;
cluster_buf = g_malloc(buf_bytes);
uncomp_buf = g_malloc(cluster_bytes);
- ret = bdrv_pread(extent->file->bs,
+ ret = bdrv_pread(extent->file,
cluster_offset,
cluster_buf, buf_bytes);
if (ret < 0) {
@@ -1422,11 +1445,11 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
}
if (offset_in_cluster < 0 ||
- offset_in_cluster + nb_sectors * 512 > buf_len) {
+ offset_in_cluster + bytes > buf_len) {
ret = -EINVAL;
goto out;
}
- memcpy(buf, uncomp_buf + offset_in_cluster, nb_sectors * 512);
+ qemu_iovec_from_buf(qiov, 0, uncomp_buf + offset_in_cluster, bytes);
ret = 0;
out:
@@ -1435,64 +1458,73 @@ static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
return ret;
}
-static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVmdkState *s = bs->opaque;
int ret;
- uint64_t n, index_in_cluster;
+ uint64_t n_bytes, offset_in_cluster;
VmdkExtent *extent = NULL;
+ QEMUIOVector local_qiov;
uint64_t cluster_offset;
+ uint64_t bytes_done = 0;
- while (nb_sectors > 0) {
- extent = find_extent(s, sector_num, extent);
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_co_mutex_lock(&s->lock);
+
+ while (bytes > 0) {
+ extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
if (!extent) {
- return -EIO;
+ ret = -EIO;
+ goto fail;
}
ret = get_cluster_offset(bs, extent, NULL,
- sector_num << 9, false, &cluster_offset,
- 0, 0);
- index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
- n = extent->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
+ offset, false, &cluster_offset, 0, 0);
+ offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
+
+ n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
+ - offset_in_cluster);
+
if (ret != VMDK_OK) {
/* if not allocated, try to read from parent image, if exist */
if (bs->backing && ret != VMDK_ZEROED) {
if (!vmdk_is_cid_valid(bs)) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto fail;
}
- ret = bdrv_read(bs->backing->bs, sector_num, buf, n);
+
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_preadv(bs->backing, offset, n_bytes,
+ &local_qiov, 0);
if (ret < 0) {
- return ret;
+ goto fail;
}
} else {
- memset(buf, 0, 512 * n);
+ qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
}
} else {
- ret = vmdk_read_extent(extent,
- cluster_offset, index_in_cluster * 512,
- buf, n);
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = vmdk_read_extent(extent, cluster_offset, offset_in_cluster,
+ &local_qiov, n_bytes);
if (ret) {
- return ret;
+ goto fail;
}
}
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
}
- return 0;
-}
-static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVVmdkState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = vmdk_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
+ qemu_iovec_destroy(&local_qiov);
+
return ret;
}
@@ -1506,38 +1538,38 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
*
* Returns: error code with 0 for success.
*/
-static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors,
- bool zeroed, bool zero_dry_run)
+static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ bool zeroed, bool zero_dry_run)
{
BDRVVmdkState *s = bs->opaque;
VmdkExtent *extent = NULL;
int ret;
- int64_t index_in_cluster, n;
+ int64_t offset_in_cluster, n_bytes;
uint64_t cluster_offset;
+ uint64_t bytes_done = 0;
VmdkMetaData m_data;
- if (sector_num > bs->total_sectors) {
- error_report("Wrong offset: sector_num=0x%" PRIx64
+ if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) {
+ error_report("Wrong offset: offset=0x%" PRIx64
" total_sectors=0x%" PRIx64,
- sector_num, bs->total_sectors);
+ offset, bs->total_sectors);
return -EIO;
}
- while (nb_sectors > 0) {
- extent = find_extent(s, sector_num, extent);
+ while (bytes > 0) {
+ extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
if (!extent) {
return -EIO;
}
- index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
- n = extent->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
- ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+ offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
+ n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
+ - offset_in_cluster);
+
+ ret = get_cluster_offset(bs, extent, &m_data, offset,
!(extent->compressed || zeroed),
- &cluster_offset,
- index_in_cluster, index_in_cluster + n);
+ &cluster_offset, offset_in_cluster,
+ offset_in_cluster + n_bytes);
if (extent->compressed) {
if (ret == VMDK_OK) {
/* Refuse write to allocated cluster for streamOptimized */
@@ -1546,7 +1578,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
return -EIO;
} else {
/* allocate */
- ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+ ret = get_cluster_offset(bs, extent, &m_data, offset,
true, &cluster_offset, 0, 0);
}
}
@@ -1556,9 +1588,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
if (zeroed) {
/* Do zeroed write, buf is ignored */
if (extent->has_zero_grain &&
- index_in_cluster == 0 &&
- n >= extent->cluster_sectors) {
- n = extent->cluster_sectors;
+ offset_in_cluster == 0 &&
+ n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) {
+ n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE;
if (!zero_dry_run) {
/* update L2 tables */
if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
@@ -1570,9 +1602,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
return -ENOTSUP;
}
} else {
- ret = vmdk_write_extent(extent,
- cluster_offset, index_in_cluster * 512,
- buf, n, sector_num);
+ ret = vmdk_write_extent(extent, cluster_offset, offset_in_cluster,
+ qiov, bytes_done, n_bytes, offset);
if (ret) {
return ret;
}
@@ -1585,9 +1616,9 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
}
}
}
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
/* update CID on the first write every time the virtual disk is
* opened */
@@ -1602,43 +1633,84 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
return 0;
}
-static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
int ret;
BDRVVmdkState *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
- ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+ ret = vmdk_pwritev(bs, offset, bytes, qiov, false, false);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
+typedef struct VmdkWriteCompressedCo {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ const uint8_t *buf;
+ int nb_sectors;
+ int ret;
+} VmdkWriteCompressedCo;
+
+static void vmdk_co_write_compressed(void *opaque)
+{
+ VmdkWriteCompressedCo *co = opaque;
+ QEMUIOVector local_qiov;
+ uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
+ uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
+
+ struct iovec iov = (struct iovec) {
+ .iov_base = (uint8_t*) co->buf,
+ .iov_len = bytes,
+ };
+ qemu_iovec_init_external(&local_qiov, &iov, 1);
+
+ co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false);
+}
+
static int vmdk_write_compressed(BlockDriverState *bs,
int64_t sector_num,
const uint8_t *buf,
int nb_sectors)
{
BDRVVmdkState *s = bs->opaque;
+
if (s->num_extents == 1 && s->extents[0].compressed) {
- return vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+ Coroutine *co;
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ VmdkWriteCompressedCo data = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .buf = buf,
+ .nb_sectors = nb_sectors,
+ .ret = -EINPROGRESS,
+ };
+ co = qemu_coroutine_create(vmdk_co_write_compressed, &data);
+ qemu_coroutine_enter(co);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(aio_context, true);
+ }
+ return data.ret;
} else {
return -ENOTSUP;
}
}
-static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- BdrvRequestFlags flags)
+static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset,
+ int bytes,
+ BdrvRequestFlags flags)
{
int ret;
BDRVVmdkState *s = bs->opaque;
+
qemu_co_mutex_lock(&s->lock);
/* write zeroes could fail if sectors not aligned to cluster, test it with
* dry_run == true before really updating image */
- ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
+ ret = vmdk_pwritev(bs, offset, bytes, NULL, true, true);
if (!ret) {
- ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
+ ret = vmdk_pwritev(bs, offset, bytes, NULL, true, false);
}
qemu_co_mutex_unlock(&s->lock);
return ret;
@@ -1728,12 +1800,12 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
header.check_bytes[3] = 0xa;
/* write all the data */
- ret = blk_pwrite(blk, 0, &magic, sizeof(magic));
+ ret = blk_pwrite(blk, 0, &magic, sizeof(magic), 0);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
- ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header));
+ ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header), 0);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1753,7 +1825,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
gd_buf[i] = cpu_to_le32(tmp);
}
ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
- gd_buf, gd_buf_size);
+ gd_buf, gd_buf_size, 0);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1765,7 +1837,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
gd_buf[i] = cpu_to_le32(tmp);
}
ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
- gd_buf, gd_buf_size);
+ gd_buf, gd_buf_size, 0);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1829,8 +1901,8 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size = 0, filesize;
char *adapter_type = NULL;
char *backing_file = NULL;
+ char *hw_version = NULL;
char *fmt = NULL;
- int flags = 0;
int ret = 0;
bool flat, split, compress;
GString *ext_desc_lines;
@@ -1861,7 +1933,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
"# The Disk Data Base\n"
"#DDB\n"
"\n"
- "ddb.virtualHWVersion = \"%d\"\n"
+ "ddb.virtualHWVersion = \"%s\"\n"
"ddb.geometry.cylinders = \"%" PRId64 "\"\n"
"ddb.geometry.heads = \"%" PRIu32 "\"\n"
"ddb.geometry.sectors = \"63\"\n"
@@ -1878,8 +1950,20 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
BDRV_SECTOR_SIZE);
adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
+ hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION);
if (qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false)) {
- flags |= BLOCK_FLAG_COMPAT6;
+ if (strcmp(hw_version, "undefined")) {
+ error_setg(errp,
+ "compat6 cannot be enabled with hwversion set");
+ ret = -EINVAL;
+ goto exit;
+ }
+ g_free(hw_version);
+ hw_version = g_strdup("6");
+ }
+ if (strcmp(hw_version, "undefined") == 0) {
+ g_free(hw_version);
+ hw_version = g_strdup("4");
}
fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false)) {
@@ -2001,7 +2085,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
fmt,
parent_desc_line,
ext_desc_lines->str,
- (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
+ hw_version,
total_size /
(int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
number_heads,
@@ -2028,7 +2112,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
blk_set_allow_write_beyond_eof(new_blk, true);
- ret = blk_pwrite(new_blk, desc_offset, desc, desc_len);
+ ret = blk_pwrite(new_blk, desc_offset, desc, desc_len, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write description");
goto exit;
@@ -2047,6 +2131,7 @@ exit:
}
g_free(adapter_type);
g_free(backing_file);
+ g_free(hw_version);
g_free(fmt);
g_free(desc);
g_free(path);
@@ -2250,27 +2335,6 @@ static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return 0;
}
-static void vmdk_detach_aio_context(BlockDriverState *bs)
-{
- BDRVVmdkState *s = bs->opaque;
- int i;
-
- for (i = 0; i < s->num_extents; i++) {
- bdrv_detach_aio_context(s->extents[i].file->bs);
- }
-}
-
-static void vmdk_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
-{
- BDRVVmdkState *s = bs->opaque;
- int i;
-
- for (i = 0; i < s->num_extents; i++) {
- bdrv_attach_aio_context(s->extents[i].file->bs, new_context);
- }
-}
-
static QemuOptsList vmdk_create_opts = {
.name = "vmdk-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head),
@@ -2298,6 +2362,12 @@ static QemuOptsList vmdk_create_opts = {
.def_value_str = "off"
},
{
+ .name = BLOCK_OPT_HWVERSION,
+ .type = QEMU_OPT_STRING,
+ .help = "VMDK hardware version",
+ .def_value_str = "undefined"
+ },
+ {
.name = BLOCK_OPT_SUBFMT,
.type = QEMU_OPT_STRING,
.help =
@@ -2321,10 +2391,10 @@ static BlockDriver bdrv_vmdk = {
.bdrv_open = vmdk_open,
.bdrv_check = vmdk_check,
.bdrv_reopen_prepare = vmdk_reopen_prepare,
- .bdrv_read = vmdk_co_read,
- .bdrv_write = vmdk_co_write,
+ .bdrv_co_preadv = vmdk_co_preadv,
+ .bdrv_co_pwritev = vmdk_co_pwritev,
.bdrv_write_compressed = vmdk_write_compressed,
- .bdrv_co_write_zeroes = vmdk_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes,
.bdrv_close = vmdk_close,
.bdrv_create = vmdk_create,
.bdrv_co_flush_to_disk = vmdk_co_flush,
@@ -2334,8 +2404,6 @@ static BlockDriver bdrv_vmdk = {
.bdrv_get_specific_info = vmdk_get_specific_info,
.bdrv_refresh_limits = vmdk_refresh_limits,
.bdrv_get_info = vmdk_get_info,
- .bdrv_detach_aio_context = vmdk_detach_aio_context,
- .bdrv_attach_aio_context = vmdk_attach_aio_context,
.supports_backing = true,
.create_opts = &vmdk_create_opts,
diff --git a/block/vpc.c b/block/vpc.c
index 3e2ea698d..43707ed22 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -29,6 +29,7 @@
#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "migration/migration.h"
+#include "qemu/bswap.h"
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
#endif
@@ -236,7 +237,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
- ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
+ ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
if (ret < 0) {
error_setg(errp, "Unable to read VHD header");
goto fail;
@@ -256,7 +257,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
}
/* If a fixed disk, the footer is found only at the end of the file */
- ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
+ ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
HEADER_SIZE);
if (ret < 0) {
goto fail;
@@ -327,7 +328,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
}
if (disk_type == VHD_DYNAMIC) {
- ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
+ ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
HEADER_SIZE);
if (ret < 0) {
error_setg(errp, "Error reading dynamic VHD header");
@@ -384,7 +385,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
- ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
+ ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
pagetable_size);
if (ret < 0) {
error_setg(errp, "Error reading pagetable");
@@ -454,22 +455,21 @@ static int vpc_reopen_prepare(BDRVReopenState *state,
* The parameter write must be 1 if the offset will be used for a write
* operation (the block bitmaps is updated then), 0 otherwise.
*/
-static inline int64_t get_sector_offset(BlockDriverState *bs,
- int64_t sector_num, int write)
+static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
+ bool write)
{
BDRVVPCState *s = bs->opaque;
- uint64_t offset = sector_num * 512;
uint64_t bitmap_offset, block_offset;
- uint32_t pagetable_index, pageentry_index;
+ uint32_t pagetable_index, offset_in_block;
pagetable_index = offset / s->block_size;
- pageentry_index = (offset % s->block_size) / 512;
+ offset_in_block = offset % s->block_size;
if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
return -1; /* not allocated */
bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
- block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
+ block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
/* We must ensure that we don't write to any sectors which are marked as
unused in the bitmap. We get away with setting all bits in the block
@@ -481,12 +481,18 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
s->last_bitmap_offset = bitmap_offset;
memset(bitmap, 0xff, s->bitmap_size);
- bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
+ bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
}
return block_offset;
}
+static inline int64_t get_sector_offset(BlockDriverState *bs,
+ int64_t sector_num, bool write)
+{
+ return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
+}
+
/*
* Writes the footer to the end of the image file. This is needed when the
* file grows as it overwrites the old footer
@@ -499,7 +505,7 @@ static int rewrite_footer(BlockDriverState* bs)
BDRVVPCState *s = bs->opaque;
int64_t offset = s->free_data_block_offset;
- ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
+ ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
if (ret < 0)
return ret;
@@ -513,7 +519,7 @@ static int rewrite_footer(BlockDriverState* bs)
*
* Returns the sectors' offset in the image file on success and < 0 on error
*/
-static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
+static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
{
BDRVVPCState *s = bs->opaque;
int64_t bat_offset;
@@ -522,19 +528,18 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
uint8_t bitmap[s->bitmap_size];
/* Check if sector_num is valid */
- if ((sector_num < 0) || (sector_num > bs->total_sectors))
- return -1;
+ if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
+ return -EINVAL;
+ }
/* Write entry into in-memory BAT */
- index = (sector_num * 512) / s->block_size;
- if (s->pagetable[index] != 0xFFFFFFFF)
- return -1;
-
+ index = offset / s->block_size;
+ assert(s->pagetable[index] == 0xFFFFFFFF);
s->pagetable[index] = s->free_data_block_offset / 512;
/* Initialize the block's bitmap */
memset(bitmap, 0xff, s->bitmap_size);
- ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
+ ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
s->bitmap_size);
if (ret < 0) {
return ret;
@@ -549,15 +554,15 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
/* Write BAT entry to disk */
bat_offset = s->bat_offset + (4 * index);
bat_value = cpu_to_be32(s->pagetable[index]);
- ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
+ ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
if (ret < 0)
goto fail;
- return get_sector_offset(bs, sector_num, 0);
+ return get_image_offset(bs, offset, false);
fail:
s->free_data_block_offset -= (s->block_size + s->bitmap_size);
- return -1;
+ return ret;
}
static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -573,104 +578,105 @@ static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return 0;
}
-static int vpc_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVPCState *s = bs->opaque;
int ret;
- int64_t offset;
- int64_t sectors, sectors_per_block;
+ int64_t image_offset;
+ int64_t n_bytes;
+ int64_t bytes_done = 0;
VHDFooter *footer = (VHDFooter *) s->footer_buf;
+ QEMUIOVector local_qiov;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors);
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
}
- while (nb_sectors > 0) {
- offset = get_sector_offset(bs, sector_num, 0);
- sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
- sectors = sectors_per_block - (sector_num % sectors_per_block);
- if (sectors > nb_sectors) {
- sectors = nb_sectors;
- }
+ qemu_co_mutex_lock(&s->lock);
+ qemu_iovec_init(&local_qiov, qiov->niov);
+
+ while (bytes > 0) {
+ image_offset = get_image_offset(bs, offset, false);
+ n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
- if (offset == -1) {
- memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
+ if (image_offset == -1) {
+ qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
} else {
- ret = bdrv_pread(bs->file->bs, offset, buf,
- sectors * BDRV_SECTOR_SIZE);
- if (ret != sectors * BDRV_SECTOR_SIZE) {
- return -1;
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
+ &local_qiov, 0);
+ if (ret < 0) {
+ goto fail;
}
}
- nb_sectors -= sectors;
- sector_num += sectors;
- buf += sectors * BDRV_SECTOR_SIZE;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
}
- return 0;
-}
-static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVVPCState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = vpc_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
+ qemu_iovec_destroy(&local_qiov);
qemu_co_mutex_unlock(&s->lock);
+
return ret;
}
-static int vpc_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVPCState *s = bs->opaque;
- int64_t offset;
- int64_t sectors, sectors_per_block;
+ int64_t image_offset;
+ int64_t n_bytes;
+ int64_t bytes_done = 0;
int ret;
VHDFooter *footer = (VHDFooter *) s->footer_buf;
+ QEMUIOVector local_qiov;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors);
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
}
- while (nb_sectors > 0) {
- offset = get_sector_offset(bs, sector_num, 1);
- sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
- sectors = sectors_per_block - (sector_num % sectors_per_block);
- if (sectors > nb_sectors) {
- sectors = nb_sectors;
- }
+ qemu_co_mutex_lock(&s->lock);
+ qemu_iovec_init(&local_qiov, qiov->niov);
+
+ while (bytes > 0) {
+ image_offset = get_image_offset(bs, offset, true);
+ n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
- if (offset == -1) {
- offset = alloc_block(bs, sector_num);
- if (offset < 0)
- return -1;
+ if (image_offset == -1) {
+ image_offset = alloc_block(bs, offset);
+ if (image_offset < 0) {
+ ret = image_offset;
+ goto fail;
+ }
}
- ret = bdrv_pwrite(bs->file->bs, offset, buf,
- sectors * BDRV_SECTOR_SIZE);
- if (ret != sectors * BDRV_SECTOR_SIZE) {
- return -1;
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
+ &local_qiov, 0);
+ if (ret < 0) {
+ goto fail;
}
- nb_sectors -= sectors;
- sector_num += sectors;
- buf += sectors * BDRV_SECTOR_SIZE;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
}
- return 0;
-}
-
-static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVVPCState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = vpc_write(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
+ qemu_iovec_destroy(&local_qiov);
qemu_co_mutex_unlock(&s->lock);
+
return ret;
}
@@ -783,13 +789,13 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
block_size = 0x200000;
num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
- ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
if (ret < 0) {
goto fail;
}
offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
- ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
if (ret < 0) {
goto fail;
}
@@ -799,7 +805,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
memset(buf, 0xFF, 512);
for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
- ret = blk_pwrite(blk, offset, buf, 512);
+ ret = blk_pwrite(blk, offset, buf, 512, 0);
if (ret < 0) {
goto fail;
}
@@ -826,7 +832,7 @@ static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
/* Write the header */
offset = 512;
- ret = blk_pwrite(blk, offset, buf, 1024);
+ ret = blk_pwrite(blk, offset, buf, 1024, 0);
if (ret < 0) {
goto fail;
}
@@ -848,7 +854,7 @@ static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
return ret;
}
- ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
if (ret < 0) {
return ret;
}
@@ -1056,8 +1062,8 @@ static BlockDriver bdrv_vpc = {
.bdrv_reopen_prepare = vpc_reopen_prepare,
.bdrv_create = vpc_create,
- .bdrv_read = vpc_co_read,
- .bdrv_write = vpc_co_write,
+ .bdrv_co_preadv = vpc_co_preadv,
+ .bdrv_co_pwritev = vpc_co_pwritev,
.bdrv_co_get_block_status = vpc_co_get_block_status,
.bdrv_get_info = vpc_get_info,
diff --git a/block/vvfat.c b/block/vvfat.c
index 183fc4f04..ba2620f3c 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -27,6 +27,7 @@
#include "qapi/error.h"
#include "block/block_int.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include "migration/migration.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qbool.h"
@@ -113,15 +114,12 @@ static inline int array_ensure_allocated(array_t* array, int index)
static inline void* array_get_next(array_t* array) {
unsigned int next = array->next;
- void* result;
if (array_ensure_allocated(array, next) < 0)
return NULL;
array->next = next + 1;
- result = array_get(array, next);
-
- return result;
+ return array_get(array, next);
}
static inline void* array_insert(array_t* array,unsigned int index,unsigned int count) {
@@ -343,9 +341,8 @@ typedef struct BDRVVVFATState {
unsigned int current_cluster;
/* write support */
- BlockDriverState* write_target;
char* qcow_filename;
- BlockDriverState* qcow;
+ BdrvChild* qcow;
void* fat2;
char* used_clusters;
array_t commits;
@@ -983,7 +980,7 @@ static int init_directories(BDRVVVFATState* s,
static BDRVVVFATState *vvv = NULL;
#endif
-static int enable_write_target(BDRVVVFATState *s, Error **errp);
+static int enable_write_target(BlockDriverState *bs, Error **errp);
static int is_consistent(BDRVVVFATState *s);
static QemuOptsList runtime_opts = {
@@ -1160,8 +1157,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
s->current_cluster=0xffffffff;
/* read only is the default for safety */
- bs->read_only = 1;
- s->qcow = s->write_target = NULL;
+ bs->read_only = true;
+ s->qcow = NULL;
s->qcow_filename = NULL;
s->fat2 = NULL;
s->downcase_short_names = 1;
@@ -1172,11 +1169,11 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
if (qemu_opt_get_bool(opts, "rw", false)) {
- ret = enable_write_target(s, errp);
+ ret = enable_write_target(bs, errp);
if (ret < 0) {
goto fail;
}
- bs->read_only = 0;
+ bs->read_only = false;
}
bs->total_sectors = cyls * heads * secs;
@@ -1210,6 +1207,11 @@ fail:
return ret;
}
+static void vvfat_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+}
+
static inline void vvfat_close_current_file(BDRVVVFATState *s)
{
if(s->current_mapping) {
@@ -1388,9 +1390,10 @@ static int vvfat_read(BlockDriverState *bs, int64_t sector_num,
return -1;
if (s->qcow) {
int n;
- if (bdrv_is_allocated(s->qcow, sector_num, nb_sectors-i, &n)) {
-DLOG(fprintf(stderr, "sectors %d+%d allocated\n", (int)sector_num, n));
- if (bdrv_read(s->qcow, sector_num, buf + i*0x200, n)) {
+ if (bdrv_is_allocated(s->qcow->bs, sector_num, nb_sectors-i, &n)) {
+ DLOG(fprintf(stderr, "sectors %d+%d allocated\n",
+ (int)sector_num, n));
+ if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, n)) {
return -1;
}
i += n - 1;
@@ -1421,14 +1424,31 @@ DLOG(fprintf(stderr, "sector %d not allocated\n", (int)sector_num));
return 0;
}
-static coroutine_fn int vvfat_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vvfat_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
int ret;
BDRVVVFATState *s = bs->opaque;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ void *buf;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ buf = g_try_malloc(bytes);
+ if (bytes && buf == NULL) {
+ return -ENOMEM;
+ }
+
qemu_co_mutex_lock(&s->lock);
ret = vvfat_read(bs, sector_num, buf, nb_sectors);
qemu_co_mutex_unlock(&s->lock);
+
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ g_free(buf);
+
return ret;
}
@@ -1649,12 +1669,15 @@ static inline int cluster_was_modified(BDRVVVFATState* s, uint32_t cluster_num)
int was_modified = 0;
int i, dummy;
- if (s->qcow == NULL)
- return 0;
+ if (s->qcow == NULL) {
+ return 0;
+ }
- for (i = 0; !was_modified && i < s->sectors_per_cluster; i++)
- was_modified = bdrv_is_allocated(s->qcow,
- cluster2sector(s, cluster_num) + i, 1, &dummy);
+ for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) {
+ was_modified = bdrv_is_allocated(s->qcow->bs,
+ cluster2sector(s, cluster_num) + i,
+ 1, &dummy);
+ }
return was_modified;
}
@@ -1803,11 +1826,16 @@ static uint32_t get_cluster_count_for_direntry(BDRVVVFATState* s,
vvfat_close_current_file(s);
for (i = 0; i < s->sectors_per_cluster; i++) {
- if (!bdrv_is_allocated(s->qcow, offset + i, 1, &dummy)) {
- if (vvfat_read(s->bs, offset, s->cluster_buffer, 1)) {
+ int res;
+
+ res = bdrv_is_allocated(s->qcow->bs, offset + i, 1, &dummy);
+ if (!res) {
+ res = vvfat_read(s->bs, offset, s->cluster_buffer, 1);
+ if (res) {
return -1;
}
- if (bdrv_write(s->qcow, offset, s->cluster_buffer, 1)) {
+ res = bdrv_write(s->qcow, offset, s->cluster_buffer, 1);
+ if (res) {
return -2;
}
}
@@ -1941,8 +1969,7 @@ DLOG(fprintf(stderr, "check direntry %d:\n", i); print_direntry(direntries + i))
/* check file size with FAT */
cluster_count = get_cluster_count_for_direntry(s, direntries + i, path2);
if (cluster_count !=
- (le32_to_cpu(direntries[i].size) + s->cluster_size
- - 1) / s->cluster_size) {
+ DIV_ROUND_UP(le32_to_cpu(direntries[i].size), s->cluster_size)) {
DLOG(fprintf(stderr, "Cluster count mismatch\n"));
goto fail;
}
@@ -2764,8 +2791,8 @@ static int do_commit(BDRVVVFATState* s)
return ret;
}
- if (s->qcow->drv->bdrv_make_empty) {
- s->qcow->drv->bdrv_make_empty(s->qcow);
+ if (s->qcow->bs->drv->bdrv_make_empty) {
+ s->qcow->bs->drv->bdrv_make_empty(s->qcow->bs);
}
memset(s->used_clusters, 0, sector2cluster(s, s->sector_count));
@@ -2880,14 +2907,31 @@ DLOG(checkpoint());
return 0;
}
-static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
int ret;
BDRVVVFATState *s = bs->opaque;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ void *buf;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ buf = g_try_malloc(bytes);
+ if (bytes && buf == NULL) {
+ return -ENOMEM;
+ }
+ qemu_iovec_to_buf(qiov, 0, buf, bytes);
+
qemu_co_mutex_lock(&s->lock);
ret = vvfat_write(bs, sector_num, buf, nb_sectors);
qemu_co_mutex_unlock(&s->lock);
+
+ g_free(buf);
+
return ret;
}
@@ -2904,26 +2948,39 @@ static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
return BDRV_BLOCK_DATA;
}
-static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
- const uint8_t* buffer, int nb_sectors) {
+static int coroutine_fn
+write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
return try_commit(s);
}
static void write_target_close(BlockDriverState *bs) {
BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
- bdrv_unref(s->qcow);
+ bdrv_unref_child(s->bs, s->qcow);
g_free(s->qcow_filename);
}
static BlockDriver vvfat_write_target = {
.format_name = "vvfat_write_target",
- .bdrv_write = write_target_commit,
+ .bdrv_co_pwritev = write_target_commit,
.bdrv_close = write_target_close,
};
-static int enable_write_target(BDRVVVFATState *s, Error **errp)
+static void vvfat_qcow_options(int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options)
{
+ *child_flags = BDRV_O_RDWR | BDRV_O_NO_FLUSH;
+}
+
+static const BdrvChildRole child_vvfat_qcow = {
+ .inherit_options = vvfat_qcow_options,
+};
+
+static int enable_write_target(BlockDriverState *bs, Error **errp)
+{
+ BDRVVVFATState *s = bs->opaque;
BlockDriver *bdrv_qcow = NULL;
BlockDriverState *backing;
QemuOpts *opts = NULL;
@@ -2960,12 +3017,13 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
goto err;
}
- s->qcow = NULL;
options = qdict_new();
- qdict_put(options, "driver", qstring_from_str("qcow"));
- ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, options,
- BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp);
- if (ret < 0) {
+ qdict_put(options, "write-target.driver", qstring_from_str("qcow"));
+ s->qcow = bdrv_open_child(s->qcow_filename, options, "write-target", bs,
+ &child_vvfat_qcow, false, errp);
+ QDECREF(options);
+ if (!s->qcow) {
+ ret = -EINVAL;
goto err;
}
@@ -3012,10 +3070,11 @@ static BlockDriver bdrv_vvfat = {
.bdrv_parse_filename = vvfat_parse_filename,
.bdrv_file_open = vvfat_open,
+ .bdrv_refresh_limits = vvfat_refresh_limits,
.bdrv_close = vvfat_close,
- .bdrv_read = vvfat_co_read,
- .bdrv_write = vvfat_co_write,
+ .bdrv_co_preadv = vvfat_co_preadv,
+ .bdrv_co_pwritev = vvfat_co_pwritev,
.bdrv_co_get_block_status = vvfat_co_get_block_status,
};
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 2d509a9a7..95e3ab154 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -27,7 +27,7 @@
#include "block/block_int.h"
#include "qemu/module.h"
#include "block/aio.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
#include "qemu/event_notifier.h"
#include "qemu/iov.h"
#include <windows.h>