summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Makefile.objs6
-rw-r--r--block/accounting.c1
-rw-r--r--block/archipelago.c4
-rw-r--r--block/backup.c105
-rw-r--r--block/blkdebug.c107
-rwxr-xr-xblock/blkreplay.c160
-rw-r--r--block/blkverify.c6
-rw-r--r--block/block-backend.c823
-rw-r--r--block/bochs.c2
-rw-r--r--block/cloop.c2
-rw-r--r--block/commit.c2
-rw-r--r--block/crypto.c586
-rw-r--r--block/curl.c69
-rw-r--r--block/dirty-bitmap.c387
-rw-r--r--block/dmg.c2
-rw-r--r--block/gluster.c79
-rw-r--r--block/io.c180
-rw-r--r--block/iscsi.c74
-rw-r--r--block/linux-aio.c1
-rw-r--r--block/mirror.c411
-rw-r--r--block/nbd-client.c109
-rw-r--r--block/nbd-client.h12
-rw-r--r--block/nbd.c162
-rw-r--r--block/nfs.c16
-rw-r--r--block/null.c44
-rw-r--r--block/parallels.c32
-rw-r--r--block/qapi.c256
-rw-r--r--block/qcow.c43
-rw-r--r--block/qcow2-cache.c3
-rw-r--r--block/qcow2-cluster.c16
-rw-r--r--block/qcow2-refcount.c452
-rw-r--r--block/qcow2-snapshot.c4
-rw-r--r--block/qcow2.c450
-rw-r--r--block/qcow2.h7
-rw-r--r--block/qed-check.c1
-rw-r--r--block/qed-cluster.c1
-rw-r--r--block/qed-gencb.c1
-rw-r--r--block/qed-l2-cache.c1
-rw-r--r--block/qed-table.c1
-rw-r--r--block/qed.c66
-rw-r--r--block/qed.h1
-rw-r--r--block/quorum.c67
-rw-r--r--block/raw-aio.h2
-rw-r--r--block/raw-posix.c204
-rw-r--r--block/raw-win32.c4
-rw-r--r--block/raw_bsd.c23
-rw-r--r--block/rbd.c52
-rw-r--r--block/sheepdog.c190
-rw-r--r--block/snapshot.c25
-rw-r--r--block/ssh.c5
-rw-r--r--block/stream.c13
-rw-r--r--block/throttle-groups.c1
-rw-r--r--block/vdi.c28
-rw-r--r--block/vhdx-endian.c1
-rw-r--r--block/vhdx-log.c15
-rw-r--r--block/vhdx.c47
-rw-r--r--block/vmdk.c214
-rw-r--r--block/vpc.c272
-rw-r--r--block/vvfat.c19
-rw-r--r--block/win32-aio.c1
-rw-r--r--block/write-threshold.c1
61 files changed, 4539 insertions, 1330 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 58ef2ef3f2..44a5416225 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -4,7 +4,7 @@ block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
block-obj-y += quorum.o
-block-obj-y += parallels.o blkdebug.o blkverify.o
+block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
block-obj-y += block-backend.o snapshot.o qapi.o
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
@@ -20,9 +20,11 @@ block-obj-$(CONFIG_RBD) += rbd.o
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
-block-obj-y += accounting.o
+block-obj-y += accounting.o dirty-bitmap.o
block-obj-y += write-threshold.o
+block-obj-y += crypto.o
+
common-obj-y += stream.o
common-obj-y += commit.o
common-obj-y += backup.o
diff --git a/block/accounting.c b/block/accounting.c
index 185025ec1e..3f457c4e73 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -23,6 +23,7 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/accounting.h"
#include "block/block_int.h"
#include "qemu/timer.h"
diff --git a/block/archipelago.c b/block/archipelago.c
index 855655c6bd..b9f5e69d4a 100644
--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -50,7 +50,8 @@
*
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "block/block_int.h"
#include "qemu/error-report.h"
#include "qemu/thread.h"
@@ -59,7 +60,6 @@
#include "qapi/qmp/qjson.h"
#include "qemu/atomic.h"
-#include <inttypes.h>
#include <xseg/xseg.h>
#include <xseg/protocol.h>
diff --git a/block/backup.c b/block/backup.c
index 705bb77661..491fd14068 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -11,22 +11,20 @@
*
*/
-#include <stdio.h>
-#include <errno.h>
-#include <unistd.h>
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/block.h"
#include "block/block_int.h"
#include "block/blockjob.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
+#include "qemu/cutils.h"
#include "sysemu/block-backend.h"
+#include "qemu/bitmap.h"
-#define BACKUP_CLUSTER_BITS 16
-#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
-#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
-
+#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
#define SLICE_TIME 100000000ULL /* ns */
typedef struct CowRequest {
@@ -47,10 +45,17 @@ typedef struct BackupBlockJob {
BlockdevOnError on_target_error;
CoRwlock flush_rwlock;
uint64_t sectors_read;
- HBitmap *bitmap;
+ unsigned long *done_bitmap;
+ int64_t cluster_size;
QLIST_HEAD(, CowRequest) inflight_reqs;
} BackupBlockJob;
+/* Size of a cluster in sectors, instead of bytes. */
+static inline int64_t cluster_size_sectors(BackupBlockJob *job)
+{
+ return job->cluster_size / BDRV_SECTOR_SIZE;
+}
+
/* See if in-flight requests overlap and wait for them to complete */
static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
int64_t start,
@@ -99,13 +104,14 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
QEMUIOVector bounce_qiov;
void *bounce_buffer = NULL;
int ret = 0;
+ int64_t sectors_per_cluster = cluster_size_sectors(job);
int64_t start, end;
int n;
qemu_co_rwlock_rdlock(&job->flush_rwlock);
- start = sector_num / BACKUP_SECTORS_PER_CLUSTER;
- end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER);
+ start = sector_num / sectors_per_cluster;
+ end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
@@ -113,19 +119,19 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
cow_request_begin(&cow_request, job, start, end);
for (; start < end; start++) {
- if (hbitmap_get(job->bitmap, start)) {
+ if (test_bit(start, job->done_bitmap)) {
trace_backup_do_cow_skip(job, start);
continue; /* already copied */
}
trace_backup_do_cow_process(job, start);
- n = MIN(BACKUP_SECTORS_PER_CLUSTER,
+ n = MIN(sectors_per_cluster,
job->common.len / BDRV_SECTOR_SIZE -
- start * BACKUP_SECTORS_PER_CLUSTER);
+ start * sectors_per_cluster);
if (!bounce_buffer) {
- bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE);
+ bounce_buffer = qemu_blockalign(bs, job->cluster_size);
}
iov.iov_base = bounce_buffer;
iov.iov_len = n * BDRV_SECTOR_SIZE;
@@ -133,10 +139,10 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
if (is_write_notifier) {
ret = bdrv_co_readv_no_serialising(bs,
- start * BACKUP_SECTORS_PER_CLUSTER,
+ start * sectors_per_cluster,
n, &bounce_qiov);
} else {
- ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
+ ret = bdrv_co_readv(bs, start * sectors_per_cluster, n,
&bounce_qiov);
}
if (ret < 0) {
@@ -149,11 +155,11 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
ret = bdrv_co_write_zeroes(job->target,
- start * BACKUP_SECTORS_PER_CLUSTER,
+ start * sectors_per_cluster,
n, BDRV_REQ_MAY_UNMAP);
} else {
ret = bdrv_co_writev(job->target,
- start * BACKUP_SECTORS_PER_CLUSTER, n,
+ start * sectors_per_cluster, n,
&bounce_qiov);
}
if (ret < 0) {
@@ -164,7 +170,7 @@ static int coroutine_fn backup_do_cow(BlockDriverState *bs,
goto out;
}
- hbitmap_set(job->bitmap, start, 1);
+ set_bit(start, job->done_bitmap);
/* Publish progress, guest I/O counts as progress too. Note that the
* offset field is an opaque progress value, it is not a disk offset.
@@ -324,21 +330,22 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
int64_t cluster;
int64_t end;
int64_t last_cluster = -1;
+ int64_t sectors_per_cluster = cluster_size_sectors(job);
BlockDriverState *bs = job->common.bs;
HBitmapIter hbi;
granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
- clusters_per_iter = MAX((granularity / BACKUP_CLUSTER_SIZE), 1);
+ clusters_per_iter = MAX((granularity / job->cluster_size), 1);
bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
/* Find the next dirty sector(s) */
while ((sector = hbitmap_iter_next(&hbi)) != -1) {
- cluster = sector / BACKUP_SECTORS_PER_CLUSTER;
+ cluster = sector / sectors_per_cluster;
/* Fake progress updates for any clusters we skipped */
if (cluster != last_cluster + 1) {
job->common.offset += ((cluster - last_cluster - 1) *
- BACKUP_CLUSTER_SIZE);
+ job->cluster_size);
}
for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
@@ -346,8 +353,8 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
if (yield_and_check(job)) {
return ret;
}
- ret = backup_do_cow(bs, cluster * BACKUP_SECTORS_PER_CLUSTER,
- BACKUP_SECTORS_PER_CLUSTER, &error_is_read,
+ ret = backup_do_cow(bs, cluster * sectors_per_cluster,
+ sectors_per_cluster, &error_is_read,
false);
if ((ret < 0) &&
backup_error_action(job, error_is_read, -ret) ==
@@ -359,17 +366,17 @@ static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
/* If the bitmap granularity is smaller than the backup granularity,
* we need to advance the iterator pointer to the next cluster. */
- if (granularity < BACKUP_CLUSTER_SIZE) {
- bdrv_set_dirty_iter(&hbi, cluster * BACKUP_SECTORS_PER_CLUSTER);
+ if (granularity < job->cluster_size) {
+ bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster);
}
last_cluster = cluster - 1;
}
/* Play some final catchup with the progress meter */
- end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
+ end = DIV_ROUND_UP(job->common.len, job->cluster_size);
if (last_cluster + 1 < end) {
- job->common.offset += ((end - last_cluster - 1) * BACKUP_CLUSTER_SIZE);
+ job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
}
return ret;
@@ -386,17 +393,17 @@ static void coroutine_fn backup_run(void *opaque)
.notify = backup_before_write_notify,
};
int64_t start, end;
+ int64_t sectors_per_cluster = cluster_size_sectors(job);
int ret = 0;
QLIST_INIT(&job->inflight_reqs);
qemu_co_rwlock_init(&job->flush_rwlock);
start = 0;
- end = DIV_ROUND_UP(job->common.len, BACKUP_CLUSTER_SIZE);
+ end = DIV_ROUND_UP(job->common.len, job->cluster_size);
- job->bitmap = hbitmap_alloc(end, 0);
+ job->done_bitmap = bitmap_new(end);
- bdrv_set_enable_write_cache(target, true);
if (target->blk) {
blk_set_on_error(target->blk, on_target_error, on_target_error);
blk_iostatus_enable(target->blk);
@@ -429,7 +436,7 @@ static void coroutine_fn backup_run(void *opaque)
/* Check to see if these blocks are already in the
* backing file. */
- for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
+ for (i = 0; i < sectors_per_cluster;) {
/* bdrv_is_allocated() only returns true/false based
* on the first set of sectors it comes across that
* are are all in the same state.
@@ -438,8 +445,8 @@ static void coroutine_fn backup_run(void *opaque)
* needed but at some point that is always the case. */
alloced =
bdrv_is_allocated(bs,
- start * BACKUP_SECTORS_PER_CLUSTER + i,
- BACKUP_SECTORS_PER_CLUSTER - i, &n);
+ start * sectors_per_cluster + i,
+ sectors_per_cluster - i, &n);
i += n;
if (alloced == 1 || n == 0) {
@@ -454,8 +461,8 @@ static void coroutine_fn backup_run(void *opaque)
}
}
/* FULL sync mode we copy the whole drive. */
- ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
- BACKUP_SECTORS_PER_CLUSTER, &error_is_read, false);
+ ret = backup_do_cow(bs, start * sectors_per_cluster,
+ sectors_per_cluster, &error_is_read, false);
if (ret < 0) {
/* Depending on error action, fail now or retry cluster */
BlockErrorAction action =
@@ -475,7 +482,7 @@ static void coroutine_fn backup_run(void *opaque)
/* wait until pending backup_do_cow() calls have completed */
qemu_co_rwlock_wrlock(&job->flush_rwlock);
qemu_co_rwlock_unlock(&job->flush_rwlock);
- hbitmap_free(job->bitmap);
+ g_free(job->done_bitmap);
if (target->blk) {
blk_iostatus_disable(target->blk);
@@ -496,6 +503,8 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
BlockJobTxn *txn, Error **errp)
{
int64_t len;
+ BlockDriverInfo bdi;
+ int ret;
assert(bs);
assert(target);
@@ -565,14 +574,32 @@ void backup_start(BlockDriverState *bs, BlockDriverState *target,
goto error;
}
- bdrv_op_block_all(target, job->common.blocker);
-
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
job->target = target;
job->sync_mode = sync_mode;
job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
sync_bitmap : NULL;
+
+ /* If there is no backing file on the target, we cannot rely on COW if our
+ * backup cluster size is smaller than the target cluster size. Even for
+ * targets with a backing file, try to avoid COW if possible. */
+ ret = bdrv_get_info(job->target, &bdi);
+ if (ret < 0 && !target->backing) {
+ error_setg_errno(errp, -ret,
+ "Couldn't determine the cluster size of the target image, "
+ "which has no backing file");
+ error_append_hint(errp,
+ "Aborting, since this may create an unusable destination image\n");
+ goto error;
+ } else if (ret < 0 && target->backing) {
+ /* Not fatal; just trudge on ahead. */
+ job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
+ } else {
+ job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+ }
+
+ bdrv_op_block_all(target, job->common.blocker);
job->common.len = len;
job->common.co = qemu_coroutine_create(backup_run);
block_job_txn_add_job(txn, &job->common);
diff --git a/block/blkdebug.c b/block/blkdebug.c
index dee3a0edfc..20d25bda67 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -22,7 +22,9 @@
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
#include "qemu/config-file.h"
#include "block/block_int.h"
#include "qemu/module.h"
@@ -36,7 +38,7 @@ typedef struct BDRVBlkdebugState {
int state;
int new_state;
- QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
+ QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
} BDRVBlkdebugState;
@@ -64,7 +66,7 @@ enum {
};
typedef struct BlkdebugRule {
- BlkDebugEvent event;
+ BlkdebugEvent event;
int action;
int state;
union {
@@ -143,69 +145,12 @@ static QemuOptsList *config_groups[] = {
NULL
};
-static const char *event_names[BLKDBG_EVENT_MAX] = {
- [BLKDBG_L1_UPDATE] = "l1_update",
- [BLKDBG_L1_GROW_ALLOC_TABLE] = "l1_grow.alloc_table",
- [BLKDBG_L1_GROW_WRITE_TABLE] = "l1_grow.write_table",
- [BLKDBG_L1_GROW_ACTIVATE_TABLE] = "l1_grow.activate_table",
-
- [BLKDBG_L2_LOAD] = "l2_load",
- [BLKDBG_L2_UPDATE] = "l2_update",
- [BLKDBG_L2_UPDATE_COMPRESSED] = "l2_update_compressed",
- [BLKDBG_L2_ALLOC_COW_READ] = "l2_alloc.cow_read",
- [BLKDBG_L2_ALLOC_WRITE] = "l2_alloc.write",
-
- [BLKDBG_READ_AIO] = "read_aio",
- [BLKDBG_READ_BACKING_AIO] = "read_backing_aio",
- [BLKDBG_READ_COMPRESSED] = "read_compressed",
-
- [BLKDBG_WRITE_AIO] = "write_aio",
- [BLKDBG_WRITE_COMPRESSED] = "write_compressed",
-
- [BLKDBG_VMSTATE_LOAD] = "vmstate_load",
- [BLKDBG_VMSTATE_SAVE] = "vmstate_save",
-
- [BLKDBG_COW_READ] = "cow_read",
- [BLKDBG_COW_WRITE] = "cow_write",
-
- [BLKDBG_REFTABLE_LOAD] = "reftable_load",
- [BLKDBG_REFTABLE_GROW] = "reftable_grow",
- [BLKDBG_REFTABLE_UPDATE] = "reftable_update",
-
- [BLKDBG_REFBLOCK_LOAD] = "refblock_load",
- [BLKDBG_REFBLOCK_UPDATE] = "refblock_update",
- [BLKDBG_REFBLOCK_UPDATE_PART] = "refblock_update_part",
- [BLKDBG_REFBLOCK_ALLOC] = "refblock_alloc",
- [BLKDBG_REFBLOCK_ALLOC_HOOKUP] = "refblock_alloc.hookup",
- [BLKDBG_REFBLOCK_ALLOC_WRITE] = "refblock_alloc.write",
- [BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS] = "refblock_alloc.write_blocks",
- [BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE] = "refblock_alloc.write_table",
- [BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE] = "refblock_alloc.switch_table",
-
- [BLKDBG_CLUSTER_ALLOC] = "cluster_alloc",
- [BLKDBG_CLUSTER_ALLOC_BYTES] = "cluster_alloc_bytes",
- [BLKDBG_CLUSTER_FREE] = "cluster_free",
-
- [BLKDBG_FLUSH_TO_OS] = "flush_to_os",
- [BLKDBG_FLUSH_TO_DISK] = "flush_to_disk",
-
- [BLKDBG_PWRITEV_RMW_HEAD] = "pwritev_rmw.head",
- [BLKDBG_PWRITEV_RMW_AFTER_HEAD] = "pwritev_rmw.after_head",
- [BLKDBG_PWRITEV_RMW_TAIL] = "pwritev_rmw.tail",
- [BLKDBG_PWRITEV_RMW_AFTER_TAIL] = "pwritev_rmw.after_tail",
- [BLKDBG_PWRITEV] = "pwritev",
- [BLKDBG_PWRITEV_ZERO] = "pwritev_zero",
- [BLKDBG_PWRITEV_DONE] = "pwritev_done",
-
- [BLKDBG_EMPTY_IMAGE_PREPARE] = "empty_image_prepare",
-};
-
-static int get_event_by_name(const char *name, BlkDebugEvent *event)
+static int get_event_by_name(const char *name, BlkdebugEvent *event)
{
int i;
- for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
- if (!strcmp(event_names[i], name)) {
+ for (i = 0; i < BLKDBG__MAX; i++) {
+ if (!strcmp(BlkdebugEvent_lookup[i], name)) {
*event = i;
return 0;
}
@@ -224,7 +169,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
struct add_rule_data *d = opaque;
BDRVBlkdebugState *s = d->s;
const char* event_name;
- BlkDebugEvent event;
+ BlkdebugEvent event;
struct BlkdebugRule *rule;
/* Find the right event for the rule */
@@ -564,7 +509,7 @@ static void blkdebug_close(BlockDriverState *bs)
BlkdebugRule *rule, *next;
int i;
- for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
+ for (i = 0; i < BLKDBG__MAX; i++) {
QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
remove_rule(rule);
}
@@ -627,13 +572,13 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
return injected;
}
-static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event)
+static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
{
BDRVBlkdebugState *s = bs->opaque;
struct BlkdebugRule *rule, *next;
bool injected;
- assert((int)event >= 0 && event < BLKDBG_EVENT_MAX);
+ assert((int)event >= 0 && event < BLKDBG__MAX);
injected = false;
s->new_state = s->state;
@@ -648,7 +593,7 @@ static int blkdebug_debug_breakpoint(BlockDriverState *bs, const char *event,
{
BDRVBlkdebugState *s = bs->opaque;
struct BlkdebugRule *rule;
- BlkDebugEvent blkdebug_event;
+ BlkdebugEvent blkdebug_event;
if (get_event_by_name(event, &blkdebug_event) < 0) {
return -ENOENT;
@@ -690,7 +635,7 @@ static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
BlkdebugRule *rule, *next;
int i, ret = -ENOENT;
- for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
+ for (i = 0; i < BLKDBG__MAX; i++) {
QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
if (rule->action == ACTION_SUSPEND &&
!strcmp(rule->options.suspend.tag, tag)) {
@@ -731,17 +676,15 @@ static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
return bdrv_truncate(bs->file->bs, offset);
}
-static void blkdebug_refresh_filename(BlockDriverState *bs)
+static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
{
QDict *opts;
const QDictEntry *e;
bool force_json = false;
- for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
+ for (e = qdict_first(options); e; e = qdict_next(options, e)) {
if (strcmp(qdict_entry_key(e), "config") &&
- strcmp(qdict_entry_key(e), "x-image") &&
- strcmp(qdict_entry_key(e), "image") &&
- strncmp(qdict_entry_key(e), "image.", strlen("image.")))
+ strcmp(qdict_entry_key(e), "x-image"))
{
force_json = true;
break;
@@ -757,7 +700,7 @@ static void blkdebug_refresh_filename(BlockDriverState *bs)
if (!force_json && bs->file->bs->exact_filename[0]) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"blkdebug:%s:%s",
- qdict_get_try_str(bs->options, "config") ?: "",
+ qdict_get_try_str(options, "config") ?: "",
bs->file->bs->exact_filename);
}
@@ -767,11 +710,8 @@ static void blkdebug_refresh_filename(BlockDriverState *bs)
QINCREF(bs->file->bs->full_open_options);
qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));
- for (e = qdict_first(bs->options); e; e = qdict_next(bs->options, e)) {
- if (strcmp(qdict_entry_key(e), "x-image") &&
- strcmp(qdict_entry_key(e), "image") &&
- strncmp(qdict_entry_key(e), "image.", strlen("image.")))
- {
+ for (e = qdict_first(options); e; e = qdict_next(options, e)) {
+ if (strcmp(qdict_entry_key(e), "x-image")) {
qobject_incref(qdict_entry_value(e));
qdict_put_obj(opts, qdict_entry_key(e), qdict_entry_value(e));
}
@@ -780,6 +720,12 @@ static void blkdebug_refresh_filename(BlockDriverState *bs)
bs->full_open_options = opts;
}
+static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
static BlockDriver bdrv_blkdebug = {
.format_name = "blkdebug",
.protocol_name = "blkdebug",
@@ -788,6 +734,7 @@ static BlockDriver bdrv_blkdebug = {
.bdrv_parse_filename = blkdebug_parse_filename,
.bdrv_file_open = blkdebug_open,
.bdrv_close = blkdebug_close,
+ .bdrv_reopen_prepare = blkdebug_reopen_prepare,
.bdrv_getlength = blkdebug_getlength,
.bdrv_truncate = blkdebug_truncate,
.bdrv_refresh_filename = blkdebug_refresh_filename,
diff --git a/block/blkreplay.c b/block/blkreplay.c
new file mode 100755
index 0000000000..42f1813af1
--- /dev/null
+++ b/block/blkreplay.c
@@ -0,0 +1,160 @@
+/*
+ * Block protocol for record/replay
+ *
+ * Copyright (c) 2010-2016 Institute for System Programming
+ * of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "sysemu/replay.h"
+#include "qapi/error.h"
+
+typedef struct Request {
+ Coroutine *co;
+ QEMUBH *bh;
+} Request;
+
+/* Next request id.
+ This counter is global, because requests from different
+ block devices should not get overlapping ids. */
+static uint64_t request_id;
+
+static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ Error *local_err = NULL;
+ int ret;
+
+ /* Open the image file */
+ bs->file = bdrv_open_child(NULL, options, "image",
+ bs, &child_file, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ ret = 0;
+fail:
+ if (ret < 0) {
+ bdrv_unref_child(bs, bs->file);
+ }
+ return ret;
+}
+
+static void blkreplay_close(BlockDriverState *bs)
+{
+}
+
+static int64_t blkreplay_getlength(BlockDriverState *bs)
+{
+ return bdrv_getlength(bs->file->bs);
+}
+
+/* This bh is used for synchronization of return from coroutines.
+ It continues yielded coroutine which then finishes its execution.
+ BH is called adjusted to some replay checkpoint, therefore
+ record and replay will always finish coroutines deterministically.
+*/
+static void blkreplay_bh_cb(void *opaque)
+{
+ Request *req = opaque;
+ qemu_coroutine_enter(req->co, NULL);
+ qemu_bh_delete(req->bh);
+ g_free(req);
+}
+
+static void block_request_create(uint64_t reqid, BlockDriverState *bs,
+ Coroutine *co)
+{
+ Request *req = g_new(Request, 1);
+ *req = (Request) {
+ .co = co,
+ .bh = aio_bh_new(bdrv_get_aio_context(bs), blkreplay_bh_cb, req),
+ };
+ replay_block_event(req->bh, reqid);
+}
+
+static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_flush(bs->file->bs);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static BlockDriver bdrv_blkreplay = {
+ .format_name = "blkreplay",
+ .protocol_name = "blkreplay",
+ .instance_size = 0,
+
+ .bdrv_file_open = blkreplay_open,
+ .bdrv_close = blkreplay_close,
+ .bdrv_getlength = blkreplay_getlength,
+
+ .bdrv_co_readv = blkreplay_co_readv,
+ .bdrv_co_writev = blkreplay_co_writev,
+
+ .bdrv_co_write_zeroes = blkreplay_co_write_zeroes,
+ .bdrv_co_discard = blkreplay_co_discard,
+ .bdrv_co_flush = blkreplay_co_flush,
+};
+
+static void bdrv_blkreplay_init(void)
+{
+ bdrv_register(&bdrv_blkreplay);
+}
+
+block_init(bdrv_blkreplay_init);
diff --git a/block/blkverify.c b/block/blkverify.c
index c5f8e8dcba..9414b7a84e 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -7,11 +7,13 @@
* See the COPYING file in the top-level directory.
*/
-#include <stdarg.h>
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
#include "block/block_int.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
typedef struct {
BdrvChild *test_file;
@@ -307,7 +309,7 @@ static void blkverify_attach_aio_context(BlockDriverState *bs,
bdrv_attach_aio_context(s->test_file->bs, new_context);
}
-static void blkverify_refresh_filename(BlockDriverState *bs)
+static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVBlkverifyState *s = bs->opaque;
diff --git a/block/block-backend.c b/block/block-backend.c
index 36ccc9e616..16c9d5e0f2 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -10,6 +10,7 @@
* or later. See the COPYING.LIB file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "sysemu/block-backend.h"
#include "block/block_int.h"
#include "block/blockjob.h"
@@ -17,18 +18,22 @@
#include "sysemu/blockdev.h"
#include "sysemu/sysemu.h"
#include "qapi-event.h"
+#include "qemu/id.h"
/* Number of coroutines to reserve per attached device model */
#define COROUTINE_POOL_RESERVATION 64
+#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+
static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
struct BlockBackend {
char *name;
int refcnt;
- BlockDriverState *bs;
+ BdrvChild *root;
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
- QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
+ QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
+ QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
void *dev; /* attached device model, if any */
/* TODO change to DeviceState when all users are qdevified */
@@ -42,12 +47,18 @@ struct BlockBackend {
* can be used to restore those options in the new BDS on insert) */
BlockBackendRootState root_state;
+ bool enable_write_cache;
+
/* I/O stats (display with "info blockstats"). */
BlockAcctStats stats;
BlockdevOnError on_read_error, on_write_error;
bool iostatus_enabled;
BlockDeviceIoStatus iostatus;
+
+ bool allow_write_beyond_eof;
+
+ NotifierList remove_bs_notifiers, insert_bs_notifiers;
};
typedef struct BlockBackendAIOCB {
@@ -64,41 +75,40 @@ static const AIOCBInfo block_backend_aiocb_info = {
static void drive_info_del(DriveInfo *dinfo);
-/* All the BlockBackends (except for hidden ones) */
-static QTAILQ_HEAD(, BlockBackend) blk_backends =
- QTAILQ_HEAD_INITIALIZER(blk_backends);
+/* All BlockBackends */
+static QTAILQ_HEAD(, BlockBackend) block_backends =
+ QTAILQ_HEAD_INITIALIZER(block_backends);
+
+/* All BlockBackends referenced by the monitor and which are iterated through by
+ * blk_next() */
+static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
+ QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
+
+static void blk_root_inherit_options(int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options)
+{
+ /* We're not supposed to call this function for root nodes */
+ abort();
+}
+
+static const BdrvChildRole child_root = {
+ .inherit_options = blk_root_inherit_options,
+};
/*
- * Create a new BlockBackend with @name, with a reference count of one.
- * @name must not be null or empty.
- * Fail if a BlockBackend with this name already exists.
+ * Create a new BlockBackend with a reference count of one.
* Store an error through @errp on failure, unless it's null.
* Return the new BlockBackend on success, null on failure.
*/
-BlockBackend *blk_new(const char *name, Error **errp)
+BlockBackend *blk_new(Error **errp)
{
BlockBackend *blk;
- assert(name && name[0]);
- if (!id_wellformed(name)) {
- error_setg(errp, "Invalid device name");
- return NULL;
- }
- if (blk_by_name(name)) {
- error_setg(errp, "Device with id '%s' already exists", name);
- return NULL;
- }
- if (bdrv_find_node(name)) {
- error_setg(errp,
- "Device name '%s' conflicts with an existing node name",
- name);
- return NULL;
- }
-
blk = g_new0(BlockBackend, 1);
- blk->name = g_strdup(name);
blk->refcnt = 1;
- QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
+ notifier_list_init(&blk->remove_bs_notifiers);
+ notifier_list_init(&blk->insert_bs_notifiers);
+ QTAILQ_INSERT_TAIL(&block_backends, blk, link);
return blk;
}
@@ -106,18 +116,18 @@ BlockBackend *blk_new(const char *name, Error **errp)
* Create a new BlockBackend with a new BlockDriverState attached.
* Otherwise just like blk_new(), which see.
*/
-BlockBackend *blk_new_with_bs(const char *name, Error **errp)
+BlockBackend *blk_new_with_bs(Error **errp)
{
BlockBackend *blk;
BlockDriverState *bs;
- blk = blk_new(name, errp);
+ blk = blk_new(errp);
if (!blk) {
return NULL;
}
bs = bdrv_new_root();
- blk->bs = bs;
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root);
bs->blk = blk;
return blk;
}
@@ -134,47 +144,44 @@ BlockBackend *blk_new_with_bs(const char *name, Error **errp)
* though, so callers of this function have to be able to specify @filename and
* @flags.
*/
-BlockBackend *blk_new_open(const char *name, const char *filename,
- const char *reference, QDict *options, int flags,
- Error **errp)
+BlockBackend *blk_new_open(const char *filename, const char *reference,
+ QDict *options, int flags, Error **errp)
{
BlockBackend *blk;
int ret;
- blk = blk_new_with_bs(name, errp);
+ blk = blk_new_with_bs(errp);
if (!blk) {
QDECREF(options);
return NULL;
}
- ret = bdrv_open(&blk->bs, filename, reference, options, flags, errp);
+ ret = bdrv_open(&blk->root->bs, filename, reference, options, flags, errp);
if (ret < 0) {
blk_unref(blk);
return NULL;
}
+ blk_set_enable_write_cache(blk, true);
+
return blk;
}
static void blk_delete(BlockBackend *blk)
{
assert(!blk->refcnt);
+ assert(!blk->name);
assert(!blk->dev);
- if (blk->bs) {
- assert(blk->bs->blk == blk);
- blk->bs->blk = NULL;
- bdrv_unref(blk->bs);
- blk->bs = NULL;
+ if (blk->root) {
+ blk_remove_bs(blk);
}
+ assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
+ assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
if (blk->root_state.throttle_state) {
g_free(blk->root_state.throttle_group);
throttle_group_unref(blk->root_state.throttle_state);
}
- /* Avoid double-remove after blk_hide_on_behalf_of_hmp_drive_del() */
- if (blk->name[0]) {
- QTAILQ_REMOVE(&blk_backends, blk, link);
- }
- g_free(blk->name);
+ QTAILQ_REMOVE(&block_backends, blk, link);
drive_info_del(blk->legacy_dinfo);
block_acct_cleanup(&blk->stats);
g_free(blk);
@@ -220,7 +227,32 @@ void blk_unref(BlockBackend *blk)
}
/*
- * Return the BlockBackend after @blk.
+ * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
+ * ones which are hidden (i.e. are not referenced by the monitor).
+ */
+static BlockBackend *blk_all_next(BlockBackend *blk)
+{
+ return blk ? QTAILQ_NEXT(blk, link)
+ : QTAILQ_FIRST(&block_backends);
+}
+
+void blk_remove_all_bs(void)
+{
+ BlockBackend *blk = NULL;
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *ctx = blk_get_aio_context(blk);
+
+ aio_context_acquire(ctx);
+ if (blk->root) {
+ blk_remove_bs(blk);
+ }
+ aio_context_release(ctx);
+ }
+}
+
+/*
+ * Return the monitor-owned BlockBackend after @blk.
* If @blk is null, return the first one.
* Else, return @blk's next sibling, which may be null.
*
@@ -231,17 +263,91 @@ void blk_unref(BlockBackend *blk)
*/
BlockBackend *blk_next(BlockBackend *blk)
{
- return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
+ return blk ? QTAILQ_NEXT(blk, monitor_link)
+ : QTAILQ_FIRST(&monitor_block_backends);
+}
+
+/*
+ * Iterates over all BlockDriverStates which are attached to a BlockBackend.
+ * This function is for use by bdrv_next().
+ *
+ * @bs must be NULL or a BDS that is attached to a BB.
+ */
+BlockDriverState *blk_next_root_bs(BlockDriverState *bs)
+{
+ BlockBackend *blk;
+
+ if (bs) {
+ assert(bs->blk);
+ blk = bs->blk;
+ } else {
+ blk = NULL;
+ }
+
+ do {
+ blk = blk_all_next(blk);
+ } while (blk && !blk->root);
+
+ return blk ? blk->root->bs : NULL;
+}
+
+/*
+ * Add a BlockBackend into the list of backends referenced by the monitor, with
+ * the given @name acting as the handle for the monitor.
+ * Strictly for use by blockdev.c.
+ *
+ * @name must not be null or empty.
+ *
+ * Returns true on success and false on failure. In the latter case, an Error
+ * object is returned through @errp.
+ */
+bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
+{
+ assert(!blk->name);
+ assert(name && name[0]);
+
+ if (!id_wellformed(name)) {
+ error_setg(errp, "Invalid device name");
+ return false;
+ }
+ if (blk_by_name(name)) {
+ error_setg(errp, "Device with id '%s' already exists", name);
+ return false;
+ }
+ if (bdrv_find_node(name)) {
+ error_setg(errp,
+ "Device name '%s' conflicts with an existing node name",
+ name);
+ return false;
+ }
+
+ blk->name = g_strdup(name);
+ QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
+ return true;
+}
+
+/*
+ * Remove a BlockBackend from the list of backends referenced by the monitor.
+ * Strictly for use by blockdev.c.
+ */
+void monitor_remove_blk(BlockBackend *blk)
+{
+ if (!blk->name) {
+ return;
+ }
+
+ QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
+ g_free(blk->name);
+ blk->name = NULL;
}
/*
* Return @blk's name, a non-null string.
- * Wart: the name is empty iff @blk has been hidden with
- * blk_hide_on_behalf_of_hmp_drive_del().
+ * Returns an empty string iff @blk is not referenced by the monitor.
*/
const char *blk_name(BlockBackend *blk)
{
- return blk->name;
+ return blk->name ?: "";
}
/*
@@ -250,10 +356,10 @@ const char *blk_name(BlockBackend *blk)
*/
BlockBackend *blk_by_name(const char *name)
{
- BlockBackend *blk;
+ BlockBackend *blk = NULL;
assert(name);
- QTAILQ_FOREACH(blk, &blk_backends, link) {
+ while ((blk = blk_next(blk)) != NULL) {
if (!strcmp(name, blk->name)) {
return blk;
}
@@ -266,24 +372,7 @@ BlockBackend *blk_by_name(const char *name)
*/
BlockDriverState *blk_bs(BlockBackend *blk)
{
- return blk->bs;
-}
-
-/*
- * Changes the BlockDriverState attached to @blk
- */
-void blk_set_bs(BlockBackend *blk, BlockDriverState *bs)
-{
- bdrv_ref(bs);
-
- if (blk->bs) {
- blk->bs->blk = NULL;
- bdrv_unref(blk->bs);
- }
- assert(bs->blk == NULL);
-
- blk->bs = bs;
- bs->blk = blk;
+ return blk->root ? blk->root->bs : NULL;
}
/*
@@ -311,9 +400,9 @@ DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo)
*/
BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
{
- BlockBackend *blk;
+ BlockBackend *blk = NULL;
- QTAILQ_FOREACH(blk, &blk_backends, link) {
+ while ((blk = blk_next(blk)) != NULL) {
if (blk->legacy_dinfo == dinfo) {
return blk;
}
@@ -322,33 +411,19 @@ BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
}
/*
- * Hide @blk.
- * @blk must not have been hidden already.
- * Make attached BlockDriverState, if any, anonymous.
- * Once hidden, @blk is invisible to all functions that don't receive
- * it as argument. For example, blk_by_name() won't return it.
- * Strictly for use by do_drive_del().
- * TODO get rid of it!
- */
-void blk_hide_on_behalf_of_hmp_drive_del(BlockBackend *blk)
-{
- QTAILQ_REMOVE(&blk_backends, blk, link);
- blk->name[0] = 0;
- if (blk->bs) {
- bdrv_make_anon(blk->bs);
- }
-}
-
-/*
* Disassociates the currently associated BlockDriverState from @blk.
*/
void blk_remove_bs(BlockBackend *blk)
{
+ assert(blk->root->bs->blk == blk);
+
+ notifier_list_notify(&blk->remove_bs_notifiers, blk);
+
blk_update_root_state(blk);
- blk->bs->blk = NULL;
- bdrv_unref(blk->bs);
- blk->bs = NULL;
+ blk->root->bs->blk = NULL;
+ bdrv_root_unref_child(blk->root);
+ blk->root = NULL;
}
/*
@@ -356,10 +431,12 @@ void blk_remove_bs(BlockBackend *blk)
*/
void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
{
- assert(!blk->bs && !bs->blk);
+ assert(!blk->root && !bs->blk);
bdrv_ref(bs);
- blk->bs = bs;
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root);
bs->blk = blk;
+
+ notifier_list_notify(&blk->insert_bs_notifiers, blk);
}
/*
@@ -458,6 +535,14 @@ bool blk_dev_has_removable_media(BlockBackend *blk)
}
/*
+ * Does @blk's attached device model have a tray?
+ */
+bool blk_dev_has_tray(BlockBackend *blk)
+{
+ return blk->dev_ops && blk->dev_ops->is_tray_open;
+}
+
+/*
* Notify @blk's attached device model of a media eject request.
* If @force is true, the medium is about to be yanked out forcefully.
*/
@@ -473,7 +558,7 @@ void blk_dev_eject_request(BlockBackend *blk, bool force)
*/
bool blk_dev_is_tray_open(BlockBackend *blk)
{
- if (blk->dev_ops && blk->dev_ops->is_tray_open) {
+ if (blk_dev_has_tray(blk)) {
return blk->dev_ops->is_tray_open(blk->dev_opaque);
}
return false;
@@ -530,9 +615,10 @@ void blk_iostatus_disable(BlockBackend *blk)
void blk_iostatus_reset(BlockBackend *blk)
{
if (blk_iostatus_is_enabled(blk)) {
+ BlockDriverState *bs = blk_bs(blk);
blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
- if (blk->bs && blk->bs->job) {
- block_job_iostatus_reset(blk->bs->job);
+ if (bs && bs->job) {
+ block_job_iostatus_reset(bs->job);
}
}
}
@@ -546,6 +632,11 @@ void blk_iostatus_set_err(BlockBackend *blk, int error)
}
}
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
+{
+ blk->allow_write_beyond_eof = allow;
+}
+
static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
size_t size)
{
@@ -559,17 +650,19 @@ static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
return -ENOMEDIUM;
}
- len = blk_getlength(blk);
- if (len < 0) {
- return len;
- }
-
if (offset < 0) {
return -EIO;
}
- if (offset > len || len - offset < size) {
- return -EIO;
+ if (!blk->allow_write_beyond_eof) {
+ len = blk_getlength(blk);
+ if (len < 0) {
+ return len;
+ }
+
+ if (offset > len || len - offset < size) {
+ return -EIO;
+ }
}
return 0;
@@ -590,48 +683,144 @@ static int blk_check_request(BlockBackend *blk, int64_t sector_num,
nb_sectors * BDRV_SECTOR_SIZE);
}
-int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
+static int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
+ int ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
- return bdrv_read(blk->bs, sector_num, buf, nb_sectors);
+ return bdrv_co_do_preadv(blk_bs(blk), offset, bytes, qiov, flags);
}
-int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
+static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
+ int ret;
+
+ ret = blk_check_byte_request(blk, offset, bytes);
if (ret < 0) {
return ret;
}
- return bdrv_read_unthrottled(blk->bs, sector_num, buf, nb_sectors);
+ if (!blk->enable_write_cache) {
+ flags |= BDRV_REQ_FUA;
+ }
+
+ return bdrv_co_do_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
}
-int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
- int nb_sectors)
+typedef struct BlkRwCo {
+ BlockBackend *blk;
+ int64_t offset;
+ QEMUIOVector *qiov;
+ int ret;
+ BdrvRequestFlags flags;
+} BlkRwCo;
+
+static void blk_read_entry(void *opaque)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return ret;
+ BlkRwCo *rwco = opaque;
+
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
+ rwco->qiov, rwco->flags);
+}
+
+static void blk_write_entry(void *opaque)
+{
+ BlkRwCo *rwco = opaque;
+
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
+ rwco->qiov, rwco->flags);
+}
+
+static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
+ int64_t bytes, CoroutineEntry co_entry,
+ BdrvRequestFlags flags)
+{
+ AioContext *aio_context;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ Coroutine *co;
+ BlkRwCo rwco;
+
+ iov = (struct iovec) {
+ .iov_base = buf,
+ .iov_len = bytes,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ rwco = (BlkRwCo) {
+ .blk = blk,
+ .offset = offset,
+ .qiov = &qiov,
+ .flags = flags,
+ .ret = NOT_DONE,
+ };
+
+ co = qemu_coroutine_create(co_entry);
+ qemu_coroutine_enter(co, &rwco);
+
+ aio_context = blk_get_aio_context(blk);
+ while (rwco.ret == NOT_DONE) {
+ aio_poll(aio_context, true);
}
- return bdrv_write(blk->bs, sector_num, buf, nb_sectors);
+ return rwco.ret;
}
-int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+static int blk_rw(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
+ int nb_sectors, CoroutineEntry co_entry,
+ BdrvRequestFlags flags)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return -EINVAL;
+ }
+
+ return blk_prw(blk, sector_num << BDRV_SECTOR_BITS, buf,
+ nb_sectors << BDRV_SECTOR_BITS, co_entry, flags);
+}
+
+int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
+ int nb_sectors)
+{
+ return blk_rw(blk, sector_num, buf, nb_sectors, blk_read_entry, 0);
+}
+
+int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
+ int nb_sectors)
+{
+ BlockDriverState *bs = blk_bs(blk);
+ bool enabled;
+ int ret;
+
+ ret = blk_check_request(blk, sector_num, nb_sectors);
if (ret < 0) {
return ret;
}
- return bdrv_write_zeroes(blk->bs, sector_num, nb_sectors, flags);
+ enabled = bs->io_limits_enabled;
+ bs->io_limits_enabled = false;
+ ret = blk_read(blk, sector_num, buf, nb_sectors);
+ bs->io_limits_enabled = enabled;
+ return ret;
+}
+
+int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
+ int nb_sectors)
+{
+ return blk_rw(blk, sector_num, (uint8_t*) buf, nb_sectors,
+ blk_write_entry, 0);
+}
+
+int blk_write_zeroes(BlockBackend *blk, int64_t sector_num,
+ int nb_sectors, BdrvRequestFlags flags)
+{
+ return blk_rw(blk, sector_num, NULL, nb_sectors, blk_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE);
}
static void error_callback_bh(void *opaque)
@@ -660,37 +849,119 @@ BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
return &acb->common;
}
+typedef struct BlkAioEmAIOCB {
+ BlockAIOCB common;
+ BlkRwCo rwco;
+ int bytes;
+ bool has_returned;
+ QEMUBH* bh;
+} BlkAioEmAIOCB;
+
+static const AIOCBInfo blk_aio_em_aiocb_info = {
+ .aiocb_size = sizeof(BlkAioEmAIOCB),
+};
+
+static void blk_aio_complete(BlkAioEmAIOCB *acb)
+{
+ if (acb->bh) {
+ assert(acb->has_returned);
+ qemu_bh_delete(acb->bh);
+ }
+ if (acb->has_returned) {
+ acb->common.cb(acb->common.opaque, acb->rwco.ret);
+ qemu_aio_unref(acb);
+ }
+}
+
+static void blk_aio_complete_bh(void *opaque)
+{
+ blk_aio_complete(opaque);
+}
+
+static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
+ QEMUIOVector *qiov, CoroutineEntry co_entry,
+ BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ BlkAioEmAIOCB *acb;
+ Coroutine *co;
+
+ acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+ acb->rwco = (BlkRwCo) {
+ .blk = blk,
+ .offset = offset,
+ .qiov = qiov,
+ .flags = flags,
+ .ret = NOT_DONE,
+ };
+ acb->bytes = bytes;
+ acb->bh = NULL;
+ acb->has_returned = false;
+
+ co = qemu_coroutine_create(co_entry);
+ qemu_coroutine_enter(co, acb);
+
+ acb->has_returned = true;
+ if (acb->rwco.ret != NOT_DONE) {
+ acb->bh = aio_bh_new(blk_get_aio_context(blk), blk_aio_complete_bh, acb);
+ qemu_bh_schedule(acb->bh);
+ }
+
+ return &acb->common;
+}
+
+static void blk_aio_read_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ assert(rwco->qiov->size == acb->bytes);
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
+ rwco->qiov, rwco->flags);
+ blk_aio_complete(acb);
+}
+
+static void blk_aio_write_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
+ rwco->qiov, rwco->flags);
+ blk_aio_complete(acb);
+}
+
BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return blk_abort_aio_request(blk, cb, opaque, ret);
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
}
- return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags,
- cb, opaque);
+ return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, NULL,
+ blk_aio_write_entry, flags | BDRV_REQ_ZERO_WRITE,
+ cb, opaque);
}
int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
{
- int ret = blk_check_byte_request(blk, offset, count);
+ int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
if (ret < 0) {
return ret;
}
-
- return bdrv_pread(blk->bs, offset, buf, count);
+ return count;
}
int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
{
- int ret = blk_check_byte_request(blk, offset, count);
+ int ret = blk_prw(blk, offset, (void*) buf, count, blk_write_entry, 0);
if (ret < 0) {
return ret;
}
-
- return bdrv_pwrite(blk->bs, offset, buf, count);
+ return count;
}
int64_t blk_getlength(BlockBackend *blk)
@@ -699,15 +970,15 @@ int64_t blk_getlength(BlockBackend *blk)
return -ENOMEDIUM;
}
- return bdrv_getlength(blk->bs);
+ return bdrv_getlength(blk_bs(blk));
}
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
{
- if (!blk->bs) {
+ if (!blk_bs(blk)) {
*nb_sectors_ptr = 0;
} else {
- bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+ bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
}
}
@@ -717,31 +988,33 @@ int64_t blk_nb_sectors(BlockBackend *blk)
return -ENOMEDIUM;
}
- return bdrv_nb_sectors(blk->bs);
+ return bdrv_nb_sectors(blk_bs(blk));
}
BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
QEMUIOVector *iov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return blk_abort_aio_request(blk, cb, opaque, ret);
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
}
- return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
+ assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
+ return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
+ blk_aio_read_entry, 0, cb, opaque);
}
BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
QEMUIOVector *iov, int nb_sectors,
BlockCompletionFunc *cb, void *opaque)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return blk_abort_aio_request(blk, cb, opaque, ret);
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return blk_abort_aio_request(blk, cb, opaque, -EINVAL);
}
- return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
+ assert(nb_sectors << BDRV_SECTOR_BITS == iov->size);
+ return blk_aio_prwv(blk, sector_num << BDRV_SECTOR_BITS, iov->size, iov,
+ blk_aio_write_entry, 0, cb, opaque);
}
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
@@ -751,7 +1024,7 @@ BlockAIOCB *blk_aio_flush(BlockBackend *blk,
return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
}
- return bdrv_aio_flush(blk->bs, cb, opaque);
+ return bdrv_aio_flush(blk_bs(blk), cb, opaque);
}
BlockAIOCB *blk_aio_discard(BlockBackend *blk,
@@ -763,7 +1036,7 @@ BlockAIOCB *blk_aio_discard(BlockBackend *blk,
return blk_abort_aio_request(blk, cb, opaque, ret);
}
- return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque);
+ return bdrv_aio_discard(blk_bs(blk), sector_num, nb_sectors, cb, opaque);
}
void blk_aio_cancel(BlockAIOCB *acb)
@@ -787,7 +1060,7 @@ int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
}
}
- return bdrv_aio_multiwrite(blk->bs, reqs, num_reqs);
+ return bdrv_aio_multiwrite(blk_bs(blk), reqs, num_reqs);
}
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
@@ -796,7 +1069,7 @@ int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
return -ENOMEDIUM;
}
- return bdrv_ioctl(blk->bs, req, buf);
+ return bdrv_ioctl(blk_bs(blk), req, buf);
}
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
@@ -806,7 +1079,7 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
}
- return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque);
+ return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
}
int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
@@ -816,7 +1089,7 @@ int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
return ret;
}
- return bdrv_co_discard(blk->bs, sector_num, nb_sectors);
+ return bdrv_co_discard(blk_bs(blk), sector_num, nb_sectors);
}
int blk_co_flush(BlockBackend *blk)
@@ -825,7 +1098,7 @@ int blk_co_flush(BlockBackend *blk)
return -ENOMEDIUM;
}
- return bdrv_co_flush(blk->bs);
+ return bdrv_co_flush(blk_bs(blk));
}
int blk_flush(BlockBackend *blk)
@@ -834,18 +1107,13 @@ int blk_flush(BlockBackend *blk)
return -ENOMEDIUM;
}
- return bdrv_flush(blk->bs);
-}
-
-int blk_flush_all(void)
-{
- return bdrv_flush_all();
+ return bdrv_flush(blk_bs(blk));
}
void blk_drain(BlockBackend *blk)
{
- if (blk->bs) {
- bdrv_drain(blk->bs);
+ if (blk_bs(blk)) {
+ bdrv_drain(blk_bs(blk));
}
}
@@ -933,8 +1201,10 @@ void blk_error_action(BlockBackend *blk, BlockErrorAction action,
int blk_is_read_only(BlockBackend *blk)
{
- if (blk->bs) {
- return bdrv_is_read_only(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bdrv_is_read_only(bs);
} else {
return blk->root_state.read_only;
}
@@ -942,48 +1212,42 @@ int blk_is_read_only(BlockBackend *blk)
int blk_is_sg(BlockBackend *blk)
{
- if (!blk->bs) {
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (!bs) {
return 0;
}
- return bdrv_is_sg(blk->bs);
+ return bdrv_is_sg(bs);
}
int blk_enable_write_cache(BlockBackend *blk)
{
- if (blk->bs) {
- return bdrv_enable_write_cache(blk->bs);
- } else {
- return !!(blk->root_state.open_flags & BDRV_O_CACHE_WB);
- }
+ return blk->enable_write_cache;
}
void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
{
- if (blk->bs) {
- bdrv_set_enable_write_cache(blk->bs, wce);
- } else {
- if (wce) {
- blk->root_state.open_flags |= BDRV_O_CACHE_WB;
- } else {
- blk->root_state.open_flags &= ~BDRV_O_CACHE_WB;
- }
- }
+ blk->enable_write_cache = wce;
}
void blk_invalidate_cache(BlockBackend *blk, Error **errp)
{
- if (!blk->bs) {
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (!bs) {
error_setg(errp, "Device '%s' has no medium", blk->name);
return;
}
- bdrv_invalidate_cache(blk->bs, errp);
+ bdrv_invalidate_cache(bs, errp);
}
bool blk_is_inserted(BlockBackend *blk)
{
- return blk->bs && bdrv_is_inserted(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ return bs && bdrv_is_inserted(bs);
}
bool blk_is_available(BlockBackend *blk)
@@ -993,22 +1257,28 @@ bool blk_is_available(BlockBackend *blk)
void blk_lock_medium(BlockBackend *blk, bool locked)
{
- if (blk->bs) {
- bdrv_lock_medium(blk->bs, locked);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_lock_medium(bs, locked);
}
}
void blk_eject(BlockBackend *blk, bool eject_flag)
{
- if (blk->bs) {
- bdrv_eject(blk->bs, eject_flag);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_eject(bs, eject_flag);
}
}
int blk_get_flags(BlockBackend *blk)
{
- if (blk->bs) {
- return bdrv_get_flags(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bdrv_get_flags(bs);
} else {
return blk->root_state.open_flags;
}
@@ -1016,57 +1286,79 @@ int blk_get_flags(BlockBackend *blk)
int blk_get_max_transfer_length(BlockBackend *blk)
{
- if (blk->bs) {
- return blk->bs->bl.max_transfer_length;
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bs->bl.max_transfer_length;
} else {
return 0;
}
}
+int blk_get_max_iov(BlockBackend *blk)
+{
+ return blk->root->bs->bl.max_iov;
+}
+
void blk_set_guest_block_size(BlockBackend *blk, int align)
{
blk->guest_block_size = align;
}
+void *blk_try_blockalign(BlockBackend *blk, size_t size)
+{
+ return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
+}
+
void *blk_blockalign(BlockBackend *blk, size_t size)
{
- return qemu_blockalign(blk ? blk->bs : NULL, size);
+ return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
}
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
{
- if (!blk->bs) {
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (!bs) {
return false;
}
- return bdrv_op_is_blocked(blk->bs, op, errp);
+ return bdrv_op_is_blocked(bs, op, errp);
}
void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
{
- if (blk->bs) {
- bdrv_op_unblock(blk->bs, op, reason);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_op_unblock(bs, op, reason);
}
}
void blk_op_block_all(BlockBackend *blk, Error *reason)
{
- if (blk->bs) {
- bdrv_op_block_all(blk->bs, reason);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_op_block_all(bs, reason);
}
}
void blk_op_unblock_all(BlockBackend *blk, Error *reason)
{
- if (blk->bs) {
- bdrv_op_unblock_all(blk->bs, reason);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_op_unblock_all(bs, reason);
}
}
AioContext *blk_get_aio_context(BlockBackend *blk)
{
- if (blk->bs) {
- return bdrv_get_aio_context(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bdrv_get_aio_context(bs);
} else {
return qemu_get_aio_context();
}
@@ -1080,8 +1372,10 @@ static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
{
- if (blk->bs) {
- bdrv_set_aio_context(blk->bs, new_context);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_set_aio_context(bs, new_context);
}
}
@@ -1089,8 +1383,10 @@ void blk_add_aio_context_notifier(BlockBackend *blk,
void (*attached_aio_context)(AioContext *new_context, void *opaque),
void (*detach_aio_context)(void *opaque), void *opaque)
{
- if (blk->bs) {
- bdrv_add_aio_context_notifier(blk->bs, attached_aio_context,
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_add_aio_context_notifier(bs, attached_aio_context,
detach_aio_context, opaque);
}
}
@@ -1101,30 +1397,39 @@ void blk_remove_aio_context_notifier(BlockBackend *blk,
void (*detach_aio_context)(void *),
void *opaque)
{
- if (blk->bs) {
- bdrv_remove_aio_context_notifier(blk->bs, attached_aio_context,
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_remove_aio_context_notifier(bs, attached_aio_context,
detach_aio_context, opaque);
}
}
-void blk_add_close_notifier(BlockBackend *blk, Notifier *notify)
+void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
{
- if (blk->bs) {
- bdrv_add_close_notifier(blk->bs, notify);
- }
+ notifier_list_add(&blk->remove_bs_notifiers, notify);
+}
+
+void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
+{
+ notifier_list_add(&blk->insert_bs_notifiers, notify);
}
void blk_io_plug(BlockBackend *blk)
{
- if (blk->bs) {
- bdrv_io_plug(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_io_plug(bs);
}
}
void blk_io_unplug(BlockBackend *blk)
{
- if (blk->bs) {
- bdrv_io_unplug(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_io_unplug(bs);
}
}
@@ -1142,12 +1447,13 @@ void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
int coroutine_fn blk_co_write_zeroes(BlockBackend *blk, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags)
{
- int ret = blk_check_request(blk, sector_num, nb_sectors);
- if (ret < 0) {
- return ret;
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return -EINVAL;
}
- return bdrv_co_write_zeroes(blk->bs, sector_num, nb_sectors, flags);
+ return blk_co_pwritev(blk, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, NULL,
+ flags | BDRV_REQ_ZERO_WRITE);
}
int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
@@ -1158,7 +1464,7 @@ int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
return ret;
}
- return bdrv_write_compressed(blk->bs, sector_num, buf, nb_sectors);
+ return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
}
int blk_truncate(BlockBackend *blk, int64_t offset)
@@ -1167,7 +1473,7 @@ int blk_truncate(BlockBackend *blk, int64_t offset)
return -ENOMEDIUM;
}
- return bdrv_truncate(blk->bs, offset);
+ return bdrv_truncate(blk_bs(blk), offset);
}
int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
@@ -1177,17 +1483,28 @@ int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
return ret;
}
- return bdrv_discard(blk->bs, sector_num, nb_sectors);
+ return bdrv_discard(blk_bs(blk), sector_num, nb_sectors);
}
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size)
{
+ int ret;
+
if (!blk_is_available(blk)) {
return -ENOMEDIUM;
}
- return bdrv_save_vmstate(blk->bs, buf, pos, size);
+ ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (ret == size && !blk->enable_write_cache) {
+ ret = bdrv_flush(blk_bs(blk));
+ }
+
+ return ret < 0 ? ret : size;
}
int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
@@ -1196,7 +1513,7 @@ int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
return -ENOMEDIUM;
}
- return bdrv_load_vmstate(blk->bs, buf, pos, size);
+ return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
}
int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
@@ -1205,7 +1522,7 @@ int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
return -ENOMEDIUM;
}
- return bdrv_probe_blocksizes(blk->bs, bsz);
+ return bdrv_probe_blocksizes(blk_bs(blk), bsz);
}
int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
@@ -1214,7 +1531,7 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
return -ENOMEDIUM;
}
- return bdrv_probe_geometry(blk->bs, geo);
+ return bdrv_probe_geometry(blk_bs(blk), geo);
}
/*
@@ -1223,18 +1540,18 @@ int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
*/
void blk_update_root_state(BlockBackend *blk)
{
- assert(blk->bs);
+ assert(blk->root);
- blk->root_state.open_flags = blk->bs->open_flags;
- blk->root_state.read_only = blk->bs->read_only;
- blk->root_state.detect_zeroes = blk->bs->detect_zeroes;
+ blk->root_state.open_flags = blk->root->bs->open_flags;
+ blk->root_state.read_only = blk->root->bs->read_only;
+ blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
if (blk->root_state.throttle_group) {
g_free(blk->root_state.throttle_group);
throttle_group_unref(blk->root_state.throttle_state);
}
- if (blk->bs->throttle_state) {
- const char *name = throttle_group_get_name(blk->bs);
+ if (blk->root->bs->throttle_state) {
+ const char *name = throttle_group_get_name(blk->root->bs);
blk->root_state.throttle_group = g_strdup(name);
blk->root_state.throttle_state = throttle_group_incref(name);
} else {
@@ -1274,3 +1591,45 @@ BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
{
return &blk->root_state;
}
+
+int blk_commit_all(void)
+{
+ BlockBackend *blk = NULL;
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *aio_context = blk_get_aio_context(blk);
+
+ aio_context_acquire(aio_context);
+ if (blk_is_inserted(blk) && blk->root->bs->backing) {
+ int ret = bdrv_commit(blk->root->bs);
+ if (ret < 0) {
+ aio_context_release(aio_context);
+ return ret;
+ }
+ }
+ aio_context_release(aio_context);
+ }
+ return 0;
+}
+
+int blk_flush_all(void)
+{
+ BlockBackend *blk = NULL;
+ int result = 0;
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *aio_context = blk_get_aio_context(blk);
+ int ret;
+
+ aio_context_acquire(aio_context);
+ if (blk_is_inserted(blk)) {
+ ret = blk_flush(blk);
+ if (ret < 0 && !result) {
+ result = ret;
+ }
+ }
+ aio_context_release(aio_context);
+ }
+
+ return result;
+}
diff --git a/block/bochs.c b/block/bochs.c
index 18949b9d4f..af8b7abdfd 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -22,6 +22,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/module.h"
diff --git a/block/cloop.c b/block/cloop.c
index 4190ae06d7..a84f14019c 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -21,6 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/module.h"
diff --git a/block/commit.c b/block/commit.c
index a5d02aa560..cba0e8c1e8 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -12,9 +12,11 @@
*
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/block_int.h"
#include "block/blockjob.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
#include "sysemu/block-backend.h"
diff --git a/block/crypto.c b/block/crypto.c
new file mode 100644
index 0000000000..1903e84fbd
--- /dev/null
+++ b/block/crypto.c
@@ -0,0 +1,586 @@
+/*
+ * QEMU block full disk encryption
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "crypto/block.h"
+#include "qapi/opts-visitor.h"
+#include "qapi-visit.h"
+#include "qapi/error.h"
+
+#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
+
+typedef struct BlockCrypto BlockCrypto;
+
+struct BlockCrypto {
+ QCryptoBlock *block;
+};
+
+
+static int block_crypto_probe_generic(QCryptoBlockFormat format,
+ const uint8_t *buf,
+ int buf_size,
+ const char *filename)
+{
+ if (qcrypto_block_has_format(format, buf, buf_size)) {
+ return 100;
+ } else {
+ return 0;
+ }
+}
+
+
+static ssize_t block_crypto_read_func(QCryptoBlock *block,
+ size_t offset,
+ uint8_t *buf,
+ size_t buflen,
+ Error **errp,
+ void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ ssize_t ret;
+
+ ret = bdrv_pread(bs->file->bs, offset, buf, buflen);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not read encryption header");
+ return ret;
+ }
+ return ret;
+}
+
+
+struct BlockCryptoCreateData {
+ const char *filename;
+ QemuOpts *opts;
+ BlockBackend *blk;
+ uint64_t size;
+};
+
+
+static ssize_t block_crypto_write_func(QCryptoBlock *block,
+ size_t offset,
+ const uint8_t *buf,
+ size_t buflen,
+ Error **errp,
+ void *opaque)
+{
+ struct BlockCryptoCreateData *data = opaque;
+ ssize_t ret;
+
+ ret = blk_pwrite(data->blk, offset, buf, buflen);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not write encryption header");
+ return ret;
+ }
+ return ret;
+}
+
+
+static ssize_t block_crypto_init_func(QCryptoBlock *block,
+ size_t headerlen,
+ Error **errp,
+ void *opaque)
+{
+ struct BlockCryptoCreateData *data = opaque;
+ int ret;
+
+ /* User provided size should reflect amount of space made
+ * available to the guest, so we must take account of that
+ * which will be used by the crypto header
+ */
+ data->size += headerlen;
+
+ qemu_opt_set_number(data->opts, BLOCK_OPT_SIZE, data->size, &error_abort);
+ ret = bdrv_create_file(data->filename, data->opts, errp);
+ if (ret < 0) {
+ return -1;
+ }
+
+ data->blk = blk_new_open(data->filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+ if (!data->blk) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
+static QemuOptsList block_crypto_runtime_opts_luks = {
+ .name = "crypto",
+ .head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head),
+ .desc = {
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
+ .type = QEMU_OPT_STRING,
+ .help = "ID of the secret that provides the encryption key",
+ },
+ { /* end of list */ }
+ },
+};
+
+
+static QemuOptsList block_crypto_create_opts_luks = {
+ .name = "crypto",
+ .head = QTAILQ_HEAD_INITIALIZER(block_crypto_create_opts_luks.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
+ .type = QEMU_OPT_STRING,
+ .help = "ID of the secret that provides the encryption key",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of encryption cipher algorithm",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of encryption cipher mode",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of IV generator algorithm",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of IV generator hash algorithm",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_HASH_ALG,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of encryption hash algorithm",
+ },
+ { /* end of list */ }
+ },
+};
+
+
+static QCryptoBlockOpenOptions *
+block_crypto_open_opts_init(QCryptoBlockFormat format,
+ QemuOpts *opts,
+ Error **errp)
+{
+ OptsVisitor *ov;
+ QCryptoBlockOpenOptions *ret = NULL;
+ Error *local_err = NULL;
+ Error *end_err = NULL;
+
+ ret = g_new0(QCryptoBlockOpenOptions, 1);
+ ret->format = format;
+
+ ov = opts_visitor_new(opts);
+
+ visit_start_struct(opts_get_visitor(ov),
+ NULL, NULL, 0, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ switch (format) {
+ case Q_CRYPTO_BLOCK_FORMAT_LUKS:
+ visit_type_QCryptoBlockOptionsLUKS_members(
+ opts_get_visitor(ov), &ret->u.luks, &local_err);
+ break;
+
+ default:
+ error_setg(&local_err, "Unsupported block format %d", format);
+ break;
+ }
+
+ visit_end_struct(opts_get_visitor(ov), &end_err);
+ error_propagate(&local_err, end_err);
+
+ out:
+ if (local_err) {
+ error_propagate(errp, local_err);
+ qapi_free_QCryptoBlockOpenOptions(ret);
+ ret = NULL;
+ }
+ opts_visitor_cleanup(ov);
+ return ret;
+}
+
+
+static QCryptoBlockCreateOptions *
+block_crypto_create_opts_init(QCryptoBlockFormat format,
+ QemuOpts *opts,
+ Error **errp)
+{
+ OptsVisitor *ov;
+ QCryptoBlockCreateOptions *ret = NULL;
+ Error *local_err = NULL;
+ Error *end_err = NULL;
+
+ ret = g_new0(QCryptoBlockCreateOptions, 1);
+ ret->format = format;
+
+ ov = opts_visitor_new(opts);
+
+ visit_start_struct(opts_get_visitor(ov),
+ NULL, NULL, 0, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ switch (format) {
+ case Q_CRYPTO_BLOCK_FORMAT_LUKS:
+ visit_type_QCryptoBlockCreateOptionsLUKS_members(
+ opts_get_visitor(ov), &ret->u.luks, &local_err);
+ break;
+
+ default:
+ error_setg(&local_err, "Unsupported block format %d", format);
+ break;
+ }
+
+ visit_end_struct(opts_get_visitor(ov), &end_err);
+ error_propagate(&local_err, end_err);
+
+ out:
+ if (local_err) {
+ error_propagate(errp, local_err);
+ qapi_free_QCryptoBlockCreateOptions(ret);
+ ret = NULL;
+ }
+ opts_visitor_cleanup(ov);
+ return ret;
+}
+
+
+static int block_crypto_open_generic(QCryptoBlockFormat format,
+ QemuOptsList *opts_spec,
+ BlockDriverState *bs,
+ QDict *options,
+ int flags,
+ Error **errp)
+{
+ BlockCrypto *crypto = bs->opaque;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
+ int ret = -EINVAL;
+ QCryptoBlockOpenOptions *open_opts = NULL;
+ unsigned int cflags = 0;
+
+ opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto cleanup;
+ }
+
+ open_opts = block_crypto_open_opts_init(format, opts, errp);
+ if (!open_opts) {
+ goto cleanup;
+ }
+
+ if (flags & BDRV_O_NO_IO) {
+ cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
+ }
+ crypto->block = qcrypto_block_open(open_opts,
+ block_crypto_read_func,
+ bs,
+ cflags,
+ errp);
+
+ if (!crypto->block) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ bs->encrypted = 1;
+ bs->valid_key = 1;
+
+ ret = 0;
+ cleanup:
+ qapi_free_QCryptoBlockOpenOptions(open_opts);
+ return ret;
+}
+
+
+static int block_crypto_create_generic(QCryptoBlockFormat format,
+ const char *filename,
+ QemuOpts *opts,
+ Error **errp)
+{
+ int ret = -EINVAL;
+ QCryptoBlockCreateOptions *create_opts = NULL;
+ QCryptoBlock *crypto = NULL;
+ struct BlockCryptoCreateData data = {
+ .size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+ BDRV_SECTOR_SIZE),
+ .opts = opts,
+ .filename = filename,
+ };
+
+ create_opts = block_crypto_create_opts_init(format, opts, errp);
+ if (!create_opts) {
+ return -1;
+ }
+
+ crypto = qcrypto_block_create(create_opts,
+ block_crypto_init_func,
+ block_crypto_write_func,
+ &data,
+ errp);
+
+ if (!crypto) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ qcrypto_block_free(crypto);
+ blk_unref(data.blk);
+ qapi_free_QCryptoBlockCreateOptions(create_opts);
+ return ret;
+}
+
+static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)
+{
+ BlockCrypto *crypto = bs->opaque;
+ size_t payload_offset =
+ qcrypto_block_get_payload_offset(crypto->block);
+
+ offset += payload_offset;
+
+ return bdrv_truncate(bs->file->bs, offset);
+}
+
+static void block_crypto_close(BlockDriverState *bs)
+{
+ BlockCrypto *crypto = bs->opaque;
+ qcrypto_block_free(crypto->block);
+}
+
+
+#define BLOCK_CRYPTO_MAX_SECTORS 32
+
+static coroutine_fn int
+block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int remaining_sectors, QEMUIOVector *qiov)
+{
+ BlockCrypto *crypto = bs->opaque;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t bytes_done = 0;
+ uint8_t *cipher_data = NULL;
+ QEMUIOVector hd_qiov;
+ int ret = 0;
+ size_t payload_offset =
+ qcrypto_block_get_payload_offset(crypto->block) / 512;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ /* Bounce buffer so we have a linear mem region for
+ * entire sector. XXX optimize so we avoid bounce
+ * buffer in case that qiov->niov == 1
+ */
+ cipher_data =
+ qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
+ qiov->size));
+ if (cipher_data == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ while (remaining_sectors) {
+ cur_nr_sectors = remaining_sectors;
+
+ if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
+ cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
+ }
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
+
+ ret = bdrv_co_readv(bs->file->bs,
+ payload_offset + sector_num,
+ cur_nr_sectors, &hd_qiov);
+ if (ret < 0) {
+ goto cleanup;
+ }
+
+ if (qcrypto_block_decrypt(crypto->block,
+ sector_num,
+ cipher_data, cur_nr_sectors * 512,
+ NULL) < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ qemu_iovec_from_buf(qiov, bytes_done,
+ cipher_data, cur_nr_sectors * 512);
+
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ }
+
+ cleanup:
+ qemu_iovec_destroy(&hd_qiov);
+ qemu_vfree(cipher_data);
+
+ return ret;
+}
+
+
+static coroutine_fn int
+block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int remaining_sectors, QEMUIOVector *qiov)
+{
+ BlockCrypto *crypto = bs->opaque;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t bytes_done = 0;
+ uint8_t *cipher_data = NULL;
+ QEMUIOVector hd_qiov;
+ int ret = 0;
+ size_t payload_offset =
+ qcrypto_block_get_payload_offset(crypto->block) / 512;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ /* Bounce buffer so we have a linear mem region for
+ * entire sector. XXX optimize so we avoid bounce
+ * buffer in case that qiov->niov == 1
+ */
+ cipher_data =
+ qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
+ qiov->size));
+ if (cipher_data == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ while (remaining_sectors) {
+ cur_nr_sectors = remaining_sectors;
+
+ if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
+ cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
+ }
+
+ qemu_iovec_to_buf(qiov, bytes_done,
+ cipher_data, cur_nr_sectors * 512);
+
+ if (qcrypto_block_encrypt(crypto->block,
+ sector_num,
+ cipher_data, cur_nr_sectors * 512,
+ NULL) < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
+
+ ret = bdrv_co_writev(bs->file->bs,
+ payload_offset + sector_num,
+ cur_nr_sectors, &hd_qiov);
+ if (ret < 0) {
+ goto cleanup;
+ }
+
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ }
+
+ cleanup:
+ qemu_iovec_destroy(&hd_qiov);
+ qemu_vfree(cipher_data);
+
+ return ret;
+}
+
+
+static int64_t block_crypto_getlength(BlockDriverState *bs)
+{
+ BlockCrypto *crypto = bs->opaque;
+ int64_t len = bdrv_getlength(bs->file->bs);
+
+ ssize_t offset = qcrypto_block_get_payload_offset(crypto->block);
+
+ len -= offset;
+
+ return len;
+}
+
+
+static int block_crypto_probe_luks(const uint8_t *buf,
+ int buf_size,
+ const char *filename) {
+ return block_crypto_probe_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+ buf, buf_size, filename);
+}
+
+static int block_crypto_open_luks(BlockDriverState *bs,
+ QDict *options,
+ int flags,
+ Error **errp)
+{
+ return block_crypto_open_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+ &block_crypto_runtime_opts_luks,
+ bs, options, flags, errp);
+}
+
+static int block_crypto_create_luks(const char *filename,
+ QemuOpts *opts,
+ Error **errp)
+{
+ return block_crypto_create_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+ filename, opts, errp);
+}
+
+BlockDriver bdrv_crypto_luks = {
+ .format_name = "luks",
+ .instance_size = sizeof(BlockCrypto),
+ .bdrv_probe = block_crypto_probe_luks,
+ .bdrv_open = block_crypto_open_luks,
+ .bdrv_close = block_crypto_close,
+ .bdrv_create = block_crypto_create_luks,
+ .bdrv_truncate = block_crypto_truncate,
+ .create_opts = &block_crypto_create_opts_luks,
+
+ .bdrv_co_readv = block_crypto_co_readv,
+ .bdrv_co_writev = block_crypto_co_writev,
+ .bdrv_getlength = block_crypto_getlength,
+};
+
+static void block_crypto_init(void)
+{
+ bdrv_register(&bdrv_crypto_luks);
+}
+
+block_init(block_crypto_init);
diff --git a/block/curl.c b/block/curl.c
index 89941826ed..5a8f8b6239 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -21,12 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "qemu/error-report.h"
#include "block/block_int.h"
#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qstring.h"
+#include "crypto/secret.h"
#include <curl/curl.h>
+#include "qemu/cutils.h"
// #define DEBUG_CURL
// #define DEBUG_VERBOSE
@@ -77,6 +81,10 @@ static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
#define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
#define CURL_BLOCK_OPT_TIMEOUT "timeout"
#define CURL_BLOCK_OPT_COOKIE "cookie"
+#define CURL_BLOCK_OPT_USERNAME "username"
+#define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
+#define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
+#define CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET "proxy-password-secret"
struct BDRVCURLState;
@@ -119,6 +127,10 @@ typedef struct BDRVCURLState {
char *cookie;
bool accept_range;
AioContext *aio_context;
+ char *username;
+ char *password;
+ char *proxyusername;
+ char *proxypassword;
} BDRVCURLState;
static void curl_clean_state(CURLState *s);
@@ -418,6 +430,21 @@ static CURLState *curl_init_state(BlockDriverState *bs, BDRVCURLState *s)
curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);
+ if (s->username) {
+ curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username);
+ }
+ if (s->password) {
+ curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password);
+ }
+ if (s->proxyusername) {
+ curl_easy_setopt(state->curl,
+ CURLOPT_PROXYUSERNAME, s->proxyusername);
+ }
+ if (s->proxypassword) {
+ curl_easy_setopt(state->curl,
+ CURLOPT_PROXYPASSWORD, s->proxypassword);
+ }
+
/* Restrict supported protocols to avoid security issues in the more
* obscure protocols. For example, do not allow POP3/SMTP/IMAP see
* CVE-2013-0249.
@@ -524,10 +551,31 @@ static QemuOptsList runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "Pass the cookie or list of cookies with each request"
},
+ {
+ .name = CURL_BLOCK_OPT_USERNAME,
+ .type = QEMU_OPT_STRING,
+ .help = "Username for HTTP auth"
+ },
+ {
+ .name = CURL_BLOCK_OPT_PASSWORD_SECRET,
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret used as password for HTTP auth",
+ },
+ {
+ .name = CURL_BLOCK_OPT_PROXY_USERNAME,
+ .type = QEMU_OPT_STRING,
+ .help = "Username for HTTP proxy auth"
+ },
+ {
+ .name = CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET,
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret used as password for HTTP proxy auth",
+ },
{ /* end of list */ }
},
};
+
static int curl_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -538,6 +586,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
const char *file;
const char *cookie;
double d;
+ const char *secretid;
static int inited = 0;
@@ -579,6 +628,26 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
goto out_noclean;
}
+ s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
+ secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);
+
+ if (secretid) {
+ s->password = qcrypto_secret_lookup_as_utf8(secretid, errp);
+ if (!s->password) {
+ goto out_noclean;
+ }
+ }
+
+ s->proxyusername = g_strdup(
+ qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_USERNAME));
+ secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET);
+ if (secretid) {
+ s->proxypassword = qcrypto_secret_lookup_as_utf8(secretid, errp);
+ if (!s->proxypassword) {
+ goto out_noclean;
+ }
+ }
+
if (!inited) {
curl_global_init(CURL_GLOBAL_ALL);
inited = 1;
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
new file mode 100644
index 0000000000..4902ca557f
--- /dev/null
+++ b/block/dirty-bitmap.c
@@ -0,0 +1,387 @@
+/*
+ * Block Dirty Bitmap
+ *
+ * Copyright (c) 2016 Red Hat. Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "block/block_int.h"
+#include "block/blockjob.h"
+
+/**
+ * A BdrvDirtyBitmap can be in three possible states:
+ * (1) successor is NULL and disabled is false: full r/w mode
+ * (2) successor is NULL and disabled is true: read only mode ("disabled")
+ * (3) successor is set: frozen mode.
+ * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
+ * or enabled. A frozen bitmap can only abdicate() or reclaim().
+ */
+struct BdrvDirtyBitmap {
+ HBitmap *bitmap; /* Dirty sector bitmap implementation */
+ BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
+ char *name; /* Optional non-empty unique ID */
+ int64_t size; /* Size of the bitmap (Number of sectors) */
+ bool disabled; /* Bitmap is read-only */
+ QLIST_ENTRY(BdrvDirtyBitmap) list;
+};
+
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
+{
+ BdrvDirtyBitmap *bm;
+
+ assert(name);
+ QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+ if (bm->name && !strcmp(name, bm->name)) {
+ return bm;
+ }
+ }
+ return NULL;
+}
+
+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
+{
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ g_free(bitmap->name);
+ bitmap->name = NULL;
+}
+
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+ uint32_t granularity,
+ const char *name,
+ Error **errp)
+{
+ int64_t bitmap_size;
+ BdrvDirtyBitmap *bitmap;
+ uint32_t sector_granularity;
+
+ assert((granularity & (granularity - 1)) == 0);
+
+ if (name && bdrv_find_dirty_bitmap(bs, name)) {
+ error_setg(errp, "Bitmap already exists: %s", name);
+ return NULL;
+ }
+ sector_granularity = granularity >> BDRV_SECTOR_BITS;
+ assert(sector_granularity);
+ bitmap_size = bdrv_nb_sectors(bs);
+ if (bitmap_size < 0) {
+ error_setg_errno(errp, -bitmap_size, "could not get length of device");
+ errno = -bitmap_size;
+ return NULL;
+ }
+ bitmap = g_new0(BdrvDirtyBitmap, 1);
+ bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
+ bitmap->size = bitmap_size;
+ bitmap->name = g_strdup(name);
+ bitmap->disabled = false;
+ QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
+ return bitmap;
+}
+
+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
+{
+ return bitmap->successor;
+}
+
+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
+{
+ return !(bitmap->disabled || bitmap->successor);
+}
+
+DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
+{
+ if (bdrv_dirty_bitmap_frozen(bitmap)) {
+ return DIRTY_BITMAP_STATUS_FROZEN;
+ } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+ return DIRTY_BITMAP_STATUS_DISABLED;
+ } else {
+ return DIRTY_BITMAP_STATUS_ACTIVE;
+ }
+}
+
+/**
+ * Create a successor bitmap destined to replace this bitmap after an operation.
+ * Requires that the bitmap is not frozen and has no successor.
+ */
+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap, Error **errp)
+{
+ uint64_t granularity;
+ BdrvDirtyBitmap *child;
+
+ if (bdrv_dirty_bitmap_frozen(bitmap)) {
+ error_setg(errp, "Cannot create a successor for a bitmap that is "
+ "currently frozen");
+ return -1;
+ }
+ assert(!bitmap->successor);
+
+ /* Create an anonymous successor */
+ granularity = bdrv_dirty_bitmap_granularity(bitmap);
+ child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
+ if (!child) {
+ return -1;
+ }
+
+ /* Successor will be on or off based on our current state. */
+ child->disabled = bitmap->disabled;
+
+ /* Install the successor and freeze the parent */
+ bitmap->successor = child;
+ return 0;
+}
+
+/**
+ * For a bitmap with a successor, yield our name to the successor,
+ * delete the old bitmap, and return a handle to the new bitmap.
+ */
+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp)
+{
+ char *name;
+ BdrvDirtyBitmap *successor = bitmap->successor;
+
+ if (successor == NULL) {
+ error_setg(errp, "Cannot relinquish control if "
+ "there's no successor present");
+ return NULL;
+ }
+
+ name = bitmap->name;
+ bitmap->name = NULL;
+ successor->name = name;
+ bitmap->successor = NULL;
+ bdrv_release_dirty_bitmap(bs, bitmap);
+
+ return successor;
+}
+
+/**
+ * In cases of failure where we can no longer safely delete the parent,
+ * we may wish to re-join the parent and child/successor.
+ * The merged parent will be un-frozen, but not explicitly re-enabled.
+ */
+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *parent,
+ Error **errp)
+{
+ BdrvDirtyBitmap *successor = parent->successor;
+
+ if (!successor) {
+ error_setg(errp, "Cannot reclaim a successor when none is present");
+ return NULL;
+ }
+
+ if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
+ error_setg(errp, "Merging of parent and successor bitmap failed");
+ return NULL;
+ }
+ bdrv_release_dirty_bitmap(bs, successor);
+ parent->successor = NULL;
+
+ return parent;
+}
+
+/**
+ * Truncates _all_ bitmaps attached to a BDS.
+ */
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
+{
+ BdrvDirtyBitmap *bitmap;
+ uint64_t size = bdrv_nb_sectors(bs);
+
+ QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ hbitmap_truncate(bitmap->bitmap, size);
+ bitmap->size = size;
+ }
+}
+
+static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ bool only_named)
+{
+ BdrvDirtyBitmap *bm, *next;
+ QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
+ if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
+ assert(!bdrv_dirty_bitmap_frozen(bm));
+ QLIST_REMOVE(bm, list);
+ hbitmap_free(bm->bitmap);
+ g_free(bm->name);
+ g_free(bm);
+
+ if (bitmap) {
+ return;
+ }
+ }
+ }
+}
+
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+{
+ bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
+}
+
+/**
+ * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
+ * There must not be any frozen bitmaps attached.
+ */
+void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
+{
+ bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
+}
+
+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ bitmap->disabled = true;
+}
+
+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ bitmap->disabled = false;
+}
+
+BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
+{
+ BdrvDirtyBitmap *bm;
+ BlockDirtyInfoList *list = NULL;
+ BlockDirtyInfoList **plist = &list;
+
+ QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+ BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
+ BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
+ info->count = bdrv_get_dirty_count(bm);
+ info->granularity = bdrv_dirty_bitmap_granularity(bm);
+ info->has_name = !!bm->name;
+ info->name = g_strdup(bm->name);
+ info->status = bdrv_dirty_bitmap_status(bm);
+ entry->value = info;
+ *plist = entry;
+ plist = &entry->next;
+ }
+
+ return list;
+}
+
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ int64_t sector)
+{
+ if (bitmap) {
+ return hbitmap_get(bitmap->bitmap, sector);
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * Chooses a default granularity based on the existing cluster size,
+ * but clamped between [4K, 64K]. Defaults to 64K in the case that there
+ * is no cluster size information available.
+ */
+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
+{
+ BlockDriverInfo bdi;
+ uint32_t granularity;
+
+ if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
+ granularity = MAX(4096, bdi.cluster_size);
+ granularity = MIN(65536, granularity);
+ } else {
+ granularity = 65536;
+ }
+
+ return granularity;
+}
+
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
+{
+ return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
+}
+
+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
+{
+ hbitmap_iter_init(hbi, bitmap->bitmap, 0);
+}
+
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int nr_sectors)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int nr_sectors)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ if (!out) {
+ hbitmap_reset_all(bitmap->bitmap);
+ } else {
+ HBitmap *backup = bitmap->bitmap;
+ bitmap->bitmap = hbitmap_alloc(bitmap->size,
+ hbitmap_granularity(backup));
+ *out = backup;
+ }
+}
+
+void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
+{
+ HBitmap *tmp = bitmap->bitmap;
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ bitmap->bitmap = in;
+ hbitmap_free(tmp);
+}
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+ int nr_sectors)
+{
+ BdrvDirtyBitmap *bitmap;
+ QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+ if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+ continue;
+ }
+ hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+ }
+}
+
+/**
+ * Advance an HBitmapIter to an arbitrary offset.
+ */
+void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
+{
+ assert(hbi->hb);
+ hbitmap_iter_init(hbi, hbi->hb, offset);
+}
+
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
+{
+ return hbitmap_count(bitmap->bitmap);
+}
diff --git a/block/dmg.c b/block/dmg.c
index 546a6f5330..a496eb7c9b 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -21,6 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/bswap.h"
diff --git a/block/gluster.c b/block/gluster.c
index 0857c14645..a8aaacf645 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -7,8 +7,10 @@
* See the COPYING file in the top-level directory.
*
*/
+#include "qemu/osdep.h"
#include <glusterfs/api/glfs.h>
#include "block/block_int.h"
+#include "qapi/error.h"
#include "qemu/uri.h"
typedef struct GlusterAIOCB {
@@ -245,7 +247,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
if (!ret || ret == acb->size) {
acb->ret = 0; /* Success */
} else if (ret < 0) {
- acb->ret = ret; /* Read/Write failed */
+ acb->ret = -errno; /* Read/Write failed */
} else {
acb->ret = -EIO; /* Partial read/write - fail it */
}
@@ -312,6 +314,23 @@ static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
goto out;
}
+#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
+ /* Without this, if fsync fails for a recoverable reason (for instance,
+ * ENOSPC), gluster will dump its cache, preventing retries. This means
+ * almost certain data loss. Not all gluster versions support the
+ * 'resync-failed-syncs-after-fsync' key value, but there is no way to
+ * discover during runtime if it is supported (this api returns success for
+ * unknown key/value pairs) */
+ ret = glfs_set_xlator_option(s->glfs, "*-write-behind",
+ "resync-failed-syncs-after-fsync",
+ "on");
+ if (ret < 0) {
+ error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
+ ret = -errno;
+ goto out;
+ }
+#endif
+
qemu_gluster_parse_flags(bdrv_flags, &open_flags);
s->fd = glfs_open(s->glfs, gconf->image, open_flags);
@@ -364,6 +383,16 @@ static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
goto exit;
}
+#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
+ ret = glfs_set_xlator_option(reop_s->glfs, "*-write-behind",
+ "resync-failed-syncs-after-fsync", "on");
+ if (ret < 0) {
+ error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
+ ret = -errno;
+ goto exit;
+ }
+#endif
+
reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
if (reop_s->fd == NULL) {
/* reops->glfs will be cleaned up in _abort */
@@ -587,6 +616,17 @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
}
+static void qemu_gluster_close(BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+
+ if (s->fd) {
+ glfs_close(s->fd);
+ s->fd = NULL;
+ }
+ glfs_fini(s->glfs);
+}
+
static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
{
int ret;
@@ -600,11 +640,35 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
if (ret < 0) {
- return -errno;
+ ret = -errno;
+ goto error;
}
qemu_coroutine_yield();
+ if (acb.ret < 0) {
+ ret = acb.ret;
+ goto error;
+ }
+
return acb.ret;
+
+error:
+ /* Some versions of Gluster (3.5.6 -> 3.5.8?) will not retain its cache
+ * after a fsync failure, so we have no way of allowing the guest to safely
+ * continue. Gluster versions prior to 3.5.6 don't retain the cache
+ * either, but will invalidate the fd on error, so this is again our only
+ * option.
+ *
+ * The 'resync-failed-syncs-after-fsync' xlator option for the
+ * write-behind cache will cause later gluster versions to retain its
+ * cache after error, so long as the fd remains open. However, we
+ * currently have no way of knowing if this option is supported.
+ *
+ * TODO: Once gluster provides a way for us to determine if the option
+ * is supported, bypass the closure and setting drv to NULL. */
+ qemu_gluster_close(bs);
+ bs->drv = NULL;
+ return ret;
}
#ifdef CONFIG_GLUSTERFS_DISCARD
@@ -659,17 +723,6 @@ static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
}
}
-static void qemu_gluster_close(BlockDriverState *bs)
-{
- BDRVGlusterState *s = bs->opaque;
-
- if (s->fd) {
- glfs_close(s->fd);
- s->fd = NULL;
- }
- glfs_fini(s->glfs);
-}
-
static int qemu_gluster_has_zero_init(BlockDriverState *bs)
{
/* GlusterFS volume could be backed by a block device */
diff --git a/block/io.c b/block/io.c
index e00fb5d690..a7dbf85b19 100644
--- a/block/io.c
+++ b/block/io.c
@@ -22,11 +22,14 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "sysemu/block-backend.h"
#include "block/blockjob.h"
#include "block/block_int.h"
#include "block/throttle-groups.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
#include "qemu/error-report.h"
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
@@ -43,12 +46,6 @@ static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
- int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
- int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
@@ -166,9 +163,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
bs->bl.max_transfer_length = bs->file->bs->bl.max_transfer_length;
bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment;
bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment;
+ bs->bl.max_iov = bs->file->bs->bl.max_iov;
} else {
bs->bl.min_mem_alignment = 512;
bs->bl.opt_mem_alignment = getpagesize();
+
+ /* Safe default since most protocols use readv()/writev()/etc */
+ bs->bl.max_iov = IOV_MAX;
}
if (bs->backing) {
@@ -189,6 +190,9 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
bs->bl.min_mem_alignment =
MAX(bs->bl.min_mem_alignment,
bs->backing->bs->bl.min_mem_alignment);
+ bs->bl.max_iov =
+ MIN(bs->bl.max_iov,
+ bs->backing->bs->bl.max_iov);
}
/* Then let the driver override it */
@@ -249,6 +253,47 @@ static void bdrv_drain_recurse(BlockDriverState *bs)
}
}
+typedef struct {
+ Coroutine *co;
+ BlockDriverState *bs;
+ QEMUBH *bh;
+ bool done;
+} BdrvCoDrainData;
+
+static void bdrv_co_drain_bh_cb(void *opaque)
+{
+ BdrvCoDrainData *data = opaque;
+ Coroutine *co = data->co;
+
+ qemu_bh_delete(data->bh);
+ bdrv_drain(data->bs);
+ data->done = true;
+ qemu_coroutine_enter(co, NULL);
+}
+
+void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
+{
+ BdrvCoDrainData data;
+
+ /* Calling bdrv_drain() from a BH ensures the current coroutine yields and
+ * other coroutines run if they were queued from
+ * qemu_co_queue_run_restart(). */
+
+ assert(qemu_in_coroutine());
+ data = (BdrvCoDrainData) {
+ .co = qemu_coroutine_self(),
+ .bs = bs,
+ .done = false,
+ .bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_drain_bh_cb, &data),
+ };
+ qemu_bh_schedule(data.bh);
+
+ qemu_coroutine_yield();
+ /* If we are resumed from some other event (such as an aio completion or a
+ * timer callback), it is a bug in the caller that should be fixed. */
+ assert(data.done);
+}
+
/*
* Wait for pending requests to complete on a single BlockDriverState subtree,
* and suspend block driver's internal I/O until next request arrives.
@@ -265,6 +310,10 @@ void bdrv_drain(BlockDriverState *bs)
bool busy = true;
bdrv_drain_recurse(bs);
+ if (qemu_in_coroutine()) {
+ bdrv_co_drain(bs);
+ return;
+ }
while (busy) {
/* Keep iterating */
bdrv_flush_io_queue(bs);
@@ -293,6 +342,7 @@ void bdrv_drain_all(void)
if (bs->job) {
block_job_pause(bs->job);
}
+ bdrv_drain_recurse(bs);
aio_context_release(aio_context);
if (!g_slist_find(aio_ctxs, aio_context)) {
@@ -612,20 +662,6 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
}
-/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
-int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- bool enabled;
- int ret;
-
- enabled = bs->io_limits_enabled;
- bs->io_limits_enabled = false;
- ret = bdrv_read(bs, sector_num, buf, nb_sectors);
- bs->io_limits_enabled = enabled;
- return ret;
-}
-
/* Return < 0 if error. Important errors are:
-EIO generic I/O error (may happen for all errors)
-ENOMEDIUM No media inserted.
@@ -656,6 +692,7 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
{
int64_t target_sectors, ret, nb_sectors, sector_num = 0;
+ BlockDriverState *file;
int n;
target_sectors = bdrv_nb_sectors(bs);
@@ -668,7 +705,7 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
if (nb_sectors <= 0) {
return 0;
}
- ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
+ ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n, &file);
if (ret < 0) {
error_report("error getting block status at sector %" PRId64 ": %s",
sector_num, strerror(-ret));
@@ -755,9 +792,9 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
return ret;
}
- /* No flush needed for cache modes that already do it */
- if (bs->enable_write_cache) {
- bdrv_flush(bs);
+ ret = bdrv_flush(bs);
+ if (ret < 0) {
+ return ret;
}
return 0;
@@ -852,6 +889,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(!qiov || bytes == qiov->size);
+ assert((bs->open_flags & BDRV_O_NO_IO) == 0);
/* Handle Copy on Read and associated serialisation */
if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -929,7 +967,7 @@ out:
/*
* Handle a read request in coroutine context
*/
-static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
@@ -1138,6 +1176,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
assert(!qiov || bytes == qiov->size);
+ assert((bs->open_flags & BDRV_O_NO_IO) == 0);
waited = wait_serialising_requests(req);
assert(!waited || !req->serialising);
@@ -1160,13 +1199,20 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
} else if (flags & BDRV_REQ_ZERO_WRITE) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
+ } else if (drv->bdrv_co_writev_flags) {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV);
+ ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
+ flags);
} else {
+ assert(drv->supported_write_flags == 0);
bdrv_debug_event(bs, BLKDBG_PWRITEV);
ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
- if (ret == 0 && !bs->enable_write_cache) {
+ if (ret == 0 && (flags & BDRV_REQ_FUA) &&
+ !(drv->supported_write_flags & BDRV_REQ_FUA))
+ {
ret = bdrv_co_flush(bs);
}
@@ -1274,7 +1320,7 @@ fail:
/*
* Handle a write request in coroutine context
*/
-static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
@@ -1293,6 +1339,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
if (bs->read_only) {
return -EPERM;
}
+ assert(!(bs->open_flags & BDRV_O_INACTIVE));
ret = bdrv_check_byte_request(bs, offset, bytes);
if (ret < 0) {
@@ -1434,29 +1481,10 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
BDRV_REQ_ZERO_WRITE | flags);
}
-int bdrv_flush_all(void)
-{
- BlockDriverState *bs = NULL;
- int result = 0;
-
- while ((bs = bdrv_next(bs))) {
- AioContext *aio_context = bdrv_get_aio_context(bs);
- int ret;
-
- aio_context_acquire(aio_context);
- ret = bdrv_flush(bs);
- if (ret < 0 && !result) {
- result = ret;
- }
- aio_context_release(aio_context);
- }
-
- return result;
-}
-
typedef struct BdrvCoGetBlockStatusData {
BlockDriverState *bs;
BlockDriverState *base;
+ BlockDriverState **file;
int64_t sector_num;
int nb_sectors;
int *pnum;
@@ -1478,10 +1506,14 @@ typedef struct BdrvCoGetBlockStatusData {
*
* 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
* beyond the end of the disk image it will be clamped.
+ *
+ * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file'
+ * points to the BDS which the sector range is allocated in.
*/
static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
int64_t total_sectors;
int64_t n;
@@ -1511,7 +1543,9 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
return ret;
}
- ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
+ *file = NULL;
+ ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
+ file);
if (ret < 0) {
*pnum = 0;
return ret;
@@ -1520,7 +1554,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
if (ret & BDRV_BLOCK_RAW) {
assert(ret & BDRV_BLOCK_OFFSET_VALID);
return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
- *pnum, pnum);
+ *pnum, pnum, file);
}
if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
@@ -1537,13 +1571,14 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
}
}
- if (bs->file &&
+ if (*file && *file != bs &&
(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
(ret & BDRV_BLOCK_OFFSET_VALID)) {
+ BlockDriverState *file2;
int file_pnum;
- ret2 = bdrv_co_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
- *pnum, &file_pnum);
+ ret2 = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
+ *pnum, &file_pnum, &file2);
if (ret2 >= 0) {
/* Ignore errors. This is just providing extra information, it
* is useful but not necessary.
@@ -1568,14 +1603,15 @@ static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
int64_t sector_num,
int nb_sectors,
- int *pnum)
+ int *pnum,
+ BlockDriverState **file)
{
BlockDriverState *p;
int64_t ret = 0;
assert(bs != base);
for (p = bs; p != base; p = backing_bs(p)) {
- ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum);
+ ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file);
if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) {
break;
}
@@ -1594,7 +1630,8 @@ static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
data->ret = bdrv_co_get_block_status_above(data->bs, data->base,
data->sector_num,
data->nb_sectors,
- data->pnum);
+ data->pnum,
+ data->file);
data->done = true;
}
@@ -1606,12 +1643,14 @@ static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
int64_t bdrv_get_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
Coroutine *co;
BdrvCoGetBlockStatusData data = {
.bs = bs,
.base = base,
+ .file = file,
.sector_num = sector_num,
.nb_sectors = nb_sectors,
.pnum = pnum,
@@ -1635,16 +1674,19 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
int64_t bdrv_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
return bdrv_get_block_status_above(bs, backing_bs(bs),
- sector_num, nb_sectors, pnum);
+ sector_num, nb_sectors, pnum, file);
}
int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum)
{
- int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
+ BlockDriverState *file;
+ int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum,
+ &file);
if (ret < 0) {
return ret;
}
@@ -1882,7 +1924,8 @@ static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
merge = 1;
}
- if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
+ if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 >
+ bs->bl.max_iov) {
merge = 0;
}
@@ -2342,6 +2385,13 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
}
tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
+
+ /* Write back all layers by calling one driver function */
+ if (bs->drv->bdrv_co_flush) {
+ ret = bs->drv->bdrv_co_flush(bs);
+ goto out;
+ }
+
/* Write back cached data to the OS even with cache=unsafe */
BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
if (bs->drv->bdrv_co_flush_to_os) {
@@ -2453,6 +2503,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
} else if (bs->read_only) {
return -EPERM;
}
+ assert(!(bs->open_flags & BDRV_O_INACTIVE));
/* Do nothing if disabled. */
if (!(bs->open_flags & BDRV_O_UNMAP)) {
@@ -2614,10 +2665,11 @@ int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
bdrv_co_ioctl_entry(&data);
} else {
Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry);
+
qemu_coroutine_enter(co, &data);
- }
- while (data.ret == -EINPROGRESS) {
- aio_poll(bdrv_get_aio_context(bs), true);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(bdrv_get_aio_context(bs), true);
+ }
}
return data.ret;
}
diff --git a/block/iscsi.c b/block/iscsi.c
index bd1f1bfcd1..302baf84c1 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -23,7 +23,7 @@
* THE SOFTWARE.
*/
-#include "config-host.h"
+#include "qemu/osdep.h"
#include <poll.h>
#include <math.h>
@@ -39,6 +39,7 @@
#include "sysemu/sysemu.h"
#include "qmp-commands.h"
#include "qapi/qmp/qstring.h"
+#include "crypto/secret.h"
#include <iscsi/iscsi.h>
#include <iscsi/scsi-lowlevel.h>
@@ -69,7 +70,6 @@ typedef struct IscsiLun {
bool lbprz;
bool dpofua;
bool has_write_same;
- bool force_next_flush;
bool request_timed_out;
} IscsiLun;
@@ -83,7 +83,6 @@ typedef struct IscsiTask {
QEMUBH *bh;
IscsiLun *iscsilun;
QEMUTimer retry_timer;
- bool force_next_flush;
int err_code;
} IscsiTask;
@@ -281,8 +280,6 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
}
iTask->err_code = iscsi_translate_sense(&task->sense);
error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
- } else {
- iTask->iscsilun->force_next_flush |= iTask->force_next_flush;
}
out:
@@ -451,15 +448,15 @@ static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
}
}
-static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
+static int coroutine_fn
+iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov, int flags)
{
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
uint64_t lba;
uint32_t num_sectors;
- int fua;
+ bool fua;
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
@@ -475,8 +472,7 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
- fua = iscsilun->dpofua && !bs->enable_write_cache;
- iTask.force_next_flush = !fua;
+ fua = iscsilun->dpofua && (flags & BDRV_REQ_FUA);
if (iscsilun->use_16_for_rw) {
iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
NULL, num_sectors * iscsilun->block_size,
@@ -517,6 +513,13 @@ retry:
return 0;
}
+static int coroutine_fn
+iscsi_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov)
+{
+ return iscsi_co_writev_flags(bs, sector_num, nb_sectors, iov, 0);
+}
+
static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
int64_t sector_num, int nb_sectors)
@@ -532,7 +535,8 @@ static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
IscsiLun *iscsilun = bs->opaque;
struct scsi_get_lba_status *lbas = NULL;
@@ -624,6 +628,9 @@ out:
if (iTask.task != NULL) {
scsi_free_scsi_task(iTask.task);
}
+ if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
+ *file = bs;
+ }
return ret;
}
@@ -650,7 +657,8 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
!iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
int64_t ret;
int pnum;
- ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
+ BlockDriverState *file;
+ ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum, &file);
if (ret < 0) {
return ret;
}
@@ -709,11 +717,6 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
- if (!iscsilun->force_next_flush) {
- return 0;
- }
- iscsilun->force_next_flush = false;
-
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
@@ -1013,7 +1016,6 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
}
iscsi_co_init_iscsitask(iscsilun, &iTask);
- iTask.force_next_flush = true;
retry:
if (use_16_for_ws) {
iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
@@ -1075,6 +1077,8 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
QemuOpts *opts;
const char *user = NULL;
const char *password = NULL;
+ const char *secretid;
+ char *secret = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
@@ -1094,8 +1098,20 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
return;
}
+ secretid = qemu_opt_get(opts, "password-secret");
password = qemu_opt_get(opts, "password");
- if (!password) {
+ if (secretid && password) {
+ error_setg(errp, "'password' and 'password-secret' properties are "
+ "mutually exclusive");
+ return;
+ }
+ if (secretid) {
+ secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
+ if (!secret) {
+ return;
+ }
+ password = secret;
+ } else if (!password) {
error_setg(errp, "CHAP username specified but no password was given");
return;
}
@@ -1103,6 +1119,8 @@ static void parse_chap(struct iscsi_context *iscsi, const char *target,
if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
error_setg(errp, "Failed to set initiator username and password");
}
+
+ g_free(secret);
}
static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
@@ -1243,8 +1261,13 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
iscsilun->lbprz = !!rc16->lbprz;
iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
}
+ break;
}
- break;
+ if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
+ && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
+ break;
+ }
+ /* Fall through and try READ CAPACITY(10) instead. */
case TYPE_ROM:
task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
if (task != NULL && task->status == SCSI_STATUS_GOOD) {
@@ -1270,7 +1293,7 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
&& retries-- > 0);
if (task == NULL || task->status != SCSI_STATUS_GOOD) {
- error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
+ error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
} else if (!iscsilun->block_size ||
iscsilun->block_size % BDRV_SECTOR_SIZE) {
error_setg(errp, "iSCSI: the target returned an invalid "
@@ -1825,6 +1848,8 @@ static BlockDriver bdrv_iscsi = {
.bdrv_co_write_zeroes = iscsi_co_write_zeroes,
.bdrv_co_readv = iscsi_co_readv,
.bdrv_co_writev = iscsi_co_writev,
+ .bdrv_co_writev_flags = iscsi_co_writev_flags,
+ .supported_write_flags = BDRV_REQ_FUA,
.bdrv_co_flush_to_disk = iscsi_co_flush,
#ifdef __linux__
@@ -1848,6 +1873,11 @@ static QemuOptsList qemu_iscsi_opts = {
.type = QEMU_OPT_STRING,
.help = "password for CHAP authentication to target",
},{
+ .name = "password-secret",
+ .type = QEMU_OPT_STRING,
+ .help = "ID of the secret providing password for CHAP "
+ "authentication to target",
+ },{
.name = "header-digest",
.type = QEMU_OPT_STRING,
.help = "HeaderDigest setting. "
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 88b0520a8b..805757e02e 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -7,6 +7,7 @@
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/aio.h"
#include "qemu/queue.h"
diff --git a/block/mirror.c b/block/mirror.c
index 0e8f5565a5..039f48125e 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -11,13 +11,16 @@
*
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/blockjob.h"
#include "block/block_int.h"
#include "sysemu/block-backend.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
#include "qemu/bitmap.h"
+#include "qemu/error-report.h"
#define SLICE_TIME 100000000ULL /* ns */
#define MAX_IN_FLIGHT 16
@@ -45,7 +48,6 @@ typedef struct MirrorBlockJob {
BlockdevOnError on_source_error, on_target_error;
bool synced;
bool should_complete;
- int64_t sector_num;
int64_t granularity;
size_t buf_size;
int64_t bdev_length;
@@ -62,6 +64,8 @@ typedef struct MirrorBlockJob {
int ret;
bool unmap;
bool waiting_for_io;
+ int target_cluster_sectors;
+ int max_iov;
} MirrorBlockJob;
typedef struct MirrorOp {
@@ -104,7 +108,7 @@ static void mirror_iteration_done(MirrorOp *op, int ret)
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
chunk_num = op->sector_num / sectors_per_chunk;
- nb_chunks = op->nb_sectors / sectors_per_chunk;
+ nb_chunks = DIV_ROUND_UP(op->nb_sectors, sectors_per_chunk);
bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
if (ret >= 0) {
if (s->cow_bitmap) {
@@ -157,112 +161,94 @@ static void mirror_read_complete(void *opaque, int ret)
mirror_write_complete, op);
}
-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static inline void mirror_clip_sectors(MirrorBlockJob *s,
+ int64_t sector_num,
+ int *nb_sectors)
{
- BlockDriverState *source = s->common.bs;
- int nb_sectors, sectors_per_chunk, nb_chunks;
- int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
- uint64_t delay_ns = 0;
- MirrorOp *op;
- int pnum;
- int64_t ret;
-
- s->sector_num = hbitmap_iter_next(&s->hbi);
- if (s->sector_num < 0) {
- bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
- s->sector_num = hbitmap_iter_next(&s->hbi);
- trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
- assert(s->sector_num >= 0);
- }
-
- hbitmap_next_sector = s->sector_num;
- sector_num = s->sector_num;
- sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
- end = s->bdev_length / BDRV_SECTOR_SIZE;
-
- /* Extend the QEMUIOVector to include all adjacent blocks that will
- * be copied in this operation.
- *
- * We have to do this if we have no backing file yet in the destination,
- * and the cluster size is very large. Then we need to do COW ourselves.
- * The first time a cluster is copied, copy it entirely. Note that,
- * because both the granularity and the cluster size are powers of two,
- * the number of sectors to copy cannot exceed one cluster.
- *
- * We also want to extend the QEMUIOVector to include more adjacent
- * dirty blocks if possible, to limit the number of I/O operations and
- * run efficiently even with a small granularity.
- */
- nb_chunks = 0;
- nb_sectors = 0;
- next_sector = sector_num;
- next_chunk = sector_num / sectors_per_chunk;
+ *nb_sectors = MIN(*nb_sectors,
+ s->bdev_length / BDRV_SECTOR_SIZE - sector_num);
+}
- /* Wait for I/O to this cluster (from a previous iteration) to be done. */
- while (test_bit(next_chunk, s->in_flight_bitmap)) {
- trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
- s->waiting_for_io = true;
- qemu_coroutine_yield();
- s->waiting_for_io = false;
+/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
+ * return the offset of the adjusted tail sector against original. */
+static int mirror_cow_align(MirrorBlockJob *s,
+ int64_t *sector_num,
+ int *nb_sectors)
+{
+ bool need_cow;
+ int ret = 0;
+ int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS;
+ int64_t align_sector_num = *sector_num;
+ int align_nb_sectors = *nb_sectors;
+ int max_sectors = chunk_sectors * s->max_iov;
+
+ need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap);
+ need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
+ s->cow_bitmap);
+ if (need_cow) {
+ bdrv_round_to_clusters(s->target, *sector_num, *nb_sectors,
+ &align_sector_num, &align_nb_sectors);
+ }
+
+ if (align_nb_sectors > max_sectors) {
+ align_nb_sectors = max_sectors;
+ if (need_cow) {
+ align_nb_sectors = QEMU_ALIGN_DOWN(align_nb_sectors,
+ s->target_cluster_sectors);
+ }
}
+ /* Clipping may result in align_nb_sectors unaligned to chunk boundary, but
+ * that doesn't matter because it's already the end of source image. */
+ mirror_clip_sectors(s, align_sector_num, &align_nb_sectors);
- do {
- int added_sectors, added_chunks;
-
- if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
- test_bit(next_chunk, s->in_flight_bitmap)) {
- assert(nb_sectors > 0);
- break;
- }
+ ret = align_sector_num + align_nb_sectors - (*sector_num + *nb_sectors);
+ *sector_num = align_sector_num;
+ *nb_sectors = align_nb_sectors;
+ assert(ret >= 0);
+ return ret;
+}
- added_sectors = sectors_per_chunk;
- if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
- bdrv_round_to_clusters(s->target,
- next_sector, added_sectors,
- &next_sector, &added_sectors);
+static inline void mirror_wait_for_io(MirrorBlockJob *s)
+{
+ assert(!s->waiting_for_io);
+ s->waiting_for_io = true;
+ qemu_coroutine_yield();
+ s->waiting_for_io = false;
+}
- /* On the first iteration, the rounding may make us copy
- * sectors before the first dirty one.
- */
- if (next_sector < sector_num) {
- assert(nb_sectors == 0);
- sector_num = next_sector;
- next_chunk = next_sector / sectors_per_chunk;
- }
- }
+/* Submit async read while handling COW.
+ * Returns: nb_sectors if no alignment is necessary, or
+ * (new_end - sector_num) if tail is rounded up or down due to
+ * alignment or buffer limit.
+ */
+static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
+ int nb_sectors)
+{
+ BlockDriverState *source = s->common.bs;
+ int sectors_per_chunk, nb_chunks;
+ int ret = nb_sectors;
+ MirrorOp *op;
- added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
- added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
+ sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
- /* When doing COW, it may happen that there is not enough space for
- * a full cluster. Wait if that is the case.
- */
- while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
- trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
- s->waiting_for_io = true;
- qemu_coroutine_yield();
- s->waiting_for_io = false;
- }
- if (s->buf_free_count < nb_chunks + added_chunks) {
- trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
- break;
- }
- if (IOV_MAX < nb_chunks + added_chunks) {
- trace_mirror_break_iov_max(s, nb_chunks, added_chunks);
- break;
- }
+ /* We can only handle as much as buf_size at a time. */
+ nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
+ assert(nb_sectors);
- /* We have enough free space to copy these sectors. */
- bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
+ if (s->cow_bitmap) {
+ ret += mirror_cow_align(s, &sector_num, &nb_sectors);
+ }
+ assert(nb_sectors << BDRV_SECTOR_BITS <= s->buf_size);
+ /* The sector range must meet granularity because:
+ * 1) Caller passes in aligned values;
+ * 2) mirror_cow_align is used only when target cluster is larger. */
+ assert(!(sector_num % sectors_per_chunk));
+ nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk);
- nb_sectors += added_sectors;
- nb_chunks += added_chunks;
- next_sector += added_sectors;
- next_chunk += added_chunks;
- if (!s->synced && s->common.speed) {
- delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
- }
- } while (delay_ns == 0 && next_sector < end);
+ while (s->buf_free_count < nb_chunks) {
+ trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+ mirror_wait_for_io(s);
+ }
/* Allocate a MirrorOp that is used as an AIO callback. */
op = g_new(MirrorOp, 1);
@@ -274,47 +260,158 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
* from s->buf_free.
*/
qemu_iovec_init(&op->qiov, nb_chunks);
- next_sector = sector_num;
while (nb_chunks-- > 0) {
MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
- size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size;
+ size_t remaining = nb_sectors * BDRV_SECTOR_SIZE - op->qiov.size;
QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
s->buf_free_count--;
qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
-
- /* Advance the HBitmapIter in parallel, so that we do not examine
- * the same sector twice.
- */
- if (next_sector > hbitmap_next_sector
- && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
- hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
- }
-
- next_sector += sectors_per_chunk;
}
- bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num, nb_sectors);
-
/* Copy the dirty cluster. */
s->in_flight++;
s->sectors_in_flight += nb_sectors;
trace_mirror_one_iteration(s, sector_num, nb_sectors);
- ret = bdrv_get_block_status_above(source, NULL, sector_num,
- nb_sectors, &pnum);
- if (ret < 0 || pnum < nb_sectors ||
- (ret & BDRV_BLOCK_DATA && !(ret & BDRV_BLOCK_ZERO))) {
- bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
- mirror_read_complete, op);
- } else if (ret & BDRV_BLOCK_ZERO) {
+ bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+ mirror_read_complete, op);
+ return ret;
+}
+
+static void mirror_do_zero_or_discard(MirrorBlockJob *s,
+ int64_t sector_num,
+ int nb_sectors,
+ bool is_discard)
+{
+ MirrorOp *op;
+
+ /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed
+ * so the freeing in mirror_iteration_done is nop. */
+ op = g_new0(MirrorOp, 1);
+ op->s = s;
+ op->sector_num = sector_num;
+ op->nb_sectors = nb_sectors;
+
+ s->in_flight++;
+ s->sectors_in_flight += nb_sectors;
+ if (is_discard) {
+ bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
+ mirror_write_complete, op);
+ } else {
bdrv_aio_write_zeroes(s->target, sector_num, op->nb_sectors,
s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
mirror_write_complete, op);
- } else {
- assert(!(ret & BDRV_BLOCK_DATA));
- bdrv_aio_discard(s->target, sector_num, op->nb_sectors,
- mirror_write_complete, op);
+ }
+}
+
+static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+{
+ BlockDriverState *source = s->common.bs;
+ int64_t sector_num, first_chunk;
+ uint64_t delay_ns = 0;
+ /* At least the first dirty chunk is mirrored in one iteration. */
+ int nb_chunks = 1;
+ int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
+ int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+
+ sector_num = hbitmap_iter_next(&s->hbi);
+ if (sector_num < 0) {
+ bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
+ sector_num = hbitmap_iter_next(&s->hbi);
+ trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
+ assert(sector_num >= 0);
+ }
+
+ first_chunk = sector_num / sectors_per_chunk;
+ while (test_bit(first_chunk, s->in_flight_bitmap)) {
+ trace_mirror_yield_in_flight(s, first_chunk, s->in_flight);
+ mirror_wait_for_io(s);
+ }
+
+ /* Find the number of consective dirty chunks following the first dirty
+ * one, and wait for in flight requests in them. */
+ while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
+ int64_t hbitmap_next;
+ int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
+ int64_t next_chunk = next_sector / sectors_per_chunk;
+ if (next_sector >= end ||
+ !bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
+ break;
+ }
+ if (test_bit(next_chunk, s->in_flight_bitmap)) {
+ break;
+ }
+
+ hbitmap_next = hbitmap_iter_next(&s->hbi);
+ if (hbitmap_next > next_sector || hbitmap_next < 0) {
+ /* The bitmap iterator's cache is stale, refresh it */
+ bdrv_set_dirty_iter(&s->hbi, next_sector);
+ hbitmap_next = hbitmap_iter_next(&s->hbi);
+ }
+ assert(hbitmap_next == next_sector);
+ nb_chunks++;
+ }
+
+ /* Clear dirty bits before querying the block status, because
+ * calling bdrv_get_block_status_above could yield - if some blocks are
+ * marked dirty in this window, we need to know.
+ */
+ bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
+ nb_chunks * sectors_per_chunk);
+ bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
+ while (nb_chunks > 0 && sector_num < end) {
+ int ret;
+ int io_sectors;
+ BlockDriverState *file;
+ enum MirrorMethod {
+ MIRROR_METHOD_COPY,
+ MIRROR_METHOD_ZERO,
+ MIRROR_METHOD_DISCARD
+ } mirror_method = MIRROR_METHOD_COPY;
+
+ assert(!(sector_num % sectors_per_chunk));
+ ret = bdrv_get_block_status_above(source, NULL, sector_num,
+ nb_chunks * sectors_per_chunk,
+ &io_sectors, &file);
+ if (ret < 0) {
+ io_sectors = nb_chunks * sectors_per_chunk;
+ }
+
+ io_sectors -= io_sectors % sectors_per_chunk;
+ if (io_sectors < sectors_per_chunk) {
+ io_sectors = sectors_per_chunk;
+ } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
+ int64_t target_sector_num;
+ int target_nb_sectors;
+ bdrv_round_to_clusters(s->target, sector_num, io_sectors,
+ &target_sector_num, &target_nb_sectors);
+ if (target_sector_num == sector_num &&
+ target_nb_sectors == io_sectors) {
+ mirror_method = ret & BDRV_BLOCK_ZERO ?
+ MIRROR_METHOD_ZERO :
+ MIRROR_METHOD_DISCARD;
+ }
+ }
+
+ mirror_clip_sectors(s, sector_num, &io_sectors);
+ switch (mirror_method) {
+ case MIRROR_METHOD_COPY:
+ io_sectors = mirror_do_read(s, sector_num, io_sectors);
+ break;
+ case MIRROR_METHOD_ZERO:
+ mirror_do_zero_or_discard(s, sector_num, io_sectors, false);
+ break;
+ case MIRROR_METHOD_DISCARD:
+ mirror_do_zero_or_discard(s, sector_num, io_sectors, true);
+ break;
+ default:
+ abort();
+ }
+ assert(io_sectors);
+ sector_num += io_sectors;
+ nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
+ delay_ns += ratelimit_calculate_delay(&s->limit, io_sectors);
}
return delay_ns;
}
@@ -339,9 +436,7 @@ static void mirror_free_init(MirrorBlockJob *s)
static void mirror_drain(MirrorBlockJob *s)
{
while (s->in_flight > 0) {
- s->waiting_for_io = true;
- qemu_coroutine_yield();
- s->waiting_for_io = false;
+ mirror_wait_for_io(s);
}
}
@@ -370,11 +465,22 @@ static void mirror_exit(BlockJob *job, void *opaque)
if (s->to_replace) {
to_replace = s->to_replace;
}
+
+ /* This was checked in mirror_start_job(), but meanwhile one of the
+ * nodes could have been newly attached to a BlockBackend. */
+ if (to_replace->blk && s->target->blk) {
+ error_report("block job: Can't create node with two BlockBackends");
+ data->ret = -EINVAL;
+ goto out;
+ }
+
if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
}
bdrv_replace_in_backing_chain(to_replace, s->target);
}
+
+out:
if (s->to_replace) {
bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
error_free(s->replace_blocker);
@@ -389,6 +495,9 @@ static void mirror_exit(BlockJob *job, void *opaque)
block_job_completed(&s->common, data->ret);
g_free(data);
bdrv_drained_end(src);
+ if (qemu_get_aio_context() == bdrv_get_aio_context(src)) {
+ aio_enable_external(iohandler_get_aio_context());
+ }
bdrv_unref(src);
}
@@ -404,6 +513,7 @@ static void coroutine_fn mirror_run(void *opaque)
checking for a NULL string */
int ret = 0;
int n;
+ int target_cluster_size = BDRV_SECTOR_SIZE;
if (block_job_is_cancelled(&s->common)) {
goto immediate_exit;
@@ -433,16 +543,16 @@ static void coroutine_fn mirror_run(void *opaque)
*/
bdrv_get_backing_filename(s->target, backing_filename,
sizeof(backing_filename));
- if (backing_filename[0] && !s->target->backing) {
- ret = bdrv_get_info(s->target, &bdi);
- if (ret < 0) {
- goto immediate_exit;
- }
- if (s->granularity < bdi.cluster_size) {
- s->buf_size = MAX(s->buf_size, bdi.cluster_size);
- s->cow_bitmap = bitmap_new(length);
- }
+ if (!bdrv_get_info(s->target, &bdi) && bdi.cluster_size) {
+ target_cluster_size = bdi.cluster_size;
+ }
+ if (backing_filename[0] && !s->target->backing
+ && s->granularity < target_cluster_size) {
+ s->buf_size = MAX(s->buf_size, target_cluster_size);
+ s->cow_bitmap = bitmap_new(length);
}
+ s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
+ s->max_iov = MIN(s->common.bs->bl.max_iov, s->target->bl.max_iov);
end = s->bdev_length / BDRV_SECTOR_SIZE;
s->buf = qemu_try_blockalign(bs, s->buf_size);
@@ -517,9 +627,7 @@ static void coroutine_fn mirror_run(void *opaque)
if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
- s->waiting_for_io = true;
- qemu_coroutine_yield();
- s->waiting_for_io = false;
+ mirror_wait_for_io(s);
continue;
} else if (cnt != 0) {
delay_ns = mirror_iteration(s);
@@ -562,7 +670,7 @@ static void coroutine_fn mirror_run(void *opaque)
* mirror_populate runs.
*/
trace_mirror_before_drain(s, cnt);
- bdrv_drain(bs);
+ bdrv_co_drain(bs);
cnt = bdrv_get_dirty_count(s->dirty_bitmap);
}
@@ -611,6 +719,12 @@ immediate_exit:
/* Before we switch to target in mirror_exit, make sure data doesn't
* change. */
bdrv_drained_begin(s->common.bs);
+ if (qemu_get_aio_context() == bdrv_get_aio_context(bs)) {
+ /* FIXME: virtio host notifiers run on iohandler_ctx, therefore the
+ * above bdrv_drained_end isn't enough to quiesce it. This is ugly, we
+ * need a block layer API change to achieve this. */
+ aio_disable_external(iohandler_get_aio_context());
+ }
block_job_defer_to_main_loop(&s->common, mirror_exit, data);
}
@@ -640,7 +754,7 @@ static void mirror_complete(BlockJob *job, Error **errp)
Error *local_err = NULL;
int ret;
- ret = bdrv_open_backing_file(s->target, NULL, &local_err);
+ ret = bdrv_open_backing_file(s->target, NULL, "backing", &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
return;
@@ -705,6 +819,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
bool is_none_mode, BlockDriverState *base)
{
MirrorBlockJob *s;
+ BlockDriverState *replaced_bs;
if (granularity == 0) {
granularity = bdrv_get_default_bitmap_granularity(target);
@@ -728,6 +843,21 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
buf_size = DEFAULT_MIRROR_BUF_SIZE;
}
+ /* We can't support this case as long as the block layer can't handle
+ * multiple BlockBackends per BlockDriverState. */
+ if (replaces) {
+ replaced_bs = bdrv_lookup_bs(replaces, replaces, errp);
+ if (replaced_bs == NULL) {
+ return;
+ }
+ } else {
+ replaced_bs = bs;
+ }
+ if (replaced_bs->blk && target->blk) {
+ error_setg(errp, "Can't create node with two BlockBackends");
+ return;
+ }
+
s = block_job_create(driver, bs, speed, cb, opaque, errp);
if (!s) {
return;
@@ -752,7 +882,6 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
bdrv_op_block_all(s->target, s->common.blocker);
- bdrv_set_enable_write_cache(s->target, true);
if (s->target->blk) {
blk_set_on_error(s->target->blk, on_target_error, on_target_error);
blk_iostatus_enable(s->target->blk);
diff --git a/block/nbd-client.c b/block/nbd-client.c
index b7fd17a115..878e879ace 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -26,8 +26,8 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "nbd-client.h"
-#include "qemu/sockets.h"
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
@@ -47,13 +47,21 @@ static void nbd_teardown_connection(BlockDriverState *bs)
{
NbdClientSession *client = nbd_get_client_session(bs);
+ if (!client->ioc) { /* Already closed */
+ return;
+ }
+
/* finish any pending coroutines */
- shutdown(client->sock, 2);
+ qio_channel_shutdown(client->ioc,
+ QIO_CHANNEL_SHUTDOWN_BOTH,
+ NULL);
nbd_recv_coroutines_enter_all(client);
nbd_client_detach_aio_context(bs);
- closesocket(client->sock);
- client->sock = -1;
+ object_unref(OBJECT(client->sioc));
+ client->sioc = NULL;
+ object_unref(OBJECT(client->ioc));
+ client->ioc = NULL;
}
static void nbd_reply_ready(void *opaque)
@@ -63,12 +71,16 @@ static void nbd_reply_ready(void *opaque)
uint64_t i;
int ret;
+ if (!s->ioc) { /* Already closed */
+ return;
+ }
+
if (s->reply.handle == 0) {
/* No reply already in flight. Fetch a header. It is possible
* that another thread has done the same thing in parallel, so
* the socket is not readable anymore.
*/
- ret = nbd_receive_reply(s->sock, &s->reply);
+ ret = nbd_receive_reply(s->ioc, &s->reply);
if (ret == -EAGAIN) {
return;
}
@@ -119,32 +131,35 @@ static int nbd_co_send_request(BlockDriverState *bs,
}
}
+ g_assert(qemu_in_coroutine());
assert(i < MAX_NBD_REQUESTS);
request->handle = INDEX_TO_HANDLE(s, i);
+
+ if (!s->ioc) {
+ qemu_co_mutex_unlock(&s->send_mutex);
+ return -EPIPE;
+ }
+
s->send_coroutine = qemu_coroutine_self();
aio_context = bdrv_get_aio_context(bs);
- aio_set_fd_handler(aio_context, s->sock, false,
+ aio_set_fd_handler(aio_context, s->sioc->fd, false,
nbd_reply_ready, nbd_restart_write, bs);
if (qiov) {
- if (!s->is_unix) {
- socket_set_cork(s->sock, 1);
- }
- rc = nbd_send_request(s->sock, request);
+ qio_channel_set_cork(s->ioc, true);
+ rc = nbd_send_request(s->ioc, request);
if (rc >= 0) {
- ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
- offset, request->len);
+ ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
+ offset, request->len, 0);
if (ret != request->len) {
rc = -EIO;
}
}
- if (!s->is_unix) {
- socket_set_cork(s->sock, 0);
- }
+ qio_channel_set_cork(s->ioc, false);
} else {
- rc = nbd_send_request(s->sock, request);
+ rc = nbd_send_request(s->ioc, request);
}
- aio_set_fd_handler(aio_context, s->sock, false,
+ aio_set_fd_handler(aio_context, s->sioc->fd, false,
nbd_reply_ready, NULL, bs);
s->send_coroutine = NULL;
qemu_co_mutex_unlock(&s->send_mutex);
@@ -161,12 +176,13 @@ static void nbd_co_receive_reply(NbdClientSession *s,
* peek at the next reply and avoid yielding if it's ours? */
qemu_coroutine_yield();
*reply = s->reply;
- if (reply->handle != request->handle) {
+ if (reply->handle != request->handle ||
+ !s->ioc) {
reply->error = EIO;
} else {
if (qiov && reply->error == 0) {
- ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
- offset, request->len);
+ ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov,
+ offset, request->len, 1);
if (ret != request->len) {
reply->error = EIO;
}
@@ -227,15 +243,15 @@ static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov,
- int offset)
+ int offset, int *flags)
{
NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = { .type = NBD_CMD_WRITE };
struct nbd_reply reply;
ssize_t ret;
- if (!bdrv_enable_write_cache(bs) &&
- (client->nbdflags & NBD_FLAG_SEND_FUA)) {
+ if ((*flags & BDRV_REQ_FUA) && (client->nbdflags & NBD_FLAG_SEND_FUA)) {
+ *flags &= ~BDRV_REQ_FUA;
request.type |= NBD_CMD_FLAG_FUA;
}
@@ -275,12 +291,13 @@ int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
}
int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+ int nb_sectors, QEMUIOVector *qiov, int *flags)
{
int offset = 0;
int ret;
while (nb_sectors > NBD_MAX_SECTORS) {
- ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
+ ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset,
+ flags);
if (ret < 0) {
return ret;
}
@@ -288,7 +305,7 @@ int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
sector_num += NBD_MAX_SECTORS;
nb_sectors -= NBD_MAX_SECTORS;
}
- return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
+ return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset, flags);
}
int nbd_client_co_flush(BlockDriverState *bs)
@@ -302,10 +319,6 @@ int nbd_client_co_flush(BlockDriverState *bs)
return 0;
}
- if (client->nbdflags & NBD_FLAG_SEND_FUA) {
- request.type |= NBD_CMD_FLAG_FUA;
- }
-
request.from = 0;
request.len = 0;
@@ -349,14 +362,14 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
void nbd_client_detach_aio_context(BlockDriverState *bs)
{
aio_set_fd_handler(bdrv_get_aio_context(bs),
- nbd_get_client_session(bs)->sock,
+ nbd_get_client_session(bs)->sioc->fd,
false, NULL, NULL, NULL);
}
void nbd_client_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
- aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sock,
+ aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
false, nbd_reply_ready, NULL, bs);
}
@@ -369,16 +382,20 @@ void nbd_client_close(BlockDriverState *bs)
.len = 0
};
- if (client->sock == -1) {
+ if (client->ioc == NULL) {
return;
}
- nbd_send_request(client->sock, &request);
+ nbd_send_request(client->ioc, &request);
nbd_teardown_connection(bs);
}
-int nbd_client_init(BlockDriverState *bs, int sock, const char *export,
+int nbd_client_init(BlockDriverState *bs,
+ QIOChannelSocket *sioc,
+ const char *export,
+ QCryptoTLSCreds *tlscreds,
+ const char *hostname,
Error **errp)
{
NbdClientSession *client = nbd_get_client_session(bs);
@@ -386,22 +403,32 @@ int nbd_client_init(BlockDriverState *bs, int sock, const char *export,
/* NBD handshake */
logout("session init %s\n", export);
- qemu_set_block(sock);
- ret = nbd_receive_negotiate(sock, export,
- &client->nbdflags, &client->size, errp);
+ qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
+
+ ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
+ &client->nbdflags,
+ tlscreds, hostname,
+ &client->ioc,
+ &client->size, errp);
if (ret < 0) {
logout("Failed to negotiate with the NBD server\n");
- closesocket(sock);
return ret;
}
qemu_co_mutex_init(&client->send_mutex);
qemu_co_mutex_init(&client->free_sema);
- client->sock = sock;
+ client->sioc = sioc;
+ object_ref(OBJECT(client->sioc));
+
+ if (!client->ioc) {
+ client->ioc = QIO_CHANNEL(sioc);
+ object_ref(OBJECT(client->ioc));
+ }
/* Now that we're connected, set the socket to be non-blocking and
* kick the reply mechanism. */
- qemu_set_nonblock(sock);
+ qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
+
nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
logout("Established connection with NBD server\n");
diff --git a/block/nbd-client.h b/block/nbd-client.h
index e8413408b5..bc7aec0795 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -4,6 +4,7 @@
#include "qemu-common.h"
#include "block/nbd.h"
#include "block/block_int.h"
+#include "io/channel-socket.h"
/* #define DEBUG_NBD */
@@ -17,7 +18,8 @@
#define MAX_NBD_REQUESTS 16
typedef struct NbdClientSession {
- int sock;
+ QIOChannelSocket *sioc; /* The master data channel */
+ QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
uint32_t nbdflags;
off_t size;
@@ -34,7 +36,11 @@ typedef struct NbdClientSession {
NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
-int nbd_client_init(BlockDriverState *bs, int sock, const char *export_name,
+int nbd_client_init(BlockDriverState *bs,
+ QIOChannelSocket *sock,
+ const char *export_name,
+ QCryptoTLSCreds *tlscreds,
+ const char *hostname,
Error **errp);
void nbd_client_close(BlockDriverState *bs);
@@ -42,7 +48,7 @@ int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors);
int nbd_client_co_flush(BlockDriverState *bs);
int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
+ int nb_sectors, QEMUIOVector *qiov, int *flags);
int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
diff --git a/block/nbd.c b/block/nbd.c
index cd6a587776..f7ea3b3608 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -26,18 +26,17 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/nbd-client.h"
+#include "qapi/error.h"
#include "qemu/uri.h"
#include "block/block_int.h"
#include "qemu/module.h"
-#include "qemu/sockets.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qjson.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qstring.h"
-
-#include <sys/types.h>
-#include <unistd.h>
+#include "qemu/cutils.h"
#define EN_OPTSTR ":exportname="
@@ -206,18 +205,20 @@ static SocketAddress *nbd_config(BDRVNBDState *s, QDict *options, char **export,
saddr = g_new0(SocketAddress, 1);
if (qdict_haskey(options, "path")) {
+ UnixSocketAddress *q_unix;
saddr->type = SOCKET_ADDRESS_KIND_UNIX;
- saddr->u.q_unix = g_new0(UnixSocketAddress, 1);
- saddr->u.q_unix->path = g_strdup(qdict_get_str(options, "path"));
+ q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
+ q_unix->path = g_strdup(qdict_get_str(options, "path"));
qdict_del(options, "path");
} else {
+ InetSocketAddress *inet;
saddr->type = SOCKET_ADDRESS_KIND_INET;
- saddr->u.inet = g_new0(InetSocketAddress, 1);
- saddr->u.inet->host = g_strdup(qdict_get_str(options, "host"));
+ inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
+ inet->host = g_strdup(qdict_get_str(options, "host"));
if (!qdict_get_try_str(options, "port")) {
- saddr->u.inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
+ inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
} else {
- saddr->u.inet->port = g_strdup(qdict_get_str(options, "port"));
+ inet->port = g_strdup(qdict_get_str(options, "port"));
}
qdict_del(options, "host");
qdict_del(options, "port");
@@ -239,55 +240,113 @@ NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
return &s->client;
}
-static int nbd_establish_connection(BlockDriverState *bs,
- SocketAddress *saddr,
- Error **errp)
+static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
+ Error **errp)
{
- BDRVNBDState *s = bs->opaque;
- int sock;
+ QIOChannelSocket *sioc;
+ Error *local_err = NULL;
- sock = socket_connect(saddr, errp, NULL, NULL);
+ sioc = qio_channel_socket_new();
- if (sock < 0) {
- logout("Failed to establish connection to NBD server\n");
- return -EIO;
+ qio_channel_socket_connect_sync(sioc,
+ saddr,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return NULL;
}
- if (!s->client.is_unix) {
- socket_set_nodelay(sock);
+ qio_channel_set_delay(QIO_CHANNEL(sioc), false);
+
+ return sioc;
+}
+
+
+static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
+{
+ Object *obj;
+ QCryptoTLSCreds *creds;
+
+ obj = object_resolve_path_component(
+ object_get_objects_root(), id);
+ if (!obj) {
+ error_setg(errp, "No TLS credentials with id '%s'",
+ id);
+ return NULL;
+ }
+ creds = (QCryptoTLSCreds *)
+ object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
+ if (!creds) {
+ error_setg(errp, "Object with id '%s' is not TLS credentials",
+ id);
+ return NULL;
}
- return sock;
+ if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
+ error_setg(errp,
+ "Expecting TLS credentials with a client endpoint");
+ return NULL;
+ }
+ object_ref(obj);
+ return creds;
}
+
static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVNBDState *s = bs->opaque;
char *export = NULL;
- int result, sock;
+ QIOChannelSocket *sioc = NULL;
SocketAddress *saddr;
+ const char *tlscredsid;
+ QCryptoTLSCreds *tlscreds = NULL;
+ const char *hostname = NULL;
+ int ret = -EINVAL;
/* Pop the config into our state object. Exit if invalid. */
saddr = nbd_config(s, options, &export, errp);
if (!saddr) {
- return -EINVAL;
+ goto error;
+ }
+
+ tlscredsid = g_strdup(qdict_get_try_str(options, "tls-creds"));
+ if (tlscredsid) {
+ qdict_del(options, "tls-creds");
+ tlscreds = nbd_get_tls_creds(tlscredsid, errp);
+ if (!tlscreds) {
+ goto error;
+ }
+
+ if (saddr->type != SOCKET_ADDRESS_KIND_INET) {
+ error_setg(errp, "TLS only supported over IP sockets");
+ goto error;
+ }
+ hostname = saddr->u.inet.data->host;
}
/* establish TCP connection, return error if it fails
* TODO: Configurable retry-until-timeout behaviour.
*/
- sock = nbd_establish_connection(bs, saddr, errp);
- qapi_free_SocketAddress(saddr);
- if (sock < 0) {
- g_free(export);
- return sock;
+ sioc = nbd_establish_connection(saddr, errp);
+ if (!sioc) {
+ ret = -ECONNREFUSED;
+ goto error;
}
/* NBD handshake */
- result = nbd_client_init(bs, sock, export, errp);
+ ret = nbd_client_init(bs, sioc, export,
+ tlscreds, hostname, errp);
+ error:
+ if (sioc) {
+ object_unref(OBJECT(sioc));
+ }
+ if (tlscreds) {
+ object_unref(OBJECT(tlscreds));
+ }
+ qapi_free_SocketAddress(saddr);
g_free(export);
- return result;
+ return ret;
}
static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
@@ -296,10 +355,29 @@ static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
return nbd_client_co_readv(bs, sector_num, nb_sectors, qiov);
}
+static int nbd_co_writev_flags(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov, int flags)
+{
+ int ret;
+
+ ret = nbd_client_co_writev(bs, sector_num, nb_sectors, qiov, &flags);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* The flag wasn't sent to the server, so we need to emulate it with an
+ * explicit flush */
+ if (flags & BDRV_REQ_FUA) {
+ ret = nbd_client_co_flush(bs);
+ }
+
+ return ret;
+}
+
static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
- return nbd_client_co_writev(bs, sector_num, nb_sectors, qiov);
+ return nbd_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0);
}
static int nbd_co_flush(BlockDriverState *bs)
@@ -342,13 +420,14 @@ static void nbd_attach_aio_context(BlockDriverState *bs,
nbd_client_attach_aio_context(bs, new_context);
}
-static void nbd_refresh_filename(BlockDriverState *bs)
+static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
{
QDict *opts = qdict_new();
- const char *path = qdict_get_try_str(bs->options, "path");
- const char *host = qdict_get_try_str(bs->options, "host");
- const char *port = qdict_get_try_str(bs->options, "port");
- const char *export = qdict_get_try_str(bs->options, "export");
+ const char *path = qdict_get_try_str(options, "path");
+ const char *host = qdict_get_try_str(options, "host");
+ const char *port = qdict_get_try_str(options, "port");
+ const char *export = qdict_get_try_str(options, "export");
+ const char *tlscreds = qdict_get_try_str(options, "tls-creds");
qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
@@ -383,6 +462,9 @@ static void nbd_refresh_filename(BlockDriverState *bs)
if (export) {
qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export)));
}
+ if (tlscreds) {
+ qdict_put_obj(opts, "tls-creds", QOBJECT(qstring_from_str(tlscreds)));
+ }
bs->full_open_options = opts;
}
@@ -395,6 +477,8 @@ static BlockDriver bdrv_nbd = {
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev = nbd_co_writev,
+ .bdrv_co_writev_flags = nbd_co_writev_flags,
+ .supported_write_flags = BDRV_REQ_FUA,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
@@ -413,6 +497,8 @@ static BlockDriver bdrv_nbd_tcp = {
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev = nbd_co_writev,
+ .bdrv_co_writev_flags = nbd_co_writev_flags,
+ .supported_write_flags = BDRV_REQ_FUA,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
@@ -431,6 +517,8 @@ static BlockDriver bdrv_nbd_unix = {
.bdrv_file_open = nbd_open,
.bdrv_co_readv = nbd_co_readv,
.bdrv_co_writev = nbd_co_writev,
+ .bdrv_co_writev_flags = nbd_co_writev_flags,
+ .supported_write_flags = BDRV_REQ_FUA,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
.bdrv_co_discard = nbd_co_discard,
diff --git a/block/nfs.c b/block/nfs.c
index fd79f89945..9f51cc3f10 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -22,20 +22,23 @@
* THE SOFTWARE.
*/
-#include "config-host.h"
+#include "qemu/osdep.h"
#include <poll.h>
#include "qemu-common.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
+#include "qapi/error.h"
#include "block/block_int.h"
#include "trace.h"
#include "qemu/iov.h"
#include "qemu/uri.h"
+#include "qemu/cutils.h"
#include "sysemu/sysemu.h"
#include <nfsc/libnfs.h>
#define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
+#define QEMU_NFS_MAX_DEBUG_LEVEL 2
typedef struct NFSClient {
struct nfs_context *context;
@@ -334,6 +337,17 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
}
nfs_set_readahead(client->context, val);
#endif
+#ifdef LIBNFS_FEATURE_DEBUG
+ } else if (!strcmp(qp->p[i].name, "debug")) {
+ /* limit the maximum debug level to avoid potential flooding
+ * of our log files. */
+ if (val > QEMU_NFS_MAX_DEBUG_LEVEL) {
+ error_report("NFS Warning: Limiting NFS debug level"
+ " to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
+ val = QEMU_NFS_MAX_DEBUG_LEVEL;
+ }
+ nfs_set_debug(client->context, val);
+#endif
} else {
error_setg(errp, "Unknown NFS parameter name: %s",
qp->p[i].name);
diff --git a/block/null.c b/block/null.c
index 7d083233fb..396500babd 100644
--- a/block/null.c
+++ b/block/null.c
@@ -10,13 +10,17 @@
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "block/block_int.h"
#define NULL_OPT_LATENCY "latency-ns"
+#define NULL_OPT_ZEROES "read-zeroes"
typedef struct {
int64_t length;
int64_t latency_ns;
+ bool read_zeroes;
} BDRVNullState;
static QemuOptsList runtime_opts = {
@@ -39,6 +43,11 @@ static QemuOptsList runtime_opts = {
.help = "nanoseconds (approximated) to wait "
"before completing request",
},
+ {
+ .name = NULL_OPT_ZEROES,
+ .type = QEMU_OPT_BOOL,
+ .help = "return zeroes when read",
+ },
{ /* end of list */ }
},
};
@@ -60,6 +69,7 @@ static int null_file_open(BlockDriverState *bs, QDict *options, int flags,
error_setg(errp, "latency-ns is invalid");
ret = -EINVAL;
}
+ s->read_zeroes = qemu_opt_get_bool(opts, NULL_OPT_ZEROES, false);
qemu_opts_del(opts);
return ret;
}
@@ -89,6 +99,12 @@ static coroutine_fn int null_co_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov)
{
+ BDRVNullState *s = bs->opaque;
+
+ if (s->read_zeroes) {
+ qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
+ }
+
return null_co_common(bs);
}
@@ -158,6 +174,12 @@ static BlockAIOCB *null_aio_readv(BlockDriverState *bs,
BlockCompletionFunc *cb,
void *opaque)
{
+ BDRVNullState *s = bs->opaque;
+
+ if (s->read_zeroes) {
+ qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
+ }
+
return null_aio_common(bs, cb, opaque);
}
@@ -183,6 +205,24 @@ static int null_reopen_prepare(BDRVReopenState *reopen_state,
return 0;
}
+static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
+{
+ BDRVNullState *s = bs->opaque;
+ off_t start = sector_num * BDRV_SECTOR_SIZE;
+
+ *pnum = nb_sectors;
+ *file = bs;
+
+ if (s->read_zeroes) {
+ return BDRV_BLOCK_OFFSET_VALID | start | BDRV_BLOCK_ZERO;
+ } else {
+ return BDRV_BLOCK_OFFSET_VALID | start;
+ }
+}
+
static BlockDriver bdrv_null_co = {
.format_name = "null-co",
.protocol_name = "null-co",
@@ -196,6 +236,8 @@ static BlockDriver bdrv_null_co = {
.bdrv_co_writev = null_co_writev,
.bdrv_co_flush_to_disk = null_co_flush,
.bdrv_reopen_prepare = null_reopen_prepare,
+
+ .bdrv_co_get_block_status = null_co_get_block_status,
};
static BlockDriver bdrv_null_aio = {
@@ -211,6 +253,8 @@ static BlockDriver bdrv_null_aio = {
.bdrv_aio_writev = null_aio_writev,
.bdrv_aio_flush = null_aio_flush,
.bdrv_reopen_prepare = null_reopen_prepare,
+
+ .bdrv_co_get_block_status = null_co_get_block_status,
};
static void bdrv_null_init(void)
diff --git a/block/parallels.c b/block/parallels.c
index f689fdeaff..324ed43ac4 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -27,8 +27,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "qemu/bitmap.h"
#include "qapi/util.h"
@@ -61,7 +64,7 @@ typedef struct ParallelsHeader {
typedef enum ParallelsPreallocMode {
PRL_PREALLOC_MODE_FALLOCATE = 0,
PRL_PREALLOC_MODE_TRUNCATE = 1,
- PRL_PREALLOC_MODE_MAX = 2,
+ PRL_PREALLOC_MODE__MAX = 2,
} ParallelsPreallocMode;
static const char *prealloc_mode_lookup[] = {
@@ -260,7 +263,7 @@ static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
BDRVParallelsState *s = bs->opaque;
int64_t offset;
@@ -273,6 +276,7 @@ static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs,
return 0;
}
+ *file = bs->file->bs;
return (offset << BDRV_SECTOR_BITS) |
BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
}
@@ -459,7 +463,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size, cl_size;
uint8_t tmp[BDRV_SECTOR_SIZE];
Error *local_err = NULL;
- BlockDriverState *file;
+ BlockBackend *file;
uint32_t bat_entries, bat_sectors;
ParallelsHeader header;
int ret;
@@ -475,14 +479,16 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
return ret;
}
- file = NULL;
- ret = bdrv_open(&file, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
- if (ret < 0) {
+ file = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (file == NULL) {
error_propagate(errp, local_err);
- return ret;
+ return -EIO;
}
- ret = bdrv_truncate(file, 0);
+
+ blk_set_allow_write_beyond_eof(file, true);
+
+ ret = blk_truncate(file, 0);
if (ret < 0) {
goto exit;
}
@@ -506,18 +512,18 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
memset(tmp, 0, sizeof(tmp));
memcpy(tmp, &header, sizeof(header));
- ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
+ ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE);
if (ret < 0) {
goto exit;
}
- ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0);
+ ret = blk_write_zeroes(file, 1, bat_sectors - 1, 0);
if (ret < 0) {
goto exit;
}
ret = 0;
done:
- bdrv_unref(file);
+ blk_unref(file);
return ret;
exit:
@@ -660,7 +666,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
s->prealloc_mode = qapi_enum_parse(prealloc_mode_lookup, buf,
- PRL_PREALLOC_MODE_MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err);
+ PRL_PREALLOC_MODE__MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err);
g_free(buf);
if (local_err != NULL) {
goto fail_options;
diff --git a/block/qapi.c b/block/qapi.c
index 267f147fe3..c5f6ba643c 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -22,6 +22,7 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/qapi.h"
#include "block/block_int.h"
#include "block/throttle-groups.h"
@@ -31,8 +32,10 @@
#include "qapi/qmp-output-visitor.h"
#include "qapi/qmp/types.h"
#include "sysemu/block-backend.h"
+#include "qemu/cutils.h"
-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
+BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
+ BlockDriverState *bs, Error **errp)
{
ImageInfo **p_image_info;
BlockDriverState *bs0;
@@ -46,7 +49,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
info->cache = g_new(BlockdevCacheInfo, 1);
*info->cache = (BlockdevCacheInfo) {
- .writeback = bdrv_enable_write_cache(bs),
+ .writeback = blk ? blk_enable_write_cache(blk) : true,
.direct = !!(bs->open_flags & BDRV_O_NOCACHE),
.no_flush = !!(bs->open_flags & BDRV_O_NO_FLUSH),
};
@@ -91,6 +94,26 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
info->iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
+ info->has_bps_max_length = info->has_bps_max;
+ info->bps_max_length =
+ cfg.buckets[THROTTLE_BPS_TOTAL].burst_length;
+ info->has_bps_rd_max_length = info->has_bps_rd_max;
+ info->bps_rd_max_length =
+ cfg.buckets[THROTTLE_BPS_READ].burst_length;
+ info->has_bps_wr_max_length = info->has_bps_wr_max;
+ info->bps_wr_max_length =
+ cfg.buckets[THROTTLE_BPS_WRITE].burst_length;
+
+ info->has_iops_max_length = info->has_iops_max;
+ info->iops_max_length =
+ cfg.buckets[THROTTLE_OPS_TOTAL].burst_length;
+ info->has_iops_rd_max_length = info->has_iops_rd_max;
+ info->iops_rd_max_length =
+ cfg.buckets[THROTTLE_OPS_READ].burst_length;
+ info->has_iops_wr_max_length = info->has_iops_wr_max;
+ info->iops_wr_max_length =
+ cfg.buckets[THROTTLE_OPS_WRITE].burst_length;
+
info->has_iops_size = cfg.op_size;
info->iops_size = cfg.op_size;
@@ -210,11 +233,13 @@ void bdrv_query_image_info(BlockDriverState *bs,
Error *err = NULL;
ImageInfo *info;
+ aio_context_acquire(bdrv_get_aio_context(bs));
+
size = bdrv_getlength(bs);
if (size < 0) {
error_setg_errno(errp, -size, "Can't get size of device '%s'",
bdrv_get_device_name(bs));
- return;
+ goto out;
}
info = g_new0(ImageInfo, 1);
@@ -245,15 +270,18 @@ void bdrv_query_image_info(BlockDriverState *bs,
info->has_backing_filename = true;
bdrv_get_full_backing_filename(bs, backing_filename2, PATH_MAX, &err);
if (err) {
- error_propagate(errp, err);
- qapi_free_ImageInfo(info);
+ /* Can't reconstruct the full backing filename, so we must omit
+ * this field and apply a Best Effort to this query. */
g_free(backing_filename2);
- return;
+ backing_filename2 = NULL;
+ error_free(err);
+ err = NULL;
}
- if (strcmp(backing_filename, backing_filename2) != 0) {
- info->full_backing_filename =
- g_strdup(backing_filename2);
+ /* Always report the full_backing_filename if present, even if it's the
+ * same as backing_filename. That they are same is useful info. */
+ if (backing_filename2) {
+ info->full_backing_filename = g_strdup(backing_filename2);
info->has_full_backing_filename = true;
}
@@ -279,10 +307,13 @@ void bdrv_query_image_info(BlockDriverState *bs,
default:
error_propagate(errp, err);
qapi_free_ImageInfo(info);
- return;
+ goto out;
}
*p_info = info;
+
+out:
+ aio_context_release(bdrv_get_aio_context(bs));
}
/* @p_info will be set only on success. */
@@ -296,7 +327,7 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
info->locked = blk_dev_is_medium_locked(blk);
info->removable = blk_dev_has_removable_media(blk);
- if (blk_dev_has_removable_media(blk)) {
+ if (blk_dev_has_tray(blk)) {
info->has_tray_open = true;
info->tray_open = blk_dev_is_tray_open(blk);
}
@@ -313,7 +344,7 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
if (bs && bs->drv) {
info->has_inserted = true;
- info->inserted = bdrv_block_device_info(bs, errp);
+ info->inserted = bdrv_block_device_info(blk, bs, errp);
if (info->inserted == NULL) {
goto err;
}
@@ -326,100 +357,115 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
qapi_free_BlockInfo(info);
}
-static BlockStats *bdrv_query_stats(const BlockDriverState *bs,
- bool query_backing)
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+ const BlockDriverState *bs,
+ bool query_backing);
+
+static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
{
- BlockStats *s;
+ BlockAcctStats *stats = blk_get_stats(blk);
+ BlockAcctTimedStats *ts = NULL;
- s = g_malloc0(sizeof(*s));
+ ds->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
+ ds->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+ ds->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
+ ds->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
- if (bdrv_get_device_name(bs)[0]) {
- s->has_device = true;
- s->device = g_strdup(bdrv_get_device_name(bs));
- }
+ ds->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
+ ds->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
+ ds->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
- if (bdrv_get_node_name(bs)[0]) {
- s->has_node_name = true;
- s->node_name = g_strdup(bdrv_get_node_name(bs));
+ ds->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
+ ds->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
+ ds->invalid_flush_operations =
+ stats->invalid_ops[BLOCK_ACCT_FLUSH];
+
+ ds->rd_merged = stats->merged[BLOCK_ACCT_READ];
+ ds->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+ ds->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
+ ds->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+ ds->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
+ ds->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+
+ ds->has_idle_time_ns = stats->last_access_time_ns > 0;
+ if (ds->has_idle_time_ns) {
+ ds->idle_time_ns = block_acct_idle_time_ns(stats);
}
- s->stats = g_malloc0(sizeof(*s->stats));
- if (bs->blk) {
- BlockAcctStats *stats = blk_get_stats(bs->blk);
- BlockAcctTimedStats *ts = NULL;
-
- s->stats->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
- s->stats->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
- s->stats->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
- s->stats->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
-
- s->stats->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
- s->stats->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
- s->stats->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
-
- s->stats->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
- s->stats->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
- s->stats->invalid_flush_operations =
- stats->invalid_ops[BLOCK_ACCT_FLUSH];
-
- s->stats->rd_merged = stats->merged[BLOCK_ACCT_READ];
- s->stats->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
- s->stats->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
- s->stats->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
- s->stats->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
- s->stats->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
-
- s->stats->has_idle_time_ns = stats->last_access_time_ns > 0;
- if (s->stats->has_idle_time_ns) {
- s->stats->idle_time_ns = block_acct_idle_time_ns(stats);
- }
+ ds->account_invalid = stats->account_invalid;
+ ds->account_failed = stats->account_failed;
- s->stats->account_invalid = stats->account_invalid;
- s->stats->account_failed = stats->account_failed;
+ while ((ts = block_acct_interval_next(stats, ts))) {
+ BlockDeviceTimedStatsList *timed_stats =
+ g_malloc0(sizeof(*timed_stats));
+ BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
+ timed_stats->next = ds->timed_stats;
+ timed_stats->value = dev_stats;
+ ds->timed_stats = timed_stats;
- while ((ts = block_acct_interval_next(stats, ts))) {
- BlockDeviceTimedStatsList *timed_stats =
- g_malloc0(sizeof(*timed_stats));
- BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
- timed_stats->next = s->stats->timed_stats;
- timed_stats->value = dev_stats;
- s->stats->timed_stats = timed_stats;
+ TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
+ TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
+ TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
- TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
- TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
- TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
+ dev_stats->interval_length = ts->interval_length;
- dev_stats->interval_length = ts->interval_length;
+ dev_stats->min_rd_latency_ns = timed_average_min(rd);
+ dev_stats->max_rd_latency_ns = timed_average_max(rd);
+ dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
- dev_stats->min_rd_latency_ns = timed_average_min(rd);
- dev_stats->max_rd_latency_ns = timed_average_max(rd);
- dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
+ dev_stats->min_wr_latency_ns = timed_average_min(wr);
+ dev_stats->max_wr_latency_ns = timed_average_max(wr);
+ dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
- dev_stats->min_wr_latency_ns = timed_average_min(wr);
- dev_stats->max_wr_latency_ns = timed_average_max(wr);
- dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
+ dev_stats->min_flush_latency_ns = timed_average_min(fl);
+ dev_stats->max_flush_latency_ns = timed_average_max(fl);
+ dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
- dev_stats->min_flush_latency_ns = timed_average_min(fl);
- dev_stats->max_flush_latency_ns = timed_average_max(fl);
- dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
+ dev_stats->avg_rd_queue_depth =
+ block_acct_queue_depth(ts, BLOCK_ACCT_READ);
+ dev_stats->avg_wr_queue_depth =
+ block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
+ }
+}
- dev_stats->avg_rd_queue_depth =
- block_acct_queue_depth(ts, BLOCK_ACCT_READ);
- dev_stats->avg_wr_queue_depth =
- block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
- }
+static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
+ bool query_backing)
+{
+ if (bdrv_get_node_name(bs)[0]) {
+ s->has_node_name = true;
+ s->node_name = g_strdup(bdrv_get_node_name(bs));
}
s->stats->wr_highest_offset = bs->wr_highest_offset;
if (bs->file) {
s->has_parent = true;
- s->parent = bdrv_query_stats(bs->file->bs, query_backing);
+ s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
}
if (query_backing && bs->backing) {
s->has_backing = true;
- s->backing = bdrv_query_stats(bs->backing->bs, query_backing);
+ s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
+ }
+
+}
+
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+ const BlockDriverState *bs,
+ bool query_backing)
+{
+ BlockStats *s;
+
+ s = g_malloc0(sizeof(*s));
+ s->stats = g_malloc0(sizeof(*s->stats));
+
+ if (blk) {
+ s->has_device = true;
+ s->device = g_strdup(blk_name(blk));
+ bdrv_query_blk_stats(s->stats, blk);
+ }
+ if (bs) {
+ bdrv_query_bds_stats(s, bs, query_backing);
}
return s;
@@ -448,22 +494,38 @@ BlockInfoList *qmp_query_block(Error **errp)
return head;
}
+static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
+ bool query_nodes)
+{
+ if (query_nodes) {
+ *bs = bdrv_next_node(*bs);
+ return !!*bs;
+ }
+
+ *blk = blk_next(*blk);
+ *bs = *blk ? blk_bs(*blk) : NULL;
+
+ return !!*blk;
+}
+
BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
bool query_nodes,
Error **errp)
{
BlockStatsList *head = NULL, **p_next = &head;
+ BlockBackend *blk = NULL;
BlockDriverState *bs = NULL;
/* Just to be safe if query_nodes is not always initialized */
query_nodes = has_query_nodes && query_nodes;
- while ((bs = query_nodes ? bdrv_next_node(bs) : bdrv_next(bs))) {
+ while (next_query_bds(&blk, &bs, query_nodes)) {
BlockStatsList *info = g_malloc0(sizeof(*info));
- AioContext *ctx = bdrv_get_aio_context(bs);
+ AioContext *ctx = blk ? blk_get_aio_context(blk)
+ : bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
- info->value = bdrv_query_stats(bs, !query_nodes);
+ info->value = bdrv_query_stats(blk, bs, !query_nodes);
aio_context_release(ctx);
*p_next = info;
@@ -588,11 +650,10 @@ static void dump_qlist(fprintf_function func_fprintf, void *f, int indentation,
int i = 0;
for (entry = qlist_first(list); entry; entry = qlist_next(entry), i++) {
- qtype_code type = qobject_type(entry->value);
+ QType type = qobject_type(entry->value);
bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
- const char *format = composite ? "%*s[%i]:\n" : "%*s[%i]: ";
-
- func_fprintf(f, format, indentation * 4, "", i);
+ func_fprintf(f, "%*s[%i]:%c", indentation * 4, "", i,
+ composite ? '\n' : ' ');
dump_qobject(func_fprintf, f, indentation + 1, entry->value);
if (!composite) {
func_fprintf(f, "\n");
@@ -606,10 +667,9 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
const QDictEntry *entry;
for (entry = qdict_first(dict); entry; entry = qdict_next(dict, entry)) {
- qtype_code type = qobject_type(entry->value);
+ QType type = qobject_type(entry->value);
bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
- const char *format = composite ? "%*s%s:\n" : "%*s%s: ";
- char key[strlen(entry->key) + 1];
+ char *key = g_malloc(strlen(entry->key) + 1);
int i;
/* replace dashes with spaces in key (variable) names */
@@ -617,12 +677,13 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
key[i] = entry->key[i] == '-' ? ' ' : entry->key[i];
}
key[i] = 0;
-
- func_fprintf(f, format, indentation * 4, "", key);
+ func_fprintf(f, "%*s%s:%c", indentation * 4, "", key,
+ composite ? '\n' : ' ');
dump_qobject(func_fprintf, f, indentation + 1, entry->value);
if (!composite) {
func_fprintf(f, "\n");
}
+ g_free(key);
}
}
@@ -632,7 +693,7 @@ void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
QmpOutputVisitor *ov = qmp_output_visitor_new();
QObject *obj, *data;
- visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), &info_spec, NULL,
+ visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), NULL, &info_spec,
&error_abort);
obj = qmp_output_get_qobject(ov);
assert(qobject_type(obj) == QTYPE_QDICT);
@@ -676,7 +737,10 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
if (info->has_backing_filename) {
func_fprintf(f, "backing file: %s", info->backing_filename);
- if (info->has_full_backing_filename) {
+ if (!info->has_full_backing_filename) {
+ func_fprintf(f, " (cannot determine actual path)");
+ } else if (strcmp(info->backing_filename,
+ info->full_backing_filename) != 0) {
func_fprintf(f, " (actual path: %s)", info->full_backing_filename);
}
func_fprintf(f, "\n");
diff --git a/block/qcow.c b/block/qcow.c
index 635085e27b..60ddb12eca 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -21,8 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
+#include "qemu/error-report.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include <zlib.h>
#include "qapi/qmp/qerror.h"
@@ -119,11 +123,7 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
if (header.version != QCOW_VERSION) {
- char version[64];
- snprintf(version, sizeof(version), "QCOW version %" PRIu32,
- header.version);
- error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bdrv_get_device_or_node_name(bs), "qcow", version);
+ error_setg(errp, "Unsupported qcow version %" PRIu32, header.version);
ret = -ENOTSUP;
goto fail;
}
@@ -159,6 +159,14 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
}
s->crypt_method_header = header.crypt_method;
if (s->crypt_method_header) {
+ if (bdrv_uses_whitelist() &&
+ s->crypt_method_header == QCOW_CRYPT_AES) {
+ error_report("qcow built-in AES encryption is deprecated");
+ error_printf("Support for it will be removed in a future release.\n"
+ "You can use 'qemu-img convert' to switch to an\n"
+ "unencrypted qcow image, or a LUKS raw image.\n");
+ }
+
bs->encrypted = 1;
}
s->cluster_bits = header.cluster_bits;
@@ -488,7 +496,7 @@ static uint64_t get_cluster_offset(BlockDriverState *bs,
}
static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
BDRVQcowState *s = bs->opaque;
int index_in_cluster, n;
@@ -509,6 +517,7 @@ static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
return BDRV_BLOCK_DATA;
}
cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
+ *file = bs->file->bs;
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
}
@@ -778,7 +787,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
int flags = 0;
Error *local_err = NULL;
int ret;
- BlockDriverState *qcow_bs;
+ BlockBackend *qcow_blk;
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -794,15 +803,17 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
goto cleanup;
}
- qcow_bs = NULL;
- ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
- if (ret < 0) {
+ qcow_blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (qcow_blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto cleanup;
}
- ret = bdrv_truncate(qcow_bs, 0);
+ blk_set_allow_write_beyond_eof(qcow_blk, true);
+
+ ret = blk_truncate(qcow_blk, 0);
if (ret < 0) {
goto exit;
}
@@ -842,13 +853,13 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
}
/* write all the data */
- ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
+ ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header));
if (ret != sizeof(header)) {
goto exit;
}
if (backing_file) {
- ret = bdrv_pwrite(qcow_bs, sizeof(header),
+ ret = blk_pwrite(qcow_blk, sizeof(header),
backing_file, backing_filename_len);
if (ret != backing_filename_len) {
goto exit;
@@ -858,7 +869,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
tmp = g_malloc0(BDRV_SECTOR_SIZE);
for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
BDRV_SECTOR_SIZE); i++) {
- ret = bdrv_pwrite(qcow_bs, header_size +
+ ret = blk_pwrite(qcow_blk, header_size +
BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
if (ret != BDRV_SECTOR_SIZE) {
g_free(tmp);
@@ -869,7 +880,7 @@ static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
g_free(tmp);
ret = 0;
exit:
- bdrv_unref(qcow_bs);
+ blk_unref(qcow_blk);
cleanup:
g_free(backing_file);
return ret;
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 86dd7f2bd9..0fe8edae41 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -23,7 +23,7 @@
*/
/* Needed for CONFIG_MADVISE */
-#include "config-host.h"
+#include "qemu/osdep.h"
#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
#include <sys/mman.h>
@@ -31,7 +31,6 @@
#include "block/block_int.h"
#include "qemu-common.h"
-#include "qemu/osdep.h"
#include "qcow2.h"
#include "trace.h"
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 24a60e2236..31ecc10304 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -22,8 +22,10 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include <zlib.h>
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "block/qcow2.h"
@@ -1641,7 +1643,8 @@ fail:
static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
int l1_size, int64_t *visited_l1_entries,
int64_t l1_entries,
- BlockDriverAmendStatusCB *status_cb)
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque)
{
BDRVQcow2State *s = bs->opaque;
bool is_active_l1 = (l1_table == s->l1_table);
@@ -1667,7 +1670,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
/* unallocated */
(*visited_l1_entries)++;
if (status_cb) {
- status_cb(bs, *visited_l1_entries, l1_entries);
+ status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque);
}
continue;
}
@@ -1804,7 +1807,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
(*visited_l1_entries)++;
if (status_cb) {
- status_cb(bs, *visited_l1_entries, l1_entries);
+ status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque);
}
}
@@ -1828,7 +1831,8 @@ fail:
* qcow2 version which doesn't yet support metadata zero clusters.
*/
int qcow2_expand_zero_clusters(BlockDriverState *bs,
- BlockDriverAmendStatusCB *status_cb)
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque)
{
BDRVQcow2State *s = bs->opaque;
uint64_t *l1_table = NULL;
@@ -1845,7 +1849,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
&visited_l1_entries, l1_entries,
- status_cb);
+ status_cb, cb_opaque);
if (ret < 0) {
goto fail;
}
@@ -1881,7 +1885,7 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
&visited_l1_entries, l1_entries,
- status_cb);
+ status_cb, cb_opaque);
if (ret < 0) {
goto fail;
}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 820f412ab6..ca6094ff5b 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -22,6 +22,8 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "block/qcow2.h"
@@ -1345,6 +1347,9 @@ static int inc_refcounts(BlockDriverState *bs,
if (refcount == s->refcount_max) {
fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
"\n", cluster_offset);
+ fprintf(stderr, "Use qemu-img amend to increase the refcount entry "
+ "width or qemu-img convert to create a clean copy if the "
+ "image cannot be opened for writing\n");
res->corruptions++;
continue;
}
@@ -2467,3 +2472,450 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
return 0;
}
+
+/* A pointer to a function of this type is given to walk_over_reftable(). That
+ * function will create refblocks and pass them to a RefblockFinishOp once they
+ * are completed (@refblock). @refblock_empty is set if the refblock is
+ * completely empty.
+ *
+ * Along with the refblock, a corresponding reftable entry is passed, in the
+ * reftable @reftable (which may be reallocated) at @reftable_index.
+ *
+ * @allocated should be set to true if a new cluster has been allocated.
+ */
+typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty,
+ bool *allocated, Error **errp);
+
+/**
+ * This "operation" for walk_over_reftable() allocates the refblock on disk (if
+ * it is not empty) and inserts its offset into the new reftable. The size of
+ * this new reftable is increased as required.
+ */
+static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty, bool *allocated,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int64_t offset;
+
+ if (!refblock_empty && reftable_index >= *reftable_size) {
+ uint64_t *new_reftable;
+ uint64_t new_reftable_size;
+
+ new_reftable_size = ROUND_UP(reftable_index + 1,
+ s->cluster_size / sizeof(uint64_t));
+ if (new_reftable_size > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) {
+ error_setg(errp,
+ "This operation would make the refcount table grow "
+ "beyond the maximum size supported by QEMU, aborting");
+ return -ENOTSUP;
+ }
+
+ new_reftable = g_try_realloc(*reftable, new_reftable_size *
+ sizeof(uint64_t));
+ if (!new_reftable) {
+ error_setg(errp, "Failed to increase reftable buffer size");
+ return -ENOMEM;
+ }
+
+ memset(new_reftable + *reftable_size, 0,
+ (new_reftable_size - *reftable_size) * sizeof(uint64_t));
+
+ *reftable = new_reftable;
+ *reftable_size = new_reftable_size;
+ }
+
+ if (!refblock_empty && !(*reftable)[reftable_index]) {
+ offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ error_setg_errno(errp, -offset, "Failed to allocate refblock");
+ return offset;
+ }
+ (*reftable)[reftable_index] = offset;
+ *allocated = true;
+ }
+
+ return 0;
+}
+
+/**
+ * This "operation" for walk_over_reftable() writes the refblock to disk at the
+ * offset specified by the new reftable's entry. It does not modify the new
+ * reftable or change any refcounts.
+ */
+static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty, bool *allocated,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int64_t offset;
+ int ret;
+
+ if (reftable_index < *reftable_size && (*reftable)[reftable_index]) {
+ offset = (*reftable)[reftable_index];
+
+ ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Overlap check failed");
+ return ret;
+ }
+
+ ret = bdrv_pwrite(bs->file->bs, offset, refblock, s->cluster_size);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to write refblock");
+ return ret;
+ }
+ } else {
+ assert(refblock_empty);
+ }
+
+ return 0;
+}
+
+/**
+ * This function walks over the existing reftable and every referenced refblock;
+ * if @new_set_refcount is non-NULL, it is called for every refcount entry to
+ * create an equal new entry in the passed @new_refblock. Once that
+ * @new_refblock is completely filled, @operation will be called.
+ *
+ * @status_cb and @cb_opaque are used for the amend operation's status callback.
+ * @index is the index of the walk_over_reftable() calls and @total is the total
+ * number of walk_over_reftable() calls per amend operation. Both are used for
+ * calculating the parameters for the status callback.
+ *
+ * @allocated is set to true if a new cluster has been allocated.
+ */
+static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
+ uint64_t *new_reftable_index,
+ uint64_t *new_reftable_size,
+ void *new_refblock, int new_refblock_size,
+ int new_refcount_bits,
+ RefblockFinishOp *operation, bool *allocated,
+ Qcow2SetRefcountFunc *new_set_refcount,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque, int index, int total,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ uint64_t reftable_index;
+ bool new_refblock_empty = true;
+ int refblock_index;
+ int new_refblock_index = 0;
+ int ret;
+
+ for (reftable_index = 0; reftable_index < s->refcount_table_size;
+ reftable_index++)
+ {
+ uint64_t refblock_offset = s->refcount_table[reftable_index]
+ & REFT_OFFSET_MASK;
+
+ status_cb(bs, (uint64_t)index * s->refcount_table_size + reftable_index,
+ (uint64_t)total * s->refcount_table_size, cb_opaque);
+
+ if (refblock_offset) {
+ void *refblock;
+
+ if (offset_into_cluster(s, refblock_offset)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
+ PRIx64 " unaligned (reftable index: %#"
+ PRIx64 ")", refblock_offset,
+ reftable_index);
+ error_setg(errp,
+ "Image is corrupt (unaligned refblock offset)");
+ return -EIO;
+ }
+
+ ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offset,
+ &refblock);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to retrieve refblock");
+ return ret;
+ }
+
+ for (refblock_index = 0; refblock_index < s->refcount_block_size;
+ refblock_index++)
+ {
+ uint64_t refcount;
+
+ if (new_refblock_index >= new_refblock_size) {
+ /* new_refblock is now complete */
+ ret = operation(bs, new_reftable, *new_reftable_index,
+ new_reftable_size, new_refblock,
+ new_refblock_empty, allocated, errp);
+ if (ret < 0) {
+ qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+ return ret;
+ }
+
+ (*new_reftable_index)++;
+ new_refblock_index = 0;
+ new_refblock_empty = true;
+ }
+
+ refcount = s->get_refcount(refblock, refblock_index);
+ if (new_refcount_bits < 64 && refcount >> new_refcount_bits) {
+ uint64_t offset;
+
+ qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+
+ offset = ((reftable_index << s->refcount_block_bits)
+ + refblock_index) << s->cluster_bits;
+
+ error_setg(errp, "Cannot decrease refcount entry width to "
+ "%i bits: Cluster at offset %#" PRIx64 " has a "
+ "refcount of %" PRIu64, new_refcount_bits,
+ offset, refcount);
+ return -EINVAL;
+ }
+
+ if (new_set_refcount) {
+ new_set_refcount(new_refblock, new_refblock_index++,
+ refcount);
+ } else {
+ new_refblock_index++;
+ }
+ new_refblock_empty = new_refblock_empty && refcount == 0;
+ }
+
+ qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+ } else {
+ /* No refblock means every refcount is 0 */
+ for (refblock_index = 0; refblock_index < s->refcount_block_size;
+ refblock_index++)
+ {
+ if (new_refblock_index >= new_refblock_size) {
+ /* new_refblock is now complete */
+ ret = operation(bs, new_reftable, *new_reftable_index,
+ new_reftable_size, new_refblock,
+ new_refblock_empty, allocated, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ (*new_reftable_index)++;
+ new_refblock_index = 0;
+ new_refblock_empty = true;
+ }
+
+ if (new_set_refcount) {
+ new_set_refcount(new_refblock, new_refblock_index++, 0);
+ } else {
+ new_refblock_index++;
+ }
+ }
+ }
+ }
+
+ if (new_refblock_index > 0) {
+ /* Complete the potentially existing partially filled final refblock */
+ if (new_set_refcount) {
+ for (; new_refblock_index < new_refblock_size;
+ new_refblock_index++)
+ {
+ new_set_refcount(new_refblock, new_refblock_index, 0);
+ }
+ }
+
+ ret = operation(bs, new_reftable, *new_reftable_index,
+ new_reftable_size, new_refblock, new_refblock_empty,
+ allocated, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ (*new_reftable_index)++;
+ }
+
+ status_cb(bs, (uint64_t)(index + 1) * s->refcount_table_size,
+ (uint64_t)total * s->refcount_table_size, cb_opaque);
+
+ return 0;
+}
+
+int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque, Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ Qcow2GetRefcountFunc *new_get_refcount;
+ Qcow2SetRefcountFunc *new_set_refcount;
+ void *new_refblock = qemu_blockalign(bs->file->bs, s->cluster_size);
+ uint64_t *new_reftable = NULL, new_reftable_size = 0;
+ uint64_t *old_reftable, old_reftable_size, old_reftable_offset;
+ uint64_t new_reftable_index = 0;
+ uint64_t i;
+ int64_t new_reftable_offset = 0, allocated_reftable_size = 0;
+ int new_refblock_size, new_refcount_bits = 1 << refcount_order;
+ int old_refcount_order;
+ int walk_index = 0;
+ int ret;
+ bool new_allocation;
+
+ assert(s->qcow_version >= 3);
+ assert(refcount_order >= 0 && refcount_order <= 6);
+
+ /* see qcow2_open() */
+ new_refblock_size = 1 << (s->cluster_bits - (refcount_order - 3));
+
+ new_get_refcount = get_refcount_funcs[refcount_order];
+ new_set_refcount = set_refcount_funcs[refcount_order];
+
+
+ do {
+ int total_walks;
+
+ new_allocation = false;
+
+ /* At least we have to do this walk and the one which writes the
+ * refblocks; also, at least we have to do this loop here at least
+ * twice (normally), first to do the allocations, and second to
+ * determine that everything is correctly allocated, this then makes
+ * three walks in total */
+ total_walks = MAX(walk_index + 2, 3);
+
+ /* First, allocate the structures so they are present in the refcount
+ * structures */
+ ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
+ &new_reftable_size, NULL, new_refblock_size,
+ new_refcount_bits, &alloc_refblock,
+ &new_allocation, NULL, status_cb, cb_opaque,
+ walk_index++, total_walks, errp);
+ if (ret < 0) {
+ goto done;
+ }
+
+ new_reftable_index = 0;
+
+ if (new_allocation) {
+ if (new_reftable_offset) {
+ qcow2_free_clusters(bs, new_reftable_offset,
+ allocated_reftable_size * sizeof(uint64_t),
+ QCOW2_DISCARD_NEVER);
+ }
+
+ new_reftable_offset = qcow2_alloc_clusters(bs, new_reftable_size *
+ sizeof(uint64_t));
+ if (new_reftable_offset < 0) {
+ error_setg_errno(errp, -new_reftable_offset,
+ "Failed to allocate the new reftable");
+ ret = new_reftable_offset;
+ goto done;
+ }
+ allocated_reftable_size = new_reftable_size;
+ }
+ } while (new_allocation);
+
+ /* Second, write the new refblocks */
+ ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
+ &new_reftable_size, new_refblock,
+ new_refblock_size, new_refcount_bits,
+ &flush_refblock, &new_allocation, new_set_refcount,
+ status_cb, cb_opaque, walk_index, walk_index + 1,
+ errp);
+ if (ret < 0) {
+ goto done;
+ }
+ assert(!new_allocation);
+
+
+ /* Write the new reftable */
+ ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset,
+ new_reftable_size * sizeof(uint64_t));
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Overlap check failed");
+ goto done;
+ }
+
+ for (i = 0; i < new_reftable_size; i++) {
+ cpu_to_be64s(&new_reftable[i]);
+ }
+
+ ret = bdrv_pwrite(bs->file->bs, new_reftable_offset, new_reftable,
+ new_reftable_size * sizeof(uint64_t));
+
+ for (i = 0; i < new_reftable_size; i++) {
+ be64_to_cpus(&new_reftable[i]);
+ }
+
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to write the new reftable");
+ goto done;
+ }
+
+
+ /* Empty the refcount cache */
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to flush the refblock cache");
+ goto done;
+ }
+
+ /* Update the image header to point to the new reftable; this only updates
+ * the fields which are relevant to qcow2_update_header(); other fields
+ * such as s->refcount_table or s->refcount_bits stay stale for now
+ * (because we have to restore everything if qcow2_update_header() fails) */
+ old_refcount_order = s->refcount_order;
+ old_reftable_size = s->refcount_table_size;
+ old_reftable_offset = s->refcount_table_offset;
+
+ s->refcount_order = refcount_order;
+ s->refcount_table_size = new_reftable_size;
+ s->refcount_table_offset = new_reftable_offset;
+
+ ret = qcow2_update_header(bs);
+ if (ret < 0) {
+ s->refcount_order = old_refcount_order;
+ s->refcount_table_size = old_reftable_size;
+ s->refcount_table_offset = old_reftable_offset;
+ error_setg_errno(errp, -ret, "Failed to update the qcow2 header");
+ goto done;
+ }
+
+ /* Now update the rest of the in-memory information */
+ old_reftable = s->refcount_table;
+ s->refcount_table = new_reftable;
+
+ s->refcount_bits = 1 << refcount_order;
+ s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
+ s->refcount_max += s->refcount_max - 1;
+
+ s->refcount_block_bits = s->cluster_bits - (refcount_order - 3);
+ s->refcount_block_size = 1 << s->refcount_block_bits;
+
+ s->get_refcount = new_get_refcount;
+ s->set_refcount = new_set_refcount;
+
+ /* For cleaning up all old refblocks and the old reftable below the "done"
+ * label */
+ new_reftable = old_reftable;
+ new_reftable_size = old_reftable_size;
+ new_reftable_offset = old_reftable_offset;
+
+done:
+ if (new_reftable) {
+ /* On success, new_reftable actually points to the old reftable (and
+ * new_reftable_size is the old reftable's size); but that is just
+ * fine */
+ for (i = 0; i < new_reftable_size; i++) {
+ uint64_t offset = new_reftable[i] & REFT_OFFSET_MASK;
+ if (offset) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_OTHER);
+ }
+ }
+ g_free(new_reftable);
+
+ if (new_reftable_offset > 0) {
+ qcow2_free_clusters(bs, new_reftable_offset,
+ new_reftable_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
+ }
+ }
+
+ qemu_vfree(new_refblock);
+ return ret;
+}
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index def720164d..5f4a17e473 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -22,10 +22,12 @@
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "block/block_int.h"
#include "block/qcow2.h"
#include "qemu/error-report.h"
+#include "qemu/cutils.h"
void qcow2_free_snapshots(BlockDriverState *bs)
{
diff --git a/block/qcow2.c b/block/qcow2.c
index 88f56c8868..470734be9f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -21,8 +21,9 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include <zlib.h>
#include "block/qcow2.h"
@@ -34,6 +35,7 @@
#include "qapi-event.h"
#include "trace.h"
#include "qemu/option_int.h"
+#include "qemu/cutils.h"
/*
Differences with QCOW:
@@ -196,22 +198,8 @@ static void cleanup_unknown_header_ext(BlockDriverState *bs)
}
}
-static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs,
- Error **errp, const char *fmt, ...)
-{
- char msg[64];
- va_list ap;
-
- va_start(ap, fmt);
- vsnprintf(msg, sizeof(msg), fmt, ap);
- va_end(ap);
-
- error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bdrv_get_device_or_node_name(bs), "qcow2", msg);
-}
-
-static void report_unsupported_feature(BlockDriverState *bs,
- Error **errp, Qcow2Feature *table, uint64_t mask)
+static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
+ uint64_t mask)
{
char *features = g_strdup("");
char *old;
@@ -236,7 +224,7 @@ static void report_unsupported_feature(BlockDriverState *bs,
g_free(old);
}
- report_unsupported(bs, errp, "%s", features);
+ error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
g_free(features);
}
@@ -853,7 +841,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
if (header.version < 2 || header.version > 3) {
- report_unsupported(bs, errp, "QCOW version %" PRIu32, header.version);
+ error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
ret = -ENOTSUP;
goto fail;
}
@@ -933,7 +921,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
void *feature_table = NULL;
qcow2_read_extensions(bs, header.header_length, ext_end,
&feature_table, NULL);
- report_unsupported_feature(bs, errp, feature_table,
+ report_unsupported_feature(errp, feature_table,
s->incompatible_features &
~QCOW2_INCOMPAT_MASK);
ret = -ENOTSUP;
@@ -977,6 +965,14 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
}
s->crypt_method_header = header.crypt_method;
if (s->crypt_method_header) {
+ if (bdrv_uses_whitelist() &&
+ s->crypt_method_header == QCOW_CRYPT_AES) {
+ error_report("qcow2 built-in AES encryption is deprecated");
+ error_printf("Support for it will be removed in a future release.\n"
+ "You can use 'qemu-img convert' to switch to an\n"
+ "unencrypted qcow2 image, or a LUKS raw image.\n");
+ }
+
bs->encrypted = 1;
}
@@ -1140,7 +1136,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
}
/* Clear unknown autoclear feature bits */
- if (!bs->read_only && !(flags & BDRV_O_INCOMING) && s->autoclear_features) {
+ if (!bs->read_only && !(flags & BDRV_O_INACTIVE) && s->autoclear_features) {
s->autoclear_features = 0;
ret = qcow2_update_header(bs);
if (ret < 0) {
@@ -1153,7 +1149,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
qemu_co_mutex_init(&s->lock);
/* Repair image if dirty */
- if (!(flags & (BDRV_O_CHECK | BDRV_O_INCOMING)) && !bs->read_only &&
+ if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
(s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
BdrvCheckResult result = {0};
@@ -1282,8 +1278,54 @@ static void qcow2_reopen_abort(BDRVReopenState *state)
g_free(state->opaque);
}
+static void qcow2_join_options(QDict *options, QDict *old_options)
+{
+ bool has_new_overlap_template =
+ qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
+ qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
+ bool has_new_total_cache_size =
+ qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
+ bool has_all_cache_options;
+
+ /* New overlap template overrides all old overlap options */
+ if (has_new_overlap_template) {
+ qdict_del(old_options, QCOW2_OPT_OVERLAP);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
+ }
+
+ /* New total cache size overrides all old options */
+ if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
+ qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
+ qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
+ }
+
+ qdict_join(options, old_options, false);
+
+ /*
+ * If after merging all cache size options are set, an old total size is
+ * overwritten. Do keep all options, however, if all three are new. The
+ * resulting error message is what we want to happen.
+ */
+ has_all_cache_options =
+ qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
+ qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
+ qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
+
+ if (has_all_cache_options && !has_new_total_cache_size) {
+ qdict_del(options, QCOW2_OPT_CACHE_SIZE);
+ }
+}
+
static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
BDRVQcow2State *s = bs->opaque;
uint64_t cluster_offset;
@@ -1302,6 +1344,7 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
!s->cipher) {
index_in_cluster = sector_num & (s->cluster_sectors - 1);
cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
+ *file = bs->file->bs;
status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
}
if (ret == QCOW2_CLUSTER_ZERO) {
@@ -1639,6 +1682,32 @@ fail:
return ret;
}
+static int qcow2_inactivate(BlockDriverState *bs)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int ret, result = 0;
+
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret) {
+ result = ret;
+ error_report("Failed to flush the L2 table cache: %s",
+ strerror(-ret));
+ }
+
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret) {
+ result = ret;
+ error_report("Failed to flush the refcount block cache: %s",
+ strerror(-ret));
+ }
+
+ if (result == 0) {
+ qcow2_mark_clean(bs);
+ }
+
+ return result;
+}
+
static void qcow2_close(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
@@ -1646,24 +1715,8 @@ static void qcow2_close(BlockDriverState *bs)
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
- if (!(bs->open_flags & BDRV_O_INCOMING)) {
- int ret1, ret2;
-
- ret1 = qcow2_cache_flush(bs, s->l2_table_cache);
- ret2 = qcow2_cache_flush(bs, s->refcount_block_cache);
-
- if (ret1) {
- error_report("Failed to flush the L2 table cache: %s",
- strerror(-ret1));
- }
- if (ret2) {
- error_report("Failed to flush the refcount block cache: %s",
- strerror(-ret2));
- }
-
- if (!ret1 && !ret2) {
- qcow2_mark_clean(bs);
- }
+ if (!(s->flags & BDRV_O_INACTIVE)) {
+ qcow2_inactivate(bs);
}
cache_clean_timer_del(bs);
@@ -1707,21 +1760,24 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
bdrv_invalidate_cache(bs->file->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
+ bs->drv = NULL;
return;
}
memset(s, 0, sizeof(BDRVQcow2State));
options = qdict_clone_shallow(bs->options);
+ flags &= ~BDRV_O_INACTIVE;
ret = qcow2_open(bs, options, flags, &local_err);
QDECREF(options);
if (local_err) {
- error_setg(errp, "Could not reopen qcow2 layer: %s",
- error_get_pretty(local_err));
- error_free(local_err);
+ error_propagate(errp, local_err);
+ error_prepend(errp, "Could not reopen qcow2 layer: ");
+ bs->drv = NULL;
return;
} else if (ret < 0) {
error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
+ bs->drv = NULL;
return;
}
@@ -1849,31 +1905,33 @@ int qcow2_update_header(BlockDriverState *bs)
}
/* Feature table */
- Qcow2Feature features[] = {
- {
- .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
- .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
- .name = "dirty bit",
- },
- {
- .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
- .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
- .name = "corrupt bit",
- },
- {
- .type = QCOW2_FEAT_TYPE_COMPATIBLE,
- .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
- .name = "lazy refcounts",
- },
- };
+ if (s->qcow_version >= 3) {
+ Qcow2Feature features[] = {
+ {
+ .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+ .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
+ .name = "dirty bit",
+ },
+ {
+ .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+ .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
+ .name = "corrupt bit",
+ },
+ {
+ .type = QCOW2_FEAT_TYPE_COMPATIBLE,
+ .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+ .name = "lazy refcounts",
+ },
+ };
- ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
- features, sizeof(features), buflen);
- if (ret < 0) {
- goto fail;
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
+ features, sizeof(features), buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+ buf += ret;
+ buflen -= ret;
}
- buf += ret;
- buflen -= ret;
/* Keep unknown header extensions */
QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
@@ -1928,6 +1986,10 @@ static int qcow2_change_backing_file(BlockDriverState *bs,
{
BDRVQcow2State *s = bs->opaque;
+ if (backing_file && strlen(backing_file) > 1023) {
+ return -EINVAL;
+ }
+
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
@@ -2034,7 +2096,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
* 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
* size for any qcow2 image.
*/
- BlockDriverState* bs;
+ BlockBackend *blk;
QCowHeader *header;
uint64_t* refcount_table;
Error *local_err = NULL;
@@ -2109,14 +2171,15 @@ static int qcow2_create2(const char *filename, int64_t total_size,
return ret;
}
- bs = NULL;
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
- return ret;
+ return -EIO;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* Write the header */
QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
header = g_malloc0(cluster_size);
@@ -2144,7 +2207,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
}
- ret = bdrv_pwrite(bs, 0, header, cluster_size);
+ ret = blk_pwrite(blk, 0, header, cluster_size);
g_free(header);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write qcow2 header");
@@ -2154,7 +2217,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* Write a refcount table with one refcount block */
refcount_table = g_malloc0(2 * cluster_size);
refcount_table[0] = cpu_to_be64(2 * cluster_size);
- ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
+ ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size);
g_free(refcount_table);
if (ret < 0) {
@@ -2162,8 +2225,8 @@ static int qcow2_create2(const char *filename, int64_t total_size,
goto out;
}
- bdrv_unref(bs);
- bs = NULL;
+ blk_unref(blk);
+ blk = NULL;
/*
* And now open the image and make it consistent first (i.e. increase the
@@ -2172,15 +2235,15 @@ static int qcow2_create2(const char *filename, int64_t total_size,
*/
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow2"));
- ret = bdrv_open(&bs, filename, NULL, options,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
- &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, options,
+ BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto out;
}
- ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
+ ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
"header and refcount table");
@@ -2191,8 +2254,15 @@ static int qcow2_create2(const char *filename, int64_t total_size,
abort();
}
+ /* Create a full header (including things like feature table) */
+ ret = qcow2_update_header(blk_bs(blk));
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not update qcow2 header");
+ goto out;
+ }
+
/* Okay, now that we have a valid image, let's give it the right size */
- ret = bdrv_truncate(bs, total_size);
+ ret = blk_truncate(blk, total_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not resize image");
goto out;
@@ -2200,7 +2270,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* Want a backing file? There you go.*/
if (backing_file) {
- ret = bdrv_change_backing_file(bs, backing_file, backing_format);
+ ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
"with format '%s'", backing_file, backing_format);
@@ -2210,9 +2280,9 @@ static int qcow2_create2(const char *filename, int64_t total_size,
/* And if we're supposed to preallocate metadata, do that now */
if (prealloc != PREALLOC_MODE_OFF) {
- BDRVQcow2State *s = bs->opaque;
+ BDRVQcow2State *s = blk_bs(blk)->opaque;
qemu_co_mutex_lock(&s->lock);
- ret = preallocate(bs);
+ ret = preallocate(blk_bs(blk));
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not preallocate metadata");
@@ -2220,24 +2290,24 @@ static int qcow2_create2(const char *filename, int64_t total_size,
}
}
- bdrv_unref(bs);
- bs = NULL;
+ blk_unref(blk);
+ blk = NULL;
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow2"));
- ret = bdrv_open(&bs, filename, NULL, options,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
- &local_err);
- if (local_err) {
+ blk = blk_new_open(filename, NULL, options,
+ BDRV_O_RDWR | BDRV_O_NO_BACKING, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto out;
}
ret = 0;
out:
- if (bs) {
- bdrv_unref(bs);
+ if (blk) {
+ blk_unref(blk);
}
return ret;
}
@@ -2269,7 +2339,7 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
DEFAULT_CLUSTER_SIZE);
buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
- PREALLOC_MODE_MAX, PREALLOC_MODE_OFF,
+ PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
&local_err);
if (local_err) {
error_propagate(errp, local_err);
@@ -2739,15 +2809,15 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
*spec_info = (ImageInfoSpecific){
.type = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
- .u.qcow2 = g_new(ImageInfoSpecificQCow2, 1),
+ .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1),
};
if (s->qcow_version == 2) {
- *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){
+ *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
.compat = g_strdup("0.10"),
.refcount_bits = s->refcount_bits,
};
} else if (s->qcow_version == 3) {
- *spec_info->u.qcow2 = (ImageInfoSpecificQCow2){
+ *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
.compat = g_strdup("1.1"),
.lazy_refcounts = s->compatible_features &
QCOW2_COMPAT_LAZY_REFCOUNTS,
@@ -2757,6 +2827,10 @@ static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
.has_corrupt = true,
.refcount_bits = s->refcount_bits,
};
+ } else {
+ /* if this assertion fails, this probably means a new version was
+ * added without having it covered here */
+ assert(false);
}
return spec_info;
@@ -2824,7 +2898,7 @@ static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
* have to be removed.
*/
static int qcow2_downgrade(BlockDriverState *bs, int target_version,
- BlockDriverAmendStatusCB *status_cb)
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
{
BDRVQcow2State *s = bs->opaque;
int current_version = s->qcow_version;
@@ -2839,13 +2913,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
}
if (s->refcount_order != 4) {
- /* we would have to convert the image to a refcount_order == 4 image
- * here; however, since qemu (at the time of writing this) does not
- * support anything different than 4 anyway, there is no point in doing
- * so right now; however, we should error out (if qemu supports this in
- * the future and this code has not been adapted) */
- error_report("qcow2_downgrade: Image refcount orders other than 4 are "
- "currently not supported.");
+ error_report("compat=0.10 requires refcount_bits=16");
return -ENOTSUP;
}
@@ -2873,7 +2941,7 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
/* clearing autoclear features is trivial */
s->autoclear_features = 0;
- ret = qcow2_expand_zero_clusters(bs, status_cb);
+ ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
if (ret < 0) {
return ret;
}
@@ -2887,8 +2955,79 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version,
return 0;
}
+typedef enum Qcow2AmendOperation {
+ /* This is the value Qcow2AmendHelperCBInfo::last_operation will be
+ * statically initialized to so that the helper CB can discern the first
+ * invocation from an operation change */
+ QCOW2_NO_OPERATION = 0,
+
+ QCOW2_CHANGING_REFCOUNT_ORDER,
+ QCOW2_DOWNGRADING,
+} Qcow2AmendOperation;
+
+typedef struct Qcow2AmendHelperCBInfo {
+ /* The code coordinating the amend operations should only modify
+ * these four fields; the rest will be managed by the CB */
+ BlockDriverAmendStatusCB *original_status_cb;
+ void *original_cb_opaque;
+
+ Qcow2AmendOperation current_operation;
+
+ /* Total number of operations to perform (only set once) */
+ int total_operations;
+
+ /* The following fields are managed by the CB */
+
+ /* Number of operations completed */
+ int operations_completed;
+
+ /* Cumulative offset of all completed operations */
+ int64_t offset_completed;
+
+ Qcow2AmendOperation last_operation;
+ int64_t last_work_size;
+} Qcow2AmendHelperCBInfo;
+
+static void qcow2_amend_helper_cb(BlockDriverState *bs,
+ int64_t operation_offset,
+ int64_t operation_work_size, void *opaque)
+{
+ Qcow2AmendHelperCBInfo *info = opaque;
+ int64_t current_work_size;
+ int64_t projected_work_size;
+
+ if (info->current_operation != info->last_operation) {
+ if (info->last_operation != QCOW2_NO_OPERATION) {
+ info->offset_completed += info->last_work_size;
+ info->operations_completed++;
+ }
+
+ info->last_operation = info->current_operation;
+ }
+
+ assert(info->total_operations > 0);
+ assert(info->operations_completed < info->total_operations);
+
+ info->last_work_size = operation_work_size;
+
+ current_work_size = info->offset_completed + operation_work_size;
+
+ /* current_work_size is the total work size for (operations_completed + 1)
+ * operations (which includes this one), so multiply it by the number of
+ * operations not covered and divide it by the number of operations
+ * covered to get a projection for the operations not covered */
+ projected_work_size = current_work_size * (info->total_operations -
+ info->operations_completed - 1)
+ / (info->operations_completed + 1);
+
+ info->original_status_cb(bs, info->offset_completed + operation_offset,
+ current_work_size + projected_work_size,
+ info->original_cb_opaque);
+}
+
static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb)
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque)
{
BDRVQcow2State *s = bs->opaque;
int old_version = s->qcow_version, new_version = old_version;
@@ -2898,8 +3037,10 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
const char *compat = NULL;
uint64_t cluster_size = s->cluster_size;
bool encrypt;
+ int refcount_bits = s->refcount_bits;
int ret;
QemuOptDesc *desc = opts->list->desc;
+ Qcow2AmendHelperCBInfo helper_cb_info;
while (desc && desc->name) {
if (!qemu_opt_find(opts, desc->name)) {
@@ -2917,11 +3058,11 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
} else if (!strcmp(compat, "1.1")) {
new_version = 3;
} else {
- fprintf(stderr, "Unknown compatibility level %s.\n", compat);
+ error_report("Unknown compatibility level %s", compat);
return -EINVAL;
}
} else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
- fprintf(stderr, "Cannot change preallocation mode.\n");
+ error_report("Cannot change preallocation mode");
return -ENOTSUP;
} else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
@@ -2934,47 +3075,74 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
!!s->cipher);
if (encrypt != !!s->cipher) {
- fprintf(stderr, "Changing the encryption flag is not "
- "supported.\n");
+ error_report("Changing the encryption flag is not supported");
return -ENOTSUP;
}
} else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
cluster_size);
if (cluster_size != s->cluster_size) {
- fprintf(stderr, "Changing the cluster size is not "
- "supported.\n");
+ error_report("Changing the cluster size is not supported");
return -ENOTSUP;
}
} else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
lazy_refcounts);
} else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
- error_report("Cannot change refcount entry width");
- return -ENOTSUP;
+ refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
+ refcount_bits);
+
+ if (refcount_bits <= 0 || refcount_bits > 64 ||
+ !is_power_of_2(refcount_bits))
+ {
+ error_report("Refcount width must be a power of two and may "
+ "not exceed 64 bits");
+ return -EINVAL;
+ }
} else {
- /* if this assertion fails, this probably means a new option was
+ /* if this point is reached, this probably means a new option was
* added without having it covered here */
- assert(false);
+ abort();
}
desc++;
}
- if (new_version != old_version) {
- if (new_version > old_version) {
- /* Upgrade */
- s->qcow_version = new_version;
- ret = qcow2_update_header(bs);
- if (ret < 0) {
- s->qcow_version = old_version;
- return ret;
- }
- } else {
- ret = qcow2_downgrade(bs, new_version, status_cb);
- if (ret < 0) {
- return ret;
- }
+ helper_cb_info = (Qcow2AmendHelperCBInfo){
+ .original_status_cb = status_cb,
+ .original_cb_opaque = cb_opaque,
+ .total_operations = (new_version < old_version)
+ + (s->refcount_bits != refcount_bits)
+ };
+
+ /* Upgrade first (some features may require compat=1.1) */
+ if (new_version > old_version) {
+ s->qcow_version = new_version;
+ ret = qcow2_update_header(bs);
+ if (ret < 0) {
+ s->qcow_version = old_version;
+ return ret;
+ }
+ }
+
+ if (s->refcount_bits != refcount_bits) {
+ int refcount_order = ctz32(refcount_bits);
+ Error *local_error = NULL;
+
+ if (new_version < 3 && refcount_bits != 16) {
+ error_report("Different refcount widths than 16 bits require "
+ "compatibility level 1.1 or above (use compat=1.1 or "
+ "greater)");
+ return -EINVAL;
+ }
+
+ helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
+ ret = qcow2_change_refcount_order(bs, refcount_order,
+ &qcow2_amend_helper_cb,
+ &helper_cb_info, &local_error);
+ if (ret < 0) {
+ error_report_err(local_error);
+ return ret;
}
}
@@ -2989,9 +3157,9 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
if (s->use_lazy_refcounts != lazy_refcounts) {
if (lazy_refcounts) {
- if (s->qcow_version < 3) {
- fprintf(stderr, "Lazy refcounts only supported with compatibility "
- "level 1.1 and above (use compat=1.1 or greater)\n");
+ if (new_version < 3) {
+ error_report("Lazy refcounts only supported with compatibility "
+ "level 1.1 and above (use compat=1.1 or greater)");
return -EINVAL;
}
s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
@@ -3025,6 +3193,16 @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
}
}
+ /* Downgrade last (so unsupported features can be removed before) */
+ if (new_version < old_version) {
+ helper_cb_info.current_operation = QCOW2_DOWNGRADING;
+ ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
+ &helper_cb_info);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
return 0;
}
@@ -3145,6 +3323,7 @@ BlockDriver bdrv_qcow2 = {
.bdrv_reopen_prepare = qcow2_reopen_prepare,
.bdrv_reopen_commit = qcow2_reopen_commit,
.bdrv_reopen_abort = qcow2_reopen_abort,
+ .bdrv_join_options = qcow2_join_options,
.bdrv_create = qcow2_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = qcow2_co_get_block_status,
@@ -3176,6 +3355,7 @@ BlockDriver bdrv_qcow2 = {
.bdrv_refresh_limits = qcow2_refresh_limits,
.bdrv_invalidate_cache = qcow2_invalidate_cache,
+ .bdrv_inactivate = qcow2_inactivate,
.create_opts = &qcow2_create_opts,
.bdrv_check = qcow2_check,
diff --git a/block/qcow2.h b/block/qcow2.h
index b8c500b9dc..a063a3c1a1 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -529,6 +529,10 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
int64_t size);
+int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque, Error **errp);
+
/* qcow2-cluster.c functions */
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size);
@@ -553,7 +557,8 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
int qcow2_expand_zero_clusters(BlockDriverState *bs,
- BlockDriverAmendStatusCB *status_cb);
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque);
/* qcow2-snapshot.c functions */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
diff --git a/block/qed-check.c b/block/qed-check.c
index 36ecd290d6..622f308976 100644
--- a/block/qed-check.c
+++ b/block/qed-check.c
@@ -11,6 +11,7 @@
*
*/
+#include "qemu/osdep.h"
#include "qed.h"
typedef struct {
diff --git a/block/qed-cluster.c b/block/qed-cluster.c
index f64b2af8f7..c24e75616a 100644
--- a/block/qed-cluster.c
+++ b/block/qed-cluster.c
@@ -12,6 +12,7 @@
*
*/
+#include "qemu/osdep.h"
#include "qed.h"
/**
diff --git a/block/qed-gencb.c b/block/qed-gencb.c
index b817a8bf50..faf8ecc840 100644
--- a/block/qed-gencb.c
+++ b/block/qed-gencb.c
@@ -11,6 +11,7 @@
*
*/
+#include "qemu/osdep.h"
#include "qed.h"
void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque)
diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c
index e9b2aae44d..5cba794650 100644
--- a/block/qed-l2-cache.c
+++ b/block/qed-l2-cache.c
@@ -50,6 +50,7 @@
* table will be deleted in favor of the existing cache entry.
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "qed.h"
diff --git a/block/qed-table.c b/block/qed-table.c
index f4219b8acc..802945f5e5 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -12,6 +12,7 @@
*
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
#include "qed.h"
diff --git a/block/qed.c b/block/qed.c
index 9b88895038..0af52741df 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -12,11 +12,14 @@
*
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu/timer.h"
#include "trace.h"
#include "qed.h"
#include "qapi/qmp/qerror.h"
#include "migration/migration.h"
+#include "sysemu/block-backend.h"
static const AIOCBInfo qed_aiocb_info = {
.aiocb_size = sizeof(QEDAIOCB),
@@ -344,7 +347,7 @@ static void qed_start_need_check_timer(BDRVQEDState *s)
* migration.
*/
timer_mod(s->need_check_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
- get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
+ NANOSECONDS_PER_SECOND * QED_NEED_CHECK_TIMEOUT);
}
/* It's okay to call this multiple times or when no timer is started */
@@ -375,18 +378,6 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
}
}
-static void bdrv_qed_drain(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
-
- /* Cancel timer and start doing I/O that were meant to happen as if it
- * fired, that way we get bdrv_drain() taking care of the ongoing requests
- * correctly. */
- qed_cancel_need_check_timer(s);
- qed_plug_allocating_write_reqs(s);
- bdrv_aio_flush(s->bs, qed_clear_need_check, s);
-}
-
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -410,11 +401,8 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
}
if (s->header.features & ~QED_FEATURE_MASK) {
/* image uses unsupported feature bits */
- char buf[64];
- snprintf(buf, sizeof(buf), "%" PRIx64,
- s->header.features & ~QED_FEATURE_MASK);
- error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bdrv_get_device_or_node_name(bs), "QED", buf);
+ error_setg(errp, "Unsupported QED features: %" PRIx64,
+ s->header.features & ~QED_FEATURE_MASK);
return -ENOTSUP;
}
if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
@@ -477,7 +465,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
* feature is no longer valid.
*/
if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
- !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INCOMING)) {
+ !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INACTIVE)) {
s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
ret = qed_write_header_sync(s);
@@ -505,7 +493,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
* aid data recovery from an otherwise inconsistent image.
*/
if (!bdrv_is_read_only(bs->file->bs) &&
- !(flags & BDRV_O_INCOMING)) {
+ !(flags & BDRV_O_INACTIVE)) {
BdrvCheckResult result = {0};
ret = qed_check(s, &result, true);
@@ -579,7 +567,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
size_t l1_size = header.cluster_size * header.table_size;
Error *local_err = NULL;
int ret = 0;
- BlockDriverState *bs;
+ BlockBackend *blk;
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
@@ -587,17 +575,17 @@ static int qed_create(const char *filename, uint32_t cluster_size,
return ret;
}
- bs = NULL;
- ret = bdrv_open(&bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
- &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
- return ret;
+ return -EIO;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* File must start empty and grow, check truncate is supported */
- ret = bdrv_truncate(bs, 0);
+ ret = blk_truncate(blk, 0);
if (ret < 0) {
goto out;
}
@@ -613,18 +601,18 @@ static int qed_create(const char *filename, uint32_t cluster_size,
}
qed_header_cpu_to_le(&header, &le_header);
- ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
+ ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header));
if (ret < 0) {
goto out;
}
- ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
- header.backing_filename_size);
+ ret = blk_pwrite(blk, sizeof(le_header), backing_file,
+ header.backing_filename_size);
if (ret < 0) {
goto out;
}
l1_table = g_malloc0(l1_size);
- ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
+ ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size);
if (ret < 0) {
goto out;
}
@@ -632,7 +620,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
ret = 0; /* success */
out:
g_free(l1_table);
- bdrv_unref(bs);
+ blk_unref(blk);
return ret;
}
@@ -692,6 +680,7 @@ typedef struct {
uint64_t pos;
int64_t status;
int *pnum;
+ BlockDriverState **file;
} QEDIsAllocatedCB;
static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
@@ -703,6 +692,7 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
case QED_CLUSTER_FOUND:
offset |= qed_offset_into_cluster(s, cb->pos);
cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
+ *cb->file = cb->bs->file->bs;
break;
case QED_CLUSTER_ZERO:
cb->status = BDRV_BLOCK_ZERO;
@@ -724,7 +714,8 @@ static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t l
static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
BDRVQEDState *s = bs->opaque;
size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
@@ -733,6 +724,7 @@ static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
.pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE,
.status = BDRV_BLOCK_OFFSET_MASK,
.pnum = pnum,
+ .file = file,
};
QEDRequest request = { .l2_table = NULL };
@@ -1611,9 +1603,8 @@ static void bdrv_qed_invalidate_cache(BlockDriverState *bs, Error **errp)
memset(s, 0, sizeof(BDRVQEDState));
ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
if (local_err) {
- error_setg(errp, "Could not reopen qed layer: %s",
- error_get_pretty(local_err));
- error_free(local_err);
+ error_propagate(errp, local_err);
+ error_prepend(errp, "Could not reopen qed layer: ");
return;
} else if (ret < 0) {
error_setg_errno(errp, -ret, "Could not reopen qed layer");
@@ -1688,7 +1679,6 @@ static BlockDriver bdrv_qed = {
.bdrv_check = bdrv_qed_check,
.bdrv_detach_aio_context = bdrv_qed_detach_aio_context,
.bdrv_attach_aio_context = bdrv_qed_attach_aio_context,
- .bdrv_drain = bdrv_qed_drain,
};
static void bdrv_qed_init(void)
diff --git a/block/qed.h b/block/qed.h
index 615e676fc8..22b3198751 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -16,6 +16,7 @@
#define BLOCK_QED_H
#include "block/block_int.h"
+#include "qemu/cutils.h"
/* The layout of a QED file is as follows:
*
diff --git a/block/quorum.c b/block/quorum.c
index b9ba028d46..da15465a9a 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -13,6 +13,7 @@
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "block/block_int.h"
#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qdict.h"
@@ -214,14 +215,16 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
return acb;
}
-static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
+ int nb_sectors, char *node_name, int ret)
{
const char *msg = NULL;
if (ret < 0) {
msg = strerror(-ret);
}
- qapi_event_send_quorum_report_bad(!!msg, msg, node_name,
- acb->sector_num, acb->nb_sectors, &error_abort);
+
+ qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
+ sector_num, nb_sectors, &error_abort);
}
static void quorum_report_failure(QuorumAIOCB *acb)
@@ -283,9 +286,19 @@ static void quorum_aio_cb(void *opaque, int ret)
BDRVQuorumState *s = acb->common.bs->opaque;
bool rewrite = false;
+ if (ret == 0) {
+ acb->success_count++;
+ } else {
+ QuorumOpType type;
+ type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
+ quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
+ sacb->aiocb->bs->node_name, ret);
+ }
+
if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
/* We try to read next child in FIFO order if we fail to read */
- if (ret < 0 && ++acb->child_iter < s->num_children) {
+ if (ret < 0 && (acb->child_iter + 1) < s->num_children) {
+ acb->child_iter++;
read_fifo_child(acb);
return;
}
@@ -300,11 +313,6 @@ static void quorum_aio_cb(void *opaque, int ret)
sacb->ret = ret;
acb->count++;
- if (ret == 0) {
- acb->success_count++;
- } else {
- quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret);
- }
assert(acb->count <= s->num_children);
assert(acb->success_count <= s->num_children);
if (acb->count < s->num_children) {
@@ -336,7 +344,9 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- quorum_report_bad(acb, s->children[item->index]->bs->node_name, 0);
+ quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
+ acb->nb_sectors,
+ s->children[item->index]->bs->node_name, 0);
}
}
}
@@ -646,8 +656,9 @@ static BlockAIOCB *read_quorum_children(QuorumAIOCB *acb)
}
for (i = 0; i < s->num_children; i++) {
- bdrv_aio_readv(s->children[i]->bs, acb->sector_num, &acb->qcrs[i].qiov,
- acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
+ acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i]->bs, acb->sector_num,
+ &acb->qcrs[i].qiov, acb->nb_sectors,
+ quorum_aio_cb, &acb->qcrs[i]);
}
return &acb->common;
@@ -662,9 +673,10 @@ static BlockAIOCB *read_fifo_child(QuorumAIOCB *acb)
qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
acb->qcrs[acb->child_iter].buf);
- bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
- &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
- quorum_aio_cb, &acb->qcrs[acb->child_iter]);
+ acb->qcrs[acb->child_iter].aiocb =
+ bdrv_aio_readv(s->children[acb->child_iter]->bs, acb->sector_num,
+ &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
+ quorum_aio_cb, &acb->qcrs[acb->child_iter]);
return &acb->common;
}
@@ -758,19 +770,30 @@ static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
QuorumVoteValue result_value;
int i;
int result = 0;
+ int success_count = 0;
QLIST_INIT(&error_votes.vote_list);
error_votes.compare = quorum_64bits_compare;
for (i = 0; i < s->num_children; i++) {
result = bdrv_co_flush(s->children[i]->bs);
- result_value.l = result;
- quorum_count_vote(&error_votes, &result_value, i);
+ if (result) {
+ quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
+ bdrv_nb_sectors(s->children[i]->bs),
+ s->children[i]->bs->node_name, result);
+ result_value.l = result;
+ quorum_count_vote(&error_votes, &result_value, i);
+ } else {
+ success_count++;
+ }
}
- winner = quorum_get_vote_winner(&error_votes);
- result = winner->value.l;
-
+ if (success_count >= s->threshold) {
+ result = 0;
+ } else {
+ winner = quorum_get_vote_winner(&error_votes);
+ result = winner->value.l;
+ }
quorum_free_vote_list(&error_votes);
return result;
@@ -847,7 +870,7 @@ static int parse_read_pattern(const char *opt)
return QUORUM_READ_PATTERN_QUORUM;
}
- for (i = 0; i < QUORUM_READ_PATTERN_MAX; i++) {
+ for (i = 0; i < QUORUM_READ_PATTERN__MAX; i++) {
if (!strcmp(opt, QuorumReadPattern_lookup[i])) {
return i;
}
@@ -997,7 +1020,7 @@ static void quorum_attach_aio_context(BlockDriverState *bs,
}
}
-static void quorum_refresh_filename(BlockDriverState *bs)
+static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVQuorumState *s = bs->opaque;
QDict *opts;
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 31d791fe67..811e375018 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -15,6 +15,8 @@
#ifndef QEMU_RAW_AIO_H
#define QEMU_RAW_AIO_H
+#include "qemu/iov.h"
+
/* AIO request types */
#define QEMU_AIO_READ 0x0001
#define QEMU_AIO_WRITE 0x0002
diff --git a/block/raw-posix.c b/block/raw-posix.c
index d9162fd306..906d5c9411 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -21,7 +21,9 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
#include "qemu/error-report.h"
#include "qemu/timer.h"
#include "qemu/log.h"
@@ -43,6 +45,7 @@
#include <IOKit/storage/IOMedia.h>
#include <IOKit/storage/IOCDMedia.h>
//#include <IOKit/storage/IOCDTypes.h>
+#include <IOKit/storage/IODVDMedia.h>
#include <CoreFoundation/CoreFoundation.h>
#endif
@@ -51,8 +54,6 @@
#include <sys/dkio.h>
#endif
#ifdef __linux__
-#include <sys/types.h>
-#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <linux/cdrom.h>
@@ -500,21 +501,17 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
goto fail;
}
if (!s->use_aio && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
- error_printf("WARNING: aio=native was specified for '%s', but "
- "it requires cache.direct=on, which was not "
- "specified. Falling back to aio=threads.\n"
- " This will become an error condition in "
- "future QEMU versions.\n",
- bs->filename);
+ error_setg(errp, "aio=native was specified, but it requires "
+ "cache.direct=on, which was not specified.");
+ ret = -EINVAL;
+ goto fail;
}
#else
if (bdrv_flags & BDRV_O_NATIVE_AIO) {
- error_printf("WARNING: aio=native was specified for '%s', but "
- "is not supported in this build. Falling back to "
- "aio=threads.\n"
- " This will become an error condition in "
- "future QEMU versions.\n",
- bs->filename);
+ error_setg(errp, "aio=native was specified, but is not supported "
+ "in this build.");
+ ret = -EINVAL;
+ goto fail;
}
#endif /* !defined(CONFIG_LINUX_AIO) */
@@ -783,7 +780,6 @@ static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
{
BDRVRawState *s = bs->opaque;
struct hd_geometry ioctl_geo = {0};
- uint32_t blksize;
/* If DASD, get its geometry */
if (check_for_dasd(s->fd) < 0) {
@@ -803,12 +799,6 @@ static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
}
geo->heads = ioctl_geo.heads;
geo->sectors = ioctl_geo.sectors;
- if (!probe_physical_blocksize(s->fd, &blksize)) {
- /* overwrite cyls: HDIO_GETGEO result is incorrect for big drives */
- geo->cylinders = bdrv_nb_sectors(bs) / (blksize / BDRV_SECTOR_SIZE)
- / (geo->heads * geo->sectors);
- return 0;
- }
geo->cylinders = ioctl_geo.cylinders;
return 0;
@@ -1636,7 +1626,7 @@ static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false);
buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
- PREALLOC_MODE_MAX, PREALLOC_MODE_OFF,
+ PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
&local_err);
g_free(buf);
if (local_err) {
@@ -1830,7 +1820,8 @@ static int find_allocation(BlockDriverState *bs, off_t start,
*/
static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
off_t start, data = 0, hole = 0;
int64_t total_size;
@@ -1872,6 +1863,7 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
*pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
ret = BDRV_BLOCK_ZERO;
}
+ *file = bs;
return ret | BDRV_BLOCK_OFFSET_VALID | start;
}
@@ -1975,33 +1967,47 @@ BlockDriver bdrv_file = {
/* host device */
#if defined(__APPLE__) && defined(__MACH__)
-static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
CFIndex maxPathSize, int flags);
-kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
+static char *FindEjectableOpticalMedia(io_iterator_t *mediaIterator)
{
- kern_return_t kernResult;
+ kern_return_t kernResult = KERN_FAILURE;
mach_port_t masterPort;
CFMutableDictionaryRef classesToMatch;
+ const char *matching_array[] = {kIODVDMediaClass, kIOCDMediaClass};
+ char *mediaType = NULL;
kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
if ( KERN_SUCCESS != kernResult ) {
printf( "IOMasterPort returned %d\n", kernResult );
}
- classesToMatch = IOServiceMatching( kIOCDMediaClass );
- if ( classesToMatch == NULL ) {
- printf( "IOServiceMatching returned a NULL dictionary.\n" );
- } else {
- CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
- }
- kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
- if ( KERN_SUCCESS != kernResult )
- {
- printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
- }
+ int index;
+ for (index = 0; index < ARRAY_SIZE(matching_array); index++) {
+ classesToMatch = IOServiceMatching(matching_array[index]);
+ if (classesToMatch == NULL) {
+ error_report("IOServiceMatching returned NULL for %s",
+ matching_array[index]);
+ continue;
+ }
+ CFDictionarySetValue(classesToMatch, CFSTR(kIOMediaEjectableKey),
+ kCFBooleanTrue);
+ kernResult = IOServiceGetMatchingServices(masterPort, classesToMatch,
+ mediaIterator);
+ if (kernResult != KERN_SUCCESS) {
+ error_report("Note: IOServiceGetMatchingServices returned %d",
+ kernResult);
+ continue;
+ }
- return kernResult;
+ /* If a match was found, leave the loop */
+ if (*mediaIterator != 0) {
+ DPRINTF("Matching using %s\n", matching_array[index]);
+ mediaType = g_strdup(matching_array[index]);
+ break;
+ }
+ }
+ return mediaType;
}
kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
@@ -2033,7 +2039,46 @@ kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
return kernResult;
}
-#endif
+/* Sets up a real cdrom for use in QEMU */
+static bool setup_cdrom(char *bsd_path, Error **errp)
+{
+ int index, num_of_test_partitions = 2, fd;
+ char test_partition[MAXPATHLEN];
+ bool partition_found = false;
+
+ /* look for a working partition */
+ for (index = 0; index < num_of_test_partitions; index++) {
+ snprintf(test_partition, sizeof(test_partition), "%ss%d", bsd_path,
+ index);
+ fd = qemu_open(test_partition, O_RDONLY | O_BINARY | O_LARGEFILE);
+ if (fd >= 0) {
+ partition_found = true;
+ qemu_close(fd);
+ break;
+ }
+ }
+
+ /* if a working partition on the device was not found */
+ if (partition_found == false) {
+ error_setg(errp, "Failed to find a working partition on disc");
+ } else {
+ DPRINTF("Using %s as optical disc\n", test_partition);
+ pstrcpy(bsd_path, MAXPATHLEN, test_partition);
+ }
+ return partition_found;
+}
+
+/* Prints directions on mounting and unmounting a device */
+static void print_unmounting_directions(const char *file_name)
+{
+ error_report("If device %s is mounted on the desktop, unmount"
+ " it first before using it in QEMU", file_name);
+ error_report("Command to unmount device: diskutil unmountDisk %s",
+ file_name);
+ error_report("Command to mount device: diskutil mountDisk %s", file_name);
+}
+
+#endif /* defined(__APPLE__) && defined(__MACH__) */
static int hdev_probe_device(const char *filename)
{
@@ -2124,33 +2169,57 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
#if defined(__APPLE__) && defined(__MACH__)
const char *filename = qdict_get_str(options, "filename");
+ char bsd_path[MAXPATHLEN] = "";
+ bool error_occurred = false;
+
+ /* If using a real cdrom */
+ if (strcmp(filename, "/dev/cdrom") == 0) {
+ char *mediaType = NULL;
+ kern_return_t ret_val;
+ io_iterator_t mediaIterator = 0;
+
+ mediaType = FindEjectableOpticalMedia(&mediaIterator);
+ if (mediaType == NULL) {
+ error_setg(errp, "Please make sure your CD/DVD is in the optical"
+ " drive");
+ error_occurred = true;
+ goto hdev_open_Mac_error;
+ }
- if (strstart(filename, "/dev/cdrom", NULL)) {
- kern_return_t kernResult;
- io_iterator_t mediaIterator;
- char bsdPath[ MAXPATHLEN ];
- int fd;
-
- kernResult = FindEjectableCDMedia( &mediaIterator );
- kernResult = GetBSDPath(mediaIterator, bsdPath, sizeof(bsdPath),
- flags);
- if ( bsdPath[ 0 ] != '\0' ) {
- strcat(bsdPath,"s0");
- /* some CDs don't have a partition 0 */
- fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
- if (fd < 0) {
- bsdPath[strlen(bsdPath)-1] = '1';
- } else {
- qemu_close(fd);
- }
- filename = bsdPath;
- qdict_put(options, "filename", qstring_from_str(filename));
+ ret_val = GetBSDPath(mediaIterator, bsd_path, sizeof(bsd_path), flags);
+ if (ret_val != KERN_SUCCESS) {
+ error_setg(errp, "Could not get BSD path for optical drive");
+ error_occurred = true;
+ goto hdev_open_Mac_error;
}
- if ( mediaIterator )
- IOObjectRelease( mediaIterator );
+ /* If a real optical drive was not found */
+ if (bsd_path[0] == '\0') {
+ error_setg(errp, "Failed to obtain bsd path for optical drive");
+ error_occurred = true;
+ goto hdev_open_Mac_error;
+ }
+
+ /* If using a cdrom disc and finding a partition on the disc failed */
+ if (strncmp(mediaType, kIOCDMediaClass, 9) == 0 &&
+ setup_cdrom(bsd_path, errp) == false) {
+ print_unmounting_directions(bsd_path);
+ error_occurred = true;
+ goto hdev_open_Mac_error;
+ }
+
+ qdict_put(options, "filename", qstring_from_str(bsd_path));
+
+hdev_open_Mac_error:
+ g_free(mediaType);
+ if (mediaIterator) {
+ IOObjectRelease(mediaIterator);
+ }
+ if (error_occurred) {
+ return -ENOENT;
+ }
}
-#endif
+#endif /* defined(__APPLE__) && defined(__MACH__) */
s->type = FTYPE_FILE;
@@ -2159,6 +2228,15 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
if (local_err) {
error_propagate(errp, local_err);
}
+#if defined(__APPLE__) && defined(__MACH__)
+ if (*bsd_path) {
+ filename = bsd_path;
+ }
+ /* if a physical device experienced an error while being opened */
+ if (strncmp(filename, "/dev/", 5) == 0) {
+ print_unmounting_directions(filename);
+ }
+#endif /* defined(__APPLE__) && defined(__MACH__) */
return ret;
}
diff --git a/block/raw-win32.c b/block/raw-win32.c
index f250503112..949bf6dc3e 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -21,7 +21,9 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
#include "qemu/timer.h"
#include "block/block_int.h"
#include "qemu/module.h"
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 915d6fd0e6..a6cc7e9918 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -26,7 +26,9 @@
* IN THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/block_int.h"
+#include "qapi/error.h"
#include "qemu/option.h"
static QemuOptsList raw_create_opts = {
@@ -55,8 +57,9 @@ static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
return bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
}
-static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn
+raw_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov, int flags)
{
void *buf = NULL;
BlockDriver *drv;
@@ -102,7 +105,8 @@ static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
}
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- ret = bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
+ ret = bdrv_co_do_pwritev(bs->file->bs, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE, qiov, flags);
fail:
if (qiov == &local_qiov) {
@@ -112,11 +116,20 @@ fail:
return ret;
}
+static int coroutine_fn
+raw_co_writev(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov)
+{
+ return raw_co_writev_flags(bs, sector_num, nb_sectors, qiov, 0);
+}
+
static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
*pnum = nb_sectors;
+ *file = bs->file->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
(sector_num << BDRV_SECTOR_BITS);
}
@@ -244,6 +257,8 @@ BlockDriver bdrv_raw = {
.bdrv_create = &raw_create,
.bdrv_co_readv = &raw_co_readv,
.bdrv_co_writev = &raw_co_writev,
+ .bdrv_co_writev_flags = &raw_co_writev_flags,
+ .supported_write_flags = BDRV_REQ_FUA,
.bdrv_co_write_zeroes = &raw_co_write_zeroes,
.bdrv_co_discard = &raw_co_discard,
.bdrv_co_get_block_status = &raw_co_get_block_status,
diff --git a/block/rbd.c b/block/rbd.c
index a60a19d58d..5bc5b32530 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -11,11 +11,13 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
-#include <inttypes.h>
+#include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qapi/error.h"
#include "qemu/error-report.h"
#include "block/block_int.h"
+#include "crypto/secret.h"
+#include "qemu/cutils.h"
#include <rbd/librbd.h>
@@ -228,6 +230,27 @@ static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
return NULL;
}
+
+static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
+ Error **errp)
+{
+ if (secretid == 0) {
+ return 0;
+ }
+
+ gchar *secret = qcrypto_secret_lookup_as_base64(secretid,
+ errp);
+ if (!secret) {
+ return -1;
+ }
+
+ rados_conf_set(cluster, "key", secret);
+ g_free(secret);
+
+ return 0;
+}
+
+
static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
bool only_read_conf_file,
Error **errp)
@@ -299,10 +322,13 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
char conf[RBD_MAX_CONF_SIZE];
char clientname_buf[RBD_MAX_CONF_SIZE];
char *clientname;
+ const char *secretid;
rados_t cluster;
rados_ioctx_t io_ctx;
int ret;
+ secretid = qemu_opt_get(opts, "password-secret");
+
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
name, sizeof(name),
@@ -350,6 +376,11 @@ static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
return -EIO;
}
+ if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
+ rados_shutdown(cluster);
+ return -EIO;
+ }
+
if (rados_connect(cluster) < 0) {
error_setg(errp, "error connecting");
rados_shutdown(cluster);
@@ -423,6 +454,11 @@ static QemuOptsList runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "Specification of the rbd image",
},
+ {
+ .name = "password-secret",
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret providing the password",
+ },
{ /* end of list */ }
},
};
@@ -436,6 +472,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
char conf[RBD_MAX_CONF_SIZE];
char clientname_buf[RBD_MAX_CONF_SIZE];
char *clientname;
+ const char *secretid;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
@@ -450,6 +487,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
}
filename = qemu_opt_get(opts, "filename");
+ secretid = qemu_opt_get(opts, "password-secret");
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
@@ -488,6 +526,11 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
}
}
+ if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) {
+ r = -EIO;
+ goto failed_shutdown;
+ }
+
/*
* Fallback to more conservative semantics if setting cache
* options fails. Ignore errors from setting rbd_cache because the
@@ -919,6 +962,11 @@ static QemuOptsList qemu_rbd_create_opts = {
.type = QEMU_OPT_SIZE,
.help = "RBD object size"
},
+ {
+ .name = "password-secret",
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret providing the password",
+ },
{ /* end of list */ }
}
};
diff --git a/block/sheepdog.c b/block/sheepdog.c
index d80e4ed18d..33e0a33824 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -12,12 +12,15 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu/uri.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/bitops.h"
+#include "qemu/cutils.h"
#define SD_PROTO_VER 0x01
@@ -283,6 +286,12 @@ static inline bool is_snapshot(struct SheepdogInode *inode)
return !!inode->snap_ctime;
}
+static inline size_t count_data_objs(const struct SheepdogInode *inode)
+{
+ return DIV_ROUND_UP(inode->vdi_size,
+ (1UL << inode->block_size_shift));
+}
+
#undef DPRINTF
#ifdef DEBUG_SDOG
#define DPRINTF(fmt, args...) \
@@ -608,14 +617,13 @@ static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
error_report("failed to send a req, %s", strerror(errno));
- ret = -socket_error();
- return ret;
+ return -errno;
}
ret = qemu_co_send(sockfd, data, *wlen);
if (ret != *wlen) {
- ret = -socket_error();
error_report("failed to send a req, %s", strerror(errno));
+ return -errno;
}
return ret;
@@ -1630,7 +1638,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
static int sd_prealloc(const char *filename, Error **errp)
{
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
BDRVSheepdogState *base = NULL;
unsigned long buf_size;
uint32_t idx, max_idx;
@@ -1639,19 +1647,22 @@ static int sd_prealloc(const char *filename, Error **errp)
void *buf = NULL;
int ret;
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- errp);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+ if (blk == NULL) {
+ ret = -EIO;
goto out_with_err_set;
}
- vdi_size = bdrv_getlength(bs);
+ blk_set_allow_write_beyond_eof(blk, true);
+
+ vdi_size = blk_getlength(blk);
if (vdi_size < 0) {
ret = vdi_size;
goto out;
}
- base = bs->opaque;
+ base = blk_bs(blk)->opaque;
object_size = (UINT32_C(1) << base->inode.block_size_shift);
buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
buf = g_malloc0(buf_size);
@@ -1663,23 +1674,24 @@ static int sd_prealloc(const char *filename, Error **errp)
* The created image can be a cloned image, so we need to read
* a data from the source image.
*/
- ret = bdrv_pread(bs, idx * buf_size, buf, buf_size);
+ ret = blk_pread(blk, idx * buf_size, buf, buf_size);
if (ret < 0) {
goto out;
}
- ret = bdrv_pwrite(bs, idx * buf_size, buf, buf_size);
+ ret = blk_pwrite(blk, idx * buf_size, buf, buf_size);
if (ret < 0) {
goto out;
}
}
+ ret = 0;
out:
if (ret < 0) {
error_setg_errno(errp, -ret, "Can't pre-allocate");
}
out_with_err_set:
- if (bs) {
- bdrv_unref(bs);
+ if (blk) {
+ blk_unref(blk);
}
g_free(buf);
@@ -1819,7 +1831,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
}
if (backing_file) {
- BlockDriverState *bs;
+ BlockBackend *blk;
BDRVSheepdogState *base;
BlockDriver *drv;
@@ -1831,22 +1843,23 @@ static int sd_create(const char *filename, QemuOpts *opts,
goto out;
}
- bs = NULL;
- ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, errp);
- if (ret < 0) {
+ blk = blk_new_open(backing_file, NULL, NULL,
+ BDRV_O_PROTOCOL, errp);
+ if (blk == NULL) {
+ ret = -EIO;
goto out;
}
- base = bs->opaque;
+ base = blk_bs(blk)->opaque;
if (!is_snapshot(&base->inode)) {
error_setg(errp, "cannot clone from a non snapshot vdi");
- bdrv_unref(bs);
+ blk_unref(blk);
ret = -EINVAL;
goto out;
}
s->inode.vdi_id = base->inode.vdi_id;
- bdrv_unref(bs);
+ blk_unref(blk);
}
s->aio_context = qemu_get_aio_context();
@@ -1861,8 +1874,7 @@ static int sd_create(const char *filename, QemuOpts *opts,
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
- error_report("%s", error_get_pretty(local_err));
- error_free(local_err);
+ error_report_err(local_err);
ret = -EIO;
goto out;
}
@@ -2406,9 +2418,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
ret = do_sd_create(s, &new_vid, 1, &local_err);
if (ret < 0) {
- error_report("failed to create inode for snapshot: %s",
- error_get_pretty(local_err));
- error_free(local_err);
+ error_reportf_err(local_err,
+ "failed to create inode for snapshot: ");
goto cleanup;
}
@@ -2479,13 +2490,131 @@ out:
return ret;
}
+#define NR_BATCHED_DISCARD 128
+
+static bool remove_objects(BDRVSheepdogState *s)
+{
+ int fd, i = 0, nr_objs = 0;
+ Error *local_err = NULL;
+ int ret = 0;
+ bool result = true;
+ SheepdogInode *inode = &s->inode;
+
+ fd = connect_to_sdog(s, &local_err);
+ if (fd < 0) {
+ error_report_err(local_err);
+ return false;
+ }
+
+ nr_objs = count_data_objs(inode);
+ while (i < nr_objs) {
+ int start_idx, nr_filled_idx;
+
+ while (i < nr_objs && !inode->data_vdi_id[i]) {
+ i++;
+ }
+ start_idx = i;
+
+ nr_filled_idx = 0;
+ while (i < nr_objs && nr_filled_idx < NR_BATCHED_DISCARD) {
+ if (inode->data_vdi_id[i]) {
+ inode->data_vdi_id[i] = 0;
+ nr_filled_idx++;
+ }
+
+ i++;
+ }
+
+ ret = write_object(fd, s->aio_context,
+ (char *)&inode->data_vdi_id[start_idx],
+ vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies,
+ (i - start_idx) * sizeof(uint32_t),
+ offsetof(struct SheepdogInode,
+ data_vdi_id[start_idx]),
+ false, s->cache_flags);
+ if (ret < 0) {
+ error_report("failed to discard snapshot inode.");
+ result = false;
+ goto out;
+ }
+ }
+
+out:
+ closesocket(fd);
+ return result;
+}
+
static int sd_snapshot_delete(BlockDriverState *bs,
const char *snapshot_id,
const char *name,
Error **errp)
{
- /* FIXME: Delete specified snapshot id. */
- return 0;
+ unsigned long snap_id = 0;
+ char snap_tag[SD_MAX_VDI_TAG_LEN];
+ Error *local_err = NULL;
+ int fd, ret;
+ char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
+ BDRVSheepdogState *s = bs->opaque;
+ unsigned int wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN, rlen = 0;
+ uint32_t vid;
+ SheepdogVdiReq hdr = {
+ .opcode = SD_OP_DEL_VDI,
+ .data_length = wlen,
+ .flags = SD_FLAG_CMD_WRITE,
+ };
+ SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
+
+ if (!remove_objects(s)) {
+ return -1;
+ }
+
+ memset(buf, 0, sizeof(buf));
+ memset(snap_tag, 0, sizeof(snap_tag));
+ pstrcpy(buf, SD_MAX_VDI_LEN, s->name);
+ ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id);
+ if (ret || snap_id > UINT32_MAX) {
+ error_setg(errp, "Invalid snapshot ID: %s",
+ snapshot_id ? snapshot_id : "<null>");
+ return -EINVAL;
+ }
+
+ if (snap_id) {
+ hdr.snapid = (uint32_t) snap_id;
+ } else {
+ pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id);
+ pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag);
+ }
+
+ ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true,
+ &local_err);
+ if (ret) {
+ return ret;
+ }
+
+ fd = connect_to_sdog(s, &local_err);
+ if (fd < 0) {
+ error_report_err(local_err);
+ return -1;
+ }
+
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+ buf, &wlen, &rlen);
+ closesocket(fd);
+ if (ret) {
+ return ret;
+ }
+
+ switch (rsp->result) {
+ case SD_RES_NO_VDI:
+ error_report("%s was already deleted", s->name);
+ case SD_RES_SUCCESS:
+ break;
+ default:
+ error_report("%s, %s", sd_strerror(rsp->result), s->name);
+ return -1;
+ }
+
+ return ret;
}
static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
@@ -2709,7 +2838,7 @@ retry:
static coroutine_fn int64_t
sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- int *pnum)
+ int *pnum, BlockDriverState **file)
{
BDRVSheepdogState *s = bs->opaque;
SheepdogInode *inode = &s->inode;
@@ -2740,6 +2869,9 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
if (*pnum > nb_sectors) {
*pnum = nb_sectors;
}
+ if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
+ *file = bs;
+ }
return ret;
}
diff --git a/block/snapshot.c b/block/snapshot.c
index 6e9fa8da98..e9d721df68 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -22,8 +22,10 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/snapshot.h"
#include "block/block_int.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
QemuOptsList internal_snapshot_opts = {
@@ -229,6 +231,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
Error **errp)
{
BlockDriver *drv = bs->drv;
+ int ret;
+
if (!drv) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
return -ENOMEDIUM;
@@ -239,18 +243,21 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
}
/* drain all pending i/o before deleting snapshot */
- bdrv_drain(bs);
+ bdrv_drained_begin(bs);
if (drv->bdrv_snapshot_delete) {
- return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
+ ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
+ } else if (bs->file) {
+ ret = bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp);
+ } else {
+ error_setg(errp, "Block format '%s' used by device '%s' "
+ "does not support internal snapshot deletion",
+ drv->format_name, bdrv_get_device_name(bs));
+ ret = -ENOTSUP;
}
- if (bs->file) {
- return bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp);
- }
- error_setg(errp, "Block format '%s' used by device '%s' "
- "does not support internal snapshot deletion",
- drv->format_name, bdrv_get_device_name(bs));
- return -ENOTSUP;
+
+ bdrv_drained_end(bs);
+ return ret;
}
int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
diff --git a/block/ssh.c b/block/ssh.c
index af025c08a0..06928ed939 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -22,14 +22,13 @@
* THE SOFTWARE.
*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
+#include "qemu/osdep.h"
#include <libssh2.h>
#include <libssh2_sftp.h>
#include "block/block_int.h"
+#include "qapi/error.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "qemu/uri.h"
diff --git a/block/stream.c b/block/stream.c
index 25af7eff62..332b9a183e 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -11,9 +11,11 @@
*
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/block_int.h"
#include "block/blockjob.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
#include "sysemu/block-backend.h"
@@ -88,21 +90,21 @@ static void coroutine_fn stream_run(void *opaque)
StreamCompleteData *data;
BlockDriverState *bs = s->common.bs;
BlockDriverState *base = s->base;
- int64_t sector_num, end;
+ int64_t sector_num = 0;
+ int64_t end = -1;
int error = 0;
int ret = 0;
int n = 0;
void *buf;
if (!bs->backing) {
- block_job_completed(&s->common, 0);
- return;
+ goto out;
}
s->common.len = bdrv_getlength(bs);
if (s->common.len < 0) {
- block_job_completed(&s->common, s->common.len);
- return;
+ ret = s->common.len;
+ goto out;
}
end = s->common.len >> BDRV_SECTOR_BITS;
@@ -189,6 +191,7 @@ wait:
qemu_vfree(buf);
+out:
/* Modify backing chain and close BDSes in main loop */
data = g_malloc(sizeof(*data));
data->ret = ret;
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 13b5baa5d7..4920e09495 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -22,6 +22,7 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "qemu/osdep.h"
#include "block/throttle-groups.h"
#include "qemu/queue.h"
#include "qemu/thread.h"
diff --git a/block/vdi.c b/block/vdi.c
index 17f435fad6..75d4819edb 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -49,11 +49,14 @@
* so this seems to be reasonable.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "migration/migration.h"
#include "qemu/coroutine.h"
+#include "qemu/cutils.h"
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
@@ -526,7 +529,7 @@ static int vdi_reopen_prepare(BDRVReopenState *state,
}
static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
/* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
@@ -550,6 +553,7 @@ static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
offset = s->header.offset_data +
(uint64_t)bmap_entry * s->block_size +
sector_in_block * SECTOR_SIZE;
+ *file = bs->file->bs;
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
}
@@ -731,7 +735,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
size_t bmap_size;
int64_t offset = 0;
Error *local_err = NULL;
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
uint32_t *bmap = NULL;
logout("\n");
@@ -764,13 +768,17 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
error_propagate(errp, local_err);
goto exit;
}
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- &local_err);
- if (ret < 0) {
+
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* We need enough blocks to store the given disk size,
so always round up. */
blocks = DIV_ROUND_UP(bytes, block_size);
@@ -800,7 +808,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
vdi_header_print(&header);
#endif
vdi_header_to_le(&header);
- ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header));
+ ret = blk_pwrite(blk, offset, &header, sizeof(header));
if (ret < 0) {
error_setg(errp, "Error writing header to %s", filename);
goto exit;
@@ -821,7 +829,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
bmap[i] = VDI_UNALLOCATED;
}
}
- ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size);
+ ret = blk_pwrite(blk, offset, bmap, bmap_size);
if (ret < 0) {
error_setg(errp, "Error writing bmap to %s", filename);
goto exit;
@@ -830,7 +838,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
if (image_type == VDI_TYPE_STATIC) {
- ret = bdrv_truncate(bs, offset + blocks * block_size);
+ ret = blk_truncate(blk, offset + blocks * block_size);
if (ret < 0) {
error_setg(errp, "Failed to statically allocate %s", filename);
goto exit;
@@ -838,7 +846,7 @@ static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
}
exit:
- bdrv_unref(bs);
+ blk_unref(blk);
g_free(bmap);
return ret;
}
diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c
index 0640d3f4a9..da33cd38ef 100644
--- a/block/vhdx-endian.c
+++ b/block/vhdx-endian.c
@@ -15,6 +15,7 @@
*
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "block/vhdx.h"
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index 47ae4b1351..7ea7187fc4 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -17,6 +17,8 @@
* See the COPYING.LIB file in the top-level directory.
*
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/error-report.h"
@@ -784,12 +786,13 @@ int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
if (logs.valid) {
if (bs->read_only) {
ret = -EPERM;
- error_setg_errno(errp, EPERM,
- "VHDX image file '%s' opened read-only, but "
- "contains a log that needs to be replayed. To "
- "replay the log, execute:\n qemu-img check -r "
- "all '%s'",
- bs->filename, bs->filename);
+ error_setg(errp,
+ "VHDX image file '%s' opened read-only, but "
+ "contains a log that needs to be replayed",
+ bs->filename);
+ error_append_hint(errp, "To replay the log, run:\n"
+ "qemu-img check -r all '%s'\n",
+ bs->filename);
goto exit;
}
/* now flush the log */
diff --git a/block/vhdx.c b/block/vhdx.c
index 2fe9a5e0cf..2b7b332404 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -15,8 +15,11 @@
*
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "qemu/crc32c.h"
#include "block/vhdx.h"
@@ -263,10 +266,10 @@ static void vhdx_region_unregister_all(BDRVVHDXState *s)
static void vhdx_set_shift_bits(BDRVVHDXState *s)
{
- s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
- s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block);
- s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio);
- s->block_size_bits = 31 - clz32(s->block_size);
+ s->logical_sector_size_bits = ctz32(s->logical_sector_size);
+ s->sectors_per_block_bits = ctz32(s->sectors_per_block);
+ s->chunk_ratio_bits = ctz64(s->chunk_ratio);
+ s->block_size_bits = ctz32(s->block_size);
}
/*
@@ -856,14 +859,8 @@ static void vhdx_calc_bat_entries(BDRVVHDXState *s)
{
uint32_t data_blocks_cnt, bitmap_blocks_cnt;
- data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
- if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
- data_blocks_cnt++;
- }
- bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits;
- if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) {
- bitmap_blocks_cnt++;
- }
+ data_blocks_cnt = DIV_ROUND_UP(s->virtual_disk_size, s->block_size);
+ bitmap_blocks_cnt = DIV_ROUND_UP(data_blocks_cnt, s->chunk_ratio);
if (s->parent_entries) {
s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
@@ -1777,7 +1774,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
gunichar2 *creator = NULL;
glong creator_items;
- BlockDriverState *bs;
+ BlockBackend *blk;
char *type = NULL;
VHDXImageType image_type;
Error *local_err = NULL;
@@ -1842,14 +1839,16 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
- bs = NULL;
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* Create (A) */
/* The creator field is optional, but may be useful for
@@ -1857,13 +1856,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
&creator_items, NULL);
signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
- ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+ ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
if (ret < 0) {
goto delete_and_exit;
}
if (creator) {
- ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature),
- creator, creator_items * sizeof(gunichar2));
+ ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
+ creator, creator_items * sizeof(gunichar2));
if (ret < 0) {
goto delete_and_exit;
}
@@ -1871,13 +1870,13 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
/* Creates (B),(C) */
- ret = vhdx_create_new_headers(bs, image_size, log_size);
+ ret = vhdx_create_new_headers(blk_bs(blk), image_size, log_size);
if (ret < 0) {
goto delete_and_exit;
}
/* Creates (D),(E),(G) explicitly. (F) created as by-product */
- ret = vhdx_create_new_region_table(bs, image_size, block_size, 512,
+ ret = vhdx_create_new_region_table(blk_bs(blk), image_size, block_size, 512,
log_size, use_zero_blocks, image_type,
&metadata_offset);
if (ret < 0) {
@@ -1885,7 +1884,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
}
/* Creates (H) */
- ret = vhdx_create_new_metadata(bs, image_size, block_size, 512,
+ ret = vhdx_create_new_metadata(blk_bs(blk), image_size, block_size, 512,
metadata_offset, image_type);
if (ret < 0) {
goto delete_and_exit;
@@ -1893,7 +1892,7 @@ static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
delete_and_exit:
- bdrv_unref(bs);
+ blk_unref(blk);
exit:
g_free(type);
g_free(creator);
diff --git a/block/vmdk.c b/block/vmdk.c
index 6f819e413f..45f9d3c5b9 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -23,12 +23,15 @@
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qapi/qmp/qerror.h"
#include "qemu/error-report.h"
#include "qemu/module.h"
#include "migration/migration.h"
+#include "qemu/cutils.h"
#include <zlib.h>
#include <glib.h>
@@ -241,15 +244,17 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
{
- char desc[DESC_SIZE];
+ char *desc;
uint32_t cid = 0xffffffff;
const char *p_name, *cid_str;
size_t cid_str_size;
BDRVVmdkState *s = bs->opaque;
int ret;
+ desc = g_malloc0(DESC_SIZE);
ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
+ g_free(desc);
return 0;
}
@@ -268,41 +273,45 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
sscanf(p_name, "%" SCNx32, &cid);
}
+ g_free(desc);
return cid;
}
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
{
- char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
+ char *desc, *tmp_desc;
char *p_name, *tmp_str;
BDRVVmdkState *s = bs->opaque;
- int ret;
+ int ret = 0;
+ desc = g_malloc0(DESC_SIZE);
+ tmp_desc = g_malloc0(DESC_SIZE);
ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
- return ret;
+ goto out;
}
desc[DESC_SIZE - 1] = '\0';
tmp_str = strstr(desc, "parentCID");
if (tmp_str == NULL) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
- pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
+ pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
p_name = strstr(desc, "CID");
if (p_name != NULL) {
p_name += sizeof("CID");
- snprintf(p_name, sizeof(desc) - (p_name - desc), "%" PRIx32 "\n", cid);
- pstrcat(desc, sizeof(desc), tmp_desc);
+ snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
+ pstrcat(desc, DESC_SIZE, tmp_desc);
}
ret = bdrv_pwrite_sync(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
- if (ret < 0) {
- return ret;
- }
- return 0;
+out:
+ g_free(desc);
+ g_free(tmp_desc);
+ return ret;
}
static int vmdk_is_cid_valid(BlockDriverState *bs)
@@ -336,15 +345,16 @@ static int vmdk_reopen_prepare(BDRVReopenState *state,
static int vmdk_parent_open(BlockDriverState *bs)
{
char *p_name;
- char desc[DESC_SIZE + 1];
+ char *desc;
BDRVVmdkState *s = bs->opaque;
int ret;
- desc[DESC_SIZE] = '\0';
+ desc = g_malloc0(DESC_SIZE + 1);
ret = bdrv_pread(bs->file->bs, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
- return ret;
+ goto out;
}
+ ret = 0;
p_name = strstr(desc, "parentFileNameHint");
if (p_name != NULL) {
@@ -353,16 +363,20 @@ static int vmdk_parent_open(BlockDriverState *bs)
p_name += sizeof("parentFileNameHint") + 1;
end_name = strchr(p_name, '\"');
if (end_name == NULL) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
}
- return 0;
+out:
+ g_free(desc);
+ return ret;
}
/* Create and append extent to the extent array. Return the added VmdkExtent
@@ -570,6 +584,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
int64_t l1_backup_offset = 0;
+ bool compressed;
ret = bdrv_pread(file->bs, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
@@ -644,14 +659,14 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
header = footer.header;
}
+ compressed =
+ le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
if (le32_to_cpu(header.version) > 3) {
- char buf[64];
- snprintf(buf, sizeof(buf), "VMDK version %" PRId32,
- le32_to_cpu(header.version));
- error_setg(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bdrv_get_device_or_node_name(bs), "vmdk", buf);
+ error_setg(errp, "Unsupported VMDK version %" PRIu32,
+ le32_to_cpu(header.version));
return -ENOTSUP;
- } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) {
+ } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) &&
+ !compressed) {
/* VMware KB 2064959 explains that version 3 added support for
* persistent changed block tracking (CBT), and backup software can
* read it as version=1 if it doesn't care about the changed area
@@ -760,6 +775,17 @@ static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
}
}
+static const char *next_line(const char *s)
+{
+ while (*s) {
+ if (*s == '\n') {
+ return s + 1;
+ }
+ s++;
+ }
+ return s;
+}
+
static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
const char *desc_file_path, QDict *options,
Error **errp)
@@ -769,7 +795,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
char access[11];
char type[11];
char fname[512];
- const char *p = desc;
+ const char *p, *np;
int64_t sectors = 0;
int64_t flat_offset;
char *extent_path;
@@ -779,7 +805,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
char extent_opt_prefix[32];
Error *local_err = NULL;
- while (*p) {
+ for (p = desc; *p; p = next_line(p)) {
/* parse extent line in one of below formats:
*
* RW [size in sectors] FLAT "file-name.vmdk" OFFSET
@@ -791,29 +817,26 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
access, &sectors, type, fname, &flat_offset);
if (matches < 4 || strcmp(access, "RW")) {
- goto next_line;
+ continue;
} else if (!strcmp(type, "FLAT")) {
if (matches != 5 || flat_offset < 0) {
- error_setg(errp, "Invalid extent lines: \n%s", p);
- return -EINVAL;
+ goto invalid;
}
} else if (!strcmp(type, "VMFS")) {
if (matches == 4) {
flat_offset = 0;
} else {
- error_setg(errp, "Invalid extent lines:\n%s", p);
- return -EINVAL;
+ goto invalid;
}
} else if (matches != 4) {
- error_setg(errp, "Invalid extent lines:\n%s", p);
- return -EINVAL;
+ goto invalid;
}
if (sectors <= 0 ||
(strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
(strcmp(access, "RW"))) {
- goto next_line;
+ continue;
}
if (!path_is_absolute(fname) && !path_has_protocol(fname) &&
@@ -870,17 +893,17 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
return -ENOTSUP;
}
extent->type = g_strdup(type);
-next_line:
- /* move to next line */
- while (*p) {
- if (*p == '\n') {
- p++;
- break;
- }
- p++;
- }
}
return 0;
+
+invalid:
+ np = next_line(p);
+ assert(np != p);
+ if (np[-1] == '\n') {
+ np--;
+ }
+ error_setg(errp, "Invalid extent line: %.*s", (int)(np - p), p);
+ return -EINVAL;
}
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
@@ -1248,7 +1271,7 @@ static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent,
}
static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
BDRVVmdkState *s = bs->opaque;
int64_t index_in_cluster, n, ret;
@@ -1265,6 +1288,7 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
0, 0);
qemu_co_mutex_unlock(&s->lock);
+ index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
switch (ret) {
case VMDK_ERROR:
ret = -EIO;
@@ -1277,14 +1301,15 @@ static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
break;
case VMDK_OK:
ret = BDRV_BLOCK_DATA;
- if (extent->file == bs->file && !extent->compressed) {
- ret |= BDRV_BLOCK_OFFSET_VALID | offset;
+ if (!extent->compressed) {
+ ret |= BDRV_BLOCK_OFFSET_VALID;
+ ret |= (offset + (index_in_cluster << BDRV_SECTOR_BITS))
+ & BDRV_BLOCK_OFFSET_MASK;
}
-
+ *file = extent->file->bs;
break;
}
- index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
n = extent->cluster_sectors - index_in_cluster;
if (n > nb_sectors) {
n = nb_sectors;
@@ -1494,8 +1519,8 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
if (sector_num > bs->total_sectors) {
error_report("Wrong offset: sector_num=0x%" PRIx64
- " total_sectors=0x%" PRIx64 "\n",
- sector_num, bs->total_sectors);
+ " total_sectors=0x%" PRIx64,
+ sector_num, bs->total_sectors);
return -EIO;
}
@@ -1624,7 +1649,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
QemuOpts *opts, Error **errp)
{
int ret, i;
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
VMDK4Header header;
Error *local_err = NULL;
uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
@@ -1637,16 +1662,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
goto exit;
}
- assert(bs == NULL);
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
if (flat) {
- ret = bdrv_truncate(bs, filesize);
+ ret = blk_truncate(blk, filesize);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
}
@@ -1654,7 +1681,13 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
}
magic = cpu_to_be32(VMDK4_MAGIC);
memset(&header, 0, sizeof(header));
- header.version = zeroed_grain ? 2 : 1;
+ if (compress) {
+ header.version = 3;
+ } else if (zeroed_grain) {
+ header.version = 2;
+ } else {
+ header.version = 1;
+ }
header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
| (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
| (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
@@ -1695,18 +1728,18 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
header.check_bytes[3] = 0xa;
/* write all the data */
- ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic));
+ ret = blk_pwrite(blk, 0, &magic, sizeof(magic));
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
- ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header));
+ ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
}
- ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9);
+ ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
goto exit;
@@ -1719,8 +1752,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
i < gt_count; i++, tmp += gt_size) {
gd_buf[i] = cpu_to_le32(tmp);
}
- ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
- gd_buf, gd_buf_size);
+ ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
+ gd_buf, gd_buf_size);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1731,8 +1764,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
i < gt_count; i++, tmp += gt_size) {
gd_buf[i] = cpu_to_le32(tmp);
}
- ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
- gd_buf, gd_buf_size);
+ ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
+ gd_buf, gd_buf_size);
if (ret < 0) {
error_setg(errp, QERR_IO_ERROR);
goto exit;
@@ -1740,8 +1773,8 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
ret = 0;
exit:
- if (bs) {
- bdrv_unref(bs);
+ if (blk) {
+ blk_unref(blk);
}
g_free(gd_buf);
return ret;
@@ -1790,7 +1823,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
{
int idx = 0;
- BlockDriverState *new_bs = NULL;
+ BlockBackend *new_blk = NULL;
Error *local_err = NULL;
char *desc = NULL;
int64_t total_size = 0, filesize;
@@ -1901,7 +1934,7 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
if (backing_file) {
- BlockDriverState *bs = NULL;
+ BlockBackend *blk;
char *full_backing = g_new0(char, PATH_MAX);
bdrv_get_full_backing_filename_from_filename(filename, backing_file,
full_backing, PATH_MAX,
@@ -1912,18 +1945,21 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
ret = -ENOENT;
goto exit;
}
- ret = bdrv_open(&bs, full_backing, NULL, NULL, BDRV_O_NO_BACKING, errp);
+
+ blk = blk_new_open(full_backing, NULL, NULL,
+ BDRV_O_NO_BACKING, errp);
g_free(full_backing);
- if (ret != 0) {
+ if (blk == NULL) {
+ ret = -EIO;
goto exit;
}
- if (strcmp(bs->drv->format_name, "vmdk")) {
- bdrv_unref(bs);
+ if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) {
+ blk_unref(blk);
ret = -EINVAL;
goto exit;
}
- parent_cid = vmdk_read_cid(bs, 0);
- bdrv_unref(bs);
+ parent_cid = vmdk_read_cid(blk_bs(blk), 0);
+ blk_unref(blk);
snprintf(parent_desc_line, BUF_SIZE,
"parentFileNameHint=\"%s\"", backing_file);
}
@@ -1981,14 +2017,18 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
goto exit;
}
}
- assert(new_bs == NULL);
- ret = bdrv_open(&new_bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
- if (ret < 0) {
+
+ new_blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (new_blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
- ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len);
+
+ blk_set_allow_write_beyond_eof(new_blk, true);
+
+ ret = blk_pwrite(new_blk, desc_offset, desc, desc_len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write description");
goto exit;
@@ -1996,14 +2036,14 @@ static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
/* bdrv_pwrite write padding zeros to align to sector, we don't need that
* for description file */
if (desc_offset == 0) {
- ret = bdrv_truncate(new_bs, desc_len);
+ ret = blk_truncate(new_blk, desc_len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
}
}
exit:
- if (new_bs) {
- bdrv_unref(new_bs);
+ if (new_blk) {
+ blk_unref(new_blk);
}
g_free(adapter_type);
g_free(backing_file);
@@ -2162,18 +2202,18 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)
*spec_info = (ImageInfoSpecific){
.type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
- {
- .vmdk = g_new0(ImageInfoSpecificVmdk, 1),
+ .u = {
+ .vmdk.data = g_new0(ImageInfoSpecificVmdk, 1),
},
};
- *spec_info->u.vmdk = (ImageInfoSpecificVmdk) {
+ *spec_info->u.vmdk.data = (ImageInfoSpecificVmdk) {
.create_type = g_strdup(s->create_type),
.cid = s->cid,
.parent_cid = s->parent_cid,
};
- next = &spec_info->u.vmdk->extents;
+ next = &spec_info->u.vmdk.data->extents;
for (i = 0; i < s->num_extents; i++) {
*next = g_new0(ImageInfoList, 1);
(*next)->value = vmdk_get_extent_info(&s->extents[i]);
diff --git a/block/vpc.c b/block/vpc.c
index 299d373092..3e2ea698d9 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -22,8 +22,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "migration/migration.h"
#if defined(CONFIG_UUID)
@@ -42,28 +45,34 @@ enum vhd_type {
VHD_DIFFERENCING = 4,
};
-// Seconds since Jan 1, 2000 0:00:00 (UTC)
+/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
#define VHD_TIMESTAMP_BASE 946684800
-#define VHD_MAX_SECTORS (65535LL * 255 * 255)
-#define VHD_MAX_GEOMETRY (65535LL * 16 * 255)
+#define VHD_CHS_MAX_C 65535LL
+#define VHD_CHS_MAX_H 16
+#define VHD_CHS_MAX_S 255
-// always big-endian
+#define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */
+#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
+
+#define VPC_OPT_FORCE_SIZE "force_size"
+
+/* always big-endian */
typedef struct vhd_footer {
- char creator[8]; // "conectix"
+ char creator[8]; /* "conectix" */
uint32_t features;
uint32_t version;
- // Offset of next header structure, 0xFFFFFFFF if none
+ /* Offset of next header structure, 0xFFFFFFFF if none */
uint64_t data_offset;
- // Seconds since Jan 1, 2000 0:00:00 (UTC)
+ /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
uint32_t timestamp;
- char creator_app[4]; // "vpc "
+ char creator_app[4]; /* e.g., "vpc " */
uint16_t major;
uint16_t minor;
- char creator_os[4]; // "Wi2k"
+ char creator_os[4]; /* "Wi2k" */
uint64_t orig_size;
uint64_t current_size;
@@ -74,29 +83,29 @@ typedef struct vhd_footer {
uint32_t type;
- // Checksum of the Hard Disk Footer ("one's complement of the sum of all
- // the bytes in the footer without the checksum field")
+ /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
+ the bytes in the footer without the checksum field") */
uint32_t checksum;
- // UUID used to identify a parent hard disk (backing file)
+ /* UUID used to identify a parent hard disk (backing file) */
uint8_t uuid[16];
uint8_t in_saved_state;
} QEMU_PACKED VHDFooter;
typedef struct vhd_dyndisk_header {
- char magic[8]; // "cxsparse"
+ char magic[8]; /* "cxsparse" */
- // Offset of next header structure, 0xFFFFFFFF if none
+ /* Offset of next header structure, 0xFFFFFFFF if none */
uint64_t data_offset;
- // Offset of the Block Allocation Table (BAT)
+ /* Offset of the Block Allocation Table (BAT) */
uint64_t table_offset;
uint32_t version;
- uint32_t max_table_entries; // 32bit/entry
+ uint32_t max_table_entries; /* 32bit/entry */
- // 2 MB by default, must be a power of two
+ /* 2 MB by default, must be a power of two */
uint32_t block_size;
uint32_t checksum;
@@ -104,7 +113,7 @@ typedef struct vhd_dyndisk_header {
uint32_t parent_timestamp;
uint32_t reserved;
- // Backing file name (in UTF-16)
+ /* Backing file name (in UTF-16) */
uint8_t parent_name[512];
struct {
@@ -127,6 +136,8 @@ typedef struct BDRVVPCState {
uint32_t block_size;
uint32_t bitmap_size;
+ bool force_use_chs;
+ bool force_use_sz;
#ifdef CACHE
uint8_t *pageentry_u8;
@@ -139,6 +150,22 @@ typedef struct BDRVVPCState {
Error *migration_blocker;
} BDRVVPCState;
+#define VPC_OPT_SIZE_CALC "force_size_calc"
+static QemuOptsList vpc_runtime_opts = {
+ .name = "vpc-runtime-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
+ .desc = {
+ {
+ .name = VPC_OPT_SIZE_CALC,
+ .type = QEMU_OPT_STRING,
+ .help = "Force disk size calculation to use either CHS geometry, "
+ "or use the disk current_size specified in the VHD footer. "
+ "{chs, current_size}"
+ },
+ { /* end of list */ }
+ }
+};
+
static uint32_t vpc_checksum(uint8_t* buf, size_t size)
{
uint32_t res = 0;
@@ -158,6 +185,25 @@ static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
return 0;
}
+static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
+ Error **errp)
+{
+ BDRVVPCState *s = bs->opaque;
+ const char *size_calc;
+
+ size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
+
+ if (!size_calc) {
+ /* no override, use autodetect only */
+ } else if (!strcmp(size_calc, "current_size")) {
+ s->force_use_sz = true;
+ } else if (!strcmp(size_calc, "chs")) {
+ s->force_use_chs = true;
+ } else {
+ error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
+ }
+}
+
static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -165,6 +211,9 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int i;
VHDFooter *footer;
VHDDynDiskHeader *dyndisk_header;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
+ bool use_chs;
uint8_t buf[HEADER_SIZE];
uint32_t checksum;
uint64_t computed_size;
@@ -172,8 +221,24 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int disk_type = VHD_DYNAMIC;
int ret;
+ opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ vpc_parse_options(bs, opts, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
if (ret < 0) {
+ error_setg(errp, "Unable to read VHD header");
goto fail;
}
@@ -182,9 +247,11 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
int64_t offset = bdrv_getlength(bs->file->bs);
if (offset < 0) {
ret = offset;
+ error_setg(errp, "Invalid file size");
goto fail;
} else if (offset < HEADER_SIZE) {
ret = -EINVAL;
+ error_setg(errp, "File too small for a VHD header");
goto fail;
}
@@ -211,22 +278,50 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
/* Write 'checksum' back to footer, or else will leave it with zero. */
footer->checksum = cpu_to_be32(checksum);
- // The visible size of a image in Virtual PC depends on the geometry
- // rather than on the size stored in the footer (the size in the footer
- // is too large usually)
+ /* The visible size of a image in Virtual PC depends on the geometry
+ rather than on the size stored in the footer (the size in the footer
+ is too large usually) */
bs->total_sectors = (int64_t)
be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
- /* Images that have exactly the maximum geometry are probably bigger and
- * would be truncated if we adhered to the geometry for them. Rely on
- * footer->current_size for them. */
- if (bs->total_sectors == VHD_MAX_GEOMETRY) {
+ /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
+ * VHD image sizes differently. VPC will rely on CHS geometry,
+ * while Hyper-V and disk2vhd use the size specified in the footer.
+ *
+ * We use a couple of approaches to try and determine the correct method:
+ * look at the Creator App field, and look for images that have CHS
+ * geometry that is the maximum value.
+ *
+ * If the CHS geometry is the maximum CHS geometry, then we assume that
+ * the size is the footer->current_size to avoid truncation. Otherwise,
+ * we follow the table based on footer->creator_app:
+ *
+ * Known creator apps:
+ * 'vpc ' : CHS Virtual PC (uses disk geometry)
+ * 'qemu' : CHS QEMU (uses disk geometry)
+ * 'qem2' : current_size QEMU (uses current_size)
+ * 'win ' : current_size Hyper-V
+ * 'd2v ' : current_size Disk2vhd
+ * 'tap\0' : current_size XenServer
+ * 'CTXS' : current_size XenConverter
+ *
+ * The user can override the table values via drive options, however
+ * even with an override we will still use current_size for images
+ * that have CHS geometry of the maximum size.
+ */
+ use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
+ !!strncmp(footer->creator_app, "qem2", 4) &&
+ !!strncmp(footer->creator_app, "d2v ", 4) &&
+ !!strncmp(footer->creator_app, "CTXS", 4) &&
+ !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
+
+ if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
bs->total_sectors = be64_to_cpu(footer->current_size) /
- BDRV_SECTOR_SIZE;
+ BDRV_SECTOR_SIZE;
}
- /* Allow a maximum disk size of approximately 2 TB */
- if (bs->total_sectors >= VHD_MAX_SECTORS) {
+ /* Allow a maximum disk size of 2040 GiB */
+ if (bs->total_sectors > VHD_MAX_SECTORS) {
ret = -EFBIG;
goto fail;
}
@@ -235,12 +330,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
HEADER_SIZE);
if (ret < 0) {
+ error_setg(errp, "Error reading dynamic VHD header");
goto fail;
}
dyndisk_header = (VHDDynDiskHeader *) buf;
if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
+ error_setg(errp, "Invalid header magic");
ret = -EINVAL;
goto fail;
}
@@ -256,16 +353,14 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
- ret = -EINVAL;
- goto fail;
- }
- if (s->max_table_entries > (VHD_MAX_SECTORS * 512) / s->block_size) {
+ error_setg(errp, "Too many blocks");
ret = -EINVAL;
goto fail;
}
computed_size = (uint64_t) s->max_table_entries * s->block_size;
if (computed_size < bs->total_sectors * 512) {
+ error_setg(errp, "Page table too small");
ret = -EINVAL;
goto fail;
}
@@ -282,6 +377,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
if (s->pagetable == NULL) {
+ error_setg(errp, "Unable to allocate memory for page table");
ret = -ENOMEM;
goto fail;
}
@@ -291,6 +387,7 @@ static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
pagetable_size);
if (ret < 0) {
+ error_setg(errp, "Error reading pagetable");
goto fail;
}
@@ -369,16 +466,16 @@ static inline int64_t get_sector_offset(BlockDriverState *bs,
pageentry_index = (offset % s->block_size) / 512;
if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
- return -1; // not allocated
+ return -1; /* not allocated */
bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
- // We must ensure that we don't write to any sectors which are marked as
- // unused in the bitmap. We get away with setting all bits in the block
- // bitmap each time we write to a new block. This might cause Virtual PC to
- // miss sparse read optimization, but it's not a problem in terms of
- // correctness.
+ /* We must ensure that we don't write to any sectors which are marked as
+ unused in the bitmap. We get away with setting all bits in the block
+ bitmap each time we write to a new block. This might cause Virtual PC to
+ miss sparse read optimization, but it's not a problem in terms of
+ correctness. */
if (write && (s->last_bitmap_offset != bitmap_offset)) {
uint8_t bitmap[s->bitmap_size];
@@ -424,18 +521,18 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
int ret;
uint8_t bitmap[s->bitmap_size];
- // Check if sector_num is valid
+ /* Check if sector_num is valid */
if ((sector_num < 0) || (sector_num > bs->total_sectors))
return -1;
- // Write entry into in-memory BAT
+ /* Write entry into in-memory BAT */
index = (sector_num * 512) / s->block_size;
if (s->pagetable[index] != 0xFFFFFFFF)
return -1;
s->pagetable[index] = s->free_data_block_offset / 512;
- // Initialize the block's bitmap
+ /* Initialize the block's bitmap */
memset(bitmap, 0xff, s->bitmap_size);
ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
s->bitmap_size);
@@ -443,13 +540,13 @@ static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
return ret;
}
- // Write new footer (the old one will be overwritten)
+ /* Write new footer (the old one will be overwritten) */
s->free_data_block_offset += s->block_size + s->bitmap_size;
ret = rewrite_footer(bs);
if (ret < 0)
goto fail;
- // Write BAT entry to disk
+ /* Write BAT entry to disk */
bat_offset = s->bat_offset + (4 * index);
bat_value = cpu_to_be32(s->pagetable[index]);
ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
@@ -578,7 +675,7 @@ static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
}
static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
BDRVVPCState *s = bs->opaque;
VHDFooter *footer = (VHDFooter*) s->footer_buf;
@@ -588,6 +685,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
if (be32_to_cpu(footer->type) == VHD_FIXED) {
*pnum = nb_sectors;
+ *file = bs->file->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
(sector_num << BDRV_SECTOR_BITS);
}
@@ -609,6 +707,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
/* *pnum can't be greater than one block for allocated
* sectors since there is always a bitmap in between. */
if (allocated) {
+ *file = bs->file->bs;
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
}
if (nb_sectors == 0) {
@@ -628,7 +727,7 @@ static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
* Note that the geometry doesn't always exactly match total_sectors but
* may round it down.
*
- * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override
+ * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
* the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
* and instead allow up to 255 heads.
*/
@@ -670,7 +769,7 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
return 0;
}
-static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
+static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
int64_t total_sectors)
{
VHDDynDiskHeader *dyndisk_header =
@@ -680,34 +779,34 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
int ret;
int64_t offset = 0;
- // Write the footer (twice: at the beginning and at the end)
+ /* Write the footer (twice: at the beginning and at the end) */
block_size = 0x200000;
num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
- ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
- if (ret) {
+ ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
+ if (ret < 0) {
goto fail;
}
offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
- ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
if (ret < 0) {
goto fail;
}
- // Write the initial BAT
+ /* Write the initial BAT */
offset = 3 * 512;
memset(buf, 0xFF, 512);
for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
- ret = bdrv_pwrite_sync(bs, offset, buf, 512);
+ ret = blk_pwrite(blk, offset, buf, 512);
if (ret < 0) {
goto fail;
}
offset += 512;
}
- // Prepare the Dynamic Disk Header
+ /* Prepare the Dynamic Disk Header */
memset(buf, 0, 1024);
memcpy(dyndisk_header->magic, "cxsparse", 8);
@@ -724,10 +823,10 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
- // Write the header
+ /* Write the header */
offset = 512;
- ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
+ ret = blk_pwrite(blk, offset, buf, 1024);
if (ret < 0) {
goto fail;
}
@@ -736,7 +835,7 @@ static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
return ret;
}
-static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
+static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
int64_t total_size)
{
int ret;
@@ -744,12 +843,12 @@ static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
/* Add footer to total size */
total_size += HEADER_SIZE;
- ret = bdrv_truncate(bs, total_size);
+ ret = blk_truncate(blk, total_size);
if (ret < 0) {
return ret;
}
- ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
if (ret < 0) {
return ret;
}
@@ -770,8 +869,9 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
int64_t total_size;
int disk_type;
int ret = -EIO;
+ bool force_size;
Error *local_err = NULL;
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -783,6 +883,7 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
} else if (!strcmp(disk_type_param, "fixed")) {
disk_type = VHD_FIXED;
} else {
+ error_setg(errp, "Invalid disk type, %s", disk_type_param);
ret = -EINVAL;
goto out;
}
@@ -790,36 +891,50 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
disk_type = VHD_DYNAMIC;
}
+ force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
+
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto out;
}
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- &local_err);
- if (ret < 0) {
+
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto out;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/*
* Calculate matching total_size and geometry. Increase the number of
* sectors requested until we get enough (or fail). This ensures that
* qemu-img convert doesn't truncate images, but rather rounds up.
*
- * If the image size can't be represented by a spec conform CHS geometry,
+ * If the image size can't be represented by a spec conformant CHS geometry,
* we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
* the image size from the VHD footer to calculate total_sectors.
*/
- total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
- for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
- calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
+ if (force_size) {
+ /* This will force the use of total_size for sector count, below */
+ cyls = VHD_CHS_MAX_C;
+ heads = VHD_CHS_MAX_H;
+ secs_per_cyl = VHD_CHS_MAX_S;
+ } else {
+ total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
+ for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
+ calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
+ }
}
if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
total_sectors = total_size / BDRV_SECTOR_SIZE;
- /* Allow a maximum disk size of approximately 2 TB */
+ /* Allow a maximum disk size of 2040 GiB */
if (total_sectors > VHD_MAX_SECTORS) {
+ error_setg(errp, "Disk size is too large, max size is 2040 GiB");
ret = -EFBIG;
goto out;
}
@@ -832,8 +947,11 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
memset(buf, 0, 1024);
memcpy(footer->creator, "conectix", 8);
- /* TODO Check if "qemu" creator_app is ok for VPC */
- memcpy(footer->creator_app, "qemu", 4);
+ if (force_size) {
+ memcpy(footer->creator_app, "qem2", 4);
+ } else {
+ memcpy(footer->creator_app, "qemu", 4);
+ }
memcpy(footer->creator_os, "Wi2k", 4);
footer->features = cpu_to_be32(0x02);
@@ -863,13 +981,16 @@ static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
if (disk_type == VHD_DYNAMIC) {
- ret = create_dynamic_disk(bs, buf, total_sectors);
+ ret = create_dynamic_disk(blk, buf, total_sectors);
} else {
- ret = create_fixed_disk(bs, buf, total_size);
+ ret = create_fixed_disk(blk, buf, total_size);
+ }
+ if (ret < 0) {
+ error_setg(errp, "Unable to create or write VHD header");
}
out:
- bdrv_unref(bs);
+ blk_unref(blk);
g_free(disk_type_param);
return ret;
}
@@ -914,6 +1035,13 @@ static QemuOptsList vpc_create_opts = {
"Type of virtual hard disk format. Supported formats are "
"{dynamic (default) | fixed} "
},
+ {
+ .name = VPC_OPT_FORCE_SIZE,
+ .type = QEMU_OPT_BOOL,
+ .help = "Force disk size calculation to use the actual size "
+ "specified, rather than using the nearest CHS-based "
+ "calculation"
+ },
{ /* end of list */ }
}
};
diff --git a/block/vvfat.c b/block/vvfat.c
index b184eca6fc..183fc4f049 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -22,15 +22,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include <sys/stat.h>
+#include "qemu/osdep.h"
#include <dirent.h>
-#include "qemu-common.h"
+#include "qapi/error.h"
#include "block/block_int.h"
#include "qemu/module.h"
#include "migration/migration.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
#ifndef S_IWGRP
#define S_IWGRP 0
@@ -1108,6 +1109,8 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
memcpy(s->volume_label, label, label_length);
+ } else {
+ memcpy(s->volume_label, "QEMU VVFAT", 10);
}
if (floppy) {
@@ -2282,12 +2285,17 @@ DLOG(fprintf(stderr, "commit_direntries for %s, parent_mapping_index %d\n", mapp
factor * (old_cluster_count - new_cluster_count));
for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) {
+ direntry_t *first_direntry;
void* direntry = array_get(&(s->directory), current_dir_index);
int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry,
s->sectors_per_cluster);
if (ret)
return ret;
- assert(!strncmp(s->directory.pointer, "QEMU", 4));
+
+ /* The first directory entry on the filesystem is the volume name */
+ first_direntry = (direntry_t*) s->directory.pointer;
+ assert(!memcmp(first_direntry->name, s->volume_label, 11));
+
current_dir_index += factor;
}
@@ -2884,7 +2892,7 @@ static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num,
}
static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int* n)
+ int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file)
{
BDRVVVFATState* s = bs->opaque;
*n = s->sector_count - sector_num;
@@ -2956,8 +2964,7 @@ static int enable_write_target(BDRVVVFATState *s, Error **errp)
options = qdict_new();
qdict_put(options, "driver", qstring_from_str("qcow"));
ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, options,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
- errp);
+ BDRV_O_RDWR | BDRV_O_NO_FLUSH, errp);
if (ret < 0) {
goto err;
}
diff --git a/block/win32-aio.c b/block/win32-aio.c
index bbf2f01c12..2d509a9a7b 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/timer.h"
#include "block/block_int.h"
diff --git a/block/write-threshold.c b/block/write-threshold.c
index 0fe38917c5..cc2ca71835 100644
--- a/block/write-threshold.c
+++ b/block/write-threshold.c
@@ -10,6 +10,7 @@
* See the COPYING.LIB file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "block/block_int.h"
#include "qemu/coroutine.h"
#include "block/write-threshold.h"