summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorChanho Park <chanho61.park@samsung.com>2014-09-05 20:35:53 +0900
committerChanho Park <chanho61.park@samsung.com>2014-09-05 20:35:53 +0900
commit16b1353a36171ae06d63fd309f4772dbfb1da113 (patch)
treecf6c297ee81aba0d9b47f23d78a889667e7bce48 /block
parenta15119db2ff5c2fdfdeb913b297bf8aa3399132e (diff)
downloadqemu-16b1353a36171ae06d63fd309f4772dbfb1da113.tar.gz
qemu-16b1353a36171ae06d63fd309f4772dbfb1da113.tar.bz2
qemu-16b1353a36171ae06d63fd309f4772dbfb1da113.zip
Imported Upstream version 2.1.0upstream/2.1.0
Diffstat (limited to 'block')
-rw-r--r--block/backup.c4
-rw-r--r--block/blkdebug.c2
-rw-r--r--block/blkverify.c47
-rw-r--r--block/bochs.c23
-rw-r--r--block/cloop.c12
-rw-r--r--block/commit.c11
-rw-r--r--block/cow.c62
-rw-r--r--block/curl.c409
-rw-r--r--block/dmg.c8
-rw-r--r--block/gluster.c87
-rw-r--r--block/iscsi.c600
-rw-r--r--block/linux-aio.c123
-rw-r--r--block/mirror.c104
-rw-r--r--block/nbd-client.c24
-rw-r--r--block/nbd-client.h4
-rw-r--r--block/nbd.c89
-rw-r--r--block/nfs.c123
-rw-r--r--block/qapi.c11
-rw-r--r--block/qcow.c120
-rw-r--r--block/qcow2-cluster.c36
-rw-r--r--block/qcow2-refcount.c28
-rw-r--r--block/qcow2.c311
-rw-r--r--block/qed-table.c8
-rw-r--r--block/qed.c210
-rw-r--r--block/qed.h4
-rw-r--r--block/quorum.c174
-rw-r--r--block/raw-aio.h10
-rw-r--r--block/raw-posix.c423
-rw-r--r--block/raw-win32.c95
-rw-r--r--block/raw_bsd.c28
-rw-r--r--block/rbd.c138
-rw-r--r--block/sheepdog.c422
-rw-r--r--block/ssh.c217
-rw-r--r--block/stream.c21
-rw-r--r--block/vdi.c129
-rw-r--r--block/vhdx.c106
-rw-r--r--block/vhdx.h1
-rw-r--r--block/vmdk.c221
-rw-r--r--block/vpc.c89
-rw-r--r--block/vvfat.c53
-rw-r--r--block/win32-aio.c27
41 files changed, 2895 insertions, 1719 deletions
diff --git a/block/backup.c b/block/backup.c
index 15a2e55e8..d0b02255c 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -307,7 +307,7 @@ static void coroutine_fn backup_run(void *opaque)
BACKUP_SECTORS_PER_CLUSTER - i, &n);
i += n;
- if (alloced == 1) {
+ if (alloced == 1 || n == 0) {
break;
}
}
@@ -325,7 +325,7 @@ static void coroutine_fn backup_run(void *opaque)
/* Depending on error action, fail now or retry cluster */
BlockErrorAction action =
backup_error_action(job, error_is_read, -ret);
- if (action == BDRV_ACTION_REPORT) {
+ if (action == BLOCK_ERROR_ACTION_REPORT) {
break;
} else {
start--;
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 380c73610..f51407de3 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -471,7 +471,7 @@ static BlockDriverAIOCB *inject_error(BlockDriverState *bs,
acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
acb->ret = -error;
- bh = qemu_bh_new(error_callback_bh, acb);
+ bh = aio_bh_new(bdrv_get_aio_context(bs), error_callback_bh, acb);
acb->bh = bh;
qemu_bh_schedule(bh);
diff --git a/block/blkverify.c b/block/blkverify.c
index e1c31171c..621b78593 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -39,12 +39,13 @@ struct BlkverifyAIOCB {
static void blkverify_aio_cancel(BlockDriverAIOCB *blockacb)
{
BlkverifyAIOCB *acb = (BlkverifyAIOCB *)blockacb;
+ AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
bool finished = false;
/* Wait until request completes, invokes its callback, and frees itself */
acb->finished = &finished;
while (!finished) {
- qemu_aio_wait();
+ aio_poll(aio_context, true);
}
}
@@ -228,7 +229,8 @@ static void blkverify_aio_cb(void *opaque, int ret)
acb->verify(acb);
}
- acb->bh = qemu_bh_new(blkverify_aio_bh, acb);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
+ blkverify_aio_bh, acb);
qemu_bh_schedule(acb->bh);
break;
}
@@ -302,21 +304,40 @@ static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
return bdrv_recurse_is_first_non_filter(s->test_file, candidate);
}
+/* Propagate AioContext changes to ->test_file */
+static void blkverify_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ bdrv_detach_aio_context(s->test_file);
+}
+
+static void blkverify_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVBlkverifyState *s = bs->opaque;
+
+ bdrv_attach_aio_context(s->test_file, new_context);
+}
+
static BlockDriver bdrv_blkverify = {
- .format_name = "blkverify",
- .protocol_name = "blkverify",
- .instance_size = sizeof(BDRVBlkverifyState),
+ .format_name = "blkverify",
+ .protocol_name = "blkverify",
+ .instance_size = sizeof(BDRVBlkverifyState),
+
+ .bdrv_parse_filename = blkverify_parse_filename,
+ .bdrv_file_open = blkverify_open,
+ .bdrv_close = blkverify_close,
+ .bdrv_getlength = blkverify_getlength,
- .bdrv_parse_filename = blkverify_parse_filename,
- .bdrv_file_open = blkverify_open,
- .bdrv_close = blkverify_close,
- .bdrv_getlength = blkverify_getlength,
+ .bdrv_aio_readv = blkverify_aio_readv,
+ .bdrv_aio_writev = blkverify_aio_writev,
+ .bdrv_aio_flush = blkverify_aio_flush,
- .bdrv_aio_readv = blkverify_aio_readv,
- .bdrv_aio_writev = blkverify_aio_writev,
- .bdrv_aio_flush = blkverify_aio_flush,
+ .bdrv_attach_aio_context = blkverify_attach_aio_context,
+ .bdrv_detach_aio_context = blkverify_detach_aio_context,
- .is_filter = true,
+ .is_filter = true,
.bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
};
diff --git a/block/bochs.c b/block/bochs.c
index eacf956e7..eba23df33 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -187,13 +187,14 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
uint64_t offset = sector_num * 512;
uint64_t extent_index, extent_offset, bitmap_offset;
char bitmap_entry;
+ int ret;
// seek to sector
extent_index = offset / s->extent_size;
extent_offset = (offset % s->extent_size) / 512;
if (s->catalog_bitmap[extent_index] == 0xffffffff) {
- return -1; /* not allocated */
+ return 0; /* not allocated */
}
bitmap_offset = s->data_offset +
@@ -201,13 +202,14 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
(s->extent_blocks + s->bitmap_blocks));
/* read in bitmap for current extent */
- if (bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
- &bitmap_entry, 1) != 1) {
- return -1;
+ ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8),
+ &bitmap_entry, 1);
+ if (ret < 0) {
+ return ret;
}
if (!((bitmap_entry >> (extent_offset % 8)) & 1)) {
- return -1; /* not allocated */
+ return 0; /* not allocated */
}
return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
@@ -220,13 +222,16 @@ static int bochs_read(BlockDriverState *bs, int64_t sector_num,
while (nb_sectors > 0) {
int64_t block_offset = seek_to_sector(bs, sector_num);
- if (block_offset >= 0) {
+ if (block_offset < 0) {
+ return block_offset;
+ } else if (block_offset > 0) {
ret = bdrv_pread(bs->file, block_offset, buf, 512);
- if (ret != 512) {
- return -1;
+ if (ret < 0) {
+ return ret;
}
- } else
+ } else {
memset(buf, 0, 512);
+ }
nb_sectors--;
sector_num++;
buf += 512;
diff --git a/block/cloop.c b/block/cloop.c
index b6ad50fbb..845773792 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -72,7 +72,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
}
s->block_size = be32_to_cpu(s->block_size);
if (s->block_size % 512) {
- error_setg(errp, "block_size %u must be a multiple of 512",
+ error_setg(errp, "block_size %" PRIu32 " must be a multiple of 512",
s->block_size);
return -EINVAL;
}
@@ -86,7 +86,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
* need a buffer this big.
*/
if (s->block_size > MAX_BLOCK_SIZE) {
- error_setg(errp, "block_size %u must be %u MB or less",
+ error_setg(errp, "block_size %" PRIu32 " must be %u MB or less",
s->block_size,
MAX_BLOCK_SIZE / (1024 * 1024));
return -EINVAL;
@@ -101,7 +101,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
/* read offsets */
if (s->n_blocks > (UINT32_MAX - 1) / sizeof(uint64_t)) {
/* Prevent integer overflow */
- error_setg(errp, "n_blocks %u must be %zu or less",
+ error_setg(errp, "n_blocks %" PRIu32 " must be %zu or less",
s->n_blocks,
(UINT32_MAX - 1) / sizeof(uint64_t));
return -EINVAL;
@@ -133,7 +133,7 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
if (s->offsets[i] < s->offsets[i - 1]) {
error_setg(errp, "offsets not monotonically increasing at "
- "index %u, image file is corrupt", i);
+ "index %" PRIu32 ", image file is corrupt", i);
ret = -EINVAL;
goto fail;
}
@@ -146,8 +146,8 @@ static int cloop_open(BlockDriverState *bs, QDict *options, int flags,
* ridiculous s->compressed_block allocation.
*/
if (size > 2 * MAX_BLOCK_SIZE) {
- error_setg(errp, "invalid compressed block size at index %u, "
- "image file is corrupt", i);
+ error_setg(errp, "invalid compressed block size at index %" PRIu32
+ ", image file is corrupt", i);
ret = -EINVAL;
goto fail;
}
diff --git a/block/commit.c b/block/commit.c
index acec4ac5a..91517d351 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -37,6 +37,7 @@ typedef struct CommitBlockJob {
BlockdevOnError on_error;
int base_flags;
int orig_overlay_flags;
+ char *backing_file_str;
} CommitBlockJob;
static int coroutine_fn commit_populate(BlockDriverState *bs,
@@ -141,7 +142,7 @@ wait:
if (!block_job_is_cancelled(&s->common) && sector_num == end) {
/* success */
- ret = bdrv_drop_intermediate(active, top, base);
+ ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
}
exit_free_buf:
@@ -158,7 +159,7 @@ exit_restore_reopen:
if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
}
-
+ g_free(s->backing_file_str);
block_job_completed(&s->common, ret);
}
@@ -182,7 +183,7 @@ static const BlockJobDriver commit_job_driver = {
void commit_start(BlockDriverState *bs, BlockDriverState *base,
BlockDriverState *top, int64_t speed,
BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp)
+ void *opaque, const char *backing_file_str, Error **errp)
{
CommitBlockJob *s;
BlockReopenQueue *reopen_queue = NULL;
@@ -194,7 +195,7 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
!bdrv_iostatus_is_enabled(bs)) {
- error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
+ error_setg(errp, "Invalid parameter combination");
return;
}
@@ -244,6 +245,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base,
s->base_flags = orig_base_flags;
s->orig_overlay_flags = orig_overlay_flags;
+ s->backing_file_str = g_strdup(backing_file_str);
+
s->on_error = on_error;
s->common.co = qemu_coroutine_create(commit_run);
diff --git a/block/cow.c b/block/cow.c
index 30deb88de..6ee483327 100644
--- a/block/cow.c
+++ b/block/cow.c
@@ -82,7 +82,7 @@ static int cow_open(BlockDriverState *bs, QDict *options, int flags,
if (be32_to_cpu(cow_header.version) != COW_VERSION) {
char version[64];
snprintf(version, sizeof(version),
- "COW version %d", cow_header.version);
+ "COW version %" PRIu32, cow_header.version);
error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
bs->device_name, "cow", version);
ret = -ENOTSUP;
@@ -324,39 +324,31 @@ static void cow_close(BlockDriverState *bs)
{
}
-static int cow_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int cow_create(const char *filename, QemuOpts *opts, Error **errp)
{
struct cow_header_v2 cow_header;
struct stat st;
int64_t image_sectors = 0;
- const char *image_filename = NULL;
+ char *image_filename = NULL;
Error *local_err = NULL;
int ret;
- BlockDriverState *cow_bs;
+ BlockDriverState *cow_bs = NULL;
/* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- image_sectors = options->value.n / 512;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- image_filename = options->value.s;
- }
- options++;
- }
+ image_sectors = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
+ image_filename = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
- ret = bdrv_create_file(filename, options, &local_err);
+ ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
- return ret;
+ goto exit;
}
- cow_bs = NULL;
ret = bdrv_open(&cow_bs, filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
- return ret;
+ goto exit;
}
memset(&cow_header, 0, sizeof(cow_header));
@@ -389,22 +381,29 @@ static int cow_create(const char *filename, QEMUOptionParameter *options,
}
exit:
- bdrv_unref(cow_bs);
+ g_free(image_filename);
+ if (cow_bs) {
+ bdrv_unref(cow_bs);
+ }
return ret;
}
-static QEMUOptionParameter cow_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- },
- { NULL }
+static QemuOptsList cow_create_opts = {
+ .name = "cow-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(cow_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = QEMU_OPT_STRING,
+ .help = "File name of a base image"
+ },
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_cow = {
@@ -416,12 +415,13 @@ static BlockDriver bdrv_cow = {
.bdrv_close = cow_close,
.bdrv_create = cow_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .supports_backing = true,
.bdrv_read = cow_co_read,
.bdrv_write = cow_co_write,
.bdrv_co_get_block_status = cow_co_get_block_status,
- .create_options = cow_create_options,
+ .create_opts = &cow_create_opts,
};
static void bdrv_cow_init(void)
diff --git a/block/curl.c b/block/curl.c
index 1b9b1f634..79ff2f1e4 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -23,6 +23,7 @@
*/
#include "qemu-common.h"
#include "block/block_int.h"
+#include "qapi/qmp/qbool.h"
#include <curl/curl.h>
// #define DEBUG
@@ -37,6 +38,21 @@
#if LIBCURL_VERSION_NUM >= 0x071000
/* The multi interface timer callback was introduced in 7.16.0 */
#define NEED_CURL_TIMER_CALLBACK
+#define HAVE_SOCKET_ACTION
+#endif
+
+#ifndef HAVE_SOCKET_ACTION
+/* If curl_multi_socket_action isn't available, define it statically here in
+ * terms of curl_multi_socket. Note that ev_bitmask will be ignored, which is
+ * less efficient but still safe. */
+static CURLMcode __curl_multi_socket_action(CURLM *multi_handle,
+ curl_socket_t sockfd,
+ int ev_bitmask,
+ int *running_handles)
+{
+ return curl_multi_socket(multi_handle, sockfd, running_handles);
+}
+#define curl_multi_socket_action __curl_multi_socket_action
#endif
#define PROTOCOLS (CURLPROTO_HTTP | CURLPROTO_HTTPS | \
@@ -46,12 +62,16 @@
#define CURL_NUM_STATES 8
#define CURL_NUM_ACB 8
#define SECTOR_SIZE 512
-#define READ_AHEAD_SIZE (256 * 1024)
+#define READ_AHEAD_DEFAULT (256 * 1024)
#define FIND_RET_NONE 0
#define FIND_RET_OK 1
#define FIND_RET_WAIT 2
+#define CURL_BLOCK_OPT_URL "url"
+#define CURL_BLOCK_OPT_READAHEAD "readahead"
+#define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
+
struct BDRVCURLState;
typedef struct CURLAIOCB {
@@ -71,6 +91,7 @@ typedef struct CURLState
struct BDRVCURLState *s;
CURLAIOCB *acb[CURL_NUM_ACB];
CURL *curl;
+ curl_socket_t sock_fd;
char *orig_buf;
size_t buf_start;
size_t buf_off;
@@ -87,11 +108,14 @@ typedef struct BDRVCURLState {
CURLState states[CURL_NUM_STATES];
char *url;
size_t readahead_size;
+ bool sslverify;
bool accept_range;
+ AioContext *aio_context;
} BDRVCURLState;
static void curl_clean_state(CURLState *s);
static void curl_multi_do(void *arg);
+static void curl_multi_read(void *arg);
#ifdef NEED_CURL_TIMER_CALLBACK
static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
@@ -111,21 +135,29 @@ static int curl_timer_cb(CURLM *multi, long timeout_ms, void *opaque)
#endif
static int curl_sock_cb(CURL *curl, curl_socket_t fd, int action,
- void *s, void *sp)
+ void *userp, void *sp)
{
+ BDRVCURLState *s;
+ CURLState *state = NULL;
+ curl_easy_getinfo(curl, CURLINFO_PRIVATE, (char **)&state);
+ state->sock_fd = fd;
+ s = state->s;
+
DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
switch (action) {
case CURL_POLL_IN:
- qemu_aio_set_fd_handler(fd, curl_multi_do, NULL, s);
+ aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
+ NULL, state);
break;
case CURL_POLL_OUT:
- qemu_aio_set_fd_handler(fd, NULL, curl_multi_do, s);
+ aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state);
break;
case CURL_POLL_INOUT:
- qemu_aio_set_fd_handler(fd, curl_multi_do, curl_multi_do, s);
+ aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
+ curl_multi_do, state);
break;
case CURL_POLL_REMOVE:
- qemu_aio_set_fd_handler(fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL);
break;
}
@@ -155,7 +187,7 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
DPRINTF("CURL: Just reading %zd bytes\n", realsize);
if (!s || !s->orig_buf)
- goto read_end;
+ return 0;
if (s->buf_off >= s->buf_len) {
/* buffer full, read nothing */
@@ -180,7 +212,6 @@ static size_t curl_read_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
}
}
-read_end:
return realsize;
}
@@ -215,7 +246,8 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
}
// Wait for unfinished chunks
- if ((start >= state->buf_start) &&
+ if (state->in_use &&
+ (start >= state->buf_start) &&
(start <= buf_fend) &&
(end >= state->buf_start) &&
(end <= buf_fend))
@@ -237,68 +269,69 @@ static int curl_find_buf(BDRVCURLState *s, size_t start, size_t len,
return FIND_RET_NONE;
}
-static void curl_multi_read(BDRVCURLState *s)
+static void curl_multi_check_completion(BDRVCURLState *s)
{
int msgs_in_queue;
/* Try to find done transfers, so we can free the easy
* handle again. */
- do {
+ for (;;) {
CURLMsg *msg;
msg = curl_multi_info_read(s->multi, &msgs_in_queue);
+ /* Quit when there are no more completions */
if (!msg)
break;
- if (msg->msg == CURLMSG_NONE)
- break;
- switch (msg->msg) {
- case CURLMSG_DONE:
- {
- CURLState *state = NULL;
- curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, (char**)&state);
-
- /* ACBs for successful messages get completed in curl_read_cb */
- if (msg->data.result != CURLE_OK) {
- int i;
- for (i = 0; i < CURL_NUM_ACB; i++) {
- CURLAIOCB *acb = state->acb[i];
-
- if (acb == NULL) {
- continue;
- }
-
- acb->common.cb(acb->common.opaque, -EIO);
- qemu_aio_release(acb);
- state->acb[i] = NULL;
+ if (msg->msg == CURLMSG_DONE) {
+ CURLState *state = NULL;
+ curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE,
+ (char **)&state);
+
+ /* ACBs for successful messages get completed in curl_read_cb */
+ if (msg->data.result != CURLE_OK) {
+ int i;
+ for (i = 0; i < CURL_NUM_ACB; i++) {
+ CURLAIOCB *acb = state->acb[i];
+
+ if (acb == NULL) {
+ continue;
}
- }
- curl_clean_state(state);
- break;
+ acb->common.cb(acb->common.opaque, -EIO);
+ qemu_aio_release(acb);
+ state->acb[i] = NULL;
+ }
}
- default:
- msgs_in_queue = 0;
- break;
+
+ curl_clean_state(state);
+ break;
}
- } while(msgs_in_queue);
+ }
}
static void curl_multi_do(void *arg)
{
- BDRVCURLState *s = (BDRVCURLState *)arg;
+ CURLState *s = (CURLState *)arg;
int running;
int r;
- if (!s->multi) {
+ if (!s->s->multi) {
return;
}
do {
- r = curl_multi_socket_all(s->multi, &running);
+ r = curl_multi_socket_action(s->s->multi, s->sock_fd, 0, &running);
} while(r == CURLM_CALL_MULTI_PERFORM);
- curl_multi_read(s);
+}
+
+static void curl_multi_read(void *arg)
+{
+ CURLState *s = (CURLState *)arg;
+
+ curl_multi_do(arg);
+ curl_multi_check_completion(s->s);
}
static void curl_multi_timeout_do(void *arg)
@@ -313,7 +346,7 @@ static void curl_multi_timeout_do(void *arg)
curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
- curl_multi_read(s);
+ curl_multi_check_completion(s);
#else
abort();
#endif
@@ -337,44 +370,44 @@ static CURLState *curl_init_state(BDRVCURLState *s)
break;
}
if (!state) {
- g_usleep(100);
- curl_multi_do(s);
+ aio_poll(state->s->aio_context, true);
}
} while(!state);
- if (state->curl)
- goto has_curl;
-
- state->curl = curl_easy_init();
- if (!state->curl)
- return NULL;
- curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
- curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, 5);
- curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION, (void *)curl_read_cb);
- curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state);
- curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state);
- curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1);
- curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1);
- curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1);
- curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
- curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);
-
- /* Restrict supported protocols to avoid security issues in the more
- * obscure protocols. For example, do not allow POP3/SMTP/IMAP see
- * CVE-2013-0249.
- *
- * Restricting protocols is only supported from 7.19.4 upwards.
- */
+ if (!state->curl) {
+ state->curl = curl_easy_init();
+ if (!state->curl) {
+ return NULL;
+ }
+ curl_easy_setopt(state->curl, CURLOPT_URL, s->url);
+ curl_easy_setopt(state->curl, CURLOPT_SSL_VERIFYPEER,
+ (long) s->sslverify);
+ curl_easy_setopt(state->curl, CURLOPT_TIMEOUT, 5);
+ curl_easy_setopt(state->curl, CURLOPT_WRITEFUNCTION,
+ (void *)curl_read_cb);
+ curl_easy_setopt(state->curl, CURLOPT_WRITEDATA, (void *)state);
+ curl_easy_setopt(state->curl, CURLOPT_PRIVATE, (void *)state);
+ curl_easy_setopt(state->curl, CURLOPT_AUTOREFERER, 1);
+ curl_easy_setopt(state->curl, CURLOPT_FOLLOWLOCATION, 1);
+ curl_easy_setopt(state->curl, CURLOPT_NOSIGNAL, 1);
+ curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
+ curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);
+
+ /* Restrict supported protocols to avoid security issues in the more
+ * obscure protocols. For example, do not allow POP3/SMTP/IMAP see
+ * CVE-2013-0249.
+ *
+ * Restricting protocols is only supported from 7.19.4 upwards.
+ */
#if LIBCURL_VERSION_NUM >= 0x071304
- curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS);
- curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS);
+ curl_easy_setopt(state->curl, CURLOPT_PROTOCOLS, PROTOCOLS);
+ curl_easy_setopt(state->curl, CURLOPT_REDIR_PROTOCOLS, PROTOCOLS);
#endif
#ifdef DEBUG_VERBOSE
- curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1);
+ curl_easy_setopt(state->curl, CURLOPT_VERBOSE, 1);
#endif
-
-has_curl:
+ }
state->s = s;
@@ -391,43 +424,50 @@ static void curl_clean_state(CURLState *s)
static void curl_parse_filename(const char *filename, QDict *options,
Error **errp)
{
+ qdict_put(options, CURL_BLOCK_OPT_URL, qstring_from_str(filename));
+}
- #define RA_OPTSTR ":readahead="
- char *file;
- char *ra;
- const char *ra_val;
- int parse_state = 0;
-
- file = g_strdup(filename);
-
- /* Parse a trailing ":readahead=#:" param, if present. */
- ra = file + strlen(file) - 1;
- while (ra >= file) {
- if (parse_state == 0) {
- if (*ra == ':') {
- parse_state++;
- } else {
- break;
- }
- } else if (parse_state == 1) {
- if (*ra > '9' || *ra < '0') {
- char *opt_start = ra - strlen(RA_OPTSTR) + 1;
- if (opt_start > file &&
- strncmp(opt_start, RA_OPTSTR, strlen(RA_OPTSTR)) == 0) {
- ra_val = ra + 1;
- ra -= strlen(RA_OPTSTR) - 1;
- *ra = '\0';
- qdict_put(options, "readahead", qstring_from_str(ra_val));
- }
- break;
- }
+static void curl_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVCURLState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < CURL_NUM_STATES; i++) {
+ if (s->states[i].in_use) {
+ curl_clean_state(&s->states[i]);
}
- ra--;
+ if (s->states[i].curl) {
+ curl_easy_cleanup(s->states[i].curl);
+ s->states[i].curl = NULL;
+ }
+ g_free(s->states[i].orig_buf);
+ s->states[i].orig_buf = NULL;
+ }
+ if (s->multi) {
+ curl_multi_cleanup(s->multi);
+ s->multi = NULL;
}
- qdict_put(options, "url", qstring_from_str(file));
+ timer_del(&s->timer);
+}
+
+static void curl_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVCURLState *s = bs->opaque;
+
+ aio_timer_init(new_context, &s->timer,
+ QEMU_CLOCK_REALTIME, SCALE_NS,
+ curl_multi_timeout_do, s);
- g_free(file);
+ assert(!s->multi);
+ s->multi = curl_multi_init();
+ s->aio_context = new_context;
+ curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
+#ifdef NEED_CURL_TIMER_CALLBACK
+ curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
+ curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
+#endif
}
static QemuOptsList runtime_opts = {
@@ -435,15 +475,20 @@ static QemuOptsList runtime_opts = {
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
- .name = "url",
+ .name = CURL_BLOCK_OPT_URL,
.type = QEMU_OPT_STRING,
.help = "URL to open",
},
{
- .name = "readahead",
+ .name = CURL_BLOCK_OPT_READAHEAD,
.type = QEMU_OPT_SIZE,
.help = "Readahead size",
},
+ {
+ .name = CURL_BLOCK_OPT_SSLVERIFY,
+ .type = QEMU_OPT_BOOL,
+ .help = "Verify SSL certificate"
+ },
{ /* end of list */ }
},
};
@@ -472,14 +517,17 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
goto out_noclean;
}
- s->readahead_size = qemu_opt_get_size(opts, "readahead", READ_AHEAD_SIZE);
+ s->readahead_size = qemu_opt_get_size(opts, CURL_BLOCK_OPT_READAHEAD,
+ READ_AHEAD_DEFAULT);
if ((s->readahead_size & 0x1ff) != 0) {
error_setg(errp, "HTTP_READAHEAD_SIZE %zd is not a multiple of 512",
s->readahead_size);
goto out_noclean;
}
- file = qemu_opt_get(opts, "url");
+ s->sslverify = qemu_opt_get_bool(opts, CURL_BLOCK_OPT_SSLVERIFY, true);
+
+ file = qemu_opt_get(opts, CURL_BLOCK_OPT_URL);
if (file == NULL) {
error_setg(errp, "curl block driver requires an 'url' option");
goto out_noclean;
@@ -491,6 +539,7 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
}
DPRINTF("CURL: Opening %s\n", file);
+ s->aio_context = bdrv_get_aio_context(bs);
s->url = g_strdup(file);
state = curl_init_state(s);
if (!state)
@@ -523,27 +572,13 @@ static int curl_open(BlockDriverState *bs, QDict *options, int flags,
curl_easy_cleanup(state->curl);
state->curl = NULL;
- aio_timer_init(bdrv_get_aio_context(bs), &s->timer,
- QEMU_CLOCK_REALTIME, SCALE_NS,
- curl_multi_timeout_do, s);
-
- // Now we know the file exists and its size, so let's
- // initialize the multi interface!
-
- s->multi = curl_multi_init();
- curl_multi_setopt(s->multi, CURLMOPT_SOCKETDATA, s);
- curl_multi_setopt(s->multi, CURLMOPT_SOCKETFUNCTION, curl_sock_cb);
-#ifdef NEED_CURL_TIMER_CALLBACK
- curl_multi_setopt(s->multi, CURLMOPT_TIMERDATA, s);
- curl_multi_setopt(s->multi, CURLMOPT_TIMERFUNCTION, curl_timer_cb);
-#endif
- curl_multi_do(s);
+ curl_attach_aio_context(bs, bdrv_get_aio_context(bs));
qemu_opts_del(opts);
return 0;
out:
- fprintf(stderr, "CURL: Error opening file: %s\n", state->errmsg);
+ error_setg(errp, "CURL: Error opening file: %s", state->errmsg);
curl_easy_cleanup(state->curl);
state->curl = NULL;
out_noclean:
@@ -566,6 +601,7 @@ static const AIOCBInfo curl_aiocb_info = {
static void curl_readv_bh_cb(void *p)
{
CURLState *state;
+ int running;
CURLAIOCB *acb = p;
BDRVCURLState *s = acb->common.bs->opaque;
@@ -600,8 +636,7 @@ static void curl_readv_bh_cb(void *p)
acb->end = (acb->nb_sectors * SECTOR_SIZE);
state->buf_off = 0;
- if (state->orig_buf)
- g_free(state->orig_buf);
+ g_free(state->orig_buf);
state->buf_start = start;
state->buf_len = acb->end + s->readahead_size;
end = MIN(start + state->buf_len, s->len) - 1;
@@ -614,8 +649,9 @@ static void curl_readv_bh_cb(void *p)
curl_easy_setopt(state->curl, CURLOPT_RANGE, state->range);
curl_multi_add_handle(s->multi, state->curl);
- curl_multi_do(s);
+ /* Tell curl it needs to kick things off */
+ curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
}
static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
@@ -630,7 +666,7 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
acb->sector_num = sector_num;
acb->nb_sectors = nb_sectors;
- acb->bh = qemu_bh_new(curl_readv_bh_cb, acb);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs), curl_readv_bh_cb, acb);
qemu_bh_schedule(acb->bh);
return &acb->common;
}
@@ -638,25 +674,9 @@ static BlockDriverAIOCB *curl_aio_readv(BlockDriverState *bs,
static void curl_close(BlockDriverState *bs)
{
BDRVCURLState *s = bs->opaque;
- int i;
DPRINTF("CURL: Close\n");
- for (i=0; i<CURL_NUM_STATES; i++) {
- if (s->states[i].in_use)
- curl_clean_state(&s->states[i]);
- if (s->states[i].curl) {
- curl_easy_cleanup(s->states[i].curl);
- s->states[i].curl = NULL;
- }
- if (s->states[i].orig_buf) {
- g_free(s->states[i].orig_buf);
- s->states[i].orig_buf = NULL;
- }
- }
- if (s->multi)
- curl_multi_cleanup(s->multi);
-
- timer_del(&s->timer);
+ curl_detach_aio_context(bs);
g_free(s->url);
}
@@ -668,68 +688,83 @@ static int64_t curl_getlength(BlockDriverState *bs)
}
static BlockDriver bdrv_http = {
- .format_name = "http",
- .protocol_name = "http",
+ .format_name = "http",
+ .protocol_name = "http",
- .instance_size = sizeof(BDRVCURLState),
- .bdrv_parse_filename = curl_parse_filename,
- .bdrv_file_open = curl_open,
- .bdrv_close = curl_close,
- .bdrv_getlength = curl_getlength,
+ .instance_size = sizeof(BDRVCURLState),
+ .bdrv_parse_filename = curl_parse_filename,
+ .bdrv_file_open = curl_open,
+ .bdrv_close = curl_close,
+ .bdrv_getlength = curl_getlength,
- .bdrv_aio_readv = curl_aio_readv,
+ .bdrv_aio_readv = curl_aio_readv,
+
+ .bdrv_detach_aio_context = curl_detach_aio_context,
+ .bdrv_attach_aio_context = curl_attach_aio_context,
};
static BlockDriver bdrv_https = {
- .format_name = "https",
- .protocol_name = "https",
+ .format_name = "https",
+ .protocol_name = "https",
+
+ .instance_size = sizeof(BDRVCURLState),
+ .bdrv_parse_filename = curl_parse_filename,
+ .bdrv_file_open = curl_open,
+ .bdrv_close = curl_close,
+ .bdrv_getlength = curl_getlength,
- .instance_size = sizeof(BDRVCURLState),
- .bdrv_parse_filename = curl_parse_filename,
- .bdrv_file_open = curl_open,
- .bdrv_close = curl_close,
- .bdrv_getlength = curl_getlength,
+ .bdrv_aio_readv = curl_aio_readv,
- .bdrv_aio_readv = curl_aio_readv,
+ .bdrv_detach_aio_context = curl_detach_aio_context,
+ .bdrv_attach_aio_context = curl_attach_aio_context,
};
static BlockDriver bdrv_ftp = {
- .format_name = "ftp",
- .protocol_name = "ftp",
+ .format_name = "ftp",
+ .protocol_name = "ftp",
+
+ .instance_size = sizeof(BDRVCURLState),
+ .bdrv_parse_filename = curl_parse_filename,
+ .bdrv_file_open = curl_open,
+ .bdrv_close = curl_close,
+ .bdrv_getlength = curl_getlength,
- .instance_size = sizeof(BDRVCURLState),
- .bdrv_parse_filename = curl_parse_filename,
- .bdrv_file_open = curl_open,
- .bdrv_close = curl_close,
- .bdrv_getlength = curl_getlength,
+ .bdrv_aio_readv = curl_aio_readv,
- .bdrv_aio_readv = curl_aio_readv,
+ .bdrv_detach_aio_context = curl_detach_aio_context,
+ .bdrv_attach_aio_context = curl_attach_aio_context,
};
static BlockDriver bdrv_ftps = {
- .format_name = "ftps",
- .protocol_name = "ftps",
+ .format_name = "ftps",
+ .protocol_name = "ftps",
- .instance_size = sizeof(BDRVCURLState),
- .bdrv_parse_filename = curl_parse_filename,
- .bdrv_file_open = curl_open,
- .bdrv_close = curl_close,
- .bdrv_getlength = curl_getlength,
+ .instance_size = sizeof(BDRVCURLState),
+ .bdrv_parse_filename = curl_parse_filename,
+ .bdrv_file_open = curl_open,
+ .bdrv_close = curl_close,
+ .bdrv_getlength = curl_getlength,
- .bdrv_aio_readv = curl_aio_readv,
+ .bdrv_aio_readv = curl_aio_readv,
+
+ .bdrv_detach_aio_context = curl_detach_aio_context,
+ .bdrv_attach_aio_context = curl_attach_aio_context,
};
static BlockDriver bdrv_tftp = {
- .format_name = "tftp",
- .protocol_name = "tftp",
+ .format_name = "tftp",
+ .protocol_name = "tftp",
+
+ .instance_size = sizeof(BDRVCURLState),
+ .bdrv_parse_filename = curl_parse_filename,
+ .bdrv_file_open = curl_open,
+ .bdrv_close = curl_close,
+ .bdrv_getlength = curl_getlength,
- .instance_size = sizeof(BDRVCURLState),
- .bdrv_parse_filename = curl_parse_filename,
- .bdrv_file_open = curl_open,
- .bdrv_close = curl_close,
- .bdrv_getlength = curl_getlength,
+ .bdrv_aio_readv = curl_aio_readv,
- .bdrv_aio_readv = curl_aio_readv,
+ .bdrv_detach_aio_context = curl_detach_aio_context,
+ .bdrv_attach_aio_context = curl_attach_aio_context,
};
static void curl_block_init(void)
diff --git a/block/dmg.c b/block/dmg.c
index 856402e1f..1e153cd76 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -248,8 +248,8 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
offset += 8;
if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
- error_report("sector count %" PRIu64 " for chunk %u is "
- "larger than max (%u)",
+ error_report("sector count %" PRIu64 " for chunk %" PRIu32
+ " is larger than max (%u)",
s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
ret = -EINVAL;
goto fail;
@@ -269,8 +269,8 @@ static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
offset += 8;
if (s->lengths[i] > DMG_LENGTHS_MAX) {
- error_report("length %" PRIu64 " for chunk %u is larger "
- "than max (%u)",
+ error_report("length %" PRIu64 " for chunk %" PRIu32
+ " is larger than max (%u)",
s->lengths[i], i, DMG_LENGTHS_MAX);
ret = -EINVAL;
goto fail;
diff --git a/block/gluster.c b/block/gluster.c
index 883608564..9274dead7 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -16,6 +16,7 @@ typedef struct GlusterAIOCB {
int ret;
QEMUBH *bh;
Coroutine *coroutine;
+ AioContext *aio_context;
} GlusterAIOCB;
typedef struct BDRVGlusterState {
@@ -207,6 +208,11 @@ static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
"volume=%s image=%s transport=%s", gconf->server,
gconf->port, gconf->volname, gconf->image,
gconf->transport);
+
+ /* glfs_init sometimes doesn't set errno although docs suggest that */
+ if (errno == 0)
+ errno = EINVAL;
+
goto out;
}
return glfs;
@@ -244,7 +250,7 @@ static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
acb->ret = -EIO; /* Partial read/write - fail it */
}
- acb->bh = qemu_bh_new(qemu_gluster_complete_aio, acb);
+ acb->bh = aio_bh_new(acb->aio_context, qemu_gluster_complete_aio, acb);
qemu_bh_schedule(acb->bh);
}
@@ -431,6 +437,7 @@ static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
acb->size = size;
acb->ret = 0;
acb->coroutine = qemu_coroutine_self();
+ acb->aio_context = bdrv_get_aio_context(bs);
ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
if (ret < 0) {
@@ -471,39 +478,37 @@ static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
#endif
static int qemu_gluster_create(const char *filename,
- QEMUOptionParameter *options, Error **errp)
+ QemuOpts *opts, Error **errp)
{
struct glfs *glfs;
struct glfs_fd *fd;
int ret = 0;
int prealloc = 0;
int64_t total_size = 0;
+ char *tmp = NULL;
GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
glfs = qemu_gluster_init(gconf, filename, errp);
if (!glfs) {
- ret = -EINVAL;
+ ret = -errno;
goto out;
}
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- total_size = options->value.n / BDRV_SECTOR_SIZE;
- } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
- if (!options->value.s || !strcmp(options->value.s, "off")) {
- prealloc = 0;
- } else if (!strcmp(options->value.s, "full") &&
- gluster_supports_zerofill()) {
- prealloc = 1;
- } else {
- error_setg(errp, "Invalid preallocation mode: '%s'"
- " or GlusterFS doesn't support zerofill API",
- options->value.s);
- ret = -EINVAL;
- goto out;
- }
- }
- options++;
+ total_size =
+ qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
+
+ tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
+ if (!tmp || !strcmp(tmp, "off")) {
+ prealloc = 0;
+ } else if (!strcmp(tmp, "full") &&
+ gluster_supports_zerofill()) {
+ prealloc = 1;
+ } else {
+ error_setg(errp, "Invalid preallocation mode: '%s'"
+ " or GlusterFS doesn't support zerofill API",
+ tmp);
+ ret = -EINVAL;
+ goto out;
}
fd = glfs_creat(glfs, gconf->image,
@@ -525,6 +530,7 @@ static int qemu_gluster_create(const char *filename,
}
}
out:
+ g_free(tmp);
qemu_gluster_gconf_free(gconf);
if (glfs) {
glfs_fini(glfs);
@@ -544,6 +550,7 @@ static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
acb->size = size;
acb->ret = 0;
acb->coroutine = qemu_coroutine_self();
+ acb->aio_context = bdrv_get_aio_context(bs);
if (write) {
ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
@@ -600,6 +607,7 @@ static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
acb->size = 0;
acb->ret = 0;
acb->coroutine = qemu_coroutine_self();
+ acb->aio_context = bdrv_get_aio_context(bs);
ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
if (ret < 0) {
@@ -628,6 +636,7 @@ static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
acb->size = 0;
acb->ret = 0;
acb->coroutine = qemu_coroutine_self();
+ acb->aio_context = bdrv_get_aio_context(bs);
ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
if (ret < 0) {
@@ -688,18 +697,22 @@ static int qemu_gluster_has_zero_init(BlockDriverState *bs)
return 0;
}
-static QEMUOptionParameter qemu_gluster_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_PREALLOC,
- .type = OPT_STRING,
- .help = "Preallocation mode (allowed values: off, full)"
- },
- { NULL }
+static QemuOptsList qemu_gluster_create_opts = {
+ .name = "qemu-gluster-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_PREALLOC,
+ .type = QEMU_OPT_STRING,
+ .help = "Preallocation mode (allowed values: off, full)"
+ },
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_gluster = {
@@ -726,7 +739,7 @@ static BlockDriver bdrv_gluster = {
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
#endif
- .create_options = qemu_gluster_create_options,
+ .create_opts = &qemu_gluster_create_opts,
};
static BlockDriver bdrv_gluster_tcp = {
@@ -753,7 +766,7 @@ static BlockDriver bdrv_gluster_tcp = {
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
#endif
- .create_options = qemu_gluster_create_options,
+ .create_opts = &qemu_gluster_create_opts,
};
static BlockDriver bdrv_gluster_unix = {
@@ -780,7 +793,7 @@ static BlockDriver bdrv_gluster_unix = {
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
#endif
- .create_options = qemu_gluster_create_options,
+ .create_opts = &qemu_gluster_create_opts,
};
static BlockDriver bdrv_gluster_rdma = {
@@ -807,7 +820,7 @@ static BlockDriver bdrv_gluster_rdma = {
#ifdef CONFIG_GLUSTERFS_ZEROFILL
.bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
#endif
- .create_options = qemu_gluster_create_options,
+ .create_opts = &qemu_gluster_create_opts,
};
static void bdrv_gluster_init(void)
diff --git a/block/iscsi.c b/block/iscsi.c
index f425573df..a7bb6970a 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -2,7 +2,7 @@
* QEMU Block driver for iSCSI images
*
* Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
- * Copyright (c) 2012-2013 Peter Lieven <pl@kamp.de>
+ * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -26,10 +26,13 @@
#include "config-host.h"
#include <poll.h>
+#include <math.h>
#include <arpa/inet.h>
#include "qemu-common.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
#include "block/block_int.h"
#include "trace.h"
#include "block/scsi.h"
@@ -47,6 +50,7 @@
typedef struct IscsiLun {
struct iscsi_context *iscsi;
+ AioContext *aio_context;
int lun;
enum scsi_inquiry_peripheral_device_type type;
int block_size;
@@ -59,6 +63,9 @@ typedef struct IscsiLun {
struct scsi_inquiry_logical_block_provisioning lbp;
struct scsi_inquiry_block_limits bl;
unsigned char *zeroblock;
+ unsigned long *allocationmap;
+ int cluster_sectors;
+ bool use_16_for_rw;
} IscsiLun;
typedef struct IscsiTask {
@@ -69,6 +76,8 @@ typedef struct IscsiTask {
struct scsi_task *task;
Coroutine *co;
QEMUBH *bh;
+ IscsiLun *iscsilun;
+ QEMUTimer retry_timer;
} IscsiTask;
typedef struct IscsiAIOCB {
@@ -80,7 +89,6 @@ typedef struct IscsiAIOCB {
uint8_t *buf;
int status;
int canceled;
- int retries;
int64_t sector_num;
int nb_sectors;
#ifdef __linux__
@@ -90,7 +98,17 @@ typedef struct IscsiAIOCB {
#define NOP_INTERVAL 5000
#define MAX_NOP_FAILURES 3
-#define ISCSI_CMD_RETRIES 5
+#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
+static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048};
+
+/* this threshold is a trade-off knob to choose between
+ * the potential additional overhead of an extra GET_LBA_STATUS request
+ * vs. unnecessarily reading a lot of zero sectors over the wire.
+ * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
+ * sectors we check the allocation status of the area covered by the
+ * request first if the allocationmap indicates that the area might be
+ * unallocated. */
+#define ISCSI_CHECKALLOC_THRES 64
static void
iscsi_bh_cb(void *p)
@@ -120,17 +138,32 @@ iscsi_schedule_bh(IscsiAIOCB *acb)
if (acb->bh) {
return;
}
- acb->bh = qemu_bh_new(iscsi_bh_cb, acb);
+ acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
qemu_bh_schedule(acb->bh);
}
static void iscsi_co_generic_bh_cb(void *opaque)
{
struct IscsiTask *iTask = opaque;
+ iTask->complete = 1;
qemu_bh_delete(iTask->bh);
qemu_coroutine_enter(iTask->co, NULL);
}
+static void iscsi_retry_timer_expired(void *opaque)
+{
+ struct IscsiTask *iTask = opaque;
+ iTask->complete = 1;
+ if (iTask->co) {
+ qemu_coroutine_enter(iTask->co, NULL);
+ }
+}
+
+static inline unsigned exp_random(double mean)
+{
+ return -mean * log((double)rand() / RAND_MAX);
+}
+
static void
iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
@@ -138,26 +171,44 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
struct IscsiTask *iTask = opaque;
struct scsi_task *task = command_data;
- iTask->complete = 1;
iTask->status = status;
iTask->do_retry = 0;
iTask->task = task;
- if (iTask->retries-- > 0 && status == SCSI_STATUS_CHECK_CONDITION
- && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
- error_report("iSCSI CheckCondition: %s", iscsi_get_error(iscsi));
- iTask->do_retry = 1;
- goto out;
- }
-
if (status != SCSI_STATUS_GOOD) {
+ if (iTask->retries++ < ISCSI_CMD_RETRIES) {
+ if (status == SCSI_STATUS_CHECK_CONDITION
+ && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
+ error_report("iSCSI CheckCondition: %s",
+ iscsi_get_error(iscsi));
+ iTask->do_retry = 1;
+ goto out;
+ }
+ if (status == SCSI_STATUS_BUSY) {
+ unsigned retry_time =
+ exp_random(iscsi_retry_times[iTask->retries - 1]);
+ error_report("iSCSI Busy (retry #%u in %u ms): %s",
+ iTask->retries, retry_time,
+ iscsi_get_error(iscsi));
+ aio_timer_init(iTask->iscsilun->aio_context,
+ &iTask->retry_timer, QEMU_CLOCK_REALTIME,
+ SCALE_MS, iscsi_retry_timer_expired, iTask);
+ timer_mod(&iTask->retry_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
+ iTask->do_retry = 1;
+ return;
+ }
+ }
error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
}
out:
if (iTask->co) {
- iTask->bh = qemu_bh_new(iscsi_co_generic_bh_cb, iTask);
+ iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
+ iscsi_co_generic_bh_cb, iTask);
qemu_bh_schedule(iTask->bh);
+ } else {
+ iTask->complete = 1;
}
}
@@ -165,7 +216,7 @@ static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
{
*iTask = (struct IscsiTask) {
.co = qemu_coroutine_self(),
- .retries = ISCSI_CMD_RETRIES,
+ .iscsilun = iscsilun,
};
}
@@ -196,7 +247,7 @@ iscsi_aio_cancel(BlockDriverAIOCB *blockacb)
iscsi_abort_task_cb, acb);
while (acb->status == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(iscsilun->aio_context, true);
}
}
@@ -219,10 +270,11 @@ iscsi_set_events(IscsiLun *iscsilun)
ev = POLLIN;
ev |= iscsi_which_events(iscsi);
if (ev != iscsilun->events) {
- qemu_aio_set_fd_handler(iscsi_get_fd(iscsi),
- iscsi_process_read,
- (ev & POLLOUT) ? iscsi_process_write : NULL,
- iscsilun);
+ aio_set_fd_handler(iscsilun->aio_context,
+ iscsi_get_fd(iscsi),
+ iscsi_process_read,
+ (ev & POLLOUT) ? iscsi_process_write : NULL,
+ iscsilun);
}
@@ -273,6 +325,32 @@ static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
return 1;
}
+static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors)
+{
+ if (iscsilun->allocationmap == NULL) {
+ return;
+ }
+ bitmap_set(iscsilun->allocationmap,
+ sector_num / iscsilun->cluster_sectors,
+ DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
+}
+
+static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors)
+{
+ int64_t cluster_num, nb_clusters;
+ if (iscsilun->allocationmap == NULL) {
+ return;
+ }
+ cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
+ nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
+ - cluster_num;
+ if (nb_clusters > 0) {
+ bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
+ }
+}
+
static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov)
@@ -281,8 +359,6 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
struct IscsiTask iTask;
uint64_t lba;
uint32_t num_sectors;
- uint8_t *data = NULL;
- uint8_t *buf = NULL;
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
@@ -290,31 +366,24 @@ static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
lba = sector_qemu2lun(sector_num, iscsilun);
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
-#if !defined(LIBISCSI_FEATURE_IOVECTOR)
- /* if the iovec only contains one buffer we can pass it directly */
- if (iov->niov == 1) {
- data = iov->iov[0].iov_base;
- } else {
- size_t size = MIN(nb_sectors * BDRV_SECTOR_SIZE, iov->size);
- buf = g_malloc(size);
- qemu_iovec_to_buf(iov, 0, buf, size);
- data = buf;
- }
-#endif
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
- iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
- data, num_sectors * iscsilun->block_size,
- iscsilun->block_size, 0, 0, 0, 0, 0,
- iscsi_co_generic_cb, &iTask);
+ if (iscsilun->use_16_for_rw) {
+ iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
+ NULL, num_sectors * iscsilun->block_size,
+ iscsilun->block_size, 0, 0, 0, 0, 0,
+ iscsi_co_generic_cb, &iTask);
+ } else {
+ iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
+ NULL, num_sectors * iscsilun->block_size,
+ iscsilun->block_size, 0, 0, 0, 0, 0,
+ iscsi_co_generic_cb, &iTask);
+ }
if (iTask.task == NULL) {
- g_free(buf);
return -ENOMEM;
}
-#if defined(LIBISCSI_FEATURE_IOVECTOR)
scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
iov->niov);
-#endif
while (!iTask.complete) {
iscsi_set_events(iscsilun);
qemu_coroutine_yield();
@@ -330,15 +399,125 @@ retry:
goto retry;
}
- g_free(buf);
-
if (iTask.status != SCSI_STATUS_GOOD) {
return -EIO;
}
+ iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
+
return 0;
}
+
+static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
+ int64_t sector_num, int nb_sectors)
+{
+ unsigned long size;
+ if (iscsilun->allocationmap == NULL) {
+ return true;
+ }
+ size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
+ return !(find_next_bit(iscsilun->allocationmap, size,
+ sector_num / iscsilun->cluster_sectors) == size);
+}
+
+static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ IscsiLun *iscsilun = bs->opaque;
+ struct scsi_get_lba_status *lbas = NULL;
+ struct scsi_lba_status_descriptor *lbasd = NULL;
+ struct IscsiTask iTask;
+ int64_t ret;
+
+ iscsi_co_init_iscsitask(iscsilun, &iTask);
+
+ if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* default to all sectors allocated */
+ ret = BDRV_BLOCK_DATA;
+ ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
+ *pnum = nb_sectors;
+
+ /* LUN does not support logical block provisioning */
+ if (iscsilun->lbpme == 0) {
+ goto out;
+ }
+
+retry:
+ if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
+ sector_qemu2lun(sector_num, iscsilun),
+ 8 + 16, iscsi_co_generic_cb,
+ &iTask) == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ while (!iTask.complete) {
+ iscsi_set_events(iscsilun);
+ qemu_coroutine_yield();
+ }
+
+ if (iTask.do_retry) {
+ if (iTask.task != NULL) {
+ scsi_free_scsi_task(iTask.task);
+ iTask.task = NULL;
+ }
+ iTask.complete = 0;
+ goto retry;
+ }
+
+ if (iTask.status != SCSI_STATUS_GOOD) {
+ /* in case the get_lba_status_callout fails (i.e.
+ * because the device is busy or the cmd is not
+ * supported) we pretend all blocks are allocated
+ * for backwards compatibility */
+ goto out;
+ }
+
+ lbas = scsi_datain_unmarshall(iTask.task);
+ if (lbas == NULL) {
+ ret = -EIO;
+ goto out;
+ }
+
+ lbasd = &lbas->descriptors[0];
+
+ if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
+ ret = -EIO;
+ goto out;
+ }
+
+ *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
+
+ if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
+ lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
+ ret &= ~BDRV_BLOCK_DATA;
+ if (iscsilun->lbprz) {
+ ret |= BDRV_BLOCK_ZERO;
+ }
+ }
+
+ if (ret & BDRV_BLOCK_ZERO) {
+ iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
+ } else {
+ iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
+ }
+
+ if (*pnum > nb_sectors) {
+ *pnum = nb_sectors;
+ }
+out:
+ if (iTask.task != NULL) {
+ scsi_free_scsi_task(iTask.task);
+ }
+ return ret;
+}
+
static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *iov)
@@ -347,48 +526,46 @@ static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
struct IscsiTask iTask;
uint64_t lba;
uint32_t num_sectors;
-#if !defined(LIBISCSI_FEATURE_IOVECTOR)
- int i;
-#endif
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
}
+ if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
+ !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
+ int64_t ret;
+ int pnum;
+ ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
+ if (ret < 0) {
+ return ret;
+ }
+ if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
+ qemu_iovec_memset(iov, 0, 0x00, iov->size);
+ return 0;
+ }
+ }
+
lba = sector_qemu2lun(sector_num, iscsilun);
num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
- switch (iscsilun->type) {
- case TYPE_DISK:
+ if (iscsilun->use_16_for_rw) {
iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
num_sectors * iscsilun->block_size,
iscsilun->block_size, 0, 0, 0, 0, 0,
iscsi_co_generic_cb, &iTask);
- break;
- default:
+ } else {
iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
num_sectors * iscsilun->block_size,
iscsilun->block_size,
-#if !defined(CONFIG_LIBISCSI_1_4) /* API change from 1.4.0 to 1.5.0 */
0, 0, 0, 0, 0,
-#endif
iscsi_co_generic_cb, &iTask);
- break;
}
if (iTask.task == NULL) {
return -ENOMEM;
}
-#if defined(LIBISCSI_FEATURE_IOVECTOR)
scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
-#else
- for (i = 0; i < iov->niov; i++) {
- scsi_task_add_data_in_buffer(iTask.task,
- iov->iov[i].iov_len,
- iov->iov[i].iov_base);
- }
-#endif
while (!iTask.complete) {
iscsi_set_events(iscsilun);
@@ -543,18 +720,9 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
data.data = acb->ioh->dxferp;
data.size = acb->ioh->dxfer_len;
} else {
-#if defined(LIBISCSI_FEATURE_IOVECTOR)
scsi_task_set_iov_out(acb->task,
(struct scsi_iovec *) acb->ioh->dxferp,
acb->ioh->iovec_count);
-#else
- struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
-
- acb->buf = g_malloc(acb->ioh->dxfer_len);
- data.data = acb->buf;
- data.size = iov_to_buf(iov, acb->ioh->iovec_count, 0,
- acb->buf, acb->ioh->dxfer_len);
-#endif
}
}
@@ -574,20 +742,9 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
acb->ioh->dxfer_len,
acb->ioh->dxferp);
} else {
-#if defined(LIBISCSI_FEATURE_IOVECTOR)
scsi_task_set_iov_in(acb->task,
(struct scsi_iovec *) acb->ioh->dxferp,
acb->ioh->iovec_count);
-#else
- int i;
- for (i = 0; i < acb->ioh->iovec_count; i++) {
- struct iovec *iov = (struct iovec *)acb->ioh->dxferp;
-
- scsi_task_add_data_in_buffer(acb->task,
- iov[i].iov_len,
- iov[i].iov_base);
- }
-#endif
}
}
@@ -596,7 +753,6 @@ static BlockDriverAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
return &acb->common;
}
-
static void ioctl_cb(void *opaque, int status)
{
int *p_status = opaque;
@@ -620,7 +776,7 @@ static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
while (status == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(iscsilun->aio_context, true);
}
return 0;
@@ -643,101 +799,6 @@ iscsi_getlength(BlockDriverState *bs)
return len;
}
-#if defined(LIBISCSI_FEATURE_IOVECTOR)
-
-static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- IscsiLun *iscsilun = bs->opaque;
- struct scsi_get_lba_status *lbas = NULL;
- struct scsi_lba_status_descriptor *lbasd = NULL;
- struct IscsiTask iTask;
- int64_t ret;
-
- iscsi_co_init_iscsitask(iscsilun, &iTask);
-
- if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
- ret = -EINVAL;
- goto out;
- }
-
- /* default to all sectors allocated */
- ret = BDRV_BLOCK_DATA;
- ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
- *pnum = nb_sectors;
-
- /* LUN does not support logical block provisioning */
- if (iscsilun->lbpme == 0) {
- goto out;
- }
-
-retry:
- if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
- sector_qemu2lun(sector_num, iscsilun),
- 8 + 16, iscsi_co_generic_cb,
- &iTask) == NULL) {
- ret = -ENOMEM;
- goto out;
- }
-
- while (!iTask.complete) {
- iscsi_set_events(iscsilun);
- qemu_coroutine_yield();
- }
-
- if (iTask.do_retry) {
- if (iTask.task != NULL) {
- scsi_free_scsi_task(iTask.task);
- iTask.task = NULL;
- }
- iTask.complete = 0;
- goto retry;
- }
-
- if (iTask.status != SCSI_STATUS_GOOD) {
- /* in case the get_lba_status_callout fails (i.e.
- * because the device is busy or the cmd is not
- * supported) we pretend all blocks are allocated
- * for backwards compatibility */
- goto out;
- }
-
- lbas = scsi_datain_unmarshall(iTask.task);
- if (lbas == NULL) {
- ret = -EIO;
- goto out;
- }
-
- lbasd = &lbas->descriptors[0];
-
- if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
- ret = -EIO;
- goto out;
- }
-
- *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
- if (*pnum > nb_sectors) {
- *pnum = nb_sectors;
- }
-
- if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
- lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
- ret &= ~BDRV_BLOCK_DATA;
- if (iscsilun->lbprz) {
- ret |= BDRV_BLOCK_ZERO;
- }
- }
-
-out:
- if (iTask.task != NULL) {
- scsi_free_scsi_task(iTask.task);
- }
- return ret;
-}
-
-#endif /* LIBISCSI_FEATURE_IOVECTOR */
-
static int
coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
int nb_sectors)
@@ -791,11 +852,11 @@ retry:
return -EIO;
}
+ iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
+
return 0;
}
-#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
-
static int
coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, BdrvRequestFlags flags)
@@ -804,18 +865,27 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
struct IscsiTask iTask;
uint64_t lba;
uint32_t nb_blocks;
+ bool use_16_for_ws = iscsilun->use_16_for_rw;
if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
}
- if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
- /* WRITE SAME without UNMAP is not supported by the target */
- return -ENOTSUP;
+ if (flags & BDRV_REQ_MAY_UNMAP) {
+ if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
+ /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
+ use_16_for_ws = true;
+ }
+ if (use_16_for_ws && !iscsilun->lbp.lbpws) {
+ /* WRITESAME16 with UNMAP is not supported by the target,
+ * fall back and try WRITESAME10/16 without UNMAP */
+ flags &= ~BDRV_REQ_MAY_UNMAP;
+ use_16_for_ws = iscsilun->use_16_for_rw;
+ }
}
- if ((flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->lbp.lbpws) {
- /* WRITE SAME with UNMAP is not supported by the target */
+ if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
+ /* WRITESAME without UNMAP is not supported by the target */
return -ENOTSUP;
}
@@ -828,10 +898,18 @@ coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
- if (iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
- iscsilun->zeroblock, iscsilun->block_size,
- nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
- 0, 0, iscsi_co_generic_cb, &iTask) == NULL) {
+ if (use_16_for_ws) {
+ iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
+ iscsilun->zeroblock, iscsilun->block_size,
+ nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
+ 0, 0, iscsi_co_generic_cb, &iTask);
+ } else {
+ iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
+ iscsilun->zeroblock, iscsilun->block_size,
+ nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
+ 0, 0, iscsi_co_generic_cb, &iTask);
+ }
+ if (iTask.task == NULL) {
return -ENOMEM;
}
@@ -864,11 +942,15 @@ retry:
return -EIO;
}
+ if (flags & BDRV_REQ_MAY_UNMAP) {
+ iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
+ } else {
+ iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
+ }
+
return 0;
}
-#endif /* SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED */
-
static void parse_chap(struct iscsi_context *iscsi, const char *target,
Error **errp)
{
@@ -978,7 +1060,6 @@ static char *parse_initiator_name(const char *target)
return iscsi_name;
}
-#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
static void iscsi_nop_timed_event(void *opaque)
{
IscsiLun *iscsilun = opaque;
@@ -996,7 +1077,6 @@ static void iscsi_nop_timed_event(void *opaque)
timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
iscsi_set_events(iscsilun);
}
-#endif
static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
{
@@ -1023,6 +1103,7 @@ static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
iscsilun->num_blocks = rc16->returned_lba + 1;
iscsilun->lbpme = rc16->lbpme;
iscsilun->lbprz = rc16->lbprz;
+ iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
}
}
break;
@@ -1095,22 +1176,53 @@ static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
*inq = scsi_datain_unmarshall(task);
if (*inq == NULL) {
error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
- goto fail;
+ goto fail_with_err;
}
return task;
fail:
- if (!error_is_set(errp)) {
- error_setg(errp, "iSCSI: Inquiry command failed : %s",
- iscsi_get_error(iscsi));
- }
+ error_setg(errp, "iSCSI: Inquiry command failed : %s",
+ iscsi_get_error(iscsi));
+fail_with_err:
if (task != NULL) {
scsi_free_scsi_task(task);
}
return NULL;
}
+static void iscsi_detach_aio_context(BlockDriverState *bs)
+{
+ IscsiLun *iscsilun = bs->opaque;
+
+ aio_set_fd_handler(iscsilun->aio_context,
+ iscsi_get_fd(iscsilun->iscsi),
+ NULL, NULL, NULL);
+ iscsilun->events = 0;
+
+ if (iscsilun->nop_timer) {
+ timer_del(iscsilun->nop_timer);
+ timer_free(iscsilun->nop_timer);
+ iscsilun->nop_timer = NULL;
+ }
+}
+
+static void iscsi_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ IscsiLun *iscsilun = bs->opaque;
+
+ iscsilun->aio_context = new_context;
+ iscsi_set_events(iscsilun);
+
+ /* Set up a timer for sending out iSCSI NOPs */
+ iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
+ QEMU_CLOCK_REALTIME, SCALE_MS,
+ iscsi_nop_timed_event, iscsilun);
+ timer_mod(iscsilun->nop_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
+}
+
/*
* We support iscsi url's on the form
* iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
@@ -1217,6 +1329,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
}
iscsilun->iscsi = iscsi;
+ iscsilun->aio_context = bdrv_get_aio_context(bs);
iscsilun->lun = iscsi_url->lun;
iscsilun->has_write_same = true;
@@ -1290,17 +1403,25 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
scsi_free_scsi_task(task);
task = NULL;
-#if defined(LIBISCSI_FEATURE_NOP_COUNTER)
- /* Set up a timer for sending out iSCSI NOPs */
- iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun);
- timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
-#endif
+ iscsi_attach_aio_context(bs, iscsilun->aio_context);
+
+ /* Guess the internal cluster (page) size of the iscsi target by the means
+ * of opt_unmap_gran. Transfer the unmap granularity only if it has a
+ * reasonable size */
+ if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
+ iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
+ iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
+ iscsilun->block_size) >> BDRV_SECTOR_BITS;
+ if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
+ iscsilun->allocationmap =
+ bitmap_new(DIV_ROUND_UP(bs->total_sectors,
+ iscsilun->cluster_sectors));
+ }
+ }
out:
qemu_opts_del(opts);
- if (initiator_name != NULL) {
- g_free(initiator_name);
- }
+ g_free(initiator_name);
if (iscsi_url != NULL) {
iscsi_destroy_url(iscsi_url);
}
@@ -1322,17 +1443,14 @@ static void iscsi_close(BlockDriverState *bs)
IscsiLun *iscsilun = bs->opaque;
struct iscsi_context *iscsi = iscsilun->iscsi;
- if (iscsilun->nop_timer) {
- timer_del(iscsilun->nop_timer);
- timer_free(iscsilun->nop_timer);
- }
- qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL);
+ iscsi_detach_aio_context(bs);
iscsi_destroy_context(iscsi);
g_free(iscsilun->zeroblock);
+ g_free(iscsilun->allocationmap);
memset(iscsilun, 0, sizeof(IscsiLun));
}
-static int iscsi_refresh_limits(BlockDriverState *bs)
+static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
{
IscsiLun *iscsilun = bs->opaque;
@@ -1357,7 +1475,6 @@ static int iscsi_refresh_limits(BlockDriverState *bs)
}
bs->bl.opt_transfer_length = sector_lun2qemu(iscsilun->bl.opt_xfer_len,
iscsilun);
- return 0;
}
/* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
@@ -1389,11 +1506,17 @@ static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
return -EINVAL;
}
+ if (iscsilun->allocationmap != NULL) {
+ g_free(iscsilun->allocationmap);
+ iscsilun->allocationmap =
+ bitmap_new(DIV_ROUND_UP(bs->total_sectors,
+ iscsilun->cluster_sectors));
+ }
+
return 0;
}
-static int iscsi_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
{
int ret = 0;
int64_t total_size = 0;
@@ -1401,16 +1524,11 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options,
IscsiLun *iscsilun = NULL;
QDict *bs_options;
- bs = bdrv_new("");
+ bs = bdrv_new("", &error_abort);
/* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, "size")) {
- total_size = options->value.n / BDRV_SECTOR_SIZE;
- }
- options++;
- }
-
+ total_size =
+ qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
bs->opaque = g_malloc0(sizeof(struct IscsiLun));
iscsilun = bs->opaque;
@@ -1422,10 +1540,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options,
if (ret != 0) {
goto out;
}
- if (iscsilun->nop_timer) {
- timer_del(iscsilun->nop_timer);
- timer_free(iscsilun->nop_timer);
- }
+ iscsi_detach_aio_context(bs);
if (iscsilun->type != TYPE_DISK) {
ret = -ENODEV;
goto out;
@@ -1451,23 +1566,21 @@ static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
IscsiLun *iscsilun = bs->opaque;
bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
- /* Guess the internal cluster (page) size of the iscsi target by the means
- * of opt_unmap_gran. Transfer the unmap granularity only if it has a
- * reasonable size for bdi->cluster_size */
- if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 64 * 1024 &&
- iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
- bdi->cluster_size = iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
- }
+ bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
return 0;
}
-static QEMUOptionParameter iscsi_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- { NULL }
+static QemuOptsList iscsi_create_opts = {
+ .name = "iscsi-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_iscsi = {
@@ -1479,7 +1592,7 @@ static BlockDriver bdrv_iscsi = {
.bdrv_file_open = iscsi_open,
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
- .create_options = iscsi_create_options,
+ .create_opts = &iscsi_create_opts,
.bdrv_reopen_prepare = iscsi_reopen_prepare,
.bdrv_getlength = iscsi_getlength,
@@ -1487,13 +1600,9 @@ static BlockDriver bdrv_iscsi = {
.bdrv_truncate = iscsi_truncate,
.bdrv_refresh_limits = iscsi_refresh_limits,
-#if defined(LIBISCSI_FEATURE_IOVECTOR)
.bdrv_co_get_block_status = iscsi_co_get_block_status,
-#endif
.bdrv_co_discard = iscsi_co_discard,
-#if defined(SCSI_SENSE_ASCQ_CAPACITY_DATA_HAS_CHANGED)
.bdrv_co_write_zeroes = iscsi_co_write_zeroes,
-#endif
.bdrv_co_readv = iscsi_co_readv,
.bdrv_co_writev = iscsi_co_writev,
.bdrv_co_flush_to_disk = iscsi_co_flush,
@@ -1502,6 +1611,9 @@ static BlockDriver bdrv_iscsi = {
.bdrv_ioctl = iscsi_ioctl,
.bdrv_aio_ioctl = iscsi_aio_ioctl,
#endif
+
+ .bdrv_detach_aio_context = iscsi_detach_aio_context,
+ .bdrv_attach_aio_context = iscsi_attach_aio_context,
};
static QemuOptsList qemu_iscsi_opts = {
diff --git a/block/linux-aio.c b/block/linux-aio.c
index 53434e2df..7ac7e8c99 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -25,6 +25,8 @@
*/
#define MAX_EVENTS 128
+#define MAX_QUEUED_IO 128
+
struct qemu_laiocb {
BlockDriverAIOCB common;
struct qemu_laio_state *ctx;
@@ -36,9 +38,19 @@ struct qemu_laiocb {
QLIST_ENTRY(qemu_laiocb) node;
};
+typedef struct {
+ struct iocb *iocbs[MAX_QUEUED_IO];
+ int plugged;
+ unsigned int size;
+ unsigned int idx;
+} LaioQueue;
+
struct qemu_laio_state {
io_context_t ctx;
EventNotifier e;
+
+ /* io queue for submit at batch */
+ LaioQueue io_q;
};
static inline ssize_t io_event_ret(struct io_event *ev)
@@ -135,6 +147,79 @@ static const AIOCBInfo laio_aiocb_info = {
.cancel = laio_cancel,
};
+static void ioq_init(LaioQueue *io_q)
+{
+ io_q->size = MAX_QUEUED_IO;
+ io_q->idx = 0;
+ io_q->plugged = 0;
+}
+
+static int ioq_submit(struct qemu_laio_state *s)
+{
+ int ret, i = 0;
+ int len = s->io_q.idx;
+
+ do {
+ ret = io_submit(s->ctx, len, s->io_q.iocbs);
+ } while (i++ < 3 && ret == -EAGAIN);
+
+ /* empty io queue */
+ s->io_q.idx = 0;
+
+ if (ret < 0) {
+ i = 0;
+ } else {
+ i = ret;
+ }
+
+ for (; i < len; i++) {
+ struct qemu_laiocb *laiocb =
+ container_of(s->io_q.iocbs[i], struct qemu_laiocb, iocb);
+
+ laiocb->ret = (ret < 0) ? ret : -EIO;
+ qemu_laio_process_completion(s, laiocb);
+ }
+ return ret;
+}
+
+static void ioq_enqueue(struct qemu_laio_state *s, struct iocb *iocb)
+{
+ unsigned int idx = s->io_q.idx;
+
+ s->io_q.iocbs[idx++] = iocb;
+ s->io_q.idx = idx;
+
+ /* submit immediately if queue is full */
+ if (idx == s->io_q.size) {
+ ioq_submit(s);
+ }
+}
+
+void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
+{
+ struct qemu_laio_state *s = aio_ctx;
+
+ s->io_q.plugged++;
+}
+
+int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
+{
+ struct qemu_laio_state *s = aio_ctx;
+ int ret = 0;
+
+ assert(s->io_q.plugged > 0 || !unplug);
+
+ if (unplug && --s->io_q.plugged > 0) {
+ return 0;
+ }
+
+ if (s->io_q.idx > 0) {
+ ret = ioq_submit(s);
+ }
+
+ return ret;
+}
+
BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque, int type)
@@ -168,8 +253,13 @@ BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
}
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
- if (io_submit(s->ctx, 1, &iocbs) < 0)
- goto out_free_aiocb;
+ if (!s->io_q.plugged) {
+ if (io_submit(s->ctx, 1, &iocbs) < 0) {
+ goto out_free_aiocb;
+ }
+ } else {
+ ioq_enqueue(s, iocbs);
+ }
return &laiocb->common;
out_free_aiocb:
@@ -177,6 +267,20 @@ out_free_aiocb:
return NULL;
}
+void laio_detach_aio_context(void *s_, AioContext *old_context)
+{
+ struct qemu_laio_state *s = s_;
+
+ aio_set_event_notifier(old_context, &s->e, NULL);
+}
+
+void laio_attach_aio_context(void *s_, AioContext *new_context)
+{
+ struct qemu_laio_state *s = s_;
+
+ aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
+}
+
void *laio_init(void)
{
struct qemu_laio_state *s;
@@ -190,7 +294,7 @@ void *laio_init(void)
goto out_close_efd;
}
- qemu_aio_set_event_notifier(&s->e, qemu_laio_completion_cb);
+ ioq_init(&s->io_q);
return s;
@@ -200,3 +304,16 @@ out_free_state:
g_free(s);
return NULL;
}
+
+void laio_cleanup(void *s_)
+{
+ struct qemu_laio_state *s = s_;
+
+ event_notifier_cleanup(&s->e);
+
+ if (io_destroy(s->ctx) != 0) {
+ fprintf(stderr, "%s: destroy AIO context %p failed\n",
+ __func__, &s->ctx);
+ }
+ g_free(s);
+}
diff --git a/block/mirror.c b/block/mirror.c
index 0ef41f999..c7a655fc5 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -32,6 +32,12 @@ typedef struct MirrorBlockJob {
RateLimit limit;
BlockDriverState *target;
BlockDriverState *base;
+ /* The name of the graph node to replace */
+ char *replaces;
+ /* The BDS to replace */
+ BlockDriverState *to_replace;
+ /* Used to block operations on the drive-mirror-replace target */
+ Error *replace_blocker;
bool is_none_mode;
BlockdevOnError on_source_error, on_target_error;
bool synced;
@@ -118,7 +124,7 @@ static void mirror_write_complete(void *opaque, int ret)
bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
action = mirror_error_action(s, false, -ret);
- if (action == BDRV_ACTION_REPORT && s->ret >= 0) {
+ if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
s->ret = ret;
}
}
@@ -135,7 +141,7 @@ static void mirror_read_complete(void *opaque, int ret)
bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
action = mirror_error_action(s, true, -ret);
- if (action == BDRV_ACTION_REPORT && s->ret >= 0) {
+ if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
s->ret = ret;
}
@@ -259,9 +265,11 @@ static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
next_sector = sector_num;
while (nb_chunks-- > 0) {
MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
+ size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size;
+
QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
s->buf_free_count--;
- qemu_iovec_add(&op->qiov, buf, s->granularity);
+ qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
/* Advance the HBitmapIter in parallel, so that we do not examine
* the same sector twice.
@@ -324,12 +332,21 @@ static void coroutine_fn mirror_run(void *opaque)
}
s->common.len = bdrv_getlength(bs);
- if (s->common.len <= 0) {
- block_job_completed(&s->common, s->common.len);
- return;
+ if (s->common.len < 0) {
+ ret = s->common.len;
+ goto immediate_exit;
+ } else if (s->common.len == 0) {
+ /* Report BLOCK_JOB_READY and wait for complete. */
+ block_job_event_ready(&s->common);
+ s->synced = true;
+ while (!block_job_is_cancelled(&s->common) && !s->should_complete) {
+ block_job_yield(&s->common);
+ }
+ s->common.cancelled = false;
+ goto immediate_exit;
}
- length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
+ length = DIV_ROUND_UP(s->common.len, s->granularity);
s->in_flight_bitmap = bitmap_new(length);
/* If we have no backing file yet in the destination, we cannot let
@@ -339,7 +356,10 @@ static void coroutine_fn mirror_run(void *opaque)
bdrv_get_backing_filename(s->target, backing_filename,
sizeof(backing_filename));
if (backing_filename[0] && !s->target->backing_hd) {
- bdrv_get_info(s->target, &bdi);
+ ret = bdrv_get_info(s->target, &bdi);
+ if (ret < 0) {
+ goto immediate_exit;
+ }
if (s->granularity < bdi.cluster_size) {
s->buf_size = MAX(s->buf_size, bdi.cluster_size);
s->cow_bitmap = bitmap_new(length);
@@ -412,7 +432,8 @@ static void coroutine_fn mirror_run(void *opaque)
trace_mirror_before_flush(s);
ret = bdrv_flush(s->target);
if (ret < 0) {
- if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
+ if (mirror_error_action(s, false, -ret) ==
+ BLOCK_ERROR_ACTION_REPORT) {
goto immediate_exit;
}
} else {
@@ -423,7 +444,7 @@ static void coroutine_fn mirror_run(void *opaque)
*/
s->common.offset = end * BDRV_SECTOR_SIZE;
if (!s->synced) {
- block_job_ready(&s->common);
+ block_job_event_ready(&s->common);
s->synced = true;
}
@@ -487,18 +508,28 @@ immediate_exit:
bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
bdrv_iostatus_disable(s->target);
if (s->should_complete && ret == 0) {
- if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
- bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
+ BlockDriverState *to_replace = s->common.bs;
+ if (s->to_replace) {
+ to_replace = s->to_replace;
}
- bdrv_swap(s->target, s->common.bs);
+ if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
+ bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
+ }
+ bdrv_swap(s->target, to_replace);
if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
/* drop the bs loop chain formed by the swap: break the loop then
* trigger the unref from the top one */
BlockDriverState *p = s->base->backing_hd;
- s->base->backing_hd = NULL;
+ bdrv_set_backing_hd(s->base, NULL);
bdrv_unref(p);
}
}
+ if (s->to_replace) {
+ bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
+ error_free(s->replace_blocker);
+ bdrv_unref(s->to_replace);
+ }
+ g_free(s->replaces);
bdrv_unref(s->target);
block_job_completed(&s->common, ret);
}
@@ -537,6 +568,20 @@ static void mirror_complete(BlockJob *job, Error **errp)
return;
}
+ /* check the target bs is not blocked and block all operations on it */
+ if (s->replaces) {
+ s->to_replace = check_to_replace_node(s->replaces, &local_err);
+ if (!s->to_replace) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ error_setg(&s->replace_blocker,
+ "block device is in use by block-job-complete");
+ bdrv_op_block_all(s->to_replace, s->replace_blocker);
+ bdrv_ref(s->to_replace);
+ }
+
s->should_complete = true;
block_job_resume(job);
}
@@ -559,14 +604,15 @@ static const BlockJobDriver commit_active_job_driver = {
};
static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
- int64_t speed, int64_t granularity,
- int64_t buf_size,
- BlockdevOnError on_source_error,
- BlockdevOnError on_target_error,
- BlockDriverCompletionFunc *cb,
- void *opaque, Error **errp,
- const BlockJobDriver *driver,
- bool is_none_mode, BlockDriverState *base)
+ const char *replaces,
+ int64_t speed, int64_t granularity,
+ int64_t buf_size,
+ BlockdevOnError on_source_error,
+ BlockdevOnError on_target_error,
+ BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp,
+ const BlockJobDriver *driver,
+ bool is_none_mode, BlockDriverState *base)
{
MirrorBlockJob *s;
@@ -597,6 +643,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
return;
}
+ s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
s->on_target_error = on_target_error;
s->target = target;
@@ -605,7 +652,10 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
s->granularity = granularity;
s->buf_size = MAX(buf_size, granularity);
- s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity);
+ s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, errp);
+ if (!s->dirty_bitmap) {
+ return;
+ }
bdrv_set_enable_write_cache(s->target, true);
bdrv_set_on_error(s->target, on_target_error, on_target_error);
bdrv_iostatus_enable(s->target);
@@ -615,6 +665,7 @@ static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
}
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
+ const char *replaces,
int64_t speed, int64_t granularity, int64_t buf_size,
MirrorSyncMode mode, BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
@@ -626,7 +677,8 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target,
is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
- mirror_start_job(bs, target, speed, granularity, buf_size,
+ mirror_start_job(bs, target, replaces,
+ speed, granularity, buf_size,
on_source_error, on_target_error, cb, opaque, errp,
&mirror_job_driver, is_none_mode, base);
}
@@ -674,10 +726,10 @@ void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
}
bdrv_ref(base);
- mirror_start_job(bs, base, speed, 0, 0,
+ mirror_start_job(bs, base, NULL, speed, 0, 0,
on_error, on_error, cb, opaque, &local_err,
&commit_active_job_driver, false, base);
- if (error_is_set(&local_err)) {
+ if (local_err) {
error_propagate(errp, local_err);
goto error_restore_flags;
}
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 7d698cb61..6e1c97cad 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -49,7 +49,7 @@ static void nbd_teardown_connection(NbdClientSession *client)
shutdown(client->sock, 2);
nbd_recv_coroutines_enter_all(client);
- qemu_aio_set_fd_handler(client->sock, NULL, NULL, NULL);
+ nbd_client_session_detach_aio_context(client);
closesocket(client->sock);
client->sock = -1;
}
@@ -103,11 +103,14 @@ static int nbd_co_send_request(NbdClientSession *s,
struct nbd_request *request,
QEMUIOVector *qiov, int offset)
{
+ AioContext *aio_context;
int rc, ret;
qemu_co_mutex_lock(&s->send_mutex);
s->send_coroutine = qemu_coroutine_self();
- qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write, s);
+ aio_context = bdrv_get_aio_context(s->bs);
+ aio_set_fd_handler(aio_context, s->sock,
+ nbd_reply_ready, nbd_restart_write, s);
if (qiov) {
if (!s->is_unix) {
socket_set_cork(s->sock, 1);
@@ -126,7 +129,7 @@ static int nbd_co_send_request(NbdClientSession *s,
} else {
rc = nbd_send_request(s->sock, request);
}
- qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL, s);
+ aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, s);
s->send_coroutine = NULL;
qemu_co_mutex_unlock(&s->send_mutex);
return rc;
@@ -335,6 +338,19 @@ int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
}
+void nbd_client_session_detach_aio_context(NbdClientSession *client)
+{
+ aio_set_fd_handler(bdrv_get_aio_context(client->bs), client->sock,
+ NULL, NULL, NULL);
+}
+
+void nbd_client_session_attach_aio_context(NbdClientSession *client,
+ AioContext *new_context)
+{
+ aio_set_fd_handler(new_context, client->sock,
+ nbd_reply_ready, NULL, client);
+}
+
void nbd_client_session_close(NbdClientSession *client)
{
struct nbd_request request = {
@@ -381,7 +397,7 @@ int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
/* Now that we're connected, set the socket to be non-blocking and
* kick the reply mechanism. */
qemu_set_nonblock(sock);
- qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL, client);
+ nbd_client_session_attach_aio_context(client, bdrv_get_aio_context(bs));
logout("Established connection with NBD server\n");
return 0;
diff --git a/block/nbd-client.h b/block/nbd-client.h
index f2a63378b..cd478f3a9 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -47,4 +47,8 @@ int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
+void nbd_client_session_detach_aio_context(NbdClientSession *client);
+void nbd_client_session_attach_aio_context(NbdClientSession *client,
+ AioContext *new_context);
+
#endif /* NBD_CLIENT_H */
diff --git a/block/nbd.c b/block/nbd.c
index 55124239d..4eda0958d 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -175,7 +175,7 @@ static void nbd_parse_filename(const char *filename, QDict *options,
InetSocketAddress *addr = NULL;
addr = inet_parse(host_spec, errp);
- if (error_is_set(errp)) {
+ if (!addr) {
goto out;
}
@@ -323,46 +323,67 @@ static int64_t nbd_getlength(BlockDriverState *bs)
return s->client.size;
}
+static void nbd_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVNBDState *s = bs->opaque;
+
+ nbd_client_session_detach_aio_context(&s->client);
+}
+
+static void nbd_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVNBDState *s = bs->opaque;
+
+ nbd_client_session_attach_aio_context(&s->client, new_context);
+}
+
static BlockDriver bdrv_nbd = {
- .format_name = "nbd",
- .protocol_name = "nbd",
- .instance_size = sizeof(BDRVNBDState),
- .bdrv_parse_filename = nbd_parse_filename,
- .bdrv_file_open = nbd_open,
- .bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
- .bdrv_close = nbd_close,
- .bdrv_co_flush_to_os = nbd_co_flush,
- .bdrv_co_discard = nbd_co_discard,
- .bdrv_getlength = nbd_getlength,
+ .format_name = "nbd",
+ .protocol_name = "nbd",
+ .instance_size = sizeof(BDRVNBDState),
+ .bdrv_parse_filename = nbd_parse_filename,
+ .bdrv_file_open = nbd_open,
+ .bdrv_co_readv = nbd_co_readv,
+ .bdrv_co_writev = nbd_co_writev,
+ .bdrv_close = nbd_close,
+ .bdrv_co_flush_to_os = nbd_co_flush,
+ .bdrv_co_discard = nbd_co_discard,
+ .bdrv_getlength = nbd_getlength,
+ .bdrv_detach_aio_context = nbd_detach_aio_context,
+ .bdrv_attach_aio_context = nbd_attach_aio_context,
};
static BlockDriver bdrv_nbd_tcp = {
- .format_name = "nbd",
- .protocol_name = "nbd+tcp",
- .instance_size = sizeof(BDRVNBDState),
- .bdrv_parse_filename = nbd_parse_filename,
- .bdrv_file_open = nbd_open,
- .bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
- .bdrv_close = nbd_close,
- .bdrv_co_flush_to_os = nbd_co_flush,
- .bdrv_co_discard = nbd_co_discard,
- .bdrv_getlength = nbd_getlength,
+ .format_name = "nbd",
+ .protocol_name = "nbd+tcp",
+ .instance_size = sizeof(BDRVNBDState),
+ .bdrv_parse_filename = nbd_parse_filename,
+ .bdrv_file_open = nbd_open,
+ .bdrv_co_readv = nbd_co_readv,
+ .bdrv_co_writev = nbd_co_writev,
+ .bdrv_close = nbd_close,
+ .bdrv_co_flush_to_os = nbd_co_flush,
+ .bdrv_co_discard = nbd_co_discard,
+ .bdrv_getlength = nbd_getlength,
+ .bdrv_detach_aio_context = nbd_detach_aio_context,
+ .bdrv_attach_aio_context = nbd_attach_aio_context,
};
static BlockDriver bdrv_nbd_unix = {
- .format_name = "nbd",
- .protocol_name = "nbd+unix",
- .instance_size = sizeof(BDRVNBDState),
- .bdrv_parse_filename = nbd_parse_filename,
- .bdrv_file_open = nbd_open,
- .bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
- .bdrv_close = nbd_close,
- .bdrv_co_flush_to_os = nbd_co_flush,
- .bdrv_co_discard = nbd_co_discard,
- .bdrv_getlength = nbd_getlength,
+ .format_name = "nbd",
+ .protocol_name = "nbd+unix",
+ .instance_size = sizeof(BDRVNBDState),
+ .bdrv_parse_filename = nbd_parse_filename,
+ .bdrv_file_open = nbd_open,
+ .bdrv_co_readv = nbd_co_readv,
+ .bdrv_co_writev = nbd_co_writev,
+ .bdrv_close = nbd_close,
+ .bdrv_co_flush_to_os = nbd_co_flush,
+ .bdrv_co_discard = nbd_co_discard,
+ .bdrv_getlength = nbd_getlength,
+ .bdrv_detach_aio_context = nbd_detach_aio_context,
+ .bdrv_attach_aio_context = nbd_attach_aio_context,
};
static void bdrv_nbd_init(void)
diff --git a/block/nfs.c b/block/nfs.c
index 98aa363e4..8439e0d38 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -40,6 +40,7 @@ typedef struct NFSClient {
struct nfsfh *fh;
int events;
bool has_zero_init;
+ AioContext *aio_context;
} NFSClient;
typedef struct NFSRPC {
@@ -49,6 +50,7 @@ typedef struct NFSRPC {
struct stat *st;
Coroutine *co;
QEMUBH *bh;
+ NFSClient *client;
} NFSRPC;
static void nfs_process_read(void *arg);
@@ -58,10 +60,11 @@ static void nfs_set_events(NFSClient *client)
{
int ev = nfs_which_events(client->context);
if (ev != client->events) {
- qemu_aio_set_fd_handler(nfs_get_fd(client->context),
- (ev & POLLIN) ? nfs_process_read : NULL,
- (ev & POLLOUT) ? nfs_process_write : NULL,
- client);
+ aio_set_fd_handler(client->aio_context,
+ nfs_get_fd(client->context),
+ (ev & POLLIN) ? nfs_process_read : NULL,
+ (ev & POLLOUT) ? nfs_process_write : NULL,
+ client);
}
client->events = ev;
@@ -84,13 +87,15 @@ static void nfs_process_write(void *arg)
static void nfs_co_init_task(NFSClient *client, NFSRPC *task)
{
*task = (NFSRPC) {
- .co = qemu_coroutine_self(),
+ .co = qemu_coroutine_self(),
+ .client = client,
};
}
static void nfs_co_generic_bh_cb(void *opaque)
{
NFSRPC *task = opaque;
+ task->complete = 1;
qemu_bh_delete(task->bh);
qemu_coroutine_enter(task->co, NULL);
}
@@ -100,7 +105,6 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
void *private_data)
{
NFSRPC *task = private_data;
- task->complete = 1;
task->ret = ret;
if (task->ret > 0 && task->iov) {
if (task->ret <= task->iov->size) {
@@ -116,8 +120,11 @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
error_report("NFS Error: %s", nfs_get_error(nfs));
}
if (task->co) {
- task->bh = qemu_bh_new(nfs_co_generic_bh_cb, task);
+ task->bh = aio_bh_new(task->client->aio_context,
+ nfs_co_generic_bh_cb, task);
qemu_bh_schedule(task->bh);
+ } else {
+ task->complete = 1;
}
}
@@ -224,13 +231,34 @@ static QemuOptsList runtime_opts = {
},
};
+static void nfs_detach_aio_context(BlockDriverState *bs)
+{
+ NFSClient *client = bs->opaque;
+
+ aio_set_fd_handler(client->aio_context,
+ nfs_get_fd(client->context),
+ NULL, NULL, NULL);
+ client->events = 0;
+}
+
+static void nfs_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ NFSClient *client = bs->opaque;
+
+ client->aio_context = new_context;
+ nfs_set_events(client);
+}
+
static void nfs_client_close(NFSClient *client)
{
if (client->context) {
if (client->fh) {
nfs_close(client->context, client->fh);
}
- qemu_aio_set_fd_handler(nfs_get_fd(client->context), NULL, NULL, NULL);
+ aio_set_fd_handler(client->aio_context,
+ nfs_get_fd(client->context),
+ NULL, NULL, NULL);
nfs_destroy_context(client->context);
}
memset(client, 0, sizeof(NFSClient));
@@ -256,6 +284,10 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
error_setg(errp, "Invalid URL specified");
goto fail;
}
+ if (!uri->server) {
+ error_setg(errp, "Invalid URL specified");
+ goto fail;
+ }
strp = strrchr(uri->path, '/');
if (strp == NULL) {
error_setg(errp, "Invalid URL specified");
@@ -272,17 +304,27 @@ static int64_t nfs_client_open(NFSClient *client, const char *filename,
qp = query_params_parse(uri->query);
for (i = 0; i < qp->n; i++) {
+ unsigned long long val;
if (!qp->p[i].value) {
error_setg(errp, "Value for NFS parameter expected: %s",
qp->p[i].name);
goto fail;
}
- if (!strncmp(qp->p[i].name, "uid", 3)) {
- nfs_set_uid(client->context, atoi(qp->p[i].value));
- } else if (!strncmp(qp->p[i].name, "gid", 3)) {
- nfs_set_gid(client->context, atoi(qp->p[i].value));
- } else if (!strncmp(qp->p[i].name, "tcp-syncnt", 10)) {
- nfs_set_tcp_syncnt(client->context, atoi(qp->p[i].value));
+ if (parse_uint_full(qp->p[i].value, &val, 0)) {
+ error_setg(errp, "Illegal value for NFS parameter: %s",
+ qp->p[i].name);
+ goto fail;
+ }
+ if (!strcmp(qp->p[i].name, "uid")) {
+ nfs_set_uid(client->context, val);
+ } else if (!strcmp(qp->p[i].name, "gid")) {
+ nfs_set_gid(client->context, val);
+ } else if (!strcmp(qp->p[i].name, "tcp-syncnt")) {
+ nfs_set_tcp_syncnt(client->context, val);
+#ifdef LIBNFS_FEATURE_READAHEAD
+ } else if (!strcmp(qp->p[i].name, "readahead")) {
+ nfs_set_readahead(client->context, val);
+#endif
} else {
error_setg(errp, "Unknown NFS parameter name: %s",
qp->p[i].name);
@@ -341,9 +383,11 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
QemuOpts *opts;
Error *local_err = NULL;
+ client->aio_context = bdrv_get_aio_context(bs);
+
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
- if (error_is_set(&local_err)) {
+ if (local_err) {
error_propagate(errp, local_err);
return -EINVAL;
}
@@ -357,20 +401,16 @@ static int nfs_file_open(BlockDriverState *bs, QDict *options, int flags,
return 0;
}
-static int nfs_file_create(const char *url, QEMUOptionParameter *options,
- Error **errp)
+static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
{
int ret = 0;
int64_t total_size = 0;
NFSClient *client = g_malloc0(sizeof(NFSClient));
+ client->aio_context = qemu_get_aio_context();
+
/* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, "size")) {
- total_size = options->value.n;
- }
- options++;
- }
+ total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
ret = nfs_client_open(client, url, O_CREAT, errp);
if (ret < 0) {
@@ -403,7 +443,7 @@ static int64_t nfs_get_allocated_file_size(BlockDriverState *bs)
while (!task.complete) {
nfs_set_events(client);
- qemu_aio_wait();
+ aio_poll(client->aio_context, true);
}
return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize);
@@ -416,22 +456,25 @@ static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
}
static BlockDriver bdrv_nfs = {
- .format_name = "nfs",
- .protocol_name = "nfs",
-
- .instance_size = sizeof(NFSClient),
- .bdrv_needs_filename = true,
- .bdrv_has_zero_init = nfs_has_zero_init,
- .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
- .bdrv_truncate = nfs_file_truncate,
-
- .bdrv_file_open = nfs_file_open,
- .bdrv_close = nfs_file_close,
- .bdrv_create = nfs_file_create,
-
- .bdrv_co_readv = nfs_co_readv,
- .bdrv_co_writev = nfs_co_writev,
- .bdrv_co_flush_to_disk = nfs_co_flush,
+ .format_name = "nfs",
+ .protocol_name = "nfs",
+
+ .instance_size = sizeof(NFSClient),
+ .bdrv_needs_filename = true,
+ .bdrv_has_zero_init = nfs_has_zero_init,
+ .bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
+ .bdrv_truncate = nfs_file_truncate,
+
+ .bdrv_file_open = nfs_file_open,
+ .bdrv_close = nfs_file_close,
+ .bdrv_create = nfs_file_create,
+
+ .bdrv_co_readv = nfs_co_readv,
+ .bdrv_co_writev = nfs_co_writev,
+ .bdrv_co_flush_to_disk = nfs_co_flush,
+
+ .bdrv_detach_aio_context = nfs_detach_aio_context,
+ .bdrv_attach_aio_context = nfs_attach_aio_context,
};
static void nfs_block_init(void)
diff --git a/block/qapi.c b/block/qapi.c
index 8f2b4dbe7..f44f6b401 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -50,6 +50,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
}
info->backing_file_depth = bdrv_get_backing_file_depth(bs);
+ info->detect_zeroes = bs->detect_zeroes;
if (bs->io_limits_enabled) {
ThrottleConfig cfg;
@@ -292,7 +293,7 @@ void bdrv_query_info(BlockDriverState *bs,
qapi_free_BlockInfo(info);
}
-BlockStats *bdrv_query_stats(const BlockDriverState *bs)
+static BlockStats *bdrv_query_stats(const BlockDriverState *bs)
{
BlockStats *s;
@@ -359,7 +360,11 @@ BlockStatsList *qmp_query_blockstats(Error **errp)
while ((bs = bdrv_next(bs))) {
BlockStatsList *info = g_malloc0(sizeof(*info));
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
info->value = bdrv_query_stats(bs);
+ aio_context_release(ctx);
*p_next = info;
p_next = &info->next;
@@ -474,6 +479,7 @@ static void dump_qobject(fprintf_function func_fprintf, void *f,
case QTYPE_QERROR: {
QString *value = qerror_human((QError *)obj);
func_fprintf(f, "%s", qstring_get_str(value));
+ QDECREF(value);
break;
}
case QTYPE_NONE:
@@ -532,12 +538,11 @@ static void dump_qdict(fprintf_function func_fprintf, void *f, int indentation,
void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
ImageInfoSpecific *info_spec)
{
- Error *local_err = NULL;
QmpOutputVisitor *ov = qmp_output_visitor_new();
QObject *obj, *data;
visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), &info_spec, NULL,
- &local_err);
+ &error_abort);
obj = qmp_output_get_qobject(ov);
assert(qobject_type(obj) == QTYPE_QDICT);
data = qdict_get(qobject_to_qdict(obj), "data");
diff --git a/block/qcow.c b/block/qcow.c
index d5a7d5fd1..a874056cf 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -48,9 +48,10 @@ typedef struct QCowHeader {
uint64_t size; /* in bytes */
uint8_t cluster_bits;
uint8_t l2_bits;
+ uint16_t padding;
uint32_t crypt_method;
uint64_t l1_table_offset;
-} QCowHeader;
+} QEMU_PACKED QCowHeader;
#define L2_CACHE_SIZE 16
@@ -60,7 +61,7 @@ typedef struct BDRVQcowState {
int cluster_sectors;
int l2_bits;
int l2_size;
- int l1_size;
+ unsigned int l1_size;
uint64_t cluster_offset_mask;
uint64_t l1_table_offset;
uint64_t *l1_table;
@@ -96,7 +97,8 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVQcowState *s = bs->opaque;
- int len, i, shift, ret;
+ unsigned int len, i, shift;
+ int ret;
QCowHeader header;
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
@@ -119,18 +121,33 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
}
if (header.version != QCOW_VERSION) {
char version[64];
- snprintf(version, sizeof(version), "QCOW version %d", header.version);
+ snprintf(version, sizeof(version), "QCOW version %" PRIu32,
+ header.version);
error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
bs->device_name, "qcow", version);
ret = -ENOTSUP;
goto fail;
}
- if (header.size <= 1 || header.cluster_bits < 9) {
- error_setg(errp, "invalid value in qcow header");
+ if (header.size <= 1) {
+ error_setg(errp, "Image size is too small (must be at least 2 bytes)");
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (header.cluster_bits < 9 || header.cluster_bits > 16) {
+ error_setg(errp, "Cluster size must be between 512 and 64k");
ret = -EINVAL;
goto fail;
}
+
+ /* l2_bits specifies number of entries; storing a uint64_t in each entry,
+ * so bytes = num_entries << 3. */
+ if (header.l2_bits < 9 - 3 || header.l2_bits > 16 - 3) {
+ error_setg(errp, "L2 table size must be between 512 and 64k");
+ ret = -EINVAL;
+ goto fail;
+ }
+
if (header.crypt_method > QCOW_CRYPT_AES) {
error_setg(errp, "invalid encryption method in qcow header");
ret = -EINVAL;
@@ -150,7 +167,19 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
/* read the level 1 table */
shift = s->cluster_bits + s->l2_bits;
- s->l1_size = (header.size + (1LL << shift) - 1) >> shift;
+ if (header.size > UINT64_MAX - (1LL << shift)) {
+ error_setg(errp, "Image too large");
+ ret = -EINVAL;
+ goto fail;
+ } else {
+ uint64_t l1_size = (header.size + (1LL << shift) - 1) >> shift;
+ if (l1_size > INT_MAX / sizeof(uint64_t)) {
+ error_setg(errp, "Image too large");
+ ret = -EINVAL;
+ goto fail;
+ }
+ s->l1_size = l1_size;
+ }
s->l1_table_offset = header.l1_table_offset;
s->l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
@@ -174,7 +203,9 @@ static int qcow_open(BlockDriverState *bs, QDict *options, int flags,
if (header.backing_file_offset != 0) {
len = header.backing_file_size;
if (len > 1023) {
- len = 1023;
+ error_setg(errp, "Backing file name too long");
+ ret = -EINVAL;
+ goto fail;
}
ret = bdrv_pread(bs->file, header.backing_file_offset,
bs->backing_file, len);
@@ -662,35 +693,29 @@ static void qcow_close(BlockDriverState *bs)
error_free(s->migration_blocker);
}
-static int qcow_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int qcow_create(const char *filename, QemuOpts *opts, Error **errp)
{
int header_size, backing_filename_len, l1_size, shift, i;
QCowHeader header;
uint8_t *tmp;
int64_t total_size = 0;
- const char *backing_file = NULL;
+ char *backing_file = NULL;
int flags = 0;
Error *local_err = NULL;
int ret;
BlockDriverState *qcow_bs;
/* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- total_size = options->value.n / 512;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
- flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
- }
- options++;
+ total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
+ backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
+ if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
+ flags |= BLOCK_FLAG_ENCRYPT;
}
- ret = bdrv_create_file(filename, options, &local_err);
+ ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
- return ret;
+ goto cleanup;
}
qcow_bs = NULL;
@@ -698,7 +723,7 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,
BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
- return ret;
+ goto cleanup;
}
ret = bdrv_truncate(qcow_bs, 0);
@@ -769,6 +794,8 @@ static int qcow_create(const char *filename, QEMUOptionParameter *options,
ret = 0;
exit:
bdrv_unref(qcow_bs);
+cleanup:
+ g_free(backing_file);
return ret;
}
@@ -881,24 +908,28 @@ static int qcow_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return 0;
}
-
-static QEMUOptionParameter qcow_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- },
- {
- .name = BLOCK_OPT_ENCRYPT,
- .type = OPT_FLAG,
- .help = "Encrypt the image"
- },
- { NULL }
+static QemuOptsList qcow_create_opts = {
+ .name = "qcow-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(qcow_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = QEMU_OPT_STRING,
+ .help = "File name of a base image"
+ },
+ {
+ .name = BLOCK_OPT_ENCRYPT,
+ .type = QEMU_OPT_BOOL,
+ .help = "Encrypt the image",
+ .def_value_str = "off"
+ },
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_qcow = {
@@ -907,9 +938,10 @@ static BlockDriver bdrv_qcow = {
.bdrv_probe = qcow_probe,
.bdrv_open = qcow_open,
.bdrv_close = qcow_close,
- .bdrv_reopen_prepare = qcow_reopen_prepare,
- .bdrv_create = qcow_create,
+ .bdrv_reopen_prepare = qcow_reopen_prepare,
+ .bdrv_create = qcow_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .supports_backing = true,
.bdrv_co_readv = qcow_co_readv,
.bdrv_co_writev = qcow_co_writev,
@@ -920,7 +952,7 @@ static BlockDriver bdrv_qcow = {
.bdrv_write_compressed = qcow_write_compressed,
.bdrv_get_info = qcow_get_info,
- .create_options = qcow_create_options,
+ .create_opts = &qcow_create_opts,
};
static void bdrv_qcow_init(void)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 331ab0802..4208dc08b 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -42,6 +42,13 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
if (min_size <= s->l1_size)
return 0;
+ /* Do a sanity check on min_size before trying to calculate new_l1_size
+ * (this prevents overflows during the while loop for the calculation of
+ * new_l1_size) */
+ if (min_size > INT_MAX / sizeof(uint64_t)) {
+ return -EFBIG;
+ }
+
if (exact_size) {
new_l1_size = min_size;
} else {
@@ -372,7 +379,8 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
if (!bs->drv) {
- return -ENOMEDIUM;
+ ret = -ENOMEDIUM;
+ goto out;
}
/* Call .bdrv_co_readv() directly instead of using the public block-layer
@@ -1360,9 +1368,9 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
for (i = 0; i < nb_clusters; i++) {
- uint64_t old_offset;
+ uint64_t old_l2_entry;
- old_offset = be64_to_cpu(l2_table[l2_index + i]);
+ old_l2_entry = be64_to_cpu(l2_table[l2_index + i]);
/*
* Make sure that a discarded area reads back as zeroes for v3 images
@@ -1373,12 +1381,22 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
* TODO We might want to use bdrv_get_block_status(bs) here, but we're
* holding s->lock, so that doesn't work today.
*/
- if (old_offset & QCOW_OFLAG_ZERO) {
- continue;
- }
+ switch (qcow2_get_cluster_type(old_l2_entry)) {
+ case QCOW2_CLUSTER_UNALLOCATED:
+ if (!bs->backing_hd) {
+ continue;
+ }
+ break;
- if ((old_offset & L2E_OFFSET_MASK) == 0 && !bs->backing_hd) {
- continue;
+ case QCOW2_CLUSTER_ZERO:
+ continue;
+
+ case QCOW2_CLUSTER_NORMAL:
+ case QCOW2_CLUSTER_COMPRESSED:
+ break;
+
+ default:
+ abort();
}
/* First remove L2 entries */
@@ -1390,7 +1408,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
}
/* Then decrease the refcount */
- qcow2_free_any_clusters(bs, old_offset, 1, type);
+ qcow2_free_any_clusters(bs, old_l2_entry, 1, type);
}
ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index a37ee4501..cc6cf743d 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -27,6 +27,7 @@
#include "block/qcow2.h"
#include "qemu/range.h"
#include "qapi/qmp/types.h"
+#include "qapi-event.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
@@ -653,6 +654,15 @@ retry:
goto retry;
}
}
+
+ /* Make sure that all offsets in the "allocated" range are representable
+ * in an int64_t */
+ if (s->free_cluster_index > 0 &&
+ s->free_cluster_index - 1 > (INT64_MAX >> s->cluster_bits))
+ {
+ return -EFBIG;
+ }
+
#ifdef DEBUG_ALLOC2
fprintf(stderr, "alloc_clusters: size=%" PRId64 " -> %" PRId64 "\n",
size,
@@ -1480,6 +1490,11 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
int ret;
size = bdrv_getlength(bs->file);
+ if (size < 0) {
+ res->check_errors++;
+ return size;
+ }
+
nb_clusters = size_to_clusters(s, size);
if (nb_clusters > INT_MAX) {
res->check_errors++;
@@ -1793,7 +1808,6 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
} else if (ret > 0) {
int metadata_ol_bitnr = ffs(ret) - 1;
char *message;
- QObject *data;
assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
@@ -1802,12 +1816,14 @@ int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
metadata_ol_names[metadata_ol_bitnr]);
message = g_strdup_printf("Prevented %s overwrite",
metadata_ol_names[metadata_ol_bitnr]);
- data = qobject_from_jsonf("{ 'device': %s, 'msg': %s, 'offset': %"
- PRId64 ", 'size': %" PRId64 " }", bs->device_name, message,
- offset, size);
- monitor_protocol_event(QEVENT_BLOCK_IMAGE_CORRUPTED, data);
+ qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
+ message,
+ true,
+ offset,
+ true,
+ size,
+ &error_abort);
g_free(message);
- qobject_decref(data);
qcow2_mark_corrupt(bs);
bs->drv = NULL; /* make BDS unusable */
diff --git a/block/qcow2.c b/block/qcow2.c
index e903d971c..1e3ab6bd0 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -31,6 +31,7 @@
#include "qapi/qmp/qerror.h"
#include "qapi/qmp/qbool.h"
#include "trace.h"
+#include "qemu/option_int.h"
/*
Differences with QCOW:
@@ -124,8 +125,9 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
case QCOW2_EXT_MAGIC_BACKING_FORMAT:
if (ext.len >= sizeof(bs->backing_format)) {
- error_setg(errp, "ERROR: ext_backing_format: len=%u too large"
- " (>=%zu)", ext.len, sizeof(bs->backing_format));
+ error_setg(errp, "ERROR: ext_backing_format: len=%" PRIu32
+ " too large (>=%zu)", ext.len,
+ sizeof(bs->backing_format));
return 2;
}
ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
@@ -208,20 +210,31 @@ static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs,
static void report_unsupported_feature(BlockDriverState *bs,
Error **errp, Qcow2Feature *table, uint64_t mask)
{
+ char *features = g_strdup("");
+ char *old;
+
while (table && table->name[0] != '\0') {
if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
- if (mask & (1 << table->bit)) {
- report_unsupported(bs, errp, "%.46s", table->name);
- mask &= ~(1 << table->bit);
+ if (mask & (1ULL << table->bit)) {
+ old = features;
+ features = g_strdup_printf("%s%s%.46s", old, *old ? ", " : "",
+ table->name);
+ g_free(old);
+ mask &= ~(1ULL << table->bit);
}
}
table++;
}
if (mask) {
- report_unsupported(bs, errp, "Unknown incompatible feature: %" PRIx64,
- mask);
+ old = features;
+ features = g_strdup_printf("%s%sUnknown incompatible feature: %" PRIx64,
+ old, *old ? ", " : "", mask);
+ g_free(old);
}
+
+ report_unsupported(bs, errp, "%s", features);
+ g_free(features);
}
/*
@@ -483,7 +496,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
if (header.version < 2 || header.version > 3) {
- report_unsupported(bs, errp, "QCOW version %d", header.version);
+ report_unsupported(bs, errp, "QCOW version %" PRIu32, header.version);
ret = -ENOTSUP;
goto fail;
}
@@ -493,7 +506,8 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
/* Initialise cluster size */
if (header.cluster_bits < MIN_CLUSTER_BITS ||
header.cluster_bits > MAX_CLUSTER_BITS) {
- error_setg(errp, "Unsupported cluster size: 2^%i", header.cluster_bits);
+ error_setg(errp, "Unsupported cluster size: 2^%" PRIu32,
+ header.cluster_bits);
ret = -EINVAL;
goto fail;
}
@@ -591,7 +605,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
s->refcount_order = header.refcount_order;
if (header.crypt_method > QCOW_CRYPT_AES) {
- error_setg(errp, "Unsupported encryption method: %i",
+ error_setg(errp, "Unsupported encryption method: %" PRIu32,
header.crypt_method);
ret = -EINVAL;
goto fail;
@@ -852,13 +866,11 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
return ret;
}
-static int qcow2_refresh_limits(BlockDriverState *bs)
+static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQcowState *s = bs->opaque;
bs->bl.write_zeroes_alignment = s->cluster_sectors;
-
- return 0;
}
static int qcow2_set_key(BlockDriverState *bs, const char *key)
@@ -1017,11 +1029,20 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
sector_num, cur_nr_sectors);
if (n1 > 0) {
+ QEMUIOVector local_qiov;
+
+ qemu_iovec_init(&local_qiov, hd_qiov.niov);
+ qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
+ n1 * BDRV_SECTOR_SIZE);
+
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
qemu_co_mutex_unlock(&s->lock);
ret = bdrv_co_readv(bs->backing_hd, sector_num,
- n1, &hd_qiov);
+ n1, &local_qiov);
qemu_co_mutex_lock(&s->lock);
+
+ qemu_iovec_destroy(&local_qiov);
+
if (ret < 0) {
goto fail;
}
@@ -1306,6 +1327,7 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
options = qdict_clone_shallow(bs->options);
ret = qcow2_open(bs, options, flags, &local_err);
+ QDECREF(options);
if (local_err) {
error_setg(errp, "Could not reopen qcow2 layer: %s",
error_get_pretty(local_err));
@@ -1316,8 +1338,6 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
return;
}
- QDECREF(options);
-
if (crypt_method) {
s->crypt_method = crypt_method;
memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key));
@@ -1593,7 +1613,7 @@ static int preallocate(BlockDriverState *bs)
static int qcow2_create2(const char *filename, int64_t total_size,
const char *backing_file, const char *backing_format,
int flags, size_t cluster_size, int prealloc,
- QEMUOptionParameter *options, int version,
+ QemuOpts *opts, int version,
Error **errp)
{
/* Calculate cluster_bits */
@@ -1625,7 +1645,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
Error *local_err = NULL;
int ret;
- ret = bdrv_create_file(filename, options, &local_err);
+ ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
return ret;
@@ -1761,11 +1781,11 @@ out:
return ret;
}
-static int qcow2_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
{
- const char *backing_file = NULL;
- const char *backing_fmt = NULL;
+ char *backing_file = NULL;
+ char *backing_fmt = NULL;
+ char *buf = NULL;
uint64_t sectors = 0;
int flags = 0;
size_t cluster_size = DEFAULT_CLUSTER_SIZE;
@@ -1775,64 +1795,66 @@ static int qcow2_create(const char *filename, QEMUOptionParameter *options,
int ret;
/* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- sectors = options->value.n / 512;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
- backing_fmt = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
- flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
- } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
- if (options->value.n) {
- cluster_size = options->value.n;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
- if (!options->value.s || !strcmp(options->value.s, "off")) {
- prealloc = 0;
- } else if (!strcmp(options->value.s, "metadata")) {
- prealloc = 1;
- } else {
- error_setg(errp, "Invalid preallocation mode: '%s'",
- options->value.s);
- return -EINVAL;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) {
- if (!options->value.s) {
- /* keep the default */
- } else if (!strcmp(options->value.s, "0.10")) {
- version = 2;
- } else if (!strcmp(options->value.s, "1.1")) {
- version = 3;
- } else {
- error_setg(errp, "Invalid compatibility level: '%s'",
- options->value.s);
- return -EINVAL;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
- flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
- }
- options++;
+ sectors = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
+ backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
+ backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
+ if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ENCRYPT, false)) {
+ flags |= BLOCK_FLAG_ENCRYPT;
+ }
+ cluster_size = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
+ DEFAULT_CLUSTER_SIZE);
+ buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
+ if (!buf || !strcmp(buf, "off")) {
+ prealloc = 0;
+ } else if (!strcmp(buf, "metadata")) {
+ prealloc = 1;
+ } else {
+ error_setg(errp, "Invalid preallocation mode: '%s'", buf);
+ ret = -EINVAL;
+ goto finish;
+ }
+ g_free(buf);
+ buf = qemu_opt_get_del(opts, BLOCK_OPT_COMPAT_LEVEL);
+ if (!buf) {
+ /* keep the default */
+ } else if (!strcmp(buf, "0.10")) {
+ version = 2;
+ } else if (!strcmp(buf, "1.1")) {
+ version = 3;
+ } else {
+ error_setg(errp, "Invalid compatibility level: '%s'", buf);
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ if (qemu_opt_get_bool_del(opts, BLOCK_OPT_LAZY_REFCOUNTS, false)) {
+ flags |= BLOCK_FLAG_LAZY_REFCOUNTS;
}
if (backing_file && prealloc) {
error_setg(errp, "Backing file and preallocation cannot be used at "
"the same time");
- return -EINVAL;
+ ret = -EINVAL;
+ goto finish;
}
if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
error_setg(errp, "Lazy refcounts only supported with compatibility "
"level 1.1 and above (use compat=1.1 or greater)");
- return -EINVAL;
+ ret = -EINVAL;
+ goto finish;
}
ret = qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
- cluster_size, prealloc, options, version, &local_err);
+ cluster_size, prealloc, opts, version, &local_err);
if (local_err) {
error_propagate(errp, local_err);
}
+
+finish:
+ g_free(backing_file);
+ g_free(backing_fmt);
+ g_free(buf);
return ret;
}
@@ -2197,64 +2219,72 @@ static int qcow2_downgrade(BlockDriverState *bs, int target_version)
return 0;
}
-static int qcow2_amend_options(BlockDriverState *bs,
- QEMUOptionParameter *options)
+static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts)
{
BDRVQcowState *s = bs->opaque;
int old_version = s->qcow_version, new_version = old_version;
uint64_t new_size = 0;
const char *backing_file = NULL, *backing_format = NULL;
bool lazy_refcounts = s->use_lazy_refcounts;
+ const char *compat = NULL;
+ uint64_t cluster_size = s->cluster_size;
+ bool encrypt;
int ret;
- int i;
+ QemuOptDesc *desc = opts->list->desc;
- for (i = 0; options[i].name; i++)
- {
- if (!options[i].assigned) {
+ while (desc && desc->name) {
+ if (!qemu_opt_find(opts, desc->name)) {
/* only change explicitly defined options */
+ desc++;
continue;
}
- if (!strcmp(options[i].name, "compat")) {
- if (!options[i].value.s) {
+ if (!strcmp(desc->name, "compat")) {
+ compat = qemu_opt_get(opts, "compat");
+ if (!compat) {
/* preserve default */
- } else if (!strcmp(options[i].value.s, "0.10")) {
+ } else if (!strcmp(compat, "0.10")) {
new_version = 2;
- } else if (!strcmp(options[i].value.s, "1.1")) {
+ } else if (!strcmp(compat, "1.1")) {
new_version = 3;
} else {
- fprintf(stderr, "Unknown compatibility level %s.\n",
- options[i].value.s);
+ fprintf(stderr, "Unknown compatibility level %s.\n", compat);
return -EINVAL;
}
- } else if (!strcmp(options[i].name, "preallocation")) {
+ } else if (!strcmp(desc->name, "preallocation")) {
fprintf(stderr, "Cannot change preallocation mode.\n");
return -ENOTSUP;
- } else if (!strcmp(options[i].name, "size")) {
- new_size = options[i].value.n;
- } else if (!strcmp(options[i].name, "backing_file")) {
- backing_file = options[i].value.s;
- } else if (!strcmp(options[i].name, "backing_fmt")) {
- backing_format = options[i].value.s;
- } else if (!strcmp(options[i].name, "encryption")) {
- if ((options[i].value.n != !!s->crypt_method)) {
+ } else if (!strcmp(desc->name, "size")) {
+ new_size = qemu_opt_get_size(opts, "size", 0);
+ } else if (!strcmp(desc->name, "backing_file")) {
+ backing_file = qemu_opt_get(opts, "backing_file");
+ } else if (!strcmp(desc->name, "backing_fmt")) {
+ backing_format = qemu_opt_get(opts, "backing_fmt");
+ } else if (!strcmp(desc->name, "encryption")) {
+ encrypt = qemu_opt_get_bool(opts, "encryption", s->crypt_method);
+ if (encrypt != !!s->crypt_method) {
fprintf(stderr, "Changing the encryption flag is not "
"supported.\n");
return -ENOTSUP;
}
- } else if (!strcmp(options[i].name, "cluster_size")) {
- if (options[i].value.n != s->cluster_size) {
+ } else if (!strcmp(desc->name, "cluster_size")) {
+ cluster_size = qemu_opt_get_size(opts, "cluster_size",
+ cluster_size);
+ if (cluster_size != s->cluster_size) {
fprintf(stderr, "Changing the cluster size is not "
"supported.\n");
return -ENOTSUP;
}
- } else if (!strcmp(options[i].name, "lazy_refcounts")) {
- lazy_refcounts = options[i].value.n;
+ } else if (!strcmp(desc->name, "lazy_refcounts")) {
+ lazy_refcounts = qemu_opt_get_bool(opts, "lazy_refcounts",
+ lazy_refcounts);
} else {
/* if this assertion fails, this probably means a new option was
* added without having it covered here */
assert(false);
}
+
+ desc++;
}
if (new_version != old_version) {
@@ -2323,49 +2353,55 @@ static int qcow2_amend_options(BlockDriverState *bs,
return 0;
}
-static QEMUOptionParameter qcow2_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_COMPAT_LEVEL,
- .type = OPT_STRING,
- .help = "Compatibility level (0.10 or 1.1)"
- },
- {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- },
- {
- .name = BLOCK_OPT_BACKING_FMT,
- .type = OPT_STRING,
- .help = "Image format of the base image"
- },
- {
- .name = BLOCK_OPT_ENCRYPT,
- .type = OPT_FLAG,
- .help = "Encrypt the image"
- },
- {
- .name = BLOCK_OPT_CLUSTER_SIZE,
- .type = OPT_SIZE,
- .help = "qcow2 cluster size",
- .value = { .n = DEFAULT_CLUSTER_SIZE },
- },
- {
- .name = BLOCK_OPT_PREALLOC,
- .type = OPT_STRING,
- .help = "Preallocation mode (allowed values: off, metadata)"
- },
- {
- .name = BLOCK_OPT_LAZY_REFCOUNTS,
- .type = OPT_FLAG,
- .help = "Postpone refcount updates",
- },
- { NULL }
+static QemuOptsList qcow2_create_opts = {
+ .name = "qcow2-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(qcow2_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_COMPAT_LEVEL,
+ .type = QEMU_OPT_STRING,
+ .help = "Compatibility level (0.10 or 1.1)"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = QEMU_OPT_STRING,
+ .help = "File name of a base image"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FMT,
+ .type = QEMU_OPT_STRING,
+ .help = "Image format of the base image"
+ },
+ {
+ .name = BLOCK_OPT_ENCRYPT,
+ .type = QEMU_OPT_BOOL,
+ .help = "Encrypt the image",
+ .def_value_str = "off"
+ },
+ {
+ .name = BLOCK_OPT_CLUSTER_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "qcow2 cluster size",
+ .def_value_str = stringify(DEFAULT_CLUSTER_SIZE)
+ },
+ {
+ .name = BLOCK_OPT_PREALLOC,
+ .type = QEMU_OPT_STRING,
+ .help = "Preallocation mode (allowed values: off, metadata)"
+ },
+ {
+ .name = BLOCK_OPT_LAZY_REFCOUNTS,
+ .type = QEMU_OPT_BOOL,
+ .help = "Postpone refcount updates",
+ .def_value_str = "off"
+ },
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_qcow2 = {
@@ -2393,21 +2429,22 @@ static BlockDriver bdrv_qcow2 = {
.bdrv_snapshot_goto = qcow2_snapshot_goto,
.bdrv_snapshot_delete = qcow2_snapshot_delete,
.bdrv_snapshot_list = qcow2_snapshot_list,
- .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
- .bdrv_get_info = qcow2_get_info,
+ .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
+ .bdrv_get_info = qcow2_get_info,
.bdrv_get_specific_info = qcow2_get_specific_info,
.bdrv_save_vmstate = qcow2_save_vmstate,
.bdrv_load_vmstate = qcow2_load_vmstate,
+ .supports_backing = true,
.bdrv_change_backing_file = qcow2_change_backing_file,
.bdrv_refresh_limits = qcow2_refresh_limits,
.bdrv_invalidate_cache = qcow2_invalidate_cache,
- .create_options = qcow2_create_options,
- .bdrv_check = qcow2_check,
- .bdrv_amend_options = qcow2_amend_options,
+ .create_opts = &qcow2_create_opts,
+ .bdrv_check = qcow2_check,
+ .bdrv_amend_options = qcow2_amend_options,
};
static void bdrv_qcow2_init(void)
diff --git a/block/qed-table.c b/block/qed-table.c
index 76d2dcccf..f61107a1c 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -173,7 +173,7 @@ int qed_read_l1_table_sync(BDRVQEDState *s)
qed_read_table(s, s->header.l1_table_offset,
s->l1_table, qed_sync_cb, &ret);
while (ret == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(bdrv_get_aio_context(s->bs), true);
}
return ret;
@@ -194,7 +194,7 @@ int qed_write_l1_table_sync(BDRVQEDState *s, unsigned int index,
qed_write_l1_table(s, index, n, qed_sync_cb, &ret);
while (ret == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(bdrv_get_aio_context(s->bs), true);
}
return ret;
@@ -267,7 +267,7 @@ int qed_read_l2_table_sync(BDRVQEDState *s, QEDRequest *request, uint64_t offset
qed_read_l2_table(s, request, offset, qed_sync_cb, &ret);
while (ret == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(bdrv_get_aio_context(s->bs), true);
}
return ret;
@@ -289,7 +289,7 @@ int qed_write_l2_table_sync(BDRVQEDState *s, QEDRequest *request,
qed_write_l2_table(s, request, index, n, flush, qed_sync_cb, &ret);
while (ret == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(bdrv_get_aio_context(s->bs), true);
}
return ret;
diff --git a/block/qed.c b/block/qed.c
index 3bd9db9c8..794483218 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -21,12 +21,13 @@
static void qed_aio_cancel(BlockDriverAIOCB *blockacb)
{
QEDAIOCB *acb = (QEDAIOCB *)blockacb;
+ AioContext *aio_context = bdrv_get_aio_context(blockacb->bs);
bool finished = false;
/* Wait for the request to finish */
acb->finished = &finished;
while (!finished) {
- qemu_aio_wait();
+ aio_poll(aio_context, true);
}
}
@@ -373,6 +374,27 @@ static void bdrv_qed_rebind(BlockDriverState *bs)
s->bs = bs;
}
+static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ qed_cancel_need_check_timer(s);
+ timer_free(s->need_check_timer);
+}
+
+static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVQEDState *s = bs->opaque;
+
+ s->need_check_timer = aio_timer_new(new_context,
+ QEMU_CLOCK_VIRTUAL, SCALE_NS,
+ qed_need_check_timer_cb, s);
+ if (s->header.features & QED_F_NEED_CHECK) {
+ qed_start_need_check_timer(s);
+ }
+}
+
static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -496,8 +518,7 @@ static int bdrv_qed_open(BlockDriverState *bs, QDict *options, int flags,
}
}
- s->need_check_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
- qed_need_check_timer_cb, s);
+ bdrv_qed_attach_aio_context(bs, bdrv_get_aio_context(bs));
out:
if (ret) {
@@ -507,13 +528,11 @@ out:
return ret;
}
-static int bdrv_qed_refresh_limits(BlockDriverState *bs)
+static void bdrv_qed_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVQEDState *s = bs->opaque;
bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
-
- return 0;
}
/* We have nothing to do for QED reopen, stubs just return
@@ -528,8 +547,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
- qed_cancel_need_check_timer(s);
- timer_free(s->need_check_timer);
+ bdrv_qed_detach_aio_context(bs);
/* Ensure writes reach stable storage */
bdrv_flush(bs->file);
@@ -547,7 +565,7 @@ static void bdrv_qed_close(BlockDriverState *bs)
static int qed_create(const char *filename, uint32_t cluster_size,
uint64_t image_size, uint32_t table_size,
const char *backing_file, const char *backing_fmt,
- Error **errp)
+ QemuOpts *opts, Error **errp)
{
QEDHeader header = {
.magic = QED_MAGIC,
@@ -566,7 +584,7 @@ static int qed_create(const char *filename, uint32_t cluster_size,
int ret = 0;
BlockDriverState *bs;
- ret = bdrv_create_file(filename, NULL, &local_err);
+ ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
return ret;
@@ -621,53 +639,53 @@ out:
return ret;
}
-static int bdrv_qed_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int bdrv_qed_create(const char *filename, QemuOpts *opts, Error **errp)
{
uint64_t image_size = 0;
uint32_t cluster_size = QED_DEFAULT_CLUSTER_SIZE;
uint32_t table_size = QED_DEFAULT_TABLE_SIZE;
- const char *backing_file = NULL;
- const char *backing_fmt = NULL;
-
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- image_size = options->value.n;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
- backing_fmt = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
- if (options->value.n) {
- cluster_size = options->value.n;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_TABLE_SIZE)) {
- if (options->value.n) {
- table_size = options->value.n;
- }
- }
- options++;
- }
+ char *backing_file = NULL;
+ char *backing_fmt = NULL;
+ int ret;
+
+ image_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+ backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
+ backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
+ cluster_size = qemu_opt_get_size_del(opts,
+ BLOCK_OPT_CLUSTER_SIZE,
+ QED_DEFAULT_CLUSTER_SIZE);
+ table_size = qemu_opt_get_size_del(opts, BLOCK_OPT_TABLE_SIZE,
+ QED_DEFAULT_TABLE_SIZE);
if (!qed_is_cluster_size_valid(cluster_size)) {
- fprintf(stderr, "QED cluster size must be within range [%u, %u] and power of 2\n",
- QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
- return -EINVAL;
+ error_setg(errp, "QED cluster size must be within range [%u, %u] "
+ "and power of 2",
+ QED_MIN_CLUSTER_SIZE, QED_MAX_CLUSTER_SIZE);
+ ret = -EINVAL;
+ goto finish;
}
if (!qed_is_table_size_valid(table_size)) {
- fprintf(stderr, "QED table size must be within range [%u, %u] and power of 2\n",
- QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
- return -EINVAL;
+ error_setg(errp, "QED table size must be within range [%u, %u] "
+ "and power of 2",
+ QED_MIN_TABLE_SIZE, QED_MAX_TABLE_SIZE);
+ ret = -EINVAL;
+ goto finish;
}
if (!qed_is_image_size_valid(image_size, cluster_size, table_size)) {
- fprintf(stderr, "QED image size must be a non-zero multiple of "
- "cluster size and less than %" PRIu64 " bytes\n",
- qed_max_image_size(cluster_size, table_size));
- return -EINVAL;
+ error_setg(errp, "QED image size must be a non-zero multiple of "
+ "cluster size and less than %" PRIu64 " bytes",
+ qed_max_image_size(cluster_size, table_size));
+ ret = -EINVAL;
+ goto finish;
}
- return qed_create(filename, cluster_size, image_size, table_size,
- backing_file, backing_fmt, errp);
+ ret = qed_create(filename, cluster_size, image_size, table_size,
+ backing_file, backing_fmt, opts, errp);
+
+finish:
+ g_free(backing_file);
+ g_free(backing_fmt);
+ return ret;
}
typedef struct {
@@ -741,17 +759,19 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
/**
* Read from the backing file or zero-fill if no backing file
*
- * @s: QED state
- * @pos: Byte position in device
- * @qiov: Destination I/O vector
- * @cb: Completion function
- * @opaque: User data for completion function
+ * @s: QED state
+ * @pos: Byte position in device
+ * @qiov: Destination I/O vector
+ * @backing_qiov: Possibly shortened copy of qiov, to be allocated here
+ * @cb: Completion function
+ * @opaque: User data for completion function
*
* This function reads qiov->size bytes starting at pos from the backing file.
* If there is no backing file then zeroes are read.
*/
static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
QEMUIOVector *qiov,
+ QEMUIOVector **backing_qiov,
BlockDriverCompletionFunc *cb, void *opaque)
{
uint64_t backing_length = 0;
@@ -784,15 +804,21 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
/* If the read straddles the end of the backing file, shorten it */
size = MIN((uint64_t)backing_length - pos, qiov->size);
+ assert(*backing_qiov == NULL);
+ *backing_qiov = g_new(QEMUIOVector, 1);
+ qemu_iovec_init(*backing_qiov, qiov->niov);
+ qemu_iovec_concat(*backing_qiov, qiov, 0, size);
+
BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
- qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
+ *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
}
typedef struct {
GenericCB gencb;
BDRVQEDState *s;
QEMUIOVector qiov;
+ QEMUIOVector *backing_qiov;
struct iovec iov;
uint64_t offset;
} CopyFromBackingFileCB;
@@ -809,6 +835,12 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
CopyFromBackingFileCB *copy_cb = opaque;
BDRVQEDState *s = copy_cb->s;
+ if (copy_cb->backing_qiov) {
+ qemu_iovec_destroy(copy_cb->backing_qiov);
+ g_free(copy_cb->backing_qiov);
+ copy_cb->backing_qiov = NULL;
+ }
+
if (ret) {
qed_copy_from_backing_file_cb(copy_cb, ret);
return;
@@ -846,11 +878,12 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
copy_cb->s = s;
copy_cb->offset = offset;
+ copy_cb->backing_qiov = NULL;
copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
copy_cb->iov.iov_len = len;
qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
- qed_read_backing_file(s, pos, &copy_cb->qiov,
+ qed_read_backing_file(s, pos, &copy_cb->qiov, &copy_cb->backing_qiov,
qed_copy_from_backing_file_write, copy_cb);
}
@@ -917,7 +950,8 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
/* Arrange for a bh to invoke the completion function */
acb->bh_ret = ret;
- acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
+ qed_aio_complete_bh, acb);
qemu_bh_schedule(acb->bh);
/* Start next allocating write request waiting behind this one. Note that
@@ -1292,7 +1326,7 @@ static void qed_aio_read_data(void *opaque, int ret,
return;
} else if (ret != QED_CLUSTER_FOUND) {
qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
- qed_aio_next_io, acb);
+ &acb->backing_qiov, qed_aio_next_io, acb);
return;
}
@@ -1318,6 +1352,12 @@ static void qed_aio_next_io(void *opaque, int ret)
trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
+ if (acb->backing_qiov) {
+ qemu_iovec_destroy(acb->backing_qiov);
+ g_free(acb->backing_qiov);
+ acb->backing_qiov = NULL;
+ }
+
/* Handle I/O error */
if (ret) {
qed_aio_complete(acb, ret);
@@ -1357,6 +1397,7 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
acb->qiov_offset = 0;
acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
+ acb->backing_qiov = NULL;
acb->request.l2_table = NULL;
qemu_iovec_init(&acb->cur_qiov, qiov->niov);
@@ -1593,36 +1634,45 @@ static int bdrv_qed_check(BlockDriverState *bs, BdrvCheckResult *result,
return qed_check(s, result, !!fix);
}
-static QEMUOptionParameter qed_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size (in bytes)"
- }, {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- }, {
- .name = BLOCK_OPT_BACKING_FMT,
- .type = OPT_STRING,
- .help = "Image format of the base image"
- }, {
- .name = BLOCK_OPT_CLUSTER_SIZE,
- .type = OPT_SIZE,
- .help = "Cluster size (in bytes)",
- .value = { .n = QED_DEFAULT_CLUSTER_SIZE },
- }, {
- .name = BLOCK_OPT_TABLE_SIZE,
- .type = OPT_SIZE,
- .help = "L1/L2 table size (in clusters)"
- },
- { /* end of list */ }
+static QemuOptsList qed_create_opts = {
+ .name = "qed-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(qed_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = QEMU_OPT_STRING,
+ .help = "File name of a base image"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FMT,
+ .type = QEMU_OPT_STRING,
+ .help = "Image format of the base image"
+ },
+ {
+ .name = BLOCK_OPT_CLUSTER_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Cluster size (in bytes)",
+ .def_value_str = stringify(QED_DEFAULT_CLUSTER_SIZE)
+ },
+ {
+ .name = BLOCK_OPT_TABLE_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "L1/L2 table size (in clusters)"
+ },
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_qed = {
.format_name = "qed",
.instance_size = sizeof(BDRVQEDState),
- .create_options = qed_create_options,
+ .create_opts = &qed_create_opts,
+ .supports_backing = true,
.bdrv_probe = bdrv_qed_probe,
.bdrv_rebind = bdrv_qed_rebind,
@@ -1642,6 +1692,8 @@ static BlockDriver bdrv_qed = {
.bdrv_change_backing_file = bdrv_qed_change_backing_file,
.bdrv_invalidate_cache = bdrv_qed_invalidate_cache,
.bdrv_check = bdrv_qed_check,
+ .bdrv_detach_aio_context = bdrv_qed_detach_aio_context,
+ .bdrv_attach_aio_context = bdrv_qed_attach_aio_context,
};
static void bdrv_qed_init(void)
diff --git a/block/qed.h b/block/qed.h
index 5d65bea07..2b0e724e0 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -43,7 +43,7 @@
*
* All fields are little-endian on disk.
*/
-
+#define QED_DEFAULT_CLUSTER_SIZE 65536
enum {
QED_MAGIC = 'Q' | 'E' << 8 | 'D' << 16 | '\0' << 24,
@@ -69,7 +69,6 @@ enum {
*/
QED_MIN_CLUSTER_SIZE = 4 * 1024, /* in bytes */
QED_MAX_CLUSTER_SIZE = 64 * 1024 * 1024,
- QED_DEFAULT_CLUSTER_SIZE = 64 * 1024,
/* Allocated clusters are tracked using a 2-level pagetable. Table size is
* a multiple of clusters so large maximum image sizes can be supported
@@ -143,6 +142,7 @@ typedef struct QEDAIOCB {
/* Current cluster scatter-gather list */
QEMUIOVector cur_qiov;
+ QEMUIOVector *backing_qiov;
uint64_t cur_pos; /* position on block device, in bytes */
uint64_t cur_cluster; /* cluster offset in image file */
unsigned int cur_nclusters; /* number of clusters being accessed */
diff --git a/block/quorum.c b/block/quorum.c
index 7f580a83b..d5ee9c005 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -17,11 +17,13 @@
#include <gnutls/crypto.h>
#include "block/block_int.h"
#include "qapi/qmp/qjson.h"
+#include "qapi-event.h"
#define HASH_LENGTH 32
#define QUORUM_OPT_VOTE_THRESHOLD "vote-threshold"
#define QUORUM_OPT_BLKVERIFY "blkverify"
+#define QUORUM_OPT_REWRITE "rewrite-corrupted"
/* This union holds a vote hash value */
typedef union QuorumVoteValue {
@@ -69,6 +71,9 @@ typedef struct BDRVQuorumState {
* It is useful to debug other block drivers by
* comparing them with a reference one.
*/
+ bool rewrite_corrupted;/* true if the driver must rewrite-on-read corrupted
+ * block if Quorum is reached.
+ */
} BDRVQuorumState;
typedef struct QuorumAIOCB QuorumAIOCB;
@@ -104,13 +109,17 @@ struct QuorumAIOCB {
int count; /* number of completed AIOCB */
int success_count; /* number of successfully completed AIOCB */
+ int rewrite_count; /* number of replica to rewrite: count down to
+ * zero once writes are fired
+ */
+
QuorumVotes votes;
bool is_read;
int vote_ret;
};
-static void quorum_vote(QuorumAIOCB *acb);
+static bool quorum_vote(QuorumAIOCB *acb);
static void quorum_aio_cancel(BlockDriverAIOCB *blockacb)
{
@@ -182,6 +191,7 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
acb->qcrs = g_new0(QuorumChildRequest, s->num_children);
acb->count = 0;
acb->success_count = 0;
+ acb->rewrite_count = 0;
acb->votes.compare = quorum_sha256_compare;
QLIST_INIT(&acb->votes.vote_list);
acb->is_read = false;
@@ -198,32 +208,22 @@ static QuorumAIOCB *quorum_aio_get(BDRVQuorumState *s,
static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
{
- QObject *data;
- assert(node_name);
- data = qobject_from_jsonf("{ 'node-name': %s"
- ", 'sector-num': %" PRId64
- ", 'sectors-count': %d }",
- node_name, acb->sector_num, acb->nb_sectors);
+ const char *msg = NULL;
if (ret < 0) {
- QDict *dict = qobject_to_qdict(data);
- qdict_put(dict, "error", qstring_from_str(strerror(-ret)));
+ msg = strerror(-ret);
}
- monitor_protocol_event(QEVENT_QUORUM_REPORT_BAD, data);
- qobject_decref(data);
+ qapi_event_send_quorum_report_bad(!!msg, msg, node_name,
+ acb->sector_num, acb->nb_sectors, &error_abort);
}
static void quorum_report_failure(QuorumAIOCB *acb)
{
- QObject *data;
const char *reference = acb->common.bs->device_name[0] ?
acb->common.bs->device_name :
acb->common.bs->node_name;
- data = qobject_from_jsonf("{ 'reference': %s"
- ", 'sector-num': %" PRId64
- ", 'sectors-count': %d }",
- reference, acb->sector_num, acb->nb_sectors);
- monitor_protocol_event(QEVENT_QUORUM_FAILURE, data);
- qobject_decref(data);
+
+ qapi_event_send_quorum_failure(reference, acb->sector_num,
+ acb->nb_sectors, &error_abort);
}
static int quorum_vote_error(QuorumAIOCB *acb);
@@ -241,11 +241,27 @@ static bool quorum_has_too_much_io_failed(QuorumAIOCB *acb)
return false;
}
+static void quorum_rewrite_aio_cb(void *opaque, int ret)
+{
+ QuorumAIOCB *acb = opaque;
+
+ /* one less rewrite to do */
+ acb->rewrite_count--;
+
+ /* wait until all rewrite callbacks have completed */
+ if (acb->rewrite_count) {
+ return;
+ }
+
+ quorum_aio_finalize(acb);
+}
+
static void quorum_aio_cb(void *opaque, int ret)
{
QuorumChildRequest *sacb = opaque;
QuorumAIOCB *acb = sacb->parent;
BDRVQuorumState *s = acb->common.bs->opaque;
+ bool rewrite = false;
sacb->ret = ret;
acb->count++;
@@ -262,12 +278,15 @@ static void quorum_aio_cb(void *opaque, int ret)
/* Do the vote on read */
if (acb->is_read) {
- quorum_vote(acb);
+ rewrite = quorum_vote(acb);
} else {
quorum_has_too_much_io_failed(acb);
}
- quorum_aio_finalize(acb);
+ /* if no rewrite is done the code will finish right away */
+ if (!rewrite) {
+ quorum_aio_finalize(acb);
+ }
}
static void quorum_report_bad_versions(BDRVQuorumState *s,
@@ -287,6 +306,43 @@ static void quorum_report_bad_versions(BDRVQuorumState *s,
}
}
+static bool quorum_rewrite_bad_versions(BDRVQuorumState *s, QuorumAIOCB *acb,
+ QuorumVoteValue *value)
+{
+ QuorumVoteVersion *version;
+ QuorumVoteItem *item;
+ int count = 0;
+
+ /* first count the number of bad versions: done first to avoid concurrency
+ * issues.
+ */
+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+ if (acb->votes.compare(&version->value, value)) {
+ continue;
+ }
+ QLIST_FOREACH(item, &version->items, next) {
+ count++;
+ }
+ }
+
+ /* quorum_rewrite_aio_cb will count down this to zero */
+ acb->rewrite_count = count;
+
+ /* now fire the correcting rewrites */
+ QLIST_FOREACH(version, &acb->votes.vote_list, next) {
+ if (acb->votes.compare(&version->value, value)) {
+ continue;
+ }
+ QLIST_FOREACH(item, &version->items, next) {
+ bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
+ acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+ }
+ }
+
+ /* return true if any rewrite is done else false */
+ return count;
+}
+
static void quorum_copy_qiov(QEMUIOVector *dest, QEMUIOVector *source)
{
int i;
@@ -477,16 +533,17 @@ static int quorum_vote_error(QuorumAIOCB *acb)
return ret;
}
-static void quorum_vote(QuorumAIOCB *acb)
+static bool quorum_vote(QuorumAIOCB *acb)
{
bool quorum = true;
+ bool rewrite = false;
int i, j, ret;
QuorumVoteValue hash;
BDRVQuorumState *s = acb->common.bs->opaque;
QuorumVoteVersion *winner;
if (quorum_has_too_much_io_failed(acb)) {
- return;
+ return false;
}
/* get the index of the first successful read */
@@ -514,7 +571,7 @@ static void quorum_vote(QuorumAIOCB *acb)
/* Every successful read agrees */
if (quorum) {
quorum_copy_qiov(acb->qiov, &acb->qcrs[i].qiov);
- return;
+ return false;
}
/* compute hashes for each successful read, also store indexes */
@@ -547,9 +604,15 @@ static void quorum_vote(QuorumAIOCB *acb)
/* some versions are bad print them */
quorum_report_bad_versions(s, acb, &winner->value);
+ /* corruption correction is enabled */
+ if (s->rewrite_corrupted) {
+ rewrite = quorum_rewrite_bad_versions(s, acb, &winner->value);
+ }
+
free_exit:
/* free lists */
quorum_free_vote_list(&acb->votes);
+ return rewrite;
}
static BlockDriverAIOCB *quorum_aio_readv(BlockDriverState *bs,
@@ -714,6 +777,11 @@ static QemuOptsList quorum_runtime_opts = {
.type = QEMU_OPT_BOOL,
.help = "Trigger block verify mode if set",
},
+ {
+ .name = QUORUM_OPT_REWRITE,
+ .type = QEMU_OPT_BOOL,
+ .help = "Rewrite corrupted block on read quorum",
+ },
{ /* end of list */ }
},
};
@@ -753,7 +821,7 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
opts = qemu_opts_create(&quorum_runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
- if (error_is_set(&local_err)) {
+ if (local_err) {
ret = -EINVAL;
goto exit;
}
@@ -775,6 +843,14 @@ static int quorum_open(BlockDriverState *bs, QDict *options, int flags,
"and using two files with vote_threshold=2\n");
}
+ s->rewrite_corrupted = qemu_opt_get_bool(opts, QUORUM_OPT_REWRITE, false);
+ if (s->rewrite_corrupted && s->is_blkverify) {
+ error_setg(&local_err,
+ "rewrite-corrupted=on cannot be used with blkverify=on");
+ ret = -EINVAL;
+ goto exit;
+ }
+
/* allocate the children BlockDriverState array */
s->bs = g_new0(BlockDriverState *, s->num_children);
opened = g_new0(bool, s->num_children);
@@ -828,7 +904,7 @@ close_exit:
g_free(opened);
exit:
/* propagate error */
- if (error_is_set(&local_err)) {
+ if (local_err) {
error_propagate(errp, local_err);
}
QDECREF(list);
@@ -848,25 +924,49 @@ static void quorum_close(BlockDriverState *bs)
g_free(s->bs);
}
+static void quorum_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->num_children; i++) {
+ bdrv_detach_aio_context(s->bs[i]);
+ }
+}
+
+static void quorum_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVQuorumState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->num_children; i++) {
+ bdrv_attach_aio_context(s->bs[i], new_context);
+ }
+}
+
static BlockDriver bdrv_quorum = {
- .format_name = "quorum",
- .protocol_name = "quorum",
+ .format_name = "quorum",
+ .protocol_name = "quorum",
+
+ .instance_size = sizeof(BDRVQuorumState),
- .instance_size = sizeof(BDRVQuorumState),
+ .bdrv_file_open = quorum_open,
+ .bdrv_close = quorum_close,
- .bdrv_file_open = quorum_open,
- .bdrv_close = quorum_close,
+ .bdrv_co_flush_to_disk = quorum_co_flush,
- .bdrv_co_flush_to_disk = quorum_co_flush,
+ .bdrv_getlength = quorum_getlength,
- .bdrv_getlength = quorum_getlength,
+ .bdrv_aio_readv = quorum_aio_readv,
+ .bdrv_aio_writev = quorum_aio_writev,
+ .bdrv_invalidate_cache = quorum_invalidate_cache,
- .bdrv_aio_readv = quorum_aio_readv,
- .bdrv_aio_writev = quorum_aio_writev,
- .bdrv_invalidate_cache = quorum_invalidate_cache,
+ .bdrv_detach_aio_context = quorum_detach_aio_context,
+ .bdrv_attach_aio_context = quorum_attach_aio_context,
- .is_filter = true,
- .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
+ .is_filter = true,
+ .bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
};
static void bdrv_quorum_init(void)
diff --git a/block/raw-aio.h b/block/raw-aio.h
index 7ad0a8a0a..e18c97509 100644
--- a/block/raw-aio.h
+++ b/block/raw-aio.h
@@ -34,19 +34,29 @@
/* linux-aio.c - Linux native implementation */
#ifdef CONFIG_LINUX_AIO
void *laio_init(void);
+void laio_cleanup(void *s);
BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque, int type);
+void laio_detach_aio_context(void *s, AioContext *old_context);
+void laio_attach_aio_context(void *s, AioContext *new_context);
+void laio_io_plug(BlockDriverState *bs, void *aio_ctx);
+int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
#endif
#ifdef _WIN32
typedef struct QEMUWin32AIOState QEMUWin32AIOState;
QEMUWin32AIOState *win32_aio_init(void);
+void win32_aio_cleanup(QEMUWin32AIOState *aio);
int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
BlockDriverAIOCB *win32_aio_submit(BlockDriverState *bs,
QEMUWin32AIOState *aio, HANDLE hfile,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque, int type);
+void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
+ AioContext *old_context);
+void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
+ AioContext *new_context);
#endif
#endif /* QEMU_RAW_AIO_H */
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 1688e16c6..8e9758e92 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -55,6 +55,9 @@
#include <linux/cdrom.h>
#include <linux/fd.h>
#include <linux/fs.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
+#endif
#endif
#ifdef CONFIG_FIEMAP
#include <linux/fiemap.h>
@@ -146,6 +149,9 @@ typedef struct BDRVRawState {
bool has_discard:1;
bool has_write_zeroes:1;
bool discard_zeroes:1;
+#ifdef CONFIG_FIEMAP
+ bool skip_fiemap;
+#endif
} BDRVRawState;
typedef struct BDRVRawReopenState {
@@ -215,7 +221,7 @@ static int raw_normalize_devicepath(const char **filename)
}
#endif
-static void raw_probe_alignment(BlockDriverState *bs)
+static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
{
BDRVRawState *s = bs->opaque;
char *buf;
@@ -234,24 +240,24 @@ static void raw_probe_alignment(BlockDriverState *bs)
s->buf_align = 0;
#ifdef BLKSSZGET
- if (ioctl(s->fd, BLKSSZGET, &sector_size) >= 0) {
+ if (ioctl(fd, BLKSSZGET, &sector_size) >= 0) {
bs->request_alignment = sector_size;
}
#endif
#ifdef DKIOCGETBLOCKSIZE
- if (ioctl(s->fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
+ if (ioctl(fd, DKIOCGETBLOCKSIZE, &sector_size) >= 0) {
bs->request_alignment = sector_size;
}
#endif
#ifdef DIOCGSECTORSIZE
- if (ioctl(s->fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
+ if (ioctl(fd, DIOCGSECTORSIZE, &sector_size) >= 0) {
bs->request_alignment = sector_size;
}
#endif
#ifdef CONFIG_XFS
if (s->is_xfs) {
struct dioattr da;
- if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
+ if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) {
bs->request_alignment = da.d_miniosz;
/* The kernel returns wrong information for d_mem */
/* s->buf_align = da.d_mem; */
@@ -264,7 +270,7 @@ static void raw_probe_alignment(BlockDriverState *bs)
size_t align;
buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
- if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
+ if (pread(fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
s->buf_align = align;
break;
}
@@ -276,13 +282,18 @@ static void raw_probe_alignment(BlockDriverState *bs)
size_t align;
buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
- if (pread(s->fd, buf, align, 0) >= 0) {
+ if (pread(fd, buf, align, 0) >= 0) {
bs->request_alignment = align;
break;
}
}
qemu_vfree(buf);
}
+
+ if (!s->buf_align || !bs->request_alignment) {
+ error_setg(errp, "Could not find working O_DIRECT alignment. "
+ "Try cache.direct=off.");
+ }
}
static void raw_parse_flags(int bdrv_flags, int *open_flags)
@@ -304,6 +315,29 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
}
}
+static void raw_detach_aio_context(BlockDriverState *bs)
+{
+#ifdef CONFIG_LINUX_AIO
+ BDRVRawState *s = bs->opaque;
+
+ if (s->use_aio) {
+ laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
+ }
+#endif
+}
+
+static void raw_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+#ifdef CONFIG_LINUX_AIO
+ BDRVRawState *s = bs->opaque;
+
+ if (s->use_aio) {
+ laio_attach_aio_context(s->aio_ctx, new_context);
+ }
+#endif
+}
+
#ifdef CONFIG_LINUX_AIO
static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
{
@@ -366,7 +400,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
BDRVRawState *s = bs->opaque;
QemuOpts *opts;
Error *local_err = NULL;
- const char *filename;
+ const char *filename = NULL;
int fd, ret;
struct stat st;
@@ -444,8 +478,13 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
}
#endif
+ raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
+
ret = 0;
fail:
+ if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
+ unlink(filename);
+ }
qemu_opts_del(opts);
return ret;
}
@@ -471,6 +510,7 @@ static int raw_reopen_prepare(BDRVReopenState *state,
BDRVRawState *s;
BDRVRawReopenState *raw_s;
int ret = 0;
+ Error *local_err = NULL;
assert(state != NULL);
assert(state->bs != NULL);
@@ -543,6 +583,19 @@ static int raw_reopen_prepare(BDRVReopenState *state,
ret = -1;
}
}
+
+ /* Fail already reopen_prepare() if we can't get a working O_DIRECT
+ * alignment with the new fd. */
+ if (raw_s->fd != -1) {
+ raw_probe_alignment(state->bs, raw_s->fd, &local_err);
+ if (local_err) {
+ qemu_close(raw_s->fd);
+ raw_s->fd = -1;
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ }
+ }
+
return ret;
}
@@ -581,14 +634,12 @@ static void raw_reopen_abort(BDRVReopenState *state)
state->opaque = NULL;
}
-static int raw_refresh_limits(BlockDriverState *bs)
+static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVRawState *s = bs->opaque;
- raw_probe_alignment(bs);
+ raw_probe_alignment(bs, s->fd, errp);
bs->bl.opt_mem_alignment = s->buf_align;
-
- return 0;
}
static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
@@ -756,6 +807,7 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
p += aiocb->aio_iov[i].iov_len;
}
+ assert(p - buf == aiocb->aio_nbytes);
}
nbytes = handle_aiocb_rw_linear(aiocb, buf);
@@ -770,9 +822,11 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
copy = aiocb->aio_iov[i].iov_len;
}
memcpy(aiocb->aio_iov[i].iov_base, p, copy);
+ assert(count >= copy);
p += copy;
count -= copy;
}
+ assert(count == 0);
}
qemu_vfree(buf);
@@ -959,12 +1013,14 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
acb->aio_type = type;
acb->aio_fildes = fd;
+ acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
+ acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
+
if (qiov) {
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
+ assert(qiov->size == acb->aio_nbytes);
}
- acb->aio_nbytes = nb_sectors * 512;
- acb->aio_offset = sector_num * 512;
trace_paio_submit_co(sector_num, nb_sectors, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
@@ -982,12 +1038,14 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
acb->aio_type = type;
acb->aio_fildes = fd;
+ acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
+ acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
+
if (qiov) {
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
+ assert(qiov->size == acb->aio_nbytes);
}
- acb->aio_nbytes = nb_sectors * 512;
- acb->aio_offset = sector_num * 512;
trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
@@ -1023,6 +1081,36 @@ static BlockDriverAIOCB *raw_aio_submit(BlockDriverState *bs,
cb, opaque, type);
}
+static void raw_aio_plug(BlockDriverState *bs)
+{
+#ifdef CONFIG_LINUX_AIO
+ BDRVRawState *s = bs->opaque;
+ if (s->use_aio) {
+ laio_io_plug(bs, s->aio_ctx);
+ }
+#endif
+}
+
+static void raw_aio_unplug(BlockDriverState *bs)
+{
+#ifdef CONFIG_LINUX_AIO
+ BDRVRawState *s = bs->opaque;
+ if (s->use_aio) {
+ laio_io_unplug(bs, s->aio_ctx, true);
+ }
+#endif
+}
+
+static void raw_aio_flush_io_queue(BlockDriverState *bs)
+{
+#ifdef CONFIG_LINUX_AIO
+ BDRVRawState *s = bs->opaque;
+ if (s->use_aio) {
+ laio_io_unplug(bs, s->aio_ctx, false);
+ }
+#endif
+}
+
static BlockDriverAIOCB *raw_aio_readv(BlockDriverState *bs,
int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb, void *opaque)
@@ -1053,6 +1141,14 @@ static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
static void raw_close(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
+
+ raw_detach_aio_context(bs);
+
+#ifdef CONFIG_LINUX_AIO
+ if (s->use_aio) {
+ laio_cleanup(s->aio_ctx);
+ }
+#endif
if (s->fd >= 0) {
qemu_close(s->fd);
s->fd = -1;
@@ -1091,12 +1187,12 @@ static int64_t raw_getlength(BlockDriverState *bs)
struct stat st;
if (fstat(fd, &st))
- return -1;
+ return -errno;
if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
struct disklabel dl;
if (ioctl(fd, DIOCGDINFO, &dl))
- return -1;
+ return -errno;
return (uint64_t)dl.d_secsize *
dl.d_partitions[DISKPART(st.st_rdev)].p_size;
} else
@@ -1110,7 +1206,7 @@ static int64_t raw_getlength(BlockDriverState *bs)
struct stat st;
if (fstat(fd, &st))
- return -1;
+ return -errno;
if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
struct dkwedge_info dkw;
@@ -1120,7 +1216,7 @@ static int64_t raw_getlength(BlockDriverState *bs)
struct disklabel dl;
if (ioctl(fd, DIOCGDINFO, &dl))
- return -1;
+ return -errno;
return (uint64_t)dl.d_secsize *
dl.d_partitions[DISKPART(st.st_rdev)].p_size;
}
@@ -1133,6 +1229,7 @@ static int64_t raw_getlength(BlockDriverState *bs)
BDRVRawState *s = bs->opaque;
struct dk_minfo minfo;
int ret;
+ int64_t size;
ret = fd_open(bs);
if (ret < 0) {
@@ -1151,7 +1248,11 @@ static int64_t raw_getlength(BlockDriverState *bs)
* There are reports that lseek on some devices fails, but
* irc discussion said that contingency on contingency was overkill.
*/
- return lseek(s->fd, 0, SEEK_END);
+ size = lseek(s->fd, 0, SEEK_END);
+ if (size < 0) {
+ return -errno;
+ }
+ return size;
}
#elif defined(CONFIG_BSD)
static int64_t raw_getlength(BlockDriverState *bs)
@@ -1186,9 +1287,12 @@ again:
if (size == 0)
#endif
#if defined(__APPLE__) && defined(__MACH__)
- size = LONG_LONG_MAX;
+ size = LLONG_MAX;
#else
size = lseek(fd, 0LL, SEEK_END);
+ if (size < 0) {
+ return -errno;
+ }
#endif
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
switch(s->type) {
@@ -1205,6 +1309,9 @@ again:
#endif
} else {
size = lseek(fd, 0, SEEK_END);
+ if (size < 0) {
+ return -errno;
+ }
}
return size;
}
@@ -1213,13 +1320,18 @@ static int64_t raw_getlength(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
int ret;
+ int64_t size;
ret = fd_open(bs);
if (ret < 0) {
return ret;
}
- return lseek(s->fd, 0, SEEK_END);
+ size = lseek(s->fd, 0, SEEK_END);
+ if (size < 0) {
+ return -errno;
+ }
+ return size;
}
#endif
@@ -1234,22 +1346,19 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
return (int64_t)st.st_blocks * 512;
}
-static int raw_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
{
int fd;
int result = 0;
int64_t total_size = 0;
+ bool nocow = false;
strstart(filename, "file:", &filename);
/* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- total_size = options->value.n / BDRV_SECTOR_SIZE;
- }
- options++;
- }
+ total_size =
+ qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
+ nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false);
fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
0644);
@@ -1257,6 +1366,21 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
result = -errno;
error_setg_errno(errp, -result, "Could not create file");
} else {
+ if (nocow) {
+#ifdef __linux__
+ /* Set NOCOW flag to solve performance issue on fs like btrfs.
+ * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value
+ * will be ignored since any failure of this operation should not
+ * block the left work.
+ */
+ int attr;
+ if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+ attr |= FS_NOCOW_FL;
+ ioctl(fd, FS_IOC_SETFLAGS, &attr);
+ }
+#endif
+ }
+
if (ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
result = -errno;
error_setg_errno(errp, -result, "Could not resize file");
@@ -1269,53 +1393,29 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
return result;
}
-/*
- * Returns true iff the specified sector is present in the disk image. Drivers
- * not implementing the functionality are assumed to not support backing files,
- * hence all their sectors are reported as allocated.
- *
- * If 'sector_num' is beyond the end of the disk image the return value is 0
- * and 'pnum' is set to 0.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
- *
- * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
- * beyond the end of the disk image it will be clamped.
- */
-static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum)
+static int64_t try_fiemap(BlockDriverState *bs, off_t start, off_t *data,
+ off_t *hole, int nb_sectors, int *pnum)
{
- off_t start, data, hole;
- int64_t ret;
-
- ret = fd_open(bs);
- if (ret < 0) {
- return ret;
- }
-
- start = sector_num * BDRV_SECTOR_SIZE;
- ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
-
#ifdef CONFIG_FIEMAP
-
BDRVRawState *s = bs->opaque;
+ int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
struct {
struct fiemap fm;
struct fiemap_extent fe;
} f;
+ if (s->skip_fiemap) {
+ return -ENOTSUP;
+ }
+
f.fm.fm_start = start;
f.fm.fm_length = (int64_t)nb_sectors * BDRV_SECTOR_SIZE;
f.fm.fm_flags = 0;
f.fm.fm_extent_count = 1;
f.fm.fm_reserved = 0;
if (ioctl(s->fd, FS_IOC_FIEMAP, &f) == -1) {
- /* Assume everything is allocated. */
- *pnum = nb_sectors;
- return ret;
+ s->skip_fiemap = true;
+ return -errno;
}
if (f.fm.fm_mapped_extents == 0) {
@@ -1323,44 +1423,92 @@ static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
* f.fm.fm_start + f.fm.fm_length must be clamped to the file size!
*/
off_t length = lseek(s->fd, 0, SEEK_END);
- hole = f.fm.fm_start;
- data = MIN(f.fm.fm_start + f.fm.fm_length, length);
+ *hole = f.fm.fm_start;
+ *data = MIN(f.fm.fm_start + f.fm.fm_length, length);
} else {
- data = f.fe.fe_logical;
- hole = f.fe.fe_logical + f.fe.fe_length;
+ *data = f.fe.fe_logical;
+ *hole = f.fe.fe_logical + f.fe.fe_length;
if (f.fe.fe_flags & FIEMAP_EXTENT_UNWRITTEN) {
ret |= BDRV_BLOCK_ZERO;
}
}
-#elif defined SEEK_HOLE && defined SEEK_DATA
+ return ret;
+#else
+ return -ENOTSUP;
+#endif
+}
+static int64_t try_seek_hole(BlockDriverState *bs, off_t start, off_t *data,
+ off_t *hole, int *pnum)
+{
+#if defined SEEK_HOLE && defined SEEK_DATA
BDRVRawState *s = bs->opaque;
- hole = lseek(s->fd, start, SEEK_HOLE);
- if (hole == -1) {
+ *hole = lseek(s->fd, start, SEEK_HOLE);
+ if (*hole == -1) {
/* -ENXIO indicates that sector_num was past the end of the file.
* There is a virtual hole there. */
assert(errno != -ENXIO);
- /* Most likely EINVAL. Assume everything is allocated. */
- *pnum = nb_sectors;
- return ret;
+ return -errno;
}
- if (hole > start) {
- data = start;
+ if (*hole > start) {
+ *data = start;
} else {
/* On a hole. We need another syscall to find its end. */
- data = lseek(s->fd, start, SEEK_DATA);
- if (data == -1) {
- data = lseek(s->fd, 0, SEEK_END);
+ *data = lseek(s->fd, start, SEEK_DATA);
+ if (*data == -1) {
+ *data = lseek(s->fd, 0, SEEK_END);
}
}
+
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
#else
- data = 0;
- hole = start + nb_sectors * BDRV_SECTOR_SIZE;
+ return -ENOTSUP;
#endif
+}
+
+/*
+ * Returns true iff the specified sector is present in the disk image. Drivers
+ * not implementing the functionality are assumed to not support backing files,
+ * hence all their sectors are reported as allocated.
+ *
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
+ * and 'pnum' is set to 0.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
+ * beyond the end of the disk image it will be clamped.
+ */
+static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ off_t start, data = 0, hole = 0;
+ int64_t ret;
+
+ ret = fd_open(bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ start = sector_num * BDRV_SECTOR_SIZE;
+
+ ret = try_fiemap(bs, start, &data, &hole, nb_sectors, pnum);
+ if (ret < 0) {
+ ret = try_seek_hole(bs, start, &data, &hole, pnum);
+ if (ret < 0) {
+ /* Assume everything is allocated. */
+ data = 0;
+ hole = start + nb_sectors * BDRV_SECTOR_SIZE;
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+ }
+ }
if (data <= start) {
/* On a data extent, compute sectors to the end of the extent. */
@@ -1410,13 +1558,22 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return 0;
}
-static QEMUOptionParameter raw_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- { NULL }
+static QemuOptsList raw_create_opts = {
+ .name = "raw-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_NOCOW,
+ .type = QEMU_OPT_BOOL,
+ .help = "Turn off copy-on-write (valid only on btrfs)"
+ },
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_file = {
@@ -1441,6 +1598,9 @@ static BlockDriver bdrv_file = {
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
+ .bdrv_io_plug = raw_aio_plug,
+ .bdrv_io_unplug = raw_aio_unplug,
+ .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1448,7 +1608,10 @@ static BlockDriver bdrv_file = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
- .create_options = raw_create_options,
+ .bdrv_detach_aio_context = raw_detach_aio_context,
+ .bdrv_attach_aio_context = raw_attach_aio_context,
+
+ .create_opts = &raw_create_opts,
};
/***********************************************/
@@ -1769,7 +1932,7 @@ static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
return -ENOTSUP;
}
-static int hdev_create(const char *filename, QEMUOptionParameter *options,
+static int hdev_create(const char *filename, QemuOpts *opts,
Error **errp)
{
int fd;
@@ -1790,12 +1953,8 @@ static int hdev_create(const char *filename, QEMUOptionParameter *options,
(void)has_prefix;
/* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, "size")) {
- total_size = options->value.n / BDRV_SECTOR_SIZE;
- }
- options++;
- }
+ total_size =
+ qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / BDRV_SECTOR_SIZE;
fd = qemu_open(filename, O_WRONLY | O_BINARY);
if (fd < 0) {
@@ -1832,8 +1991,8 @@ static BlockDriver bdrv_host_device = {
.bdrv_reopen_prepare = raw_reopen_prepare,
.bdrv_reopen_commit = raw_reopen_commit,
.bdrv_reopen_abort = raw_reopen_abort,
- .bdrv_create = hdev_create,
- .create_options = raw_create_options,
+ .bdrv_create = hdev_create,
+ .create_opts = &raw_create_opts,
.bdrv_co_write_zeroes = hdev_co_write_zeroes,
.bdrv_aio_readv = raw_aio_readv,
@@ -1841,6 +2000,9 @@ static BlockDriver bdrv_host_device = {
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
.bdrv_refresh_limits = raw_refresh_limits,
+ .bdrv_io_plug = raw_aio_plug,
+ .bdrv_io_unplug = raw_aio_unplug,
+ .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1848,6 +2010,9 @@ static BlockDriver bdrv_host_device = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
+ .bdrv_detach_aio_context = raw_detach_aio_context,
+ .bdrv_attach_aio_context = raw_attach_aio_context,
+
/* generic scsi device */
#ifdef __linux__
.bdrv_ioctl = hdev_ioctl,
@@ -1976,13 +2141,16 @@ static BlockDriver bdrv_host_floppy = {
.bdrv_reopen_prepare = raw_reopen_prepare,
.bdrv_reopen_commit = raw_reopen_commit,
.bdrv_reopen_abort = raw_reopen_abort,
- .bdrv_create = hdev_create,
- .create_options = raw_create_options,
+ .bdrv_create = hdev_create,
+ .create_opts = &raw_create_opts,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
+ .bdrv_io_plug = raw_aio_plug,
+ .bdrv_io_unplug = raw_aio_unplug,
+ .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1990,6 +2158,9 @@ static BlockDriver bdrv_host_floppy = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
+ .bdrv_detach_aio_context = raw_detach_aio_context,
+ .bdrv_attach_aio_context = raw_attach_aio_context,
+
/* removable device support */
.bdrv_is_inserted = floppy_is_inserted,
.bdrv_media_changed = floppy_media_changed,
@@ -2101,13 +2272,16 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_reopen_prepare = raw_reopen_prepare,
.bdrv_reopen_commit = raw_reopen_commit,
.bdrv_reopen_abort = raw_reopen_abort,
- .bdrv_create = hdev_create,
- .create_options = raw_create_options,
+ .bdrv_create = hdev_create,
+ .create_opts = &raw_create_opts,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
+ .bdrv_io_plug = raw_aio_plug,
+ .bdrv_io_unplug = raw_aio_unplug,
+ .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2115,6 +2289,9 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
+ .bdrv_detach_aio_context = raw_detach_aio_context,
+ .bdrv_attach_aio_context = raw_attach_aio_context,
+
/* removable device support */
.bdrv_is_inserted = cdrom_is_inserted,
.bdrv_eject = cdrom_eject,
@@ -2233,12 +2410,15 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_reopen_commit = raw_reopen_commit,
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_create = hdev_create,
- .create_options = raw_create_options,
+ .create_opts = &raw_create_opts,
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
+ .bdrv_io_plug = raw_aio_plug,
+ .bdrv_io_unplug = raw_aio_unplug,
+ .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2246,6 +2426,9 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
+ .bdrv_detach_aio_context = raw_detach_aio_context,
+ .bdrv_attach_aio_context = raw_attach_aio_context,
+
/* removable device support */
.bdrv_is_inserted = cdrom_is_inserted,
.bdrv_eject = cdrom_eject,
@@ -2253,40 +2436,6 @@ static BlockDriver bdrv_host_cdrom = {
};
#endif /* __FreeBSD__ */
-#ifdef CONFIG_LINUX_AIO
-/**
- * Return the file descriptor for Linux AIO
- *
- * This function is a layering violation and should be removed when it becomes
- * possible to call the block layer outside the global mutex. It allows the
- * caller to hijack the file descriptor so I/O can be performed outside the
- * block layer.
- */
-int raw_get_aio_fd(BlockDriverState *bs)
-{
- BDRVRawState *s;
-
- if (!bs->drv) {
- return -ENOMEDIUM;
- }
-
- if (bs->drv == bdrv_find_format("raw")) {
- bs = bs->file;
- }
-
- /* raw-posix has several protocols so just check for raw_aio_readv */
- if (bs->drv->bdrv_aio_readv != raw_aio_readv) {
- return -ENOTSUP;
- }
-
- s = bs->opaque;
- if (!s->use_aio) {
- return -ENOTSUP;
- }
- return s->fd;
-}
-#endif /* CONFIG_LINUX_AIO */
-
static void bdrv_file_init(void)
{
/*
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 48cb2c225..902eab610 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -36,8 +36,6 @@
#define FTYPE_CD 1
#define FTYPE_HARDDISK 2
-static QEMUWin32AIOState *aio;
-
typedef struct RawWin32AIOData {
BlockDriverState *bs;
HANDLE hfile;
@@ -202,6 +200,25 @@ static int set_sparse(int fd)
NULL, 0, NULL, 0, &returned, NULL);
}
+static void raw_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+
+ if (s->aio) {
+ win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs));
+ }
+}
+
+static void raw_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVRawState *s = bs->opaque;
+
+ if (s->aio) {
+ win32_aio_attach_aio_context(s->aio, new_context);
+ }
+}
+
static void raw_probe_alignment(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
@@ -300,15 +317,6 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
raw_parse_flags(flags, &access_flags, &overlapped);
- if ((flags & BDRV_O_NATIVE_AIO) && aio == NULL) {
- aio = win32_aio_init();
- if (aio == NULL) {
- error_setg(errp, "Could not initialize AIO");
- ret = -EINVAL;
- goto fail;
- }
- }
-
if (filename[0] && filename[1] == ':') {
snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
} else if (filename[0] == '\\' && filename[1] == '\\') {
@@ -335,13 +343,23 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
}
if (flags & BDRV_O_NATIVE_AIO) {
- ret = win32_aio_attach(aio, s->hfile);
+ s->aio = win32_aio_init();
+ if (s->aio == NULL) {
+ CloseHandle(s->hfile);
+ error_setg(errp, "Could not initialize AIO");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ ret = win32_aio_attach(s->aio, s->hfile);
if (ret < 0) {
+ win32_aio_cleanup(s->aio);
CloseHandle(s->hfile);
error_setg_errno(errp, -ret, "Could not enable AIO");
goto fail;
}
- s->aio = aio;
+
+ win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
}
raw_probe_alignment(bs);
@@ -389,7 +407,17 @@ static BlockDriverAIOCB *raw_aio_flush(BlockDriverState *bs,
static void raw_close(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
+
+ if (s->aio) {
+ win32_aio_detach_aio_context(s->aio, bdrv_get_aio_context(bs));
+ win32_aio_cleanup(s->aio);
+ s->aio = NULL;
+ }
+
CloseHandle(s->hfile);
+ if (bs->open_flags & BDRV_O_TEMPORARY) {
+ unlink(bs->filename);
+ }
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
@@ -475,8 +503,7 @@ static int64_t raw_get_allocated_file_size(BlockDriverState *bs)
return st.st_size;
}
-static int raw_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
{
int fd;
int64_t total_size = 0;
@@ -484,12 +511,8 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
strstart(filename, "file:", &filename);
/* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- total_size = options->value.n / 512;
- }
- options++;
- }
+ total_size =
+ qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0) / 512;
fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
0644);
@@ -503,13 +526,18 @@ static int raw_create(const char *filename, QEMUOptionParameter *options,
return 0;
}
-static QEMUOptionParameter raw_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- { NULL }
+
+static QemuOptsList raw_create_opts = {
+ .name = "raw-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_file = {
@@ -518,9 +546,9 @@ static BlockDriver bdrv_file = {
.instance_size = sizeof(BDRVRawState),
.bdrv_needs_filename = true,
.bdrv_parse_filename = raw_parse_filename,
- .bdrv_file_open = raw_open,
- .bdrv_close = raw_close,
- .bdrv_create = raw_create,
+ .bdrv_file_open = raw_open,
+ .bdrv_close = raw_close,
+ .bdrv_create = raw_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_aio_readv = raw_aio_readv,
@@ -532,7 +560,7 @@ static BlockDriver bdrv_file = {
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
- .create_options = raw_create_options,
+ .create_opts = &raw_create_opts,
};
/***********************************************/
@@ -681,6 +709,9 @@ static BlockDriver bdrv_host_device = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_detach_aio_context = raw_detach_aio_context,
+ .bdrv_attach_aio_context = raw_attach_aio_context,
+
.bdrv_getlength = raw_getlength,
.has_variable_length = true,
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 01ea692a4..f82f4c25d 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -29,13 +29,17 @@
#include "block/block_int.h"
#include "qemu/option.h"
-static QEMUOptionParameter raw_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- { 0 }
+static QemuOptsList raw_create_opts = {
+ .name = "raw-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { /* end of list */ }
+ }
};
static int raw_reopen_prepare(BDRVReopenState *reopen_state,
@@ -90,10 +94,9 @@ static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return bdrv_get_info(bs->file, bdi);
}
-static int raw_refresh_limits(BlockDriverState *bs)
+static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
bs->bl = bs->file->bl;
- return 0;
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
@@ -139,13 +142,12 @@ static int raw_has_zero_init(BlockDriverState *bs)
return bdrv_has_zero_init(bs->file);
}
-static int raw_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
{
Error *local_err = NULL;
int ret;
- ret = bdrv_create_file(filename, options, &local_err);
+ ret = bdrv_create_file(filename, opts, &local_err);
if (local_err) {
error_propagate(errp, local_err);
}
@@ -194,7 +196,7 @@ static BlockDriver bdrv_raw = {
.bdrv_lock_medium = &raw_lock_medium,
.bdrv_ioctl = &raw_ioctl,
.bdrv_aio_ioctl = &raw_aio_ioctl,
- .create_options = &raw_create_options[0],
+ .create_opts = &raw_create_opts,
.bdrv_has_zero_init = &raw_has_zero_init
};
diff --git a/block/rbd.c b/block/rbd.c
index dbc79f452..2b797d3e8 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -105,7 +105,7 @@ typedef struct BDRVRBDState {
static int qemu_rbd_next_tok(char *dst, int dst_len,
char *src, char delim,
const char *name,
- char **p)
+ char **p, Error **errp)
{
int l;
char *end;
@@ -128,10 +128,10 @@ static int qemu_rbd_next_tok(char *dst, int dst_len,
}
l = strlen(src);
if (l >= dst_len) {
- error_report("%s too long", name);
+ error_setg(errp, "%s too long", name);
return -EINVAL;
} else if (l == 0) {
- error_report("%s too short", name);
+ error_setg(errp, "%s too short", name);
return -EINVAL;
}
@@ -157,13 +157,15 @@ static int qemu_rbd_parsename(const char *filename,
char *pool, int pool_len,
char *snap, int snap_len,
char *name, int name_len,
- char *conf, int conf_len)
+ char *conf, int conf_len,
+ Error **errp)
{
const char *start;
char *p, *buf;
int ret;
if (!strstart(filename, "rbd:", &start)) {
+ error_setg(errp, "File name must start with 'rbd:'");
return -EINVAL;
}
@@ -172,7 +174,8 @@ static int qemu_rbd_parsename(const char *filename,
*snap = '\0';
*conf = '\0';
- ret = qemu_rbd_next_tok(pool, pool_len, p, '/', "pool name", &p);
+ ret = qemu_rbd_next_tok(pool, pool_len, p,
+ '/', "pool name", &p, errp);
if (ret < 0 || !p) {
ret = -EINVAL;
goto done;
@@ -180,21 +183,25 @@ static int qemu_rbd_parsename(const char *filename,
qemu_rbd_unescape(pool);
if (strchr(p, '@')) {
- ret = qemu_rbd_next_tok(name, name_len, p, '@', "object name", &p);
+ ret = qemu_rbd_next_tok(name, name_len, p,
+ '@', "object name", &p, errp);
if (ret < 0) {
goto done;
}
- ret = qemu_rbd_next_tok(snap, snap_len, p, ':', "snap name", &p);
+ ret = qemu_rbd_next_tok(snap, snap_len, p,
+ ':', "snap name", &p, errp);
qemu_rbd_unescape(snap);
} else {
- ret = qemu_rbd_next_tok(name, name_len, p, ':', "object name", &p);
+ ret = qemu_rbd_next_tok(name, name_len, p,
+ ':', "object name", &p, errp);
}
qemu_rbd_unescape(name);
if (ret < 0 || !p) {
goto done;
}
- ret = qemu_rbd_next_tok(conf, conf_len, p, '\0', "configuration", &p);
+ ret = qemu_rbd_next_tok(conf, conf_len, p,
+ '\0', "configuration", &p, errp);
done:
g_free(buf);
@@ -229,7 +236,7 @@ static char *qemu_rbd_parse_clientname(const char *conf, char *clientname)
return NULL;
}
-static int qemu_rbd_set_conf(rados_t cluster, const char *conf)
+static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp)
{
char *p, *buf;
char name[RBD_MAX_CONF_NAME_SIZE];
@@ -241,20 +248,20 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf)
while (p) {
ret = qemu_rbd_next_tok(name, sizeof(name), p,
- '=', "conf option name", &p);
+ '=', "conf option name", &p, errp);
if (ret < 0) {
break;
}
qemu_rbd_unescape(name);
if (!p) {
- error_report("conf option %s has no value", name);
+ error_setg(errp, "conf option %s has no value", name);
ret = -EINVAL;
break;
}
ret = qemu_rbd_next_tok(value, sizeof(value), p,
- ':', "conf option value", &p);
+ ':', "conf option value", &p, errp);
if (ret < 0) {
break;
}
@@ -263,7 +270,7 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf)
if (strcmp(name, "conf") == 0) {
ret = rados_conf_read_file(cluster, value);
if (ret < 0) {
- error_report("error reading conf file %s", value);
+ error_setg(errp, "error reading conf file %s", value);
break;
}
} else if (strcmp(name, "id") == 0) {
@@ -271,7 +278,7 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf)
} else {
ret = rados_conf_set(cluster, name, value);
if (ret < 0) {
- error_report("invalid conf option %s", name);
+ error_setg(errp, "invalid conf option %s", name);
ret = -EINVAL;
break;
}
@@ -282,9 +289,9 @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf)
return ret;
}
-static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
{
+ Error *local_err = NULL;
int64_t bytes = 0;
int64_t objsize;
int obj_order = 0;
@@ -301,34 +308,29 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options,
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
name, sizeof(name),
- conf, sizeof(conf)) < 0) {
+ conf, sizeof(conf), &local_err) < 0) {
+ error_propagate(errp, local_err);
return -EINVAL;
}
/* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- bytes = options->value.n;
- } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
- if (options->value.n) {
- objsize = options->value.n;
- if ((objsize - 1) & objsize) { /* not a power of 2? */
- error_report("obj size needs to be power of 2");
- return -EINVAL;
- }
- if (objsize < 4096) {
- error_report("obj size too small");
- return -EINVAL;
- }
- obj_order = ffs(objsize) - 1;
- }
+ bytes = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+ objsize = qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, 0);
+ if (objsize) {
+ if ((objsize - 1) & objsize) { /* not a power of 2? */
+ error_setg(errp, "obj size needs to be power of 2");
+ return -EINVAL;
}
- options++;
+ if (objsize < 4096) {
+ error_setg(errp, "obj size too small");
+ return -EINVAL;
+ }
+ obj_order = ffs(objsize) - 1;
}
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
if (rados_create(&cluster, clientname) < 0) {
- error_report("error initializing");
+ error_setg(errp, "error initializing");
return -EIO;
}
@@ -338,20 +340,20 @@ static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options,
}
if (conf[0] != '\0' &&
- qemu_rbd_set_conf(cluster, conf) < 0) {
- error_report("error setting config options");
+ qemu_rbd_set_conf(cluster, conf, &local_err) < 0) {
rados_shutdown(cluster);
+ error_propagate(errp, local_err);
return -EIO;
}
if (rados_connect(cluster) < 0) {
- error_report("error connecting");
+ error_setg(errp, "error connecting");
rados_shutdown(cluster);
return -EIO;
}
if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) {
- error_report("error opening pool %s", pool);
+ error_setg(errp, "error opening pool %s", pool);
rados_shutdown(cluster);
return -EIO;
}
@@ -441,8 +443,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
- qerror_report_err(local_err);
- error_free(local_err);
+ error_propagate(errp, local_err);
qemu_opts_del(opts);
return -EINVAL;
}
@@ -452,7 +453,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
s->name, sizeof(s->name),
- conf, sizeof(conf)) < 0) {
+ conf, sizeof(conf), errp) < 0) {
r = -EINVAL;
goto failed_opts;
}
@@ -460,7 +461,7 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
r = rados_create(&s->cluster, clientname);
if (r < 0) {
- error_report("error initializing");
+ error_setg(&local_err, "error initializing");
goto failed_opts;
}
@@ -488,28 +489,27 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
}
if (conf[0] != '\0') {
- r = qemu_rbd_set_conf(s->cluster, conf);
+ r = qemu_rbd_set_conf(s->cluster, conf, errp);
if (r < 0) {
- error_report("error setting config options");
goto failed_shutdown;
}
}
r = rados_connect(s->cluster);
if (r < 0) {
- error_report("error connecting");
+ error_setg(&local_err, "error connecting");
goto failed_shutdown;
}
r = rados_ioctx_create(s->cluster, pool, &s->io_ctx);
if (r < 0) {
- error_report("error opening pool %s", pool);
+ error_setg(&local_err, "error opening pool %s", pool);
goto failed_shutdown;
}
r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
if (r < 0) {
- error_report("error reading header from %s", s->name);
+ error_setg(&local_err, "error reading header from %s", s->name);
goto failed_open;
}
@@ -548,7 +548,7 @@ static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
acb->cancelled = 1;
while (acb->status == -EINPROGRESS) {
- qemu_aio_wait();
+ aio_poll(bdrv_get_aio_context(acb->common.bs), true);
}
qemu_aio_release(acb);
@@ -581,7 +581,8 @@ static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
rcb->ret = rbd_aio_get_return_value(c);
rbd_aio_release(c);
- acb->bh = qemu_bh_new(rbd_finish_bh, rcb);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(acb->common.bs),
+ rbd_finish_bh, rcb);
qemu_bh_schedule(acb->bh);
}
@@ -677,13 +678,16 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
}
if (r < 0) {
- goto failed;
+ goto failed_completion;
}
return &acb->common;
+failed_completion:
+ rbd_aio_release(c);
failed:
g_free(rcb);
+ qemu_vfree(acb->bounce);
qemu_aio_release(acb);
return NULL;
}
@@ -900,18 +904,22 @@ static BlockDriverAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
}
#endif
-static QEMUOptionParameter qemu_rbd_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_CLUSTER_SIZE,
- .type = OPT_SIZE,
- .help = "RBD object size"
- },
- {NULL}
+static QemuOptsList qemu_rbd_create_opts = {
+ .name = "rbd-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_rbd_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_CLUSTER_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "RBD object size"
+ },
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_rbd = {
@@ -923,7 +931,7 @@ static BlockDriver bdrv_rbd = {
.bdrv_create = qemu_rbd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_get_info = qemu_rbd_getinfo,
- .create_options = qemu_rbd_create_options,
+ .create_opts = &qemu_rbd_create_opts,
.bdrv_getlength = qemu_rbd_getlength,
.bdrv_truncate = qemu_rbd_truncate,
.protocol_name = "rbd",
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 0eb33ee80..8d9350c26 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -200,6 +200,8 @@ typedef struct SheepdogInode {
uint32_t data_vdi_id[MAX_DATA_OBJS];
} SheepdogInode;
+#define SD_INODE_HEADER_SIZE offsetof(SheepdogInode, data_vdi_id)
+
/*
* 64 bit FNV-1a non-zero initial basis
*/
@@ -282,6 +284,7 @@ typedef struct AIOReq {
unsigned int data_len;
uint8_t flags;
uint32_t id;
+ bool create;
QLIST_ENTRY(AIOReq) aio_siblings;
} AIOReq;
@@ -314,6 +317,7 @@ struct SheepdogAIOCB {
typedef struct BDRVSheepdogState {
BlockDriverState *bs;
+ AioContext *aio_context;
SheepdogInode inode;
@@ -404,7 +408,7 @@ static const char * sd_strerror(int err)
static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
uint64_t oid, unsigned int data_len,
- uint64_t offset, uint8_t flags,
+ uint64_t offset, uint8_t flags, bool create,
uint64_t base_oid, unsigned int iov_offset)
{
AIOReq *aio_req;
@@ -418,6 +422,7 @@ static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
aio_req->data_len = data_len;
aio_req->flags = flags;
aio_req->id = s->aioreq_seq_num++;
+ aio_req->create = create;
acb->nr_pending++;
return aio_req;
@@ -496,7 +501,7 @@ static void sd_aio_cancel(BlockDriverAIOCB *blockacb)
sd_finish_aiocb(acb);
return;
}
- qemu_aio_wait();
+ aio_poll(s->aio_context, true);
}
}
@@ -526,17 +531,16 @@ static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
return acb;
}
-static int connect_to_sdog(BDRVSheepdogState *s)
+static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)
{
int fd;
- Error *err = NULL;
if (s->is_unix) {
- fd = unix_connect(s->host_spec, &err);
+ fd = unix_connect(s->host_spec, errp);
} else {
- fd = inet_connect(s->host_spec, &err);
+ fd = inet_connect(s->host_spec, errp);
- if (err == NULL) {
+ if (fd >= 0) {
int ret = socket_set_nodelay(fd);
if (ret < 0) {
error_report("%s", strerror(errno));
@@ -544,10 +548,7 @@ static int connect_to_sdog(BDRVSheepdogState *s)
}
}
- if (err != NULL) {
- qerror_report_err(err);
- error_free(err);
- } else {
+ if (fd >= 0) {
qemu_set_nonblock(fd);
}
@@ -582,6 +583,7 @@ static void restart_co_req(void *opaque)
typedef struct SheepdogReqCo {
int sockfd;
+ AioContext *aio_context;
SheepdogReq *hdr;
void *data;
unsigned int *wlen;
@@ -602,14 +604,14 @@ static coroutine_fn void do_co_req(void *opaque)
unsigned int *rlen = srco->rlen;
co = qemu_coroutine_self();
- qemu_aio_set_fd_handler(sockfd, NULL, restart_co_req, co);
+ aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
goto out;
}
- qemu_aio_set_fd_handler(sockfd, restart_co_req, NULL, co);
+ aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
@@ -634,18 +636,19 @@ static coroutine_fn void do_co_req(void *opaque)
out:
/* there is at most one request for this sockfd, so it is safe to
* set each handler to NULL. */
- qemu_aio_set_fd_handler(sockfd, NULL, NULL, NULL);
+ aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL);
srco->ret = ret;
srco->finished = true;
}
-static int do_req(int sockfd, SheepdogReq *hdr, void *data,
- unsigned int *wlen, unsigned int *rlen)
+static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
+ void *data, unsigned int *wlen, unsigned int *rlen)
{
Coroutine *co;
SheepdogReqCo srco = {
.sockfd = sockfd,
+ .aio_context = aio_context,
.hdr = hdr,
.data = data,
.wlen = wlen,
@@ -660,7 +663,7 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
co = qemu_coroutine_create(do_co_req);
qemu_coroutine_enter(co, &srco);
while (!srco.finished) {
- qemu_aio_wait();
+ aio_poll(aio_context, true);
}
}
@@ -668,11 +671,11 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data,
}
static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
- struct iovec *iov, int niov, bool create,
- enum AIOCBState aiocb_type);
+ struct iovec *iov, int niov,
+ enum AIOCBState aiocb_type);
static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req);
static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag);
-static int get_sheep_fd(BDRVSheepdogState *s);
+static int get_sheep_fd(BDRVSheepdogState *s, Error **errp);
static void co_write_request(void *opaque);
static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid)
@@ -702,17 +705,18 @@ static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
/* move aio_req from pending list to inflight one */
QLIST_REMOVE(aio_req, aio_siblings);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, false,
+ add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
acb->aiocb_type);
}
}
static coroutine_fn void reconnect_to_sdog(void *opaque)
{
+ Error *local_err = NULL;
BDRVSheepdogState *s = opaque;
AIOReq *aio_req, *next;
- qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
close(s->fd);
s->fd = -1;
@@ -723,9 +727,11 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
/* Try to reconnect the sheepdog server every one second. */
while (s->fd < 0) {
- s->fd = get_sheep_fd(s);
+ s->fd = get_sheep_fd(s, &local_err);
if (s->fd < 0) {
DPRINTF("Wait for connection to be established\n");
+ error_report("%s", error_get_pretty(local_err));
+ error_free(local_err);
co_aio_sleep_ns(bdrv_get_aio_context(s->bs), QEMU_CLOCK_REALTIME,
1000000000ULL);
}
@@ -798,7 +804,7 @@ static void coroutine_fn aio_read_response(void *opaque)
}
idx = data_oid_to_idx(aio_req->oid);
- if (s->inode.data_vdi_id[idx] != s->inode.vdi_id) {
+ if (aio_req->create) {
/*
* If the object is newly created one, we need to update
* the vdi object (metadata object). min_dirty_data_idx
@@ -914,16 +920,16 @@ static void co_write_request(void *opaque)
* We cannot use this descriptor for other operations because
* the block driver may be on waiting response from the server.
*/
-static int get_sheep_fd(BDRVSheepdogState *s)
+static int get_sheep_fd(BDRVSheepdogState *s, Error **errp)
{
int fd;
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, errp);
if (fd < 0) {
return fd;
}
- qemu_aio_set_fd_handler(fd, co_read_response, NULL, s);
+ aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s);
return fd;
}
@@ -1061,7 +1067,7 @@ static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
uint32_t snapid, const char *tag, uint32_t *vid,
- bool lock)
+ bool lock, Error **errp)
{
int ret, fd;
SheepdogVdiReq hdr;
@@ -1069,7 +1075,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
unsigned int wlen, rlen = 0;
char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, errp);
if (fd < 0) {
return fd;
}
@@ -1093,14 +1099,15 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
hdr.snapid = snapid;
hdr.flags = SD_FLAG_CMD_WRITE;
- ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
if (ret) {
+ error_setg_errno(errp, -ret, "cannot get vdi info");
goto out;
}
if (rsp->result != SD_RES_SUCCESS) {
- error_report("cannot get vdi info, %s, %s %d %s",
- sd_strerror(rsp->result), filename, snapid, tag);
+ error_setg(errp, "cannot get vdi info, %s, %s %" PRIu32 " %s",
+ sd_strerror(rsp->result), filename, snapid, tag);
if (rsp->result == SD_RES_NO_VDI) {
ret = -ENOENT;
} else {
@@ -1117,8 +1124,8 @@ out:
}
static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
- struct iovec *iov, int niov, bool create,
- enum AIOCBState aiocb_type)
+ struct iovec *iov, int niov,
+ enum AIOCBState aiocb_type)
{
int nr_copies = s->inode.nr_copies;
SheepdogObjReq hdr;
@@ -1129,6 +1136,7 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
uint64_t offset = aio_req->offset;
uint8_t flags = aio_req->flags;
uint64_t old_oid = aio_req->base_oid;
+ bool create = aio_req->create;
if (!nr_copies) {
error_report("bug");
@@ -1173,7 +1181,8 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
qemu_co_mutex_lock(&s->lock);
s->co_send = qemu_coroutine_self();
- qemu_aio_set_fd_handler(s->fd, co_read_response, co_write_request, s);
+ aio_set_fd_handler(s->aio_context, s->fd,
+ co_read_response, co_write_request, s);
socket_set_cork(s->fd, 1);
/* send a header */
@@ -1191,12 +1200,13 @@ static void coroutine_fn add_aio_request(BDRVSheepdogState *s, AIOReq *aio_req,
}
out:
socket_set_cork(s->fd, 0);
- qemu_aio_set_fd_handler(s->fd, co_read_response, NULL, s);
+ aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s);
s->co_send = NULL;
qemu_co_mutex_unlock(&s->lock);
}
-static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
+static int read_write_object(int fd, AioContext *aio_context, char *buf,
+ uint64_t oid, uint8_t copies,
unsigned int datalen, uint64_t offset,
bool write, bool create, uint32_t cache_flags)
{
@@ -1229,7 +1239,7 @@ static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
hdr.offset = offset;
hdr.copies = copies;
- ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+ ret = do_req(fd, aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
if (ret) {
error_report("failed to send a request to the sheep");
return ret;
@@ -1244,49 +1254,59 @@ static int read_write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
}
}
-static int read_object(int fd, char *buf, uint64_t oid, uint8_t copies,
+static int read_object(int fd, AioContext *aio_context, char *buf,
+ uint64_t oid, uint8_t copies,
unsigned int datalen, uint64_t offset,
uint32_t cache_flags)
{
- return read_write_object(fd, buf, oid, copies, datalen, offset, false,
+ return read_write_object(fd, aio_context, buf, oid, copies,
+ datalen, offset, false,
false, cache_flags);
}
-static int write_object(int fd, char *buf, uint64_t oid, uint8_t copies,
+static int write_object(int fd, AioContext *aio_context, char *buf,
+ uint64_t oid, uint8_t copies,
unsigned int datalen, uint64_t offset, bool create,
uint32_t cache_flags)
{
- return read_write_object(fd, buf, oid, copies, datalen, offset, true,
+ return read_write_object(fd, aio_context, buf, oid, copies,
+ datalen, offset, true,
create, cache_flags);
}
/* update inode with the latest state */
static int reload_inode(BDRVSheepdogState *s, uint32_t snapid, const char *tag)
{
+ Error *local_err = NULL;
SheepdogInode *inode;
int ret = 0, fd;
uint32_t vid = 0;
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
+ error_report("%s", error_get_pretty(local_err));;
+ error_free(local_err);
return -EIO;
}
- inode = g_malloc(sizeof(s->inode));
+ inode = g_malloc(SD_INODE_HEADER_SIZE);
- ret = find_vdi_name(s, s->name, snapid, tag, &vid, false);
+ ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err);
if (ret) {
+ error_report("%s", error_get_pretty(local_err));;
+ error_free(local_err);
goto out;
}
- ret = read_object(fd, (char *)inode, vid_to_vdi_oid(vid),
- s->inode.nr_copies, sizeof(*inode), 0, s->cache_flags);
+ ret = read_object(fd, s->aio_context, (char *)inode, vid_to_vdi_oid(vid),
+ s->inode.nr_copies, SD_INODE_HEADER_SIZE, 0,
+ s->cache_flags);
if (ret < 0) {
goto out;
}
if (inode->vdi_id != s->inode.vdi_id) {
- memcpy(&s->inode, inode, sizeof(s->inode));
+ memcpy(&s->inode, inode, SD_INODE_HEADER_SIZE);
}
out:
@@ -1310,6 +1330,7 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid);
aio_req->flags = 0;
aio_req->base_oid = 0;
+ aio_req->create = false;
QLIST_REMOVE(aio_req, aio_siblings);
QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings);
return true;
@@ -1322,7 +1343,8 @@ static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
{
SheepdogAIOCB *acb = aio_req->aiocb;
- bool create = false;
+
+ aio_req->create = false;
/* check whether this request becomes a CoW one */
if (acb->aiocb_type == AIOCB_WRITE_UDATA && is_data_obj(aio_req->oid)) {
@@ -1340,20 +1362,36 @@ static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
aio_req->flags |= SD_FLAG_CMD_COW;
}
- create = true;
+ aio_req->create = true;
}
out:
if (is_data_obj(aio_req->oid)) {
- add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create,
+ add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
acb->aiocb_type);
} else {
struct iovec iov;
iov.iov_base = &s->inode;
iov.iov_len = sizeof(s->inode);
- add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
+ add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
}
}
+static void sd_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVSheepdogState *s = bs->opaque;
+
+ aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+}
+
+static void sd_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVSheepdogState *s = bs->opaque;
+
+ s->aio_context = new_context;
+ aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s);
+}
+
/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "sheepdog",
@@ -1382,12 +1420,12 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
const char *filename;
s->bs = bs;
+ s->aio_context = bdrv_get_aio_context(bs);
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
- qerror_report_err(local_err);
- error_free(local_err);
+ error_propagate(errp, local_err);
ret = -EINVAL;
goto out;
}
@@ -1408,15 +1446,16 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
ret = parse_vdiname(s, filename, vdi, &snapid, tag);
}
if (ret < 0) {
+ error_setg(errp, "Can't parse filename");
goto out;
}
- s->fd = get_sheep_fd(s);
+ s->fd = get_sheep_fd(s, errp);
if (s->fd < 0) {
ret = s->fd;
goto out;
}
- ret = find_vdi_name(s, vdi, snapid, tag, &vid, true);
+ ret = find_vdi_name(s, vdi, snapid, tag, &vid, true, errp);
if (ret) {
goto out;
}
@@ -1436,19 +1475,20 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
s->is_snapshot = true;
}
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, errp);
if (fd < 0) {
ret = fd;
goto out;
}
buf = g_malloc(SD_INODE_SIZE);
- ret = read_object(fd, buf, vid_to_vdi_oid(vid), 0, SD_INODE_SIZE, 0,
- s->cache_flags);
+ ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+ 0, SD_INODE_SIZE, 0, s->cache_flags);
closesocket(fd);
if (ret) {
+ error_setg(errp, "Can't read snapshot inode");
goto out;
}
@@ -1463,7 +1503,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
g_free(buf);
return 0;
out:
- qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
if (s->fd >= 0) {
closesocket(s->fd);
}
@@ -1472,7 +1512,8 @@ out:
return ret;
}
-static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot)
+static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
+ Error **errp)
{
SheepdogVdiReq hdr;
SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
@@ -1480,7 +1521,7 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot)
unsigned int wlen, rlen = 0;
char buf[SD_MAX_VDI_LEN];
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, errp);
if (fd < 0) {
return fd;
}
@@ -1505,16 +1546,17 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot)
hdr.copy_policy = s->inode.copy_policy;
hdr.copies = s->inode.nr_copies;
- ret = do_req(fd, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
closesocket(fd);
if (ret) {
+ error_setg_errno(errp, -ret, "create failed");
return ret;
}
if (rsp->result != SD_RES_SUCCESS) {
- error_report("%s, %s", sd_strerror(rsp->result), s->inode.name);
+ error_setg(errp, "%s, %s", sd_strerror(rsp->result), s->inode.name);
return -EIO;
}
@@ -1525,21 +1567,18 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot)
return 0;
}
-static int sd_prealloc(const char *filename)
+static int sd_prealloc(const char *filename, Error **errp)
{
BlockDriverState *bs = NULL;
uint32_t idx, max_idx;
int64_t vdi_size;
void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
- Error *local_err = NULL;
int ret;
ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
+ NULL, errp);
if (ret < 0) {
- qerror_report_err(local_err);
- error_free(local_err);
- goto out;
+ goto out_with_err_set;
}
vdi_size = bdrv_getlength(bs);
@@ -1563,7 +1602,12 @@ static int sd_prealloc(const char *filename)
goto out;
}
}
+
out:
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Can't pre-allocate");
+ }
+out_with_err_set:
if (bs) {
bdrv_unref(bs);
}
@@ -1626,17 +1670,17 @@ static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
return 0;
}
-static int sd_create(const char *filename, QEMUOptionParameter *options,
+static int sd_create(const char *filename, QemuOpts *opts,
Error **errp)
{
int ret = 0;
uint32_t vid = 0;
char *backing_file = NULL;
+ char *buf = NULL;
BDRVSheepdogState *s;
char tag[SD_MAX_VDI_TAG_LEN];
uint32_t snapid;
bool prealloc = false;
- Error *local_err = NULL;
s = g_malloc0(sizeof(BDRVSheepdogState));
@@ -1647,38 +1691,35 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,
ret = parse_vdiname(s, filename, s->name, &snapid, tag);
}
if (ret < 0) {
+ error_setg(errp, "Can't parse filename");
goto out;
}
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- s->inode.vdi_size = options->value.n;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
- if (!options->value.s || !strcmp(options->value.s, "off")) {
- prealloc = false;
- } else if (!strcmp(options->value.s, "full")) {
- prealloc = true;
- } else {
- error_report("Invalid preallocation mode: '%s'",
- options->value.s);
- ret = -EINVAL;
- goto out;
- }
- } else if (!strcmp(options->name, BLOCK_OPT_REDUNDANCY)) {
- if (options->value.s) {
- ret = parse_redundancy(s, options->value.s);
- if (ret < 0) {
- goto out;
- }
- }
+ s->inode.vdi_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+ backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
+ buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
+ if (!buf || !strcmp(buf, "off")) {
+ prealloc = false;
+ } else if (!strcmp(buf, "full")) {
+ prealloc = true;
+ } else {
+ error_setg(errp, "Invalid preallocation mode: '%s'", buf);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ g_free(buf);
+ buf = qemu_opt_get_del(opts, BLOCK_OPT_REDUNDANCY);
+ if (buf) {
+ ret = parse_redundancy(s, buf);
+ if (ret < 0) {
+ error_setg(errp, "Invalid redundancy mode: '%s'", buf);
+ goto out;
}
- options++;
}
if (s->inode.vdi_size > SD_MAX_VDI_SIZE) {
- error_report("too big image size");
+ error_setg(errp, "too big image size");
ret = -EINVAL;
goto out;
}
@@ -1691,24 +1732,22 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,
/* Currently, only Sheepdog backing image is supported. */
drv = bdrv_find_protocol(backing_file, true);
if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) {
- error_report("backing_file must be a sheepdog image");
+ error_setg(errp, "backing_file must be a sheepdog image");
ret = -EINVAL;
goto out;
}
bs = NULL;
ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, NULL,
- &local_err);
+ errp);
if (ret < 0) {
- qerror_report_err(local_err);
- error_free(local_err);
goto out;
}
base = bs->opaque;
if (!is_snapshot(&base->inode)) {
- error_report("cannot clone from a non snapshot vdi");
+ error_setg(errp, "cannot clone from a non snapshot vdi");
bdrv_unref(bs);
ret = -EINVAL;
goto out;
@@ -1717,19 +1756,25 @@ static int sd_create(const char *filename, QEMUOptionParameter *options,
bdrv_unref(bs);
}
- ret = do_sd_create(s, &vid, 0);
- if (!prealloc || ret) {
+ s->aio_context = qemu_get_aio_context();
+ ret = do_sd_create(s, &vid, 0, errp);
+ if (ret) {
goto out;
}
- ret = sd_prealloc(filename);
+ if (prealloc) {
+ ret = sd_prealloc(filename, errp);
+ }
out:
+ g_free(backing_file);
+ g_free(buf);
g_free(s);
return ret;
}
static void sd_close(BlockDriverState *bs)
{
+ Error *local_err = NULL;
BDRVSheepdogState *s = bs->opaque;
SheepdogVdiReq hdr;
SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
@@ -1738,8 +1783,10 @@ static void sd_close(BlockDriverState *bs)
DPRINTF("%s\n", s->name);
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
+ error_report("%s", error_get_pretty(local_err));;
+ error_free(local_err);
return;
}
@@ -1751,7 +1798,8 @@ static void sd_close(BlockDriverState *bs)
hdr.data_length = wlen;
hdr.flags = SD_FLAG_CMD_WRITE;
- ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen);
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+ s->name, &wlen, &rlen);
closesocket(fd);
@@ -1760,7 +1808,7 @@ static void sd_close(BlockDriverState *bs)
error_report("%s, %s", sd_strerror(rsp->result), s->name);
}
- qemu_aio_set_fd_handler(s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
closesocket(s->fd);
g_free(s->host_spec);
}
@@ -1774,6 +1822,7 @@ static int64_t sd_getlength(BlockDriverState *bs)
static int sd_truncate(BlockDriverState *bs, int64_t offset)
{
+ Error *local_err = NULL;
BDRVSheepdogState *s = bs->opaque;
int ret, fd;
unsigned int datalen;
@@ -1786,16 +1835,19 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset)
return -EINVAL;
}
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
+ error_report("%s", error_get_pretty(local_err));;
+ error_free(local_err);
return fd;
}
/* we don't need to update entire object */
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
s->inode.vdi_size = offset;
- ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
- s->inode.nr_copies, datalen, 0, false, s->cache_flags);
+ ret = write_object(fd, s->aio_context, (char *)&s->inode,
+ vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
+ datalen, 0, false, s->cache_flags);
close(fd);
if (ret < 0) {
@@ -1831,9 +1883,9 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
iov.iov_base = &s->inode;
iov.iov_len = sizeof(s->inode);
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
- data_len, offset, 0, 0, offset);
+ data_len, offset, 0, false, 0, offset);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- add_aio_request(s, aio_req, &iov, 1, false, AIOCB_WRITE_UDATA);
+ add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
acb->aio_done_func = sd_finish_aiocb;
acb->aiocb_type = AIOCB_WRITE_UDATA;
@@ -1846,6 +1898,7 @@ static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
/* Delete current working VDI on the snapshot chain */
static bool sd_delete(BDRVSheepdogState *s)
{
+ Error *local_err = NULL;
unsigned int wlen = SD_MAX_VDI_LEN, rlen = 0;
SheepdogVdiReq hdr = {
.opcode = SD_OP_DEL_VDI,
@@ -1856,12 +1909,15 @@ static bool sd_delete(BDRVSheepdogState *s)
SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
int fd, ret;
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
+ error_report("%s", error_get_pretty(local_err));;
+ error_free(local_err);
return false;
}
- ret = do_req(fd, (SheepdogReq *)&hdr, s->name, &wlen, &rlen);
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+ s->name, &wlen, &rlen);
closesocket(fd);
if (ret) {
return false;
@@ -1885,6 +1941,7 @@ static bool sd_delete(BDRVSheepdogState *s)
*/
static int sd_create_branch(BDRVSheepdogState *s)
{
+ Error *local_err = NULL;
int ret, fd;
uint32_t vid;
char *buf;
@@ -1900,21 +1957,25 @@ static int sd_create_branch(BDRVSheepdogState *s)
* false bail out.
*/
deleted = sd_delete(s);
- ret = do_sd_create(s, &vid, !deleted);
+ ret = do_sd_create(s, &vid, !deleted, &local_err);
if (ret) {
+ error_report("%s", error_get_pretty(local_err));;
+ error_free(local_err);
goto out;
}
DPRINTF("%" PRIx32 " is created.\n", vid);
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
+ error_report("%s", error_get_pretty(local_err));;
+ error_free(local_err);
ret = fd;
goto out;
}
- ret = read_object(fd, buf, vid_to_vdi_oid(vid), s->inode.nr_copies,
- SD_INODE_SIZE, 0, s->cache_flags);
+ ret = read_object(fd, s->aio_context, buf, vid_to_vdi_oid(vid),
+ s->inode.nr_copies, SD_INODE_SIZE, 0, s->cache_flags);
closesocket(fd);
@@ -2023,7 +2084,8 @@ static int coroutine_fn sd_co_rw_vector(void *p)
DPRINTF("new oid %" PRIx64 "\n", oid);
}
- aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, old_oid, done);
+ aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
+ old_oid, done);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
if (create) {
@@ -2032,7 +2094,7 @@ static int coroutine_fn sd_co_rw_vector(void *p)
}
}
- add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov, create,
+ add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
acb->aiocb_type);
done:
offset = 0;
@@ -2112,9 +2174,9 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
acb->aio_done_func = sd_finish_aiocb;
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
- 0, 0, 0, 0, 0);
+ 0, 0, 0, false, 0, 0);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- add_aio_request(s, aio_req, NULL, 0, false, acb->aiocb_type);
+ add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
qemu_coroutine_yield();
return acb->ret;
@@ -2122,6 +2184,7 @@ static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
+ Error *local_err = NULL;
BDRVSheepdogState *s = bs->opaque;
int ret, fd;
uint32_t new_vid;
@@ -2149,32 +2212,37 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
/* we don't need to update entire object */
datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
+ inode = g_malloc(datalen);
/* refresh inode. */
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
+ error_report("%s", error_get_pretty(local_err));;
+ error_free(local_err);
ret = fd;
goto cleanup;
}
- ret = write_object(fd, (char *)&s->inode, vid_to_vdi_oid(s->inode.vdi_id),
- s->inode.nr_copies, datalen, 0, false, s->cache_flags);
+ ret = write_object(fd, s->aio_context, (char *)&s->inode,
+ vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
+ datalen, 0, false, s->cache_flags);
if (ret < 0) {
error_report("failed to write snapshot's inode.");
goto cleanup;
}
- ret = do_sd_create(s, &new_vid, 1);
+ ret = do_sd_create(s, &new_vid, 1, &local_err);
if (ret < 0) {
+ error_report("%s", error_get_pretty(local_err));;
+ error_free(local_err);
error_report("failed to create inode for snapshot. %s",
strerror(errno));
goto cleanup;
}
- inode = (SheepdogInode *)g_malloc(datalen);
-
- ret = read_object(fd, (char *)inode, vid_to_vdi_oid(new_vid),
- s->inode.nr_copies, datalen, 0, s->cache_flags);
+ ret = read_object(fd, s->aio_context, (char *)inode,
+ vid_to_vdi_oid(new_vid), s->inode.nr_copies, datalen, 0,
+ s->cache_flags);
if (ret < 0) {
error_report("failed to read new inode info. %s", strerror(errno));
@@ -2186,6 +2254,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
s->inode.name, s->inode.snap_id, s->inode.vdi_id);
cleanup:
+ g_free(inode);
closesocket(fd);
return ret;
}
@@ -2249,6 +2318,7 @@ static int sd_snapshot_delete(BlockDriverState *bs,
static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
{
+ Error *local_err = NULL;
BDRVSheepdogState *s = bs->opaque;
SheepdogReq req;
int fd, nr = 1024, ret, max = BITS_TO_LONGS(SD_NR_VDIS) * sizeof(long);
@@ -2263,8 +2333,10 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
vdi_inuse = g_malloc(max);
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
+ error_report("%s", error_get_pretty(local_err));;
+ error_free(local_err);
ret = fd;
goto out;
}
@@ -2277,7 +2349,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
req.opcode = SD_OP_READ_VDIS;
req.data_length = max;
- ret = do_req(fd, (SheepdogReq *)&req, vdi_inuse, &wlen, &rlen);
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&req,
+ vdi_inuse, &wlen, &rlen);
closesocket(fd);
if (ret) {
@@ -2290,8 +2363,10 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
hval = fnv_64a_buf(s->name, strlen(s->name), FNV1A_64_INIT);
start_nr = hval & (SD_NR_VDIS - 1);
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
+ error_report("%s", error_get_pretty(local_err));;
+ error_free(local_err);
ret = fd;
goto out;
}
@@ -2302,7 +2377,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
}
/* we don't need to read entire object */
- ret = read_object(fd, (char *)&inode, vid_to_vdi_oid(vid),
+ ret = read_object(fd, s->aio_context, (char *)&inode,
+ vid_to_vdi_oid(vid),
0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
s->cache_flags);
@@ -2316,8 +2392,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
sn_tab[found].vm_state_size = inode.vm_state_size;
sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec;
- snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u",
- inode.snap_id);
+ snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str),
+ "%" PRIu32, inode.snap_id);
pstrcpy(sn_tab[found].name,
MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)),
inode.tag);
@@ -2341,6 +2417,7 @@ out:
static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
int64_t pos, int size, int load)
{
+ Error *local_err = NULL;
bool create;
int fd, ret = 0, remaining = size;
unsigned int data_len;
@@ -2349,8 +2426,10 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
uint32_t vdi_index;
uint32_t vdi_id = load ? s->inode.parent_vdi_id : s->inode.vdi_id;
- fd = connect_to_sdog(s);
+ fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
+ error_report("%s", error_get_pretty(local_err));;
+ error_free(local_err);
return fd;
}
@@ -2364,11 +2443,11 @@ static int do_load_save_vmstate(BDRVSheepdogState *s, uint8_t *data,
create = (offset == 0);
if (load) {
- ret = read_object(fd, (char *)data, vmstate_oid,
+ ret = read_object(fd, s->aio_context, (char *)data, vmstate_oid,
s->inode.nr_copies, data_len, offset,
s->cache_flags);
} else {
- ret = write_object(fd, (char *)data, vmstate_oid,
+ ret = write_object(fd, s->aio_context, (char *)data, vmstate_oid,
s->inode.nr_copies, data_len, offset, create,
s->cache_flags);
}
@@ -2490,28 +2569,32 @@ static int64_t sd_get_allocated_file_size(BlockDriverState *bs)
return size;
}
-static QEMUOptionParameter sd_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- },
- {
- .name = BLOCK_OPT_PREALLOC,
- .type = OPT_STRING,
- .help = "Preallocation mode (allowed values: off, full)"
- },
- {
- .name = BLOCK_OPT_REDUNDANCY,
- .type = OPT_STRING,
- .help = "Redundancy of the image"
- },
- { NULL }
+static QemuOptsList sd_create_opts = {
+ .name = "sheepdog-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(sd_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = QEMU_OPT_STRING,
+ .help = "File name of a base image"
+ },
+ {
+ .name = BLOCK_OPT_PREALLOC,
+ .type = QEMU_OPT_STRING,
+ .help = "Preallocation mode (allowed values: off, full)"
+ },
+ {
+ .name = BLOCK_OPT_REDUNDANCY,
+ .type = QEMU_OPT_STRING,
+ .help = "Redundancy of the image"
+ },
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_sheepdog = {
@@ -2541,7 +2624,10 @@ static BlockDriver bdrv_sheepdog = {
.bdrv_save_vmstate = sd_save_vmstate,
.bdrv_load_vmstate = sd_load_vmstate,
- .create_options = sd_create_options,
+ .bdrv_detach_aio_context = sd_detach_aio_context,
+ .bdrv_attach_aio_context = sd_attach_aio_context,
+
+ .create_opts = &sd_create_opts,
};
static BlockDriver bdrv_sheepdog_tcp = {
@@ -2571,7 +2657,10 @@ static BlockDriver bdrv_sheepdog_tcp = {
.bdrv_save_vmstate = sd_save_vmstate,
.bdrv_load_vmstate = sd_load_vmstate,
- .create_options = sd_create_options,
+ .bdrv_detach_aio_context = sd_detach_aio_context,
+ .bdrv_attach_aio_context = sd_attach_aio_context,
+
+ .create_opts = &sd_create_opts,
};
static BlockDriver bdrv_sheepdog_unix = {
@@ -2601,7 +2690,10 @@ static BlockDriver bdrv_sheepdog_unix = {
.bdrv_save_vmstate = sd_save_vmstate,
.bdrv_load_vmstate = sd_load_vmstate,
- .create_options = sd_create_options,
+ .bdrv_detach_aio_context = sd_detach_aio_context,
+ .bdrv_attach_aio_context = sd_attach_aio_context,
+
+ .create_opts = &sd_create_opts,
};
static void bdrv_sheepdog_init(void)
diff --git a/block/ssh.c b/block/ssh.c
index aa63c9d20..cd2fd751f 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -106,30 +106,59 @@ static void ssh_state_free(BDRVSSHState *s)
}
}
-/* Wrappers around error_report which make sure to dump as much
- * information from libssh2 as possible.
- */
-static void GCC_FMT_ATTR(2, 3)
-session_error_report(BDRVSSHState *s, const char *fs, ...)
+static void GCC_FMT_ATTR(3, 4)
+session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
{
va_list args;
+ char *msg;
va_start(args, fs);
- error_vprintf(fs, args);
+ msg = g_strdup_vprintf(fs, args);
+ va_end(args);
- if ((s)->session) {
+ if (s->session) {
char *ssh_err;
int ssh_err_code;
- libssh2_session_last_error((s)->session, &ssh_err, NULL, 0);
/* This is not an errno. See <libssh2.h>. */
- ssh_err_code = libssh2_session_last_errno((s)->session);
-
- error_printf(": %s (libssh2 error code: %d)", ssh_err, ssh_err_code);
+ ssh_err_code = libssh2_session_last_error(s->session,
+ &ssh_err, NULL, 0);
+ error_setg(errp, "%s: %s (libssh2 error code: %d)",
+ msg, ssh_err, ssh_err_code);
+ } else {
+ error_setg(errp, "%s", msg);
}
+ g_free(msg);
+}
+
+static void GCC_FMT_ATTR(3, 4)
+sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
+{
+ va_list args;
+ char *msg;
+ va_start(args, fs);
+ msg = g_strdup_vprintf(fs, args);
va_end(args);
- error_printf("\n");
+
+ if (s->sftp) {
+ char *ssh_err;
+ int ssh_err_code;
+ unsigned long sftp_err_code;
+
+ /* This is not an errno. See <libssh2.h>. */
+ ssh_err_code = libssh2_session_last_error(s->session,
+ &ssh_err, NULL, 0);
+ /* See <libssh2_sftp.h>. */
+ sftp_err_code = libssh2_sftp_last_error((s)->sftp);
+
+ error_setg(errp,
+ "%s: %s (libssh2 error code: %d, sftp error code: %lu)",
+ msg, ssh_err, ssh_err_code, sftp_err_code);
+ } else {
+ error_setg(errp, "%s", msg);
+ }
+ g_free(msg);
}
static void GCC_FMT_ATTR(2, 3)
@@ -145,9 +174,9 @@ sftp_error_report(BDRVSSHState *s, const char *fs, ...)
int ssh_err_code;
unsigned long sftp_err_code;
- libssh2_session_last_error((s)->session, &ssh_err, NULL, 0);
/* This is not an errno. See <libssh2.h>. */
- ssh_err_code = libssh2_session_last_errno((s)->session);
+ ssh_err_code = libssh2_session_last_error(s->session,
+ &ssh_err, NULL, 0);
/* See <libssh2_sftp.h>. */
sftp_err_code = libssh2_sftp_last_error((s)->sftp);
@@ -243,7 +272,7 @@ static void ssh_parse_filename(const char *filename, QDict *options,
}
static int check_host_key_knownhosts(BDRVSSHState *s,
- const char *host, int port)
+ const char *host, int port, Error **errp)
{
const char *home;
char *knh_file = NULL;
@@ -257,14 +286,15 @@ static int check_host_key_knownhosts(BDRVSSHState *s,
hostkey = libssh2_session_hostkey(s->session, &len, &type);
if (!hostkey) {
ret = -EINVAL;
- session_error_report(s, "failed to read remote host key");
+ session_error_setg(errp, s, "failed to read remote host key");
goto out;
}
knh = libssh2_knownhost_init(s->session);
if (!knh) {
ret = -EINVAL;
- session_error_report(s, "failed to initialize known hosts support");
+ session_error_setg(errp, s,
+ "failed to initialize known hosts support");
goto out;
}
@@ -289,21 +319,23 @@ static int check_host_key_knownhosts(BDRVSSHState *s,
break;
case LIBSSH2_KNOWNHOST_CHECK_MISMATCH:
ret = -EINVAL;
- session_error_report(s, "host key does not match the one in known_hosts (found key %s)",
- found->key);
+ session_error_setg(errp, s,
+ "host key does not match the one in known_hosts"
+ " (found key %s)", found->key);
goto out;
case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND:
ret = -EINVAL;
- session_error_report(s, "no host key was found in known_hosts");
+ session_error_setg(errp, s, "no host key was found in known_hosts");
goto out;
case LIBSSH2_KNOWNHOST_CHECK_FAILURE:
ret = -EINVAL;
- session_error_report(s, "failure matching the host key with known_hosts");
+ session_error_setg(errp, s,
+ "failure matching the host key with known_hosts");
goto out;
default:
ret = -EINVAL;
- session_error_report(s, "unknown error matching the host key with known_hosts (%d)",
- r);
+ session_error_setg(errp, s, "unknown error matching the host key"
+ " with known_hosts (%d)", r);
goto out;
}
@@ -358,20 +390,20 @@ static int compare_fingerprint(const unsigned char *fingerprint, size_t len,
static int
check_host_key_hash(BDRVSSHState *s, const char *hash,
- int hash_type, size_t fingerprint_len)
+ int hash_type, size_t fingerprint_len, Error **errp)
{
const char *fingerprint;
fingerprint = libssh2_hostkey_hash(s->session, hash_type);
if (!fingerprint) {
- session_error_report(s, "failed to read remote host key");
+ session_error_setg(errp, s, "failed to read remote host key");
return -EINVAL;
}
if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len,
hash) != 0) {
- error_report("remote host key does not match host_key_check '%s'",
- hash);
+ error_setg(errp, "remote host key does not match host_key_check '%s'",
+ hash);
return -EPERM;
}
@@ -379,7 +411,7 @@ check_host_key_hash(BDRVSSHState *s, const char *hash,
}
static int check_host_key(BDRVSSHState *s, const char *host, int port,
- const char *host_key_check)
+ const char *host_key_check, Error **errp)
{
/* host_key_check=no */
if (strcmp(host_key_check, "no") == 0) {
@@ -389,25 +421,25 @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
/* host_key_check=md5:xx:yy:zz:... */
if (strncmp(host_key_check, "md5:", 4) == 0) {
return check_host_key_hash(s, &host_key_check[4],
- LIBSSH2_HOSTKEY_HASH_MD5, 16);
+ LIBSSH2_HOSTKEY_HASH_MD5, 16, errp);
}
/* host_key_check=sha1:xx:yy:zz:... */
if (strncmp(host_key_check, "sha1:", 5) == 0) {
return check_host_key_hash(s, &host_key_check[5],
- LIBSSH2_HOSTKEY_HASH_SHA1, 20);
+ LIBSSH2_HOSTKEY_HASH_SHA1, 20, errp);
}
/* host_key_check=yes */
if (strcmp(host_key_check, "yes") == 0) {
- return check_host_key_knownhosts(s, host, port);
+ return check_host_key_knownhosts(s, host, port, errp);
}
- error_report("unknown host_key_check setting (%s)", host_key_check);
+ error_setg(errp, "unknown host_key_check setting (%s)", host_key_check);
return -EINVAL;
}
-static int authenticate(BDRVSSHState *s, const char *user)
+static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
{
int r, ret;
const char *userauthlist;
@@ -418,7 +450,8 @@ static int authenticate(BDRVSSHState *s, const char *user)
userauthlist = libssh2_userauth_list(s->session, user, strlen(user));
if (strstr(userauthlist, "publickey") == NULL) {
ret = -EPERM;
- error_report("remote server does not support \"publickey\" authentication");
+ error_setg(errp,
+ "remote server does not support \"publickey\" authentication");
goto out;
}
@@ -426,17 +459,18 @@ static int authenticate(BDRVSSHState *s, const char *user)
agent = libssh2_agent_init(s->session);
if (!agent) {
ret = -EINVAL;
- session_error_report(s, "failed to initialize ssh-agent support");
+ session_error_setg(errp, s, "failed to initialize ssh-agent support");
goto out;
}
if (libssh2_agent_connect(agent)) {
ret = -ECONNREFUSED;
- session_error_report(s, "failed to connect to ssh-agent");
+ session_error_setg(errp, s, "failed to connect to ssh-agent");
goto out;
}
if (libssh2_agent_list_identities(agent)) {
ret = -EINVAL;
- session_error_report(s, "failed requesting identities from ssh-agent");
+ session_error_setg(errp, s,
+ "failed requesting identities from ssh-agent");
goto out;
}
@@ -447,7 +481,8 @@ static int authenticate(BDRVSSHState *s, const char *user)
}
if (r < 0) {
ret = -EINVAL;
- session_error_report(s, "failed to obtain identity from ssh-agent");
+ session_error_setg(errp, s,
+ "failed to obtain identity from ssh-agent");
goto out;
}
r = libssh2_agent_userauth(agent, user, identity);
@@ -461,8 +496,8 @@ static int authenticate(BDRVSSHState *s, const char *user)
}
ret = -EPERM;
- error_report("failed to authenticate using publickey authentication "
- "and the identities held by your ssh-agent");
+ error_setg(errp, "failed to authenticate using publickey authentication "
+ "and the identities held by your ssh-agent");
out:
if (agent != NULL) {
@@ -476,10 +511,9 @@ static int authenticate(BDRVSSHState *s, const char *user)
}
static int connect_to_ssh(BDRVSSHState *s, QDict *options,
- int ssh_flags, int creat_mode)
+ int ssh_flags, int creat_mode, Error **errp)
{
int r, ret;
- Error *err = NULL;
const char *host, *user, *path, *host_key_check;
int port;
@@ -498,6 +532,7 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
} else {
user = g_get_user_name();
if (!user) {
+ error_setg_errno(errp, errno, "Can't get user name");
ret = -errno;
goto err;
}
@@ -514,11 +549,9 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
s->hostport = g_strdup_printf("%s:%d", host, port);
/* Open the socket and connect. */
- s->sock = inet_connect(s->hostport, &err);
- if (err != NULL) {
+ s->sock = inet_connect(s->hostport, errp);
+ if (s->sock < 0) {
ret = -errno;
- qerror_report_err(err);
- error_free(err);
goto err;
}
@@ -526,7 +559,7 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
s->session = libssh2_session_init();
if (!s->session) {
ret = -EINVAL;
- session_error_report(s, "failed to initialize libssh2 session");
+ session_error_setg(errp, s, "failed to initialize libssh2 session");
goto err;
}
@@ -537,18 +570,18 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
r = libssh2_session_handshake(s->session, s->sock);
if (r != 0) {
ret = -EINVAL;
- session_error_report(s, "failed to establish SSH session");
+ session_error_setg(errp, s, "failed to establish SSH session");
goto err;
}
/* Check the remote host's key against known_hosts. */
- ret = check_host_key(s, host, port, host_key_check);
+ ret = check_host_key(s, host, port, host_key_check, errp);
if (ret < 0) {
goto err;
}
/* Authenticate. */
- ret = authenticate(s, user);
+ ret = authenticate(s, user, errp);
if (ret < 0) {
goto err;
}
@@ -556,7 +589,7 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
/* Start SFTP. */
s->sftp = libssh2_sftp_init(s->session);
if (!s->sftp) {
- session_error_report(s, "failed to initialize sftp handle");
+ session_error_setg(errp, s, "failed to initialize sftp handle");
ret = -EINVAL;
goto err;
}
@@ -566,14 +599,14 @@ static int connect_to_ssh(BDRVSSHState *s, QDict *options,
path, ssh_flags, creat_mode);
s->sftp_handle = libssh2_sftp_open(s->sftp, path, ssh_flags, creat_mode);
if (!s->sftp_handle) {
- session_error_report(s, "failed to open remote file '%s'", path);
+ session_error_setg(errp, s, "failed to open remote file '%s'", path);
ret = -EINVAL;
goto err;
}
r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
if (r < 0) {
- sftp_error_report(s, "failed to read file attributes");
+ sftp_error_setg(errp, s, "failed to read file attributes");
return -EINVAL;
}
@@ -623,7 +656,7 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
}
/* Start up SSH. */
- ret = connect_to_ssh(s, options, ssh_flags, 0);
+ ret = connect_to_ssh(s, options, ssh_flags, 0, errp);
if (ret < 0) {
goto err;
}
@@ -642,20 +675,22 @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
return ret;
}
-static QEMUOptionParameter ssh_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- { NULL }
+static QemuOptsList ssh_create_opts = {
+ .name = "ssh-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(ssh_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { /* end of list */ }
+ }
};
-static int ssh_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int ssh_create(const char *filename, QemuOpts *opts, Error **errp)
{
int r, ret;
- Error *local_err = NULL;
int64_t total_size = 0;
QDict *uri_options = NULL;
BDRVSSHState s;
@@ -665,26 +700,20 @@ static int ssh_create(const char *filename, QEMUOptionParameter *options,
ssh_state_init(&s);
/* Get desired file size. */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- total_size = options->value.n;
- }
- options++;
- }
+ total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
DPRINTF("total_size=%" PRIi64, total_size);
uri_options = qdict_new();
- r = parse_uri(filename, uri_options, &local_err);
+ r = parse_uri(filename, uri_options, errp);
if (r < 0) {
- qerror_report_err(local_err);
- error_free(local_err);
ret = r;
goto out;
}
r = connect_to_ssh(&s, uri_options,
LIBSSH2_FXF_READ|LIBSSH2_FXF_WRITE|
- LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC, 0644);
+ LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC,
+ 0644, errp);
if (r < 0) {
ret = r;
goto out;
@@ -694,7 +723,7 @@ static int ssh_create(const char *filename, QEMUOptionParameter *options,
libssh2_sftp_seek64(s.sftp_handle, total_size-1);
r2 = libssh2_sftp_write(s.sftp_handle, c, 1);
if (r2 < 0) {
- sftp_error_report(&s, "truncate failed");
+ sftp_error_setg(errp, &s, "truncate failed");
ret = -EINVAL;
goto out;
}
@@ -742,7 +771,7 @@ static void restart_coroutine(void *opaque)
qemu_coroutine_enter(co, NULL);
}
-static coroutine_fn void set_fd_handler(BDRVSSHState *s)
+static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
{
int r;
IOHandler *rd_handler = NULL, *wr_handler = NULL;
@@ -760,24 +789,26 @@ static coroutine_fn void set_fd_handler(BDRVSSHState *s)
DPRINTF("s->sock=%d rd_handler=%p wr_handler=%p", s->sock,
rd_handler, wr_handler);
- qemu_aio_set_fd_handler(s->sock, rd_handler, wr_handler, co);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
+ rd_handler, wr_handler, co);
}
-static coroutine_fn void clear_fd_handler(BDRVSSHState *s)
+static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
+ BlockDriverState *bs)
{
DPRINTF("s->sock=%d", s->sock);
- qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL);
}
/* A non-blocking call returned EAGAIN, so yield, ensuring the
* handlers are set up so that we'll be rescheduled when there is an
* interesting event on the socket.
*/
-static coroutine_fn void co_yield(BDRVSSHState *s)
+static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
{
- set_fd_handler(s);
+ set_fd_handler(s, bs);
qemu_coroutine_yield();
- clear_fd_handler(s);
+ clear_fd_handler(s, bs);
}
/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
@@ -807,7 +838,7 @@ static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
}
}
-static coroutine_fn int ssh_read(BDRVSSHState *s,
+static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
int64_t offset, size_t size,
QEMUIOVector *qiov)
{
@@ -840,7 +871,7 @@ static coroutine_fn int ssh_read(BDRVSSHState *s,
DPRINTF("sftp_read returned %zd", r);
if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
- co_yield(s);
+ co_yield(s, bs);
goto again;
}
if (r < 0) {
@@ -875,14 +906,14 @@ static coroutine_fn int ssh_co_readv(BlockDriverState *bs,
int ret;
qemu_co_mutex_lock(&s->lock);
- ret = ssh_read(s, sector_num * BDRV_SECTOR_SIZE,
+ ret = ssh_read(s, bs, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE, qiov);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
-static int ssh_write(BDRVSSHState *s,
+static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
int64_t offset, size_t size,
QEMUIOVector *qiov)
{
@@ -910,7 +941,7 @@ static int ssh_write(BDRVSSHState *s,
DPRINTF("sftp_write returned %zd", r);
if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
- co_yield(s);
+ co_yield(s, bs);
goto again;
}
if (r < 0) {
@@ -929,7 +960,7 @@ static int ssh_write(BDRVSSHState *s,
*/
if (r == 0) {
ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
- co_yield(s);
+ co_yield(s, bs);
goto again;
}
@@ -957,7 +988,7 @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
int ret;
qemu_co_mutex_lock(&s->lock);
- ret = ssh_write(s, sector_num * BDRV_SECTOR_SIZE,
+ ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE,
nb_sectors * BDRV_SECTOR_SIZE, qiov);
qemu_co_mutex_unlock(&s->lock);
@@ -978,7 +1009,7 @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
#ifdef HAS_LIBSSH2_SFTP_FSYNC
-static coroutine_fn int ssh_flush(BDRVSSHState *s)
+static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
{
int r;
@@ -986,7 +1017,7 @@ static coroutine_fn int ssh_flush(BDRVSSHState *s)
again:
r = libssh2_sftp_fsync(s->sftp_handle);
if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
- co_yield(s);
+ co_yield(s, bs);
goto again;
}
if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
@@ -1008,7 +1039,7 @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
int ret;
qemu_co_mutex_lock(&s->lock);
- ret = ssh_flush(s);
+ ret = ssh_flush(s, bs);
qemu_co_mutex_unlock(&s->lock);
return ret;
@@ -1051,7 +1082,7 @@ static BlockDriver bdrv_ssh = {
.bdrv_co_writev = ssh_co_writev,
.bdrv_getlength = ssh_getlength,
.bdrv_co_flush_to_disk = ssh_co_flush,
- .create_options = ssh_create_options,
+ .create_opts = &ssh_create_opts,
};
static void bdrv_ssh_init(void)
diff --git a/block/stream.c b/block/stream.c
index dd0b4ac3d..cdea3e8d0 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -32,7 +32,7 @@ typedef struct StreamBlockJob {
RateLimit limit;
BlockDriverState *base;
BlockdevOnError on_error;
- char backing_file_id[1024];
+ char *backing_file_str;
} StreamBlockJob;
static int coroutine_fn stream_populate(BlockDriverState *bs,
@@ -60,7 +60,7 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
/* Must assign before bdrv_delete() to prevent traversing dangling pointer
* while we delete backing image instances.
*/
- top->backing_hd = base;
+ bdrv_set_backing_hd(top, base);
while (intermediate) {
BlockDriverState *unused;
@@ -72,11 +72,11 @@ static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
unused = intermediate;
intermediate = intermediate->backing_hd;
- unused->backing_hd = NULL;
+ bdrv_set_backing_hd(unused, NULL);
bdrv_unref(unused);
}
- bdrv_refresh_limits(top);
+ bdrv_refresh_limits(top, NULL);
}
static void coroutine_fn stream_run(void *opaque)
@@ -159,14 +159,14 @@ wait:
BlockErrorAction action =
block_job_error_action(&s->common, s->common.bs, s->on_error,
true, -ret);
- if (action == BDRV_ACTION_STOP) {
+ if (action == BLOCK_ERROR_ACTION_STOP) {
n = 0;
continue;
}
if (error == 0) {
error = ret;
}
- if (action == BDRV_ACTION_REPORT) {
+ if (action == BLOCK_ERROR_ACTION_REPORT) {
break;
}
}
@@ -186,7 +186,7 @@ wait:
if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
const char *base_id = NULL, *base_fmt = NULL;
if (base) {
- base_id = s->backing_file_id;
+ base_id = s->backing_file_str;
if (base->drv) {
base_fmt = base->drv->format_name;
}
@@ -196,6 +196,7 @@ wait:
}
qemu_vfree(buf);
+ g_free(s->backing_file_str);
block_job_completed(&s->common, ret);
}
@@ -217,7 +218,7 @@ static const BlockJobDriver stream_job_driver = {
};
void stream_start(BlockDriverState *bs, BlockDriverState *base,
- const char *base_id, int64_t speed,
+ const char *backing_file_str, int64_t speed,
BlockdevOnError on_error,
BlockDriverCompletionFunc *cb,
void *opaque, Error **errp)
@@ -237,9 +238,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base,
}
s->base = base;
- if (base_id) {
- pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id);
- }
+ s->backing_file_str = g_strdup(backing_file_str);
s->on_error = on_error;
s->common.co = qemu_coroutine_create(stream_run);
diff --git a/block/vdi.c b/block/vdi.c
index 820cd376b..197bd77c9 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -53,6 +53,13 @@
#include "block/block_int.h"
#include "qemu/module.h"
#include "migration/migration.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
+#endif
+#endif
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
@@ -239,7 +246,6 @@ static void vdi_header_to_le(VdiHeader *header)
cpu_to_le32s(&header->block_extra);
cpu_to_le32s(&header->blocks_in_image);
cpu_to_le32s(&header->blocks_allocated);
- cpu_to_le32s(&header->blocks_allocated);
uuid_convert(header->uuid_image);
uuid_convert(header->uuid_last_snap);
uuid_convert(header->uuid_link);
@@ -408,34 +414,35 @@ static int vdi_open(BlockDriverState *bs, QDict *options, int flags,
}
if (header.signature != VDI_SIGNATURE) {
- error_setg(errp, "Image not in VDI format (bad signature %08x)", header.signature);
+ error_setg(errp, "Image not in VDI format (bad signature %08" PRIx32
+ ")", header.signature);
ret = -EINVAL;
goto fail;
} else if (header.version != VDI_VERSION_1_1) {
- error_setg(errp, "unsupported VDI image (version %u.%u)",
- header.version >> 16, header.version & 0xffff);
+ error_setg(errp, "unsupported VDI image (version %" PRIu32 ".%" PRIu32
+ ")", header.version >> 16, header.version & 0xffff);
ret = -ENOTSUP;
goto fail;
} else if (header.offset_bmap % SECTOR_SIZE != 0) {
/* We only support block maps which start on a sector boundary. */
error_setg(errp, "unsupported VDI image (unaligned block map offset "
- "0x%x)", header.offset_bmap);
+ "0x%" PRIx32 ")", header.offset_bmap);
ret = -ENOTSUP;
goto fail;
} else if (header.offset_data % SECTOR_SIZE != 0) {
/* We only support data blocks which start on a sector boundary. */
- error_setg(errp, "unsupported VDI image (unaligned data offset 0x%x)",
- header.offset_data);
+ error_setg(errp, "unsupported VDI image (unaligned data offset 0x%"
+ PRIx32 ")", header.offset_data);
ret = -ENOTSUP;
goto fail;
} else if (header.sector_size != SECTOR_SIZE) {
- error_setg(errp, "unsupported VDI image (sector size %u is not %u)",
- header.sector_size, SECTOR_SIZE);
+ error_setg(errp, "unsupported VDI image (sector size %" PRIu32
+ " is not %u)", header.sector_size, SECTOR_SIZE);
ret = -ENOTSUP;
goto fail;
} else if (header.block_size != DEFAULT_CLUSTER_SIZE) {
- error_setg(errp, "unsupported VDI image (block size %u is not %u)",
- header.block_size, DEFAULT_CLUSTER_SIZE);
+ error_setg(errp, "unsupported VDI image (block size %" PRIu32
+ " is not %u)", header.block_size, DEFAULT_CLUSTER_SIZE);
ret = -ENOTSUP;
goto fail;
} else if (header.disk_size >
@@ -672,8 +679,7 @@ static int vdi_co_write(BlockDriverState *bs,
return ret;
}
-static int vdi_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int vdi_create(const char *filename, QemuOpts *opts, Error **errp)
{
int fd;
int result = 0;
@@ -684,29 +690,24 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options,
VdiHeader header;
size_t i;
size_t bmap_size;
+ bool nocow = false;
logout("\n");
/* Read out options. */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- bytes = options->value.n;
+ bytes = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
#if defined(CONFIG_VDI_BLOCK_SIZE)
- } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
- if (options->value.n) {
- /* TODO: Additional checks (SECTOR_SIZE * 2^n, ...). */
- block_size = options->value.n;
- }
+ /* TODO: Additional checks (SECTOR_SIZE * 2^n, ...). */
+ block_size = qemu_opt_get_size_del(opts,
+ BLOCK_OPT_CLUSTER_SIZE,
+ DEFAULT_CLUSTER_SIZE);
#endif
#if defined(CONFIG_VDI_STATIC_IMAGE)
- } else if (!strcmp(options->name, BLOCK_OPT_STATIC)) {
- if (options->value.n) {
- image_type = VDI_TYPE_STATIC;
- }
-#endif
- }
- options++;
+ if (qemu_opt_get_bool_del(opts, BLOCK_OPT_STATIC, false)) {
+ image_type = VDI_TYPE_STATIC;
}
+#endif
+ nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);
if (bytes > VDI_DISK_SIZE_MAX) {
result = -ENOTSUP;
@@ -724,6 +725,21 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options,
goto exit;
}
+ if (nocow) {
+#ifdef __linux__
+ /* Set NOCOW flag to solve performance issue on fs like btrfs.
+ * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
+ * be ignored since any failure of this operation should not block the
+ * left work.
+ */
+ int attr;
+ if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+ attr |= FS_NOCOW_FL;
+ ioctl(fd, FS_IOC_SETFLAGS, &attr);
+ }
+#endif
+ }
+
/* We need enough blocks to store the given disk size,
so always round up. */
blocks = (bytes + block_size - 1) / block_size;
@@ -755,6 +771,7 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options,
vdi_header_to_le(&header);
if (write(fd, &header, sizeof(header)) < 0) {
result = -errno;
+ goto close_and_exit;
}
if (bmap_size > 0) {
@@ -768,6 +785,8 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options,
}
if (write(fd, bmap, bmap_size) < 0) {
result = -errno;
+ g_free(bmap);
+ goto close_and_exit;
}
g_free(bmap);
}
@@ -775,10 +794,12 @@ static int vdi_create(const char *filename, QEMUOptionParameter *options,
if (image_type == VDI_TYPE_STATIC) {
if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) {
result = -errno;
+ goto close_and_exit;
}
}
- if (close(fd) < 0) {
+close_and_exit:
+ if ((close(fd) < 0) && !result) {
result = -errno;
}
@@ -796,29 +817,39 @@ static void vdi_close(BlockDriverState *bs)
error_free(s->migration_blocker);
}
-static QEMUOptionParameter vdi_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
+static QemuOptsList vdi_create_opts = {
+ .name = "vdi-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(vdi_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
#if defined(CONFIG_VDI_BLOCK_SIZE)
- {
- .name = BLOCK_OPT_CLUSTER_SIZE,
- .type = OPT_SIZE,
- .help = "VDI cluster (block) size",
- .value = { .n = DEFAULT_CLUSTER_SIZE },
- },
+ {
+ .name = BLOCK_OPT_CLUSTER_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "VDI cluster (block) size",
+ .def_value_str = stringify(DEFAULT_CLUSTER_SIZE)
+ },
#endif
#if defined(CONFIG_VDI_STATIC_IMAGE)
- {
- .name = BLOCK_OPT_STATIC,
- .type = OPT_FLAG,
- .help = "VDI static (pre-allocated) image"
- },
+ {
+ .name = BLOCK_OPT_STATIC,
+ .type = QEMU_OPT_BOOL,
+ .help = "VDI static (pre-allocated) image",
+ .def_value_str = "off"
+ },
#endif
- /* TODO: An additional option to set UUID values might be useful. */
- { NULL }
+ {
+ .name = BLOCK_OPT_NOCOW,
+ .type = QEMU_OPT_BOOL,
+ .help = "Turn off copy-on-write (valid only on btrfs)"
+ },
+ /* TODO: An additional option to set UUID values might be useful. */
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_vdi = {
@@ -840,7 +871,7 @@ static BlockDriver bdrv_vdi = {
.bdrv_get_info = vdi_get_info,
- .create_options = vdi_create_options,
+ .create_opts = &vdi_create_opts,
.bdrv_check = vdi_check,
};
diff --git a/block/vhdx.c b/block/vhdx.c
index 509baaf48..fedcf9f9c 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -473,7 +473,14 @@ static void vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s,
} else if (h2_seq > h1_seq) {
s->curr_header = 1;
} else {
- goto fail;
+ /* The Microsoft Disk2VHD tool will create 2 identical
+ * headers, with identical sequence numbers. If the headers are
+ * identical, don't consider the file corrupt */
+ if (!memcmp(header1, header2, sizeof(VHDXHeader))) {
+ s->curr_header = 0;
+ } else {
+ goto fail;
+ }
}
}
@@ -1716,8 +1723,7 @@ exit:
* .---- ~ ----------- ~ ------------ ~ ---------------- ~ -----------.
* 1MB
*/
-static int vhdx_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int vhdx_create(const char *filename, QemuOpts *opts, Error **errp)
{
int ret = 0;
uint64_t image_size = (uint64_t) 2 * GiB;
@@ -1730,24 +1736,15 @@ static int vhdx_create(const char *filename, QEMUOptionParameter *options,
gunichar2 *creator = NULL;
glong creator_items;
BlockDriverState *bs;
- const char *type = NULL;
+ char *type = NULL;
VHDXImageType image_type;
Error *local_err = NULL;
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- image_size = options->value.n;
- } else if (!strcmp(options->name, VHDX_BLOCK_OPT_LOG_SIZE)) {
- log_size = options->value.n;
- } else if (!strcmp(options->name, VHDX_BLOCK_OPT_BLOCK_SIZE)) {
- block_size = options->value.n;
- } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
- type = options->value.s;
- } else if (!strcmp(options->name, VHDX_BLOCK_OPT_ZERO)) {
- use_zero_blocks = options->value.n != 0;
- }
- options++;
- }
+ image_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+ log_size = qemu_opt_get_size_del(opts, VHDX_BLOCK_OPT_LOG_SIZE, 0);
+ block_size = qemu_opt_get_size_del(opts, VHDX_BLOCK_OPT_BLOCK_SIZE, 0);
+ type = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
+ use_zero_blocks = qemu_opt_get_bool_del(opts, VHDX_BLOCK_OPT_ZERO, false);
if (image_size > VHDX_MAX_IMAGE_SIZE) {
error_setg_errno(errp, EINVAL, "Image size too large; max of 64TB");
@@ -1756,7 +1753,7 @@ static int vhdx_create(const char *filename, QEMUOptionParameter *options,
}
if (type == NULL) {
- type = "dynamic";
+ type = g_strdup("dynamic");
}
if (!strcmp(type, "dynamic")) {
@@ -1796,7 +1793,7 @@ static int vhdx_create(const char *filename, QEMUOptionParameter *options,
block_size = block_size > VHDX_BLOCK_SIZE_MAX ? VHDX_BLOCK_SIZE_MAX :
block_size;
- ret = bdrv_create_file(filename, options, &local_err);
+ ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto exit;
@@ -1856,6 +1853,7 @@ static int vhdx_create(const char *filename, QEMUOptionParameter *options,
delete_and_exit:
bdrv_unref(bs);
exit:
+ g_free(type);
g_free(creator);
return ret;
}
@@ -1878,37 +1876,41 @@ static int vhdx_check(BlockDriverState *bs, BdrvCheckResult *result,
return 0;
}
-static QEMUOptionParameter vhdx_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size; max of 64TB."
- },
- {
- .name = VHDX_BLOCK_OPT_LOG_SIZE,
- .type = OPT_SIZE,
- .value.n = 1 * MiB,
- .help = "Log size; min 1MB."
- },
- {
- .name = VHDX_BLOCK_OPT_BLOCK_SIZE,
- .type = OPT_SIZE,
- .value.n = 0,
- .help = "Block Size; min 1MB, max 256MB. " \
- "0 means auto-calculate based on image size."
- },
- {
- .name = BLOCK_OPT_SUBFMT,
- .type = OPT_STRING,
- .help = "VHDX format type, can be either 'dynamic' or 'fixed'. "\
- "Default is 'dynamic'."
- },
- {
- .name = VHDX_BLOCK_OPT_ZERO,
- .type = OPT_FLAG,
- .help = "Force use of payload blocks of type 'ZERO'. Non-standard."
- },
- { NULL }
+static QemuOptsList vhdx_create_opts = {
+ .name = "vhdx-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(vhdx_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size; max of 64TB."
+ },
+ {
+ .name = VHDX_BLOCK_OPT_LOG_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .def_value_str = stringify(DEFAULT_LOG_SIZE),
+ .help = "Log size; min 1MB."
+ },
+ {
+ .name = VHDX_BLOCK_OPT_BLOCK_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .def_value_str = stringify(0),
+ .help = "Block Size; min 1MB, max 256MB. " \
+ "0 means auto-calculate based on image size."
+ },
+ {
+ .name = BLOCK_OPT_SUBFMT,
+ .type = QEMU_OPT_STRING,
+ .help = "VHDX format type, can be either 'dynamic' or 'fixed'. "\
+ "Default is 'dynamic'."
+ },
+ {
+ .name = VHDX_BLOCK_OPT_ZERO,
+ .type = QEMU_OPT_BOOL,
+ .help = "Force use of payload blocks of type 'ZERO'. Non-standard."
+ },
+ { NULL }
+ }
};
static BlockDriver bdrv_vhdx = {
@@ -1924,7 +1926,7 @@ static BlockDriver bdrv_vhdx = {
.bdrv_get_info = vhdx_get_info,
.bdrv_check = vhdx_check,
- .create_options = vhdx_create_options,
+ .create_opts = &vhdx_create_opts,
};
static void bdrv_vhdx_init(void)
diff --git a/block/vhdx.h b/block/vhdx.h
index 8103d4c44..5370010c5 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -23,6 +23,7 @@
#define GiB (MiB * 1024)
#define TiB ((uint64_t) GiB * 1024)
+#define DEFAULT_LOG_SIZE 1048576 /* 1MiB */
/* Structures and fields present in the VHDX file */
/* The header section has the following blocks,
diff --git a/block/vmdk.c b/block/vmdk.c
index b69988d16..0517bbaf9 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -262,7 +262,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
p_name = strstr(desc, cid_str);
if (p_name != NULL) {
p_name += cid_str_size;
- sscanf(p_name, "%x", &cid);
+ sscanf(p_name, "%" SCNx32, &cid);
}
return cid;
@@ -290,7 +290,7 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
p_name = strstr(desc, "CID");
if (p_name != NULL) {
p_name += sizeof("CID");
- snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
+ snprintf(p_name, sizeof(desc) - (p_name - desc), "%" PRIx32 "\n", cid);
pstrcat(desc, sizeof(desc), tmp_desc);
}
@@ -640,7 +640,7 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
if (le32_to_cpu(header.version) > 3) {
char buf[64];
- snprintf(buf, sizeof(buf), "VMDK version %d",
+ snprintf(buf, sizeof(buf), "VMDK version %" PRId32,
le32_to_cpu(header.version));
error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
bs->device_name, "vmdk", buf);
@@ -671,8 +671,9 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
}
if (bdrv_getlength(file) <
le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE) {
- error_setg(errp, "File truncated, expecting at least %lld bytes",
- le64_to_cpu(header.grain_offset) * BDRV_SECTOR_SIZE);
+ error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
+ (int64_t)(le64_to_cpu(header.grain_offset)
+ * BDRV_SECTOR_SIZE));
return -EINVAL;
}
@@ -937,7 +938,7 @@ fail:
}
-static int vmdk_refresh_limits(BlockDriverState *bs)
+static void vmdk_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVVmdkState *s = bs->opaque;
int i;
@@ -949,8 +950,6 @@ static int vmdk_refresh_limits(BlockDriverState *bs)
s->extents[i].cluster_sectors);
}
}
-
- return 0;
}
static int get_whole_cluster(BlockDriverState *bs,
@@ -1495,6 +1494,19 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
return ret;
}
+static int vmdk_write_compressed(BlockDriverState *bs,
+ int64_t sector_num,
+ const uint8_t *buf,
+ int nb_sectors)
+{
+ BDRVVmdkState *s = bs->opaque;
+ if (s->num_extents == 1 && s->extents[0].compressed) {
+ return vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+ } else {
+ return -ENOTSUP;
+ }
+}
+
static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
int64_t sector_num,
int nb_sectors,
@@ -1515,17 +1527,17 @@ static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
static int vmdk_create_extent(const char *filename, int64_t filesize,
bool flat, bool compress, bool zeroed_grain,
- Error **errp)
+ QemuOpts *opts, Error **errp)
{
int ret, i;
BlockDriverState *bs = NULL;
VMDK4Header header;
- Error *local_err;
+ Error *local_err = NULL;
uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
uint32_t *gd_buf = NULL;
int gd_buf_size;
- ret = bdrv_create_file(filename, NULL, &local_err);
+ ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto exit;
@@ -1681,17 +1693,16 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
return VMDK_OK;
}
-static int vmdk_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
{
int idx = 0;
BlockDriverState *new_bs = NULL;
- Error *local_err;
+ Error *local_err = NULL;
char *desc = NULL;
int64_t total_size = 0, filesize;
- const char *adapter_type = NULL;
- const char *backing_file = NULL;
- const char *fmt = NULL;
+ char *adapter_type = NULL;
+ char *backing_file = NULL;
+ char *fmt = NULL;
int flags = 0;
int ret = 0;
bool flat, split, compress;
@@ -1707,8 +1718,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
const char desc_template[] =
"# Disk DescriptorFile\n"
"version=1\n"
- "CID=%x\n"
- "parentCID=%x\n"
+ "CID=%" PRIx32 "\n"
+ "parentCID=%" PRIx32 "\n"
"createType=\"%s\"\n"
"%s"
"\n"
@@ -1720,7 +1731,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
"\n"
"ddb.virtualHWVersion = \"%d\"\n"
"ddb.geometry.cylinders = \"%" PRId64 "\"\n"
- "ddb.geometry.heads = \"%d\"\n"
+ "ddb.geometry.heads = \"%" PRIu32 "\"\n"
"ddb.geometry.sectors = \"63\"\n"
"ddb.adapterType = \"%s\"\n";
@@ -1731,24 +1742,19 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
goto exit;
}
/* Read out options */
- while (options && options->name) {
- if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
- total_size = options->value.n;
- } else if (!strcmp(options->name, BLOCK_OPT_ADAPTER_TYPE)) {
- adapter_type = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
- backing_file = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
- flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
- } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
- fmt = options->value.s;
- } else if (!strcmp(options->name, BLOCK_OPT_ZEROED_GRAIN)) {
- zeroed_grain |= options->value.n;
- }
- options++;
+ total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+ adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
+ backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
+ if (qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false)) {
+ flags |= BLOCK_FLAG_COMPAT6;
}
+ fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
+ if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false)) {
+ zeroed_grain = true;
+ }
+
if (!adapter_type) {
- adapter_type = "ide";
+ adapter_type = g_strdup("ide");
} else if (strcmp(adapter_type, "ide") &&
strcmp(adapter_type, "buslogic") &&
strcmp(adapter_type, "lsilogic") &&
@@ -1764,7 +1770,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
}
if (!fmt) {
/* Default format to monolithicSparse */
- fmt = "monolithicSparse";
+ fmt = g_strdup("monolithicSparse");
} else if (strcmp(fmt, "monolithicFlat") &&
strcmp(fmt, "monolithicSparse") &&
strcmp(fmt, "twoGbMaxExtentSparse") &&
@@ -1780,9 +1786,9 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
strcmp(fmt, "twoGbMaxExtentFlat"));
compress = !strcmp(fmt, "streamOptimized");
if (flat) {
- desc_extent_line = "RW %lld FLAT \"%s\" 0\n";
+ desc_extent_line = "RW %" PRId64 " FLAT \"%s\" 0\n";
} else {
- desc_extent_line = "RW %lld SPARSE \"%s\"\n";
+ desc_extent_line = "RW %" PRId64 " SPARSE \"%s\"\n";
}
if (flat && backing_file) {
error_setg(errp, "Flat image can't have backing file");
@@ -1837,7 +1843,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
path, desc_filename);
if (vmdk_create_extent(ext_filename, size,
- flat, compress, zeroed_grain, errp)) {
+ flat, compress, zeroed_grain, opts, errp)) {
ret = -EINVAL;
goto exit;
}
@@ -1850,7 +1856,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
}
/* generate descriptor file */
desc = g_strdup_printf(desc_template,
- (unsigned int)time(NULL),
+ (uint32_t)time(NULL),
parent_cid,
fmt,
parent_desc_line,
@@ -1865,9 +1871,9 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
if (!split && !flat) {
desc_offset = 0x200;
} else {
- ret = bdrv_create_file(filename, options, &local_err);
+ ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
- error_setg_errno(errp, -ret, "Could not create image file");
+ error_propagate(errp, local_err);
goto exit;
}
}
@@ -1875,7 +1881,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options,
ret = bdrv_open(&new_bs, filename, NULL, NULL,
BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
if (ret < 0) {
- error_setg_errno(errp, -ret, "Could not write description");
+ error_propagate(errp, local_err);
goto exit;
}
ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len);
@@ -1895,6 +1901,9 @@ exit:
if (new_bs) {
bdrv_unref(new_bs);
}
+ g_free(adapter_type);
+ g_free(backing_file);
+ g_free(fmt);
g_free(desc);
g_string_free(ext_desc_lines, true);
return ret;
@@ -2062,41 +2071,88 @@ static ImageInfoSpecific *vmdk_get_specific_info(BlockDriverState *bs)
return spec_info;
}
-static QEMUOptionParameter vmdk_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_ADAPTER_TYPE,
- .type = OPT_STRING,
- .help = "Virtual adapter type, can be one of "
- "ide (default), lsilogic, buslogic or legacyESX"
- },
- {
- .name = BLOCK_OPT_BACKING_FILE,
- .type = OPT_STRING,
- .help = "File name of a base image"
- },
- {
- .name = BLOCK_OPT_COMPAT6,
- .type = OPT_FLAG,
- .help = "VMDK version 6 image"
- },
- {
- .name = BLOCK_OPT_SUBFMT,
- .type = OPT_STRING,
- .help =
- "VMDK flat extent format, can be one of "
- "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
- },
- {
- .name = BLOCK_OPT_ZEROED_GRAIN,
- .type = OPT_FLAG,
- .help = "Enable efficient zero writes using the zeroed-grain GTE feature"
- },
- { NULL }
+static int vmdk_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+ int i;
+ BDRVVmdkState *s = bs->opaque;
+ assert(s->num_extents);
+ bdi->needs_compressed_writes = s->extents[0].compressed;
+ if (!s->extents[0].flat) {
+ bdi->cluster_size = s->extents[0].cluster_sectors << BDRV_SECTOR_BITS;
+ }
+ /* See if we have multiple extents but they have different cases */
+ for (i = 1; i < s->num_extents; i++) {
+ if (bdi->needs_compressed_writes != s->extents[i].compressed ||
+ (bdi->cluster_size && bdi->cluster_size !=
+ s->extents[i].cluster_sectors << BDRV_SECTOR_BITS)) {
+ return -ENOTSUP;
+ }
+ }
+ return 0;
+}
+
+static void vmdk_detach_aio_context(BlockDriverState *bs)
+{
+ BDRVVmdkState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->num_extents; i++) {
+ bdrv_detach_aio_context(s->extents[i].file);
+ }
+}
+
+static void vmdk_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ BDRVVmdkState *s = bs->opaque;
+ int i;
+
+ for (i = 0; i < s->num_extents; i++) {
+ bdrv_attach_aio_context(s->extents[i].file, new_context);
+ }
+}
+
+static QemuOptsList vmdk_create_opts = {
+ .name = "vmdk-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_ADAPTER_TYPE,
+ .type = QEMU_OPT_STRING,
+ .help = "Virtual adapter type, can be one of "
+ "ide (default), lsilogic, buslogic or legacyESX"
+ },
+ {
+ .name = BLOCK_OPT_BACKING_FILE,
+ .type = QEMU_OPT_STRING,
+ .help = "File name of a base image"
+ },
+ {
+ .name = BLOCK_OPT_COMPAT6,
+ .type = QEMU_OPT_BOOL,
+ .help = "VMDK version 6 image",
+ .def_value_str = "off"
+ },
+ {
+ .name = BLOCK_OPT_SUBFMT,
+ .type = QEMU_OPT_STRING,
+ .help =
+ "VMDK flat extent format, can be one of "
+ "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
+ },
+ {
+ .name = BLOCK_OPT_ZEROED_GRAIN,
+ .type = QEMU_OPT_BOOL,
+ .help = "Enable efficient zero writes "
+ "using the zeroed-grain GTE feature"
+ },
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_vmdk = {
@@ -2108,6 +2164,7 @@ static BlockDriver bdrv_vmdk = {
.bdrv_reopen_prepare = vmdk_reopen_prepare,
.bdrv_read = vmdk_co_read,
.bdrv_write = vmdk_co_write,
+ .bdrv_write_compressed = vmdk_write_compressed,
.bdrv_co_write_zeroes = vmdk_co_write_zeroes,
.bdrv_close = vmdk_close,
.bdrv_create = vmdk_create,
@@ -2117,8 +2174,12 @@ static BlockDriver bdrv_vmdk = {
.bdrv_has_zero_init = vmdk_has_zero_init,
.bdrv_get_specific_info = vmdk_get_specific_info,
.bdrv_refresh_limits = vmdk_refresh_limits,
+ .bdrv_get_info = vmdk_get_info,
+ .bdrv_detach_aio_context = vmdk_detach_aio_context,
+ .bdrv_attach_aio_context = vmdk_attach_aio_context,
- .create_options = vmdk_create_options,
+ .supports_backing = true,
+ .create_opts = &vmdk_create_opts,
};
static void bdrv_vmdk_init(void)
diff --git a/block/vpc.c b/block/vpc.c
index 2e25f5723..8b376a40b 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -29,6 +29,13 @@
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
#endif
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#ifndef FS_NOCOW_FL
+#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
+#endif
+#endif
/**************************************************************/
@@ -738,12 +745,11 @@ static int create_fixed_disk(int fd, uint8_t *buf, int64_t total_size)
return ret;
}
-static int vpc_create(const char *filename, QEMUOptionParameter *options,
- Error **errp)
+static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
{
uint8_t buf[1024];
VHDFooter *footer = (VHDFooter *) buf;
- QEMUOptionParameter *disk_type_param;
+ char *disk_type_param;
int fd, i;
uint16_t cyls = 0;
uint8_t heads = 0;
@@ -752,27 +758,45 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options,
int64_t total_size;
int disk_type;
int ret = -EIO;
+ bool nocow = false;
/* Read out options */
- total_size = get_option_parameter(options, BLOCK_OPT_SIZE)->value.n;
-
- disk_type_param = get_option_parameter(options, BLOCK_OPT_SUBFMT);
- if (disk_type_param && disk_type_param->value.s) {
- if (!strcmp(disk_type_param->value.s, "dynamic")) {
+ total_size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
+ disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
+ if (disk_type_param) {
+ if (!strcmp(disk_type_param, "dynamic")) {
disk_type = VHD_DYNAMIC;
- } else if (!strcmp(disk_type_param->value.s, "fixed")) {
+ } else if (!strcmp(disk_type_param, "fixed")) {
disk_type = VHD_FIXED;
} else {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
} else {
disk_type = VHD_DYNAMIC;
}
+ nocow = qemu_opt_get_bool_del(opts, BLOCK_OPT_NOCOW, false);
/* Create the file */
fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
if (fd < 0) {
- return -EIO;
+ ret = -EIO;
+ goto out;
+ }
+
+ if (nocow) {
+#ifdef __linux__
+ /* Set NOCOW flag to solve performance issue on fs like btrfs.
+ * This is an optimisation. The FS_IOC_SETFLAGS ioctl return value will
+ * be ignored since any failure of this operation should not block the
+ * left work.
+ */
+ int attr;
+ if (ioctl(fd, FS_IOC_GETFLAGS, &attr) == 0) {
+ attr |= FS_NOCOW_FL;
+ ioctl(fd, FS_IOC_SETFLAGS, &attr);
+ }
+#endif
}
/*
@@ -837,8 +861,10 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options,
ret = create_fixed_disk(fd, buf, total_size);
}
- fail:
+fail:
qemu_close(fd);
+out:
+ g_free(disk_type_param);
return ret;
}
@@ -866,20 +892,29 @@ static void vpc_close(BlockDriverState *bs)
error_free(s->migration_blocker);
}
-static QEMUOptionParameter vpc_create_options[] = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_SUBFMT,
- .type = OPT_STRING,
- .help =
- "Type of virtual hard disk format. Supported formats are "
- "{dynamic (default) | fixed} "
- },
- { NULL }
+static QemuOptsList vpc_create_opts = {
+ .name = "vpc-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_SUBFMT,
+ .type = QEMU_OPT_STRING,
+ .help =
+ "Type of virtual hard disk format. Supported formats are "
+ "{dynamic (default) | fixed} "
+ },
+ {
+ .name = BLOCK_OPT_NOCOW,
+ .type = QEMU_OPT_BOOL,
+ .help = "Turn off copy-on-write (valid only on btrfs)"
+ },
+ { /* end of list */ }
+ }
};
static BlockDriver bdrv_vpc = {
@@ -897,7 +932,7 @@ static BlockDriver bdrv_vpc = {
.bdrv_get_info = vpc_get_info,
- .create_options = vpc_create_options,
+ .create_opts = &vpc_create_opts,
.bdrv_has_zero_init = vpc_has_zero_init,
};
diff --git a/block/vvfat.c b/block/vvfat.c
index 1978c9ed6..70176b161 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -787,7 +787,9 @@ static int read_directory(BDRVVVFATState* s, int mapping_index)
s->current_mapping->path=buffer;
s->current_mapping->read_only =
(st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)) == 0;
- }
+ } else {
+ g_free(buffer);
+ }
}
closedir(dir);
@@ -831,7 +833,8 @@ static inline off_t cluster2sector(BDRVVVFATState* s, uint32_t cluster_num)
}
static int init_directories(BDRVVVFATState* s,
- const char *dirname, int heads, int secs)
+ const char *dirname, int heads, int secs,
+ Error **errp)
{
bootsector_t* bootsector;
mapping_t* mapping;
@@ -892,8 +895,8 @@ static int init_directories(BDRVVVFATState* s,
if (mapping->mode & MODE_DIRECTORY) {
mapping->begin = cluster;
if(read_directory(s, i)) {
- fprintf(stderr, "Could not read directory %s\n",
- mapping->path);
+ error_setg(errp, "Could not read directory %s",
+ mapping->path);
return -1;
}
mapping = array_get(&(s->mapping), i);
@@ -919,9 +922,10 @@ static int init_directories(BDRVVVFATState* s,
cluster = mapping->end;
if(cluster > s->cluster_count) {
- fprintf(stderr,"Directory does not fit in FAT%d (capacity %.2f MB)\n",
- s->fat_type, s->sector_count / 2000.0);
- return -EINVAL;
+ error_setg(errp,
+ "Directory does not fit in FAT%d (capacity %.2f MB)",
+ s->fat_type, s->sector_count / 2000.0);
+ return -1;
}
/* fix fat for entry */
@@ -979,7 +983,7 @@ static int init_directories(BDRVVVFATState* s,
static BDRVVVFATState *vvv = NULL;
#endif
-static int enable_write_target(BDRVVVFATState *s);
+static int enable_write_target(BDRVVVFATState *s, Error **errp);
static int is_consistent(BDRVVVFATState *s);
static void vvfat_rebind(BlockDriverState *bs)
@@ -1160,7 +1164,7 @@ DLOG(if (stderr == NULL) {
s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
if (qemu_opt_get_bool(opts, "rw", false)) {
- ret = enable_write_target(s);
+ ret = enable_write_target(s, errp);
if (ret < 0) {
goto fail;
}
@@ -1169,7 +1173,7 @@ DLOG(if (stderr == NULL) {
bs->total_sectors = cyls * heads * secs;
- if (init_directories(s, dirname, heads, secs)) {
+ if (init_directories(s, dirname, heads, secs, errp)) {
ret = -EIO;
goto fail;
}
@@ -1864,7 +1868,7 @@ static int check_directory_consistency(BDRVVVFATState *s,
if (s->used_clusters[cluster_num] & USED_ANY) {
fprintf(stderr, "cluster %d used more than once\n", (int)cluster_num);
- return 0;
+ goto fail;
}
s->used_clusters[cluster_num] = USED_DIRECTORY;
@@ -2904,11 +2908,10 @@ static BlockDriver vvfat_write_target = {
.bdrv_close = write_target_close,
};
-static int enable_write_target(BDRVVVFATState *s)
+static int enable_write_target(BDRVVVFATState *s, Error **errp)
{
- BlockDriver *bdrv_qcow;
- QEMUOptionParameter *options;
- Error *local_err = NULL;
+ BlockDriver *bdrv_qcow = NULL;
+ QemuOpts *opts = NULL;
int ret;
int size = sector2cluster(s, s->sector_count);
s->used_clusters = calloc(size, 1);
@@ -2918,28 +2921,26 @@ static int enable_write_target(BDRVVVFATState *s)
s->qcow_filename = g_malloc(1024);
ret = get_tmp_filename(s->qcow_filename, 1024);
if (ret < 0) {
+ error_setg_errno(errp, -ret, "can't create temporary file");
goto err;
}
bdrv_qcow = bdrv_find_format("qcow");
- options = parse_option_parameters("", bdrv_qcow->create_options, NULL);
- set_option_parameter_int(options, BLOCK_OPT_SIZE, s->sector_count * 512);
- set_option_parameter(options, BLOCK_OPT_BACKING_FILE, "fat:");
+ opts = qemu_opts_create(bdrv_qcow->create_opts, NULL, 0, &error_abort);
+ qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s->sector_count * 512);
+ qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, "fat:");
- ret = bdrv_create(bdrv_qcow, s->qcow_filename, options, &local_err);
+ ret = bdrv_create(bdrv_qcow, s->qcow_filename, opts, errp);
+ qemu_opts_del(opts);
if (ret < 0) {
- qerror_report_err(local_err);
- error_free(local_err);
goto err;
}
s->qcow = NULL;
ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, bdrv_qcow,
- &local_err);
+ BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
+ bdrv_qcow, errp);
if (ret < 0) {
- qerror_report_err(local_err);
- error_free(local_err);
goto err;
}
@@ -2947,7 +2948,7 @@ static int enable_write_target(BDRVVVFATState *s)
unlink(s->qcow_filename);
#endif
- s->bs->backing_hd = bdrv_new("");
+ bdrv_set_backing_hd(s->bs, bdrv_new("", &error_abort));
s->bs->backing_hd->drv = &vvfat_write_target;
s->bs->backing_hd->opaque = g_malloc(sizeof(void*));
*(void**)s->bs->backing_hd->opaque = s;
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 5d1d199b6..8e417f70a 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -40,6 +40,7 @@ struct QEMUWin32AIOState {
HANDLE hIOCP;
EventNotifier e;
int count;
+ bool is_aio_context_attached;
};
typedef struct QEMUWin32AIOCB {
@@ -114,7 +115,7 @@ static void win32_aio_cancel(BlockDriverAIOCB *blockacb)
* wait for completion.
*/
while (!HasOverlappedIoCompleted(&waiocb->ov)) {
- qemu_aio_wait();
+ aio_poll(bdrv_get_aio_context(blockacb->bs), true);
}
}
@@ -180,6 +181,20 @@ int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile)
}
}
+void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
+ AioContext *old_context)
+{
+ aio_set_event_notifier(old_context, &aio->e, NULL);
+ aio->is_aio_context_attached = false;
+}
+
+void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
+ AioContext *new_context)
+{
+ aio->is_aio_context_attached = true;
+ aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb);
+}
+
QEMUWin32AIOState *win32_aio_init(void)
{
QEMUWin32AIOState *s;
@@ -194,8 +209,6 @@ QEMUWin32AIOState *win32_aio_init(void)
goto out_close_efd;
}
- qemu_aio_set_event_notifier(&s->e, win32_aio_completion_cb);
-
return s;
out_close_efd:
@@ -204,3 +217,11 @@ out_free_state:
g_free(s);
return NULL;
}
+
+void win32_aio_cleanup(QEMUWin32AIOState *aio)
+{
+ assert(!aio->is_aio_context_attached);
+ CloseHandle(aio->hIOCP);
+ event_notifier_cleanup(&aio->e);
+ g_free(aio);
+}