diff options
author | Chanho Park <chanho61.park@samsung.com> | 2014-09-05 20:35:53 +0900 |
---|---|---|
committer | Chanho Park <chanho61.park@samsung.com> | 2014-09-05 20:35:53 +0900 |
commit | 16b1353a36171ae06d63fd309f4772dbfb1da113 (patch) | |
tree | cf6c297ee81aba0d9b47f23d78a889667e7bce48 /block.c | |
parent | a15119db2ff5c2fdfdeb913b297bf8aa3399132e (diff) | |
download | qemu-16b1353a36171ae06d63fd309f4772dbfb1da113.tar.gz qemu-16b1353a36171ae06d63fd309f4772dbfb1da113.tar.bz2 qemu-16b1353a36171ae06d63fd309f4772dbfb1da113.zip |
Imported Upstream version 2.1.0upstream/2.1.0
Diffstat (limited to 'block.c')
-rw-r--r-- | block.c | 1130 |
1 files changed, 738 insertions, 392 deletions
@@ -24,7 +24,6 @@ #include "config-host.h" #include "qemu-common.h" #include "trace.h" -#include "monitor/monitor.h" #include "block/block_int.h" #include "block/blockjob.h" #include "qemu/module.h" @@ -35,6 +34,7 @@ #include "block/qapi.h" #include "qmp-commands.h" #include "qemu/timer.h" +#include "qapi-event.h" #ifdef CONFIG_BSD #include <sys/types.h> @@ -179,6 +179,7 @@ void bdrv_io_limits_enable(BlockDriverState *bs) { assert(!bs->io_limits_enabled); throttle_init(&bs->throttle_state, + bdrv_get_aio_context(bs), QEMU_CLOCK_VIRTUAL, bdrv_throttle_read_timer_cb, bdrv_throttle_write_timer_cb, @@ -332,9 +333,21 @@ void bdrv_register(BlockDriver *bdrv) } /* create a new block device (by default it is empty) */ -BlockDriverState *bdrv_new(const char *device_name) +BlockDriverState *bdrv_new(const char *device_name, Error **errp) { BlockDriverState *bs; + int i; + + if (bdrv_find(device_name)) { + error_setg(errp, "Device with id '%s' already exists", + device_name); + return NULL; + } + if (bdrv_find_node(device_name)) { + error_setg(errp, "Device with node-name '%s' already exists", + device_name); + return NULL; + } bs = g_malloc0(sizeof(BlockDriverState)); QLIST_INIT(&bs->dirty_bitmaps); @@ -342,12 +355,16 @@ BlockDriverState *bdrv_new(const char *device_name) if (device_name[0] != '\0') { QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list); } + for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { + QLIST_INIT(&bs->op_blockers[i]); + } bdrv_iostatus_disable(bs); notifier_list_init(&bs->close_notifiers); notifier_with_return_list_init(&bs->before_write_notifiers); qemu_co_queue_init(&bs->throttled_reqs[0]); qemu_co_queue_init(&bs->throttled_reqs[1]); bs->refcnt = 1; + bs->aio_context = qemu_get_aio_context(); return bs; } @@ -407,7 +424,7 @@ BlockDriver *bdrv_find_whitelisted_format(const char *format_name, typedef struct CreateCo { BlockDriver *drv; char *filename; - QEMUOptionParameter *options; + QemuOpts *opts; int ret; Error *err; } CreateCo; @@ -420,7 +437,7 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) CreateCo *cco = opaque; assert(cco->drv); - ret = cco->drv->bdrv_create(cco->filename, cco->options, &local_err); + ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err); if (local_err) { error_propagate(&cco->err, local_err); } @@ -428,7 +445,7 @@ static void coroutine_fn bdrv_create_co_entry(void *opaque) } int bdrv_create(BlockDriver *drv, const char* filename, - QEMUOptionParameter *options, Error **errp) + QemuOpts *opts, Error **errp) { int ret; @@ -436,7 +453,7 @@ int bdrv_create(BlockDriver *drv, const char* filename, CreateCo cco = { .drv = drv, .filename = g_strdup(filename), - .options = options, + .opts = opts, .ret = NOT_DONE, .err = NULL, }; @@ -454,7 +471,7 @@ int bdrv_create(BlockDriver *drv, const char* filename, co = qemu_coroutine_create(bdrv_create_co_entry); qemu_coroutine_enter(co, &cco); while (cco.ret == NOT_DONE) { - qemu_aio_wait(); + aio_poll(qemu_get_aio_context(), true); } } @@ -472,8 +489,7 @@ out: return ret; } -int bdrv_create_file(const char* filename, QEMUOptionParameter *options, - Error **errp) +int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp) { BlockDriver *drv; Error *local_err = NULL; @@ -485,26 +501,31 @@ int bdrv_create_file(const char* filename, QEMUOptionParameter *options, return -ENOENT; } - ret = bdrv_create(drv, filename, options, &local_err); + ret = bdrv_create(drv, filename, opts, &local_err); if (local_err) { error_propagate(errp, local_err); } return ret; } -int bdrv_refresh_limits(BlockDriverState *bs) +void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) { BlockDriver *drv = bs->drv; + Error *local_err = NULL; memset(&bs->bl, 0, sizeof(bs->bl)); if (!drv) { - return 0; + return; } /* Take some limits from the children as a default */ if (bs->file) { - bdrv_refresh_limits(bs->file); + bdrv_refresh_limits(bs->file, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; } else { @@ -512,7 +533,11 @@ int bdrv_refresh_limits(BlockDriverState *bs) } if (bs->backing_hd) { - bdrv_refresh_limits(bs->backing_hd); + bdrv_refresh_limits(bs->backing_hd, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } bs->bl.opt_transfer_length = MAX(bs->bl.opt_transfer_length, bs->backing_hd->bl.opt_transfer_length); @@ -523,10 +548,8 @@ int bdrv_refresh_limits(BlockDriverState *bs) /* Then let the driver override it */ if (drv->bdrv_refresh_limits) { - return drv->bdrv_refresh_limits(bs); + drv->bdrv_refresh_limits(bs, errp); } - - return 0; } /* @@ -763,63 +786,100 @@ void bdrv_disable_copy_on_read(BlockDriverState *bs) bs->copy_on_read--; } +/* + * Returns the flags that a temporary snapshot should get, based on the + * originally requested flags (the originally requested image will have flags + * like a backing file) + */ +static int bdrv_temp_snapshot_flags(int flags) +{ + return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY; +} + +/* + * Returns the flags that bs->file should get, based on the given flags for + * the parent BDS + */ +static int bdrv_inherited_flags(int flags) +{ + /* Enable protocol handling, disable format probing for bs->file */ + flags |= BDRV_O_PROTOCOL; + + /* Our block drivers take care to send flushes and respect unmap policy, + * so we can enable both unconditionally on lower layers. */ + flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP; + + /* Clear flags that only apply to the top layer */ + flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ); + + return flags; +} + +/* + * Returns the flags that bs->backing_hd should get, based on the given flags + * for the parent BDS + */ +static int bdrv_backing_flags(int flags) +{ + /* backing files always opened read-only */ + flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ); + + /* snapshot=on is handled on the top layer */ + flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY); + + return flags; +} + static int bdrv_open_flags(BlockDriverState *bs, int flags) { int open_flags = flags | BDRV_O_CACHE_WB; - /* The backing file of a temporary snapshot is read-only */ - if (flags & BDRV_O_SNAPSHOT) { - open_flags &= ~BDRV_O_RDWR; - } - /* * Clear flags that are internal to the block layer before opening the * image. */ - open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); + open_flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_PROTOCOL); /* * Snapshots should be writable. */ - if (bs->is_temporary) { + if (flags & BDRV_O_TEMPORARY) { open_flags |= BDRV_O_RDWR; } return open_flags; } -static int bdrv_assign_node_name(BlockDriverState *bs, - const char *node_name, - Error **errp) +static void bdrv_assign_node_name(BlockDriverState *bs, + const char *node_name, + Error **errp) { if (!node_name) { - return 0; + return; } /* empty string node name is invalid */ if (node_name[0] == '\0') { error_setg(errp, "Empty node name"); - return -EINVAL; + return; } /* takes care of avoiding namespaces collisions */ if (bdrv_find(node_name)) { error_setg(errp, "node-name=%s is conflicting with a device id", node_name); - return -EINVAL; + return; } /* takes care of avoiding duplicates node names */ if (bdrv_find_node(node_name)) { error_setg(errp, "Duplicate node name"); - return -EINVAL; + return; } /* copy node name into the bs and insert it into the graph list */ pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); - - return 0; } /* @@ -854,9 +914,10 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name); node_name = qdict_get_try_str(options, "node-name"); - ret = bdrv_assign_node_name(bs, node_name, errp); - if (ret < 0) { - return ret; + bdrv_assign_node_name(bs, node_name, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; } qdict_del(options, "node-name"); @@ -874,6 +935,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, bs->zero_beyond_eof = true; open_flags = bdrv_open_flags(bs, flags); bs->read_only = !(open_flags & BDRV_O_RDWR); + bs->growable = !!(flags & BDRV_O_PROTOCOL); if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) { error_setg(errp, @@ -938,16 +1000,15 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, goto free_and_fail; } - bdrv_refresh_limits(bs); + bdrv_refresh_limits(bs, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto free_and_fail; + } + assert(bdrv_opt_mem_align(bs) != 0); assert((bs->request_alignment != 0) || bs->sg); - -#ifndef _WIN32 - if (bs->is_temporary) { - assert(bs->filename[0] != '\0'); - unlink(bs->filename); - } -#endif return 0; free_and_fail: @@ -958,94 +1019,155 @@ free_and_fail: return ret; } +static QDict *parse_json_filename(const char *filename, Error **errp) +{ + QObject *options_obj; + QDict *options; + int ret; + + ret = strstart(filename, "json:", &filename); + assert(ret); + + options_obj = qobject_from_json(filename); + if (!options_obj) { + error_setg(errp, "Could not parse the JSON options"); + return NULL; + } + + if (qobject_type(options_obj) != QTYPE_QDICT) { + qobject_decref(options_obj); + error_setg(errp, "Invalid JSON object given"); + return NULL; + } + + options = qobject_to_qdict(options_obj); + qdict_flatten(options); + + return options; +} + /* - * Opens a file using a protocol (file, host_device, nbd, ...) - * - * options is an indirect pointer to a QDict of options to pass to the block - * drivers, or pointer to NULL for an empty set of options. If this function - * takes ownership of the QDict reference, it will set *options to NULL; - * otherwise, it will contain unused/unrecognized options after this function - * returns. Then, the caller is responsible for freeing it. If it intends to - * reuse the QDict, QINCREF() should be called beforehand. + * Fills in default options for opening images and converts the legacy + * filename/flags pair to option QDict entries. */ -static int bdrv_file_open(BlockDriverState *bs, const char *filename, - QDict **options, int flags, Error **errp) +static int bdrv_fill_options(QDict **options, const char **pfilename, int flags, + BlockDriver *drv, Error **errp) { - BlockDriver *drv; + const char *filename = *pfilename; const char *drvname; + bool protocol = flags & BDRV_O_PROTOCOL; bool parse_filename = false; Error *local_err = NULL; - int ret; + + /* Parse json: pseudo-protocol */ + if (filename && g_str_has_prefix(filename, "json:")) { + QDict *json_options = parse_json_filename(filename, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return -EINVAL; + } + + /* Options given in the filename have lower priority than options + * specified directly */ + qdict_join(*options, json_options, false); + QDECREF(json_options); + *pfilename = filename = NULL; + } /* Fetch the file name from the options QDict if necessary */ - if (!filename) { - filename = qdict_get_try_str(*options, "filename"); - } else if (filename && !qdict_haskey(*options, "filename")) { - qdict_put(*options, "filename", qstring_from_str(filename)); - parse_filename = true; - } else { - error_setg(errp, "Can't specify 'file' and 'filename' options at the " - "same time"); - ret = -EINVAL; - goto fail; + if (protocol && filename) { + if (!qdict_haskey(*options, "filename")) { + qdict_put(*options, "filename", qstring_from_str(filename)); + parse_filename = true; + } else { + error_setg(errp, "Can't specify 'file' and 'filename' options at " + "the same time"); + return -EINVAL; + } } /* Find the right block driver */ + filename = qdict_get_try_str(*options, "filename"); drvname = qdict_get_try_str(*options, "driver"); - if (drvname) { - drv = bdrv_find_format(drvname); - if (!drv) { - error_setg(errp, "Unknown driver '%s'", drvname); - } - qdict_del(*options, "driver"); - } else if (filename) { - drv = bdrv_find_protocol(filename, parse_filename); - if (!drv) { - error_setg(errp, "Unknown protocol"); + + if (drv) { + if (drvname) { + error_setg(errp, "Driver specified twice"); + return -EINVAL; } + drvname = drv->format_name; + qdict_put(*options, "driver", qstring_from_str(drvname)); } else { - error_setg(errp, "Must specify either driver or file"); - drv = NULL; - } + if (!drvname && protocol) { + if (filename) { + drv = bdrv_find_protocol(filename, parse_filename); + if (!drv) { + error_setg(errp, "Unknown protocol"); + return -EINVAL; + } - if (!drv) { - /* errp has been set already */ - ret = -ENOENT; - goto fail; + drvname = drv->format_name; + qdict_put(*options, "driver", qstring_from_str(drvname)); + } else { + error_setg(errp, "Must specify either driver or file"); + return -EINVAL; + } + } else if (drvname) { + drv = bdrv_find_format(drvname); + if (!drv) { + error_setg(errp, "Unknown driver '%s'", drvname); + return -ENOENT; + } + } } - /* Parse the filename and open it */ - if (drv->bdrv_parse_filename && parse_filename) { + assert(drv || !protocol); + + /* Driver-specific filename parsing */ + if (drv && drv->bdrv_parse_filename && parse_filename) { drv->bdrv_parse_filename(filename, *options, &local_err); if (local_err) { error_propagate(errp, local_err); - ret = -EINVAL; - goto fail; + return -EINVAL; } if (!drv->bdrv_needs_filename) { qdict_del(*options, "filename"); - } else { - filename = qdict_get_str(*options, "filename"); } } - if (!drv->bdrv_file_open) { - ret = bdrv_open(&bs, filename, NULL, *options, flags, drv, &local_err); - *options = NULL; - } else { - ret = bdrv_open_common(bs, NULL, *options, flags, drv, &local_err); - } - if (ret < 0) { - error_propagate(errp, local_err); - goto fail; - } - - bs->growable = 1; return 0; +} -fail: - return ret; +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd) +{ + + if (bs->backing_hd) { + assert(bs->backing_blocker); + bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker); + } else if (backing_hd) { + error_setg(&bs->backing_blocker, + "device is used as backing hd of '%s'", + bs->device_name); + } + + bs->backing_hd = backing_hd; + if (!backing_hd) { + error_free(bs->backing_blocker); + bs->backing_blocker = NULL; + goto out; + } + bs->open_flags &= ~BDRV_O_NO_BACKING; + pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename); + pstrcpy(bs->backing_format, sizeof(bs->backing_format), + backing_hd->drv ? backing_hd->drv->format_name : ""); + + bdrv_op_block_all(bs->backing_hd, bs->backing_blocker); + /* Otherwise we won't be able to commit due to check in bdrv_commit */ + bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, + bs->backing_blocker); +out: + bdrv_refresh_limits(bs, NULL); } /* @@ -1058,14 +1180,15 @@ fail: */ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) { - char backing_filename[PATH_MAX]; - int back_flags, ret; + char *backing_filename = g_malloc0(PATH_MAX); + int ret = 0; BlockDriver *back_drv = NULL; + BlockDriverState *backing_hd; Error *local_err = NULL; if (bs->backing_hd != NULL) { QDECREF(options); - return 0; + goto free_exit; } /* NULL means an empty set of options */ @@ -1078,42 +1201,42 @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp) backing_filename[0] = '\0'; } else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) { QDECREF(options); - return 0; + goto free_exit; } else { - bdrv_get_full_backing_filename(bs, backing_filename, - sizeof(backing_filename)); + bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX); + } + + if (!bs->drv || !bs->drv->supports_backing) { + ret = -EINVAL; + error_setg(errp, "Driver doesn't support backing files"); + QDECREF(options); + goto free_exit; } + backing_hd = bdrv_new("", errp); + if (bs->backing_format[0] != '\0') { back_drv = bdrv_find_format(bs->backing_format); } - /* backing files always opened read-only */ - back_flags = bs->open_flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | - BDRV_O_COPY_ON_READ); - assert(bs->backing_hd == NULL); - ret = bdrv_open(&bs->backing_hd, + ret = bdrv_open(&backing_hd, *backing_filename ? backing_filename : NULL, NULL, options, - back_flags, back_drv, &local_err); + bdrv_backing_flags(bs->open_flags), back_drv, &local_err); if (ret < 0) { - bs->backing_hd = NULL; + bdrv_unref(backing_hd); + backing_hd = NULL; bs->open_flags |= BDRV_O_NO_BACKING; error_setg(errp, "Could not open backing file: %s", error_get_pretty(local_err)); error_free(local_err); - return ret; + goto free_exit; } + bdrv_set_backing_hd(bs, backing_hd); - if (bs->backing_hd->file) { - pstrcpy(bs->backing_file, sizeof(bs->backing_file), - bs->backing_hd->file->filename); - } - - /* Recalculate the BlockLimits with the backing file */ - bdrv_refresh_limits(bs); - - return 0; +free_exit: + g_free(backing_filename); + return ret; } /* @@ -1157,6 +1280,7 @@ int bdrv_open_image(BlockDriverState **pbs, const char *filename, bdref_key); ret = -EINVAL; } + QDECREF(image_options); goto done; } @@ -1167,14 +1291,13 @@ done: return ret; } -void bdrv_append_temp_snapshot(BlockDriverState *bs, Error **errp) +int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp) { /* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */ - char tmp_filename[PATH_MAX + 1]; - + char *tmp_filename = g_malloc0(PATH_MAX + 1); int64_t total_size; BlockDriver *bdrv_qcow2; - QEMUOptionParameter *create_options; + QemuOpts *opts = NULL; QDict *snapshot_options; BlockDriverState *bs_snapshot; Error *local_err; @@ -1186,32 +1309,31 @@ void bdrv_append_temp_snapshot(BlockDriverState *bs, Error **errp) /* Get the required size from the image */ total_size = bdrv_getlength(bs); if (total_size < 0) { + ret = total_size; error_setg_errno(errp, -total_size, "Could not get image size"); - return; + goto out; } total_size &= BDRV_SECTOR_MASK; /* Create the temporary image */ - ret = get_tmp_filename(tmp_filename, sizeof(tmp_filename)); + ret = get_tmp_filename(tmp_filename, PATH_MAX + 1); if (ret < 0) { error_setg_errno(errp, -ret, "Could not get temporary filename"); - return; + goto out; } bdrv_qcow2 = bdrv_find_format("qcow2"); - create_options = parse_option_parameters("", bdrv_qcow2->create_options, - NULL); - - set_option_parameter_int(create_options, BLOCK_OPT_SIZE, total_size); - - ret = bdrv_create(bdrv_qcow2, tmp_filename, create_options, &local_err); - free_option_parameters(create_options); + opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0, + &error_abort); + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size); + ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err); + qemu_opts_del(opts); if (ret < 0) { error_setg_errno(errp, -ret, "Could not create temporary overlay " "'%s': %s", tmp_filename, error_get_pretty(local_err)); error_free(local_err); - return; + goto out; } /* Prepare a new options QDict for the temporary file */ @@ -1221,17 +1343,20 @@ void bdrv_append_temp_snapshot(BlockDriverState *bs, Error **errp) qdict_put(snapshot_options, "file.filename", qstring_from_str(tmp_filename)); - bs_snapshot = bdrv_new(""); - bs_snapshot->is_temporary = 1; + bs_snapshot = bdrv_new("", &error_abort); ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options, - bs->open_flags & ~BDRV_O_SNAPSHOT, bdrv_qcow2, &local_err); + flags, bdrv_qcow2, &local_err); if (ret < 0) { error_propagate(errp, local_err); - return; + goto out; } bdrv_append(bs_snapshot, bs); + +out: + g_free(tmp_filename); + return ret; } /* @@ -1257,6 +1382,7 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, BlockDriverState *file = NULL, *bs; const char *drvname; Error *local_err = NULL; + int snapshot_flags = 0; assert(pbs); @@ -1288,7 +1414,7 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, if (*pbs) { bs = *pbs; } else { - bs = bdrv_new(""); + bs = bdrv_new("", &error_abort); } /* NULL means an empty set of options */ @@ -1296,66 +1422,69 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, options = qdict_new(); } - bs->options = options; - options = qdict_clone_shallow(options); - - if (flags & BDRV_O_PROTOCOL) { - assert(!drv); - ret = bdrv_file_open(bs, filename, &options, flags & ~BDRV_O_PROTOCOL, - &local_err); - if (!ret) { - drv = bs->drv; - goto done; - } else if (bs->drv) { - goto close_and_fail; - } else { - goto fail; - } - } - - /* Open image file without format layer */ - if (flags & BDRV_O_RDWR) { - flags |= BDRV_O_ALLOW_RDWR; - } - - assert(file == NULL); - ret = bdrv_open_image(&file, filename, options, "file", - bdrv_open_flags(bs, flags | BDRV_O_UNMAP) | - BDRV_O_PROTOCOL, true, &local_err); - if (ret < 0) { - goto unlink_and_fail; + ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err); + if (local_err) { + goto fail; } /* Find the right image format driver */ + drv = NULL; drvname = qdict_get_try_str(options, "driver"); if (drvname) { drv = bdrv_find_format(drvname); qdict_del(options, "driver"); if (!drv) { - error_setg(errp, "Invalid driver: '%s'", drvname); + error_setg(errp, "Unknown driver: '%s'", drvname); ret = -EINVAL; - goto unlink_and_fail; + goto fail; } } - if (!drv) { - if (file) { - ret = find_image_format(file, filename, &drv, &local_err); - } else { - error_setg(errp, "Must specify either driver or file"); - ret = -EINVAL; - goto unlink_and_fail; + assert(drvname || !(flags & BDRV_O_PROTOCOL)); + if (drv && !drv->bdrv_file_open) { + /* If the user explicitly wants a format driver here, we'll need to add + * another layer for the protocol in bs->file */ + flags &= ~BDRV_O_PROTOCOL; + } + + bs->options = options; + options = qdict_clone_shallow(options); + + /* Open image file without format layer */ + if ((flags & BDRV_O_PROTOCOL) == 0) { + if (flags & BDRV_O_RDWR) { + flags |= BDRV_O_ALLOW_RDWR; + } + if (flags & BDRV_O_SNAPSHOT) { + snapshot_flags = bdrv_temp_snapshot_flags(flags); + flags = bdrv_backing_flags(flags); + } + + assert(file == NULL); + ret = bdrv_open_image(&file, filename, options, "file", + bdrv_inherited_flags(flags), + true, &local_err); + if (ret < 0) { + goto fail; } } - if (!drv) { - goto unlink_and_fail; + /* Image format probing */ + if (!drv && file) { + ret = find_image_format(file, filename, &drv, &local_err); + if (ret < 0) { + goto fail; + } + } else if (!drv) { + error_setg(errp, "Must specify either driver or file"); + ret = -EINVAL; + goto fail; } /* Open the image */ ret = bdrv_open_common(bs, file, options, flags, drv, &local_err); if (ret < 0) { - goto unlink_and_fail; + goto fail; } if (file && (bs->file != file)) { @@ -1376,16 +1505,13 @@ int bdrv_open(BlockDriverState **pbs, const char *filename, /* For snapshot=on, create a temporary qcow2 overlay. bs points to the * temporary snapshot afterwards. */ - if (flags & BDRV_O_SNAPSHOT) { - bdrv_append_temp_snapshot(bs, &local_err); + if (snapshot_flags) { + ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err); if (local_err) { - error_propagate(errp, local_err); goto close_and_fail; } } - -done: /* Check if any unknown options were used */ if (options && (qdict_size(options) != 0)) { const QDictEntry *entry = qdict_first(options); @@ -1417,14 +1543,10 @@ done: *pbs = bs; return 0; -unlink_and_fail: +fail: if (file != NULL) { bdrv_unref(file); } - if (bs->is_temporary) { - unlink(filename); - } -fail: QDECREF(bs->options); QDECREF(options); bs->options = NULL; @@ -1488,8 +1610,11 @@ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, QSIMPLEQ_INIT(bs_queue); } + /* bdrv_open() masks this flag out */ + flags &= ~BDRV_O_PROTOCOL; + if (bs->file) { - bdrv_reopen_queue(bs_queue, bs->file, flags); + bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags)); } bs_entry = g_new0(BlockReopenQueueEntry, 1); @@ -1666,7 +1791,7 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state) BDRV_O_CACHE_WB); reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR); - bdrv_refresh_limits(reopen_state->bs); + bdrv_refresh_limits(reopen_state->bs, NULL); } /* @@ -1699,16 +1824,12 @@ void bdrv_close(BlockDriverState *bs) if (bs->drv) { if (bs->backing_hd) { - bdrv_unref(bs->backing_hd); - bs->backing_hd = NULL; + BlockDriverState *backing_hd = bs->backing_hd; + bdrv_set_backing_hd(bs, NULL); + bdrv_unref(backing_hd); } bs->drv->bdrv_close(bs); g_free(bs->opaque); -#ifdef _WIN32 - if (bs->is_temporary) { - unlink(bs->filename); - } -#endif bs->opaque = NULL; bs->drv = NULL; bs->copy_on_read = 0; @@ -1742,7 +1863,11 @@ void bdrv_close_all(void) BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, device_list) { + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); bdrv_close(bs); + aio_context_release(aio_context); } } @@ -1767,17 +1892,6 @@ static bool bdrv_requests_pending(BlockDriverState *bs) return false; } -static bool bdrv_requests_pending_all(void) -{ - BlockDriverState *bs; - QTAILQ_FOREACH(bs, &bdrv_states, device_list) { - if (bdrv_requests_pending(bs)) { - return true; - } - } - return false; -} - /* * Wait for pending requests to complete across all BlockDriverStates * @@ -1797,12 +1911,21 @@ void bdrv_drain_all(void) BlockDriverState *bs; while (busy) { + busy = false; + QTAILQ_FOREACH(bs, &bdrv_states, device_list) { + AioContext *aio_context = bdrv_get_aio_context(bs); + bool bs_busy; + + aio_context_acquire(aio_context); + bdrv_flush_io_queue(bs); bdrv_start_throttled_reqs(bs); - } + bs_busy = bdrv_requests_pending(bs); + bs_busy |= aio_poll(aio_context, bs_busy); + aio_context_release(aio_context); - busy = bdrv_requests_pending_all(); - busy |= aio_poll(qemu_get_aio_context(), busy); + busy |= bs_busy; + } } } @@ -1832,7 +1955,6 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, BlockDriverState *bs_src) { /* move some fields that need to stay attached to the device */ - bs_dest->open_flags = bs_src->open_flags; /* dev info */ bs_dest->dev_ops = bs_src->dev_ops; @@ -1866,13 +1988,14 @@ static void bdrv_move_feature_fields(BlockDriverState *bs_dest, bs_dest->refcnt = bs_src->refcnt; /* job */ - bs_dest->in_use = bs_src->in_use; bs_dest->job = bs_src->job; /* keep the same entry in bdrv_states */ pstrcpy(bs_dest->device_name, sizeof(bs_dest->device_name), bs_src->device_name); bs_dest->device_list = bs_src->device_list; + memcpy(bs_dest->op_blockers, bs_src->op_blockers, + sizeof(bs_dest->op_blockers)); } /* @@ -1907,7 +2030,6 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) assert(QLIST_EMPTY(&bs_new->dirty_bitmaps)); assert(bs_new->job == NULL); assert(bs_new->dev == NULL); - assert(bs_new->in_use == 0); assert(bs_new->io_limits_enabled == false); assert(!throttle_have_timer(&bs_new->throttle_state)); @@ -1926,7 +2048,6 @@ void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old) /* Check a few fields that should remain attached to the device */ assert(bs_new->dev == NULL); assert(bs_new->job == NULL); - assert(bs_new->in_use == 0); assert(bs_new->io_limits_enabled == false); assert(!throttle_have_timer(&bs_new->throttle_state)); @@ -1959,19 +2080,14 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top) /* The contents of 'tmp' will become bs_top, as we are * swapping bs_new and bs_top contents. */ - bs_top->backing_hd = bs_new; - bs_top->open_flags &= ~BDRV_O_NO_BACKING; - pstrcpy(bs_top->backing_file, sizeof(bs_top->backing_file), - bs_new->filename); - pstrcpy(bs_top->backing_format, sizeof(bs_top->backing_format), - bs_new->drv ? bs_new->drv->format_name : ""); + bdrv_set_backing_hd(bs_top, bs_new); } static void bdrv_delete(BlockDriverState *bs) { assert(!bs->dev); assert(!bs->job); - assert(!bs->in_use); + assert(bdrv_op_blocker_is_empty(bs)); assert(!bs->refcnt); assert(QLIST_EMPTY(&bs->dirty_bitmaps)); @@ -2025,47 +2141,6 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops, bs->dev_opaque = opaque; } -void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, - enum MonitorEvent ev, - BlockErrorAction action, bool is_read) -{ - QObject *data; - const char *action_str; - - switch (action) { - case BDRV_ACTION_REPORT: - action_str = "report"; - break; - case BDRV_ACTION_IGNORE: - action_str = "ignore"; - break; - case BDRV_ACTION_STOP: - action_str = "stop"; - break; - default: - abort(); - } - - data = qobject_from_jsonf("{ 'device': %s, 'action': %s, 'operation': %s }", - bdrv->device_name, - action_str, - is_read ? "read" : "write"); - monitor_protocol_event(ev, data); - - qobject_decref(data); -} - -static void bdrv_emit_qmp_eject_event(BlockDriverState *bs, bool ejected) -{ - QObject *data; - - data = qobject_from_jsonf("{ 'device': %s, 'tray-open': %i }", - bdrv_get_device_name(bs), ejected); - monitor_protocol_event(QEVENT_DEVICE_TRAY_MOVED, data); - - qobject_decref(data); -} - static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load) { if (bs->dev_ops && bs->dev_ops->change_media_cb) { @@ -2073,11 +2148,13 @@ static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load) bs->dev_ops->change_media_cb(bs->dev_opaque, load); if (tray_was_closed) { /* tray open */ - bdrv_emit_qmp_eject_event(bs, true); + qapi_event_send_device_tray_moved(bdrv_get_device_name(bs), + true, &error_abort); } if (load) { /* tray close */ - bdrv_emit_qmp_eject_event(bs, false); + qapi_event_send_device_tray_moved(bdrv_get_device_name(bs), + false, &error_abort); } } } @@ -2153,7 +2230,8 @@ int bdrv_commit(BlockDriverState *bs) return -ENOTSUP; } - if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) { + if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) || + bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) { return -EBUSY; } @@ -2244,12 +2322,17 @@ int bdrv_commit_all(void) BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, device_list) { + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); if (bs->drv && bs->backing_hd) { int ret = bdrv_commit(bs); if (ret < 0) { + aio_context_release(aio_context); return ret; } } + aio_context_release(aio_context); } return 0; } @@ -2439,32 +2522,23 @@ int bdrv_change_backing_file(BlockDriverState *bs, * * Returns NULL if bs is not found in active's image chain, * or if active == bs. + * + * Returns the bottommost base image if bs == NULL. */ BlockDriverState *bdrv_find_overlay(BlockDriverState *active, BlockDriverState *bs) { - BlockDriverState *overlay = NULL; - BlockDriverState *intermediate; - - assert(active != NULL); - assert(bs != NULL); - - /* if bs is the same as active, then by definition it has no overlay - */ - if (active == bs) { - return NULL; + while (active && bs != active->backing_hd) { + active = active->backing_hd; } - intermediate = active; - while (intermediate->backing_hd) { - if (intermediate->backing_hd == bs) { - overlay = intermediate; - break; - } - intermediate = intermediate->backing_hd; - } + return active; +} - return overlay; +/* Given a BDS, searches for the base layer. */ +BlockDriverState *bdrv_find_base(BlockDriverState *bs) +{ + return bdrv_find_overlay(bs, NULL); } typedef struct BlkIntermediateStates { @@ -2495,12 +2569,15 @@ typedef struct BlkIntermediateStates { * * base <- active * + * If backing_file_str is non-NULL, it will be used when modifying top's + * overlay image metadata. + * * Error conditions: * if active == top, that is considered an error * */ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, - BlockDriverState *base) + BlockDriverState *base, const char *backing_file_str) { BlockDriverState *intermediate; BlockDriverState *base_bs = NULL; @@ -2552,18 +2629,17 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, } /* success - we can delete the intermediate states, and link top->base */ - ret = bdrv_change_backing_file(new_top_bs, base_bs->filename, + backing_file_str = backing_file_str ? backing_file_str : base_bs->filename; + ret = bdrv_change_backing_file(new_top_bs, backing_file_str, base_bs->drv ? base_bs->drv->format_name : ""); if (ret) { goto exit; } - new_top_bs->backing_hd = base_bs; - - bdrv_refresh_limits(new_top_bs); + bdrv_set_backing_hd(new_top_bs, base_bs); QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { /* so that bdrv_close() does not recursively close the chain */ - intermediate_state->bs->backing_hd = NULL; + bdrv_set_backing_hd(intermediate_state->bs, NULL); bdrv_unref(intermediate_state->bs); } ret = 0; @@ -2581,6 +2657,10 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, { int64_t len; + if (size > INT_MAX) { + return -EIO; + } + if (!bdrv_is_inserted(bs)) return -ENOMEDIUM; @@ -2601,7 +2681,7 @@ static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num, int nb_sectors) { - if (nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) { + if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) { return -EIO; } @@ -2665,10 +2745,12 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset, /* Fast-path if already in coroutine context */ bdrv_rw_co_entry(&rwco); } else { + AioContext *aio_context = bdrv_get_aio_context(bs); + co = qemu_coroutine_create(bdrv_rw_co_entry); qemu_coroutine_enter(co, &rwco); while (rwco.ret == NOT_DONE) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } return rwco.ret; @@ -2686,6 +2768,10 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, .iov_len = nb_sectors * BDRV_SECTOR_SIZE, }; + if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) { + return -EINVAL; + } + qemu_iovec_init_external(&qiov, &iov, 1); return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS, &qiov, is_write, flags); @@ -2741,10 +2827,16 @@ int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, */ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags) { - int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE; + int64_t target_size; int64_t ret, nb_sectors, sector_num = 0; int n; + target_size = bdrv_getlength(bs); + if (target_size < 0) { + return target_size; + } + target_size /= BDRV_SECTOR_SIZE; + for (;;) { nb_sectors = target_size - sector_num; if (nb_sectors <= 0) { @@ -2931,6 +3023,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); + assert(!qiov || bytes == qiov->size); /* Handle Copy on Read and associated serialisation */ if (flags & BDRV_REQ_COPY_ON_READ) { @@ -2975,8 +3068,20 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), align >> BDRV_SECTOR_BITS); if (max_nb_sectors > 0) { - ret = drv->bdrv_co_readv(bs, sector_num, - MIN(nb_sectors, max_nb_sectors), qiov); + QEMUIOVector local_qiov; + size_t local_sectors; + + max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS); + local_sectors = MIN(max_nb_sectors, nb_sectors); + + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_iovec_concat(&local_qiov, qiov, 0, + local_sectors * BDRV_SECTOR_SIZE); + + ret = drv->bdrv_co_readv(bs, sector_num, local_sectors, + &local_qiov); + + qemu_iovec_destroy(&local_qiov); } else { ret = 0; } @@ -3188,6 +3293,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); + assert(!qiov || bytes == qiov->size); waited = wait_serialising_requests(req); assert(!waited || !req->serialising); @@ -3196,6 +3302,15 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req); + if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && + !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes && + qemu_iovec_is_zero(qiov)) { + flags |= BDRV_REQ_ZERO_WRITE; + if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { + flags |= BDRV_REQ_MAY_UNMAP; + } + } + if (ret < 0) { /* Do nothing, write notifier decided to fail this request */ } else if (flags & BDRV_REQ_ZERO_WRITE) { @@ -3392,8 +3507,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset) return -ENOTSUP; if (bs->read_only) return -EACCES; - if (bdrv_in_use(bs)) - return -EBUSY; + ret = drv->bdrv_truncate(bs, offset); if (ret == 0) { ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS); @@ -3469,13 +3583,14 @@ BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int e switch (on_err) { case BLOCKDEV_ON_ERROR_ENOSPC: - return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT; + return (error == ENOSPC) ? + BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT; case BLOCKDEV_ON_ERROR_STOP: - return BDRV_ACTION_STOP; + return BLOCK_ERROR_ACTION_STOP; case BLOCKDEV_ON_ERROR_REPORT: - return BDRV_ACTION_REPORT; + return BLOCK_ERROR_ACTION_REPORT; case BLOCKDEV_ON_ERROR_IGNORE: - return BDRV_ACTION_IGNORE; + return BLOCK_ERROR_ACTION_IGNORE; default: abort(); } @@ -3489,10 +3604,33 @@ void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, bool is_read, int error) { assert(error >= 0); - bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read); - if (action == BDRV_ACTION_STOP) { - vm_stop(RUN_STATE_IO_ERROR); + + if (action == BLOCK_ERROR_ACTION_STOP) { + /* First set the iostatus, so that "info block" returns an iostatus + * that matches the events raised so far (an additional error iostatus + * is fine, but not a lost one). + */ bdrv_iostatus_set_err(bs, error); + + /* Then raise the request to stop the VM and the event. + * qemu_system_vmstop_request_prepare has two effects. First, + * it ensures that the STOP event always comes after the + * BLOCK_IO_ERROR event. Second, it ensures that even if management + * can observe the STOP event and do a "cont" before the STOP + * event is issued, the VM will not stop. In this case, vm_start() + * also ensures that the STOP/RESUME pair of events is emitted. + */ + qemu_system_vmstop_request_prepare(); + qapi_event_send_block_io_error(bdrv_get_device_name(bs), + is_read ? IO_OPERATION_TYPE_READ : + IO_OPERATION_TYPE_WRITE, + action, &error_abort); + qemu_system_vmstop_request(RUN_STATE_IO_ERROR); + } else { + qapi_event_send_block_io_error(bdrv_get_device_name(bs), + is_read ? IO_OPERATION_TYPE_READ : + IO_OPERATION_TYPE_WRITE, + action, &error_abort); } } @@ -3574,10 +3712,25 @@ void bdrv_iterate_format(void (*it)(void *opaque, const char *name), void *opaque) { BlockDriver *drv; + int count = 0; + const char **formats = NULL; QLIST_FOREACH(drv, &bdrv_drivers, list) { - it(opaque, drv->format_name); + if (drv->format_name) { + bool found = false; + int i = count; + while (formats && i && !found) { + found = !strcmp(formats[--i], drv->format_name); + } + + if (!found) { + formats = g_realloc(formats, (count + 1) * sizeof(char *)); + formats[count++] = drv->format_name; + it(opaque, drv->format_name); + } + } } + g_free(formats); } /* This function is to find block backend bs */ @@ -3653,6 +3806,17 @@ BlockDriverState *bdrv_lookup_bs(const char *device, return NULL; } +/* If 'base' is in the same chain as 'top', return true. Otherwise, + * return false. If either argument is NULL, return false. */ +bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) +{ + while (top && top != base) { + top = top->backing_hd; + } + + return top != NULL; +} + BlockDriverState *bdrv_next(BlockDriverState *bs) { if (!bs) { @@ -3686,10 +3850,15 @@ int bdrv_flush_all(void) int result = 0; QTAILQ_FOREACH(bs, &bdrv_states, device_list) { - int ret = bdrv_flush(bs); + AioContext *aio_context = bdrv_get_aio_context(bs); + int ret; + + aio_context_acquire(aio_context); + ret = bdrv_flush(bs); if (ret < 0 && !result) { result = ret; } + aio_context_release(aio_context); } return result; @@ -3797,7 +3966,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, if (!bs->drv->bdrv_co_get_block_status) { *pnum = nb_sectors; - ret = BDRV_BLOCK_DATA; + ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; if (bs->drv->protocol_name) { ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE); } @@ -3816,6 +3985,10 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, *pnum, pnum); } + if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { + ret |= BDRV_BLOCK_ALLOCATED; + } + if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) { if (bdrv_unallocated_blocks_are_zero(bs)) { ret |= BDRV_BLOCK_ZERO; @@ -3876,10 +4049,12 @@ int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, /* Fast-path if already in coroutine context */ bdrv_get_block_status_co_entry(&data); } else { + AioContext *aio_context = bdrv_get_aio_context(bs); + co = qemu_coroutine_create(bdrv_get_block_status_co_entry); qemu_coroutine_enter(co, &data); while (!data.done) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } return data.ret; @@ -3892,9 +4067,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, if (ret < 0) { return ret; } - return - (ret & BDRV_BLOCK_DATA) || - ((ret & BDRV_BLOCK_ZERO) && !bdrv_has_zero_init(bs)); + return !!(ret & BDRV_BLOCK_ALLOCATED); } /* @@ -4187,22 +4360,6 @@ int bdrv_get_backing_file_depth(BlockDriverState *bs) return 1 + bdrv_get_backing_file_depth(bs->backing_hd); } -BlockDriverState *bdrv_find_base(BlockDriverState *bs) -{ - BlockDriverState *curr_bs = NULL; - - if (!bs) { - return NULL; - } - - curr_bs = bs; - - while (curr_bs->backing_hd) { - curr_bs = curr_bs->backing_hd; - } - return curr_bs; -} - /**************************************************************/ /* async I/Os */ @@ -4474,7 +4631,7 @@ static BlockDriverAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs, acb->is_write = is_write; acb->qiov = qiov; acb->bounce = qemu_blockalign(bs, qiov->size); - acb->bh = qemu_bh_new(bdrv_aio_bh_cb, acb); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb); if (is_write) { qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size); @@ -4513,13 +4670,14 @@ typedef struct BlockDriverAIOCBCoroutine { static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) { + AioContext *aio_context = bdrv_get_aio_context(blockacb->bs); BlockDriverAIOCBCoroutine *acb = container_of(blockacb, BlockDriverAIOCBCoroutine, common); bool done = false; acb->done = &done; while (!done) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } @@ -4556,7 +4714,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque) acb->req.nb_sectors, acb->req.qiov, acb->req.flags); } - acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); qemu_bh_schedule(acb->bh); } @@ -4592,7 +4750,7 @@ static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque) BlockDriverState *bs = acb->common.bs; acb->req.error = bdrv_co_flush(bs); - acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); qemu_bh_schedule(acb->bh); } @@ -4619,7 +4777,7 @@ static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque) BlockDriverState *bs = acb->common.bs; acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors); - acb->bh = qemu_bh_new(bdrv_co_em_bh, acb); + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); qemu_bh_schedule(acb->bh); } @@ -4830,7 +4988,11 @@ void bdrv_invalidate_cache_all(Error **errp) Error *local_err = NULL; QTAILQ_FOREACH(bs, &bdrv_states, device_list) { + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); bdrv_invalidate_cache(bs, &local_err); + aio_context_release(aio_context); if (local_err) { error_propagate(errp, local_err); return; @@ -4843,7 +5005,11 @@ void bdrv_clear_incoming_migration_all(void) BlockDriverState *bs; QTAILQ_FOREACH(bs, &bdrv_states, device_list) { + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING); + aio_context_release(aio_context); } } @@ -4859,10 +5025,12 @@ int bdrv_flush(BlockDriverState *bs) /* Fast-path if already in coroutine context */ bdrv_flush_co_entry(&rwco); } else { + AioContext *aio_context = bdrv_get_aio_context(bs); + co = qemu_coroutine_create(bdrv_flush_co_entry); qemu_coroutine_enter(co, &rwco); while (rwco.ret == NOT_DONE) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } @@ -4972,10 +5140,12 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors) /* Fast-path if already in coroutine context */ bdrv_discard_co_entry(&rwco); } else { + AioContext *aio_context = bdrv_get_aio_context(bs); + co = qemu_coroutine_create(bdrv_discard_co_entry); qemu_coroutine_enter(co, &rwco); while (rwco.ret == NOT_DONE) { - qemu_aio_wait(); + aio_poll(aio_context, true); } } @@ -5025,7 +5195,8 @@ void bdrv_eject(BlockDriverState *bs, bool eject_flag) } if (bs->device_name[0] != '\0') { - bdrv_emit_qmp_eject_event(bs, eject_flag); + qapi_event_send_device_tray_moved(bdrv_get_device_name(bs), + eject_flag, &error_abort); } } @@ -5096,7 +5267,8 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) return true; } -BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity) +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity, + Error **errp) { int64_t bitmap_size; BdrvDirtyBitmap *bitmap; @@ -5105,7 +5277,13 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity) granularity >>= BDRV_SECTOR_BITS; assert(granularity); - bitmap_size = (bdrv_getlength(bs) >> BDRV_SECTOR_BITS); + bitmap_size = bdrv_getlength(bs); + if (bitmap_size < 0) { + error_setg_errno(errp, -bitmap_size, "could not get length of device"); + errno = -bitmap_size; + return NULL; + } + bitmap_size >>= BDRV_SECTOR_BITS; bitmap = g_malloc0(sizeof(BdrvDirtyBitmap)); bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1); QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list); @@ -5199,15 +5377,74 @@ void bdrv_unref(BlockDriverState *bs) } } -void bdrv_set_in_use(BlockDriverState *bs, int in_use) +struct BdrvOpBlocker { + Error *reason; + QLIST_ENTRY(BdrvOpBlocker) list; +}; + +bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp) { - assert(bs->in_use != in_use); - bs->in_use = in_use; + BdrvOpBlocker *blocker; + assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); + if (!QLIST_EMPTY(&bs->op_blockers[op])) { + blocker = QLIST_FIRST(&bs->op_blockers[op]); + if (errp) { + error_setg(errp, "Device '%s' is busy: %s", + bs->device_name, error_get_pretty(blocker->reason)); + } + return true; + } + return false; } -int bdrv_in_use(BlockDriverState *bs) +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason) { - return bs->in_use; + BdrvOpBlocker *blocker; + assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); + + blocker = g_malloc0(sizeof(BdrvOpBlocker)); + blocker->reason = reason; + QLIST_INSERT_HEAD(&bs->op_blockers[op], blocker, list); +} + +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason) +{ + BdrvOpBlocker *blocker, *next; + assert((int) op >= 0 && op < BLOCK_OP_TYPE_MAX); + QLIST_FOREACH_SAFE(blocker, &bs->op_blockers[op], list, next) { + if (blocker->reason == reason) { + QLIST_REMOVE(blocker, list); + g_free(blocker); + } + } +} + +void bdrv_op_block_all(BlockDriverState *bs, Error *reason) +{ + int i; + for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { + bdrv_op_block(bs, i, reason); + } +} + +void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason) +{ + int i; + for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { + bdrv_op_unblock(bs, i, reason); + } +} + +bool bdrv_op_blocker_is_empty(BlockDriverState *bs) +{ + int i; + + for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { + if (!QLIST_EMPTY(&bs->op_blockers[i])) { + return false; + } + } + return true; } void bdrv_iostatus_enable(BlockDriverState *bs) @@ -5276,8 +5513,10 @@ void bdrv_img_create(const char *filename, const char *fmt, char *options, uint64_t img_size, int flags, Error **errp, bool quiet) { - QEMUOptionParameter *param = NULL, *create_options = NULL; - QEMUOptionParameter *backing_fmt, *backing_file, *size; + QemuOptsList *create_opts = NULL; + QemuOpts *opts = NULL; + const char *backing_fmt, *backing_file; + int64_t size; BlockDriver *drv, *proto_drv; BlockDriver *backing_drv = NULL; Error *local_err = NULL; @@ -5296,28 +5535,23 @@ void bdrv_img_create(const char *filename, const char *fmt, return; } - create_options = append_option_parameters(create_options, - drv->create_options); - create_options = append_option_parameters(create_options, - proto_drv->create_options); + create_opts = qemu_opts_append(create_opts, drv->create_opts); + create_opts = qemu_opts_append(create_opts, proto_drv->create_opts); /* Create parameter list with default values */ - param = parse_option_parameters("", create_options, param); - - set_option_parameter_int(param, BLOCK_OPT_SIZE, img_size); + opts = qemu_opts_create(create_opts, NULL, 0, &error_abort); + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size); /* Parse -o options */ if (options) { - param = parse_option_parameters(options, create_options, param); - if (param == NULL) { - error_setg(errp, "Invalid options for file format '%s'.", fmt); + if (qemu_opts_do_parse(opts, options, NULL) != 0) { + error_setg(errp, "Invalid options for file format '%s'", fmt); goto out; } } if (base_filename) { - if (set_option_parameter(param, BLOCK_OPT_BACKING_FILE, - base_filename)) { + if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename)) { error_setg(errp, "Backing file not supported for file format '%s'", fmt); goto out; @@ -5325,40 +5559,39 @@ void bdrv_img_create(const char *filename, const char *fmt, } if (base_fmt) { - if (set_option_parameter(param, BLOCK_OPT_BACKING_FMT, base_fmt)) { + if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt)) { error_setg(errp, "Backing file format not supported for file " "format '%s'", fmt); goto out; } } - backing_file = get_option_parameter(param, BLOCK_OPT_BACKING_FILE); - if (backing_file && backing_file->value.s) { - if (!strcmp(filename, backing_file->value.s)) { + backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE); + if (backing_file) { + if (!strcmp(filename, backing_file)) { error_setg(errp, "Error: Trying to create an image with the " "same filename as the backing file"); goto out; } } - backing_fmt = get_option_parameter(param, BLOCK_OPT_BACKING_FMT); - if (backing_fmt && backing_fmt->value.s) { - backing_drv = bdrv_find_format(backing_fmt->value.s); + backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); + if (backing_fmt) { + backing_drv = bdrv_find_format(backing_fmt); if (!backing_drv) { error_setg(errp, "Unknown backing file format '%s'", - backing_fmt->value.s); + backing_fmt); goto out; } } // The size for the image must always be specified, with one exception: // If we are using a backing file, we can obtain the size from there - size = get_option_parameter(param, BLOCK_OPT_SIZE); - if (size && size->value.n == -1) { - if (backing_file && backing_file->value.s) { + size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0); + if (size == -1) { + if (backing_file) { BlockDriverState *bs; uint64_t size; - char buf[32]; int back_flags; /* backing files always opened read-only */ @@ -5366,11 +5599,11 @@ void bdrv_img_create(const char *filename, const char *fmt, flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING); bs = NULL; - ret = bdrv_open(&bs, backing_file->value.s, NULL, NULL, back_flags, + ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags, backing_drv, &local_err); if (ret < 0) { error_setg_errno(errp, -ret, "Could not open '%s': %s", - backing_file->value.s, + backing_file, error_get_pretty(local_err)); error_free(local_err); local_err = NULL; @@ -5379,8 +5612,7 @@ void bdrv_img_create(const char *filename, const char *fmt, bdrv_get_geometry(bs, &size); size *= 512; - snprintf(buf, sizeof(buf), "%" PRId64, size); - set_option_parameter(param, BLOCK_OPT_SIZE, buf); + qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size); bdrv_unref(bs); } else { @@ -5391,16 +5623,18 @@ void bdrv_img_create(const char *filename, const char *fmt, if (!quiet) { printf("Formatting '%s', fmt=%s ", filename, fmt); - print_option_parameters(param); + qemu_opts_print(opts); puts(""); } - ret = bdrv_create(drv, filename, param, &local_err); + + ret = bdrv_create(drv, filename, opts, &local_err); + if (ret == -EFBIG) { /* This is generally a better message than whatever the driver would * deliver (especially because of the cluster_size_hint), since that * is most probably not much different from "image too large". */ const char *cluster_size_hint = ""; - if (get_option_parameter(create_options, BLOCK_OPT_CLUSTER_SIZE)) { + if (qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE, 0)) { cluster_size_hint = " (try using a larger cluster size)"; } error_setg(errp, "The image size is too large for file format '%s'" @@ -5410,9 +5644,8 @@ void bdrv_img_create(const char *filename, const char *fmt, } out: - free_option_parameters(create_options); - free_option_parameters(param); - + qemu_opts_del(opts); + qemu_opts_free(create_opts); if (local_err) { error_propagate(errp, local_err); } @@ -5420,8 +5653,66 @@ out: AioContext *bdrv_get_aio_context(BlockDriverState *bs) { - /* Currently BlockDriverState always uses the main loop AioContext */ - return qemu_get_aio_context(); + return bs->aio_context; +} + +void bdrv_detach_aio_context(BlockDriverState *bs) +{ + if (!bs->drv) { + return; + } + + if (bs->io_limits_enabled) { + throttle_detach_aio_context(&bs->throttle_state); + } + if (bs->drv->bdrv_detach_aio_context) { + bs->drv->bdrv_detach_aio_context(bs); + } + if (bs->file) { + bdrv_detach_aio_context(bs->file); + } + if (bs->backing_hd) { + bdrv_detach_aio_context(bs->backing_hd); + } + + bs->aio_context = NULL; +} + +void bdrv_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + if (!bs->drv) { + return; + } + + bs->aio_context = new_context; + + if (bs->backing_hd) { + bdrv_attach_aio_context(bs->backing_hd, new_context); + } + if (bs->file) { + bdrv_attach_aio_context(bs->file, new_context); + } + if (bs->drv->bdrv_attach_aio_context) { + bs->drv->bdrv_attach_aio_context(bs, new_context); + } + if (bs->io_limits_enabled) { + throttle_attach_aio_context(&bs->throttle_state, new_context); + } +} + +void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context) +{ + bdrv_drain_all(); /* ensure there are no in-flight requests */ + + bdrv_detach_aio_context(bs); + + /* This function executes in the old AioContext so acquire the new one in + * case it runs in a different thread. + */ + aio_context_acquire(new_context); + bdrv_attach_aio_context(bs, new_context); + aio_context_release(new_context); } void bdrv_add_before_write_notifier(BlockDriverState *bs, @@ -5430,12 +5721,12 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs, notifier_with_return_list_add(&bs->before_write_notifiers, notifier); } -int bdrv_amend_options(BlockDriverState *bs, QEMUOptionParameter *options) +int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts) { - if (bs->drv->bdrv_amend_options == NULL) { + if (!bs->drv->bdrv_amend_options) { return -ENOTSUP; } - return bs->drv->bdrv_amend_options(bs, options); + return bs->drv->bdrv_amend_options(bs, opts); } /* This function will be called by the bdrv_recurse_is_first_non_filter method @@ -5494,3 +5785,58 @@ bool bdrv_is_first_non_filter(BlockDriverState *candidate) return false; } + +BlockDriverState *check_to_replace_node(const char *node_name, Error **errp) +{ + BlockDriverState *to_replace_bs = bdrv_find_node(node_name); + if (!to_replace_bs) { + error_setg(errp, "Node name '%s' not found", node_name); + return NULL; + } + + if (bdrv_op_is_blocked(to_replace_bs, BLOCK_OP_TYPE_REPLACE, errp)) { + return NULL; + } + + /* We don't want arbitrary node of the BDS chain to be replaced only the top + * most non filter in order to prevent data corruption. + * Another benefit is that this tests exclude backing files which are + * blocked by the backing blockers. + */ + if (!bdrv_is_first_non_filter(to_replace_bs)) { + error_setg(errp, "Only top most non filter can be replaced"); + return NULL; + } + + return to_replace_bs; +} + +void bdrv_io_plug(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + if (drv && drv->bdrv_io_plug) { + drv->bdrv_io_plug(bs); + } else if (bs->file) { + bdrv_io_plug(bs->file); + } +} + +void bdrv_io_unplug(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + if (drv && drv->bdrv_io_unplug) { + drv->bdrv_io_unplug(bs); + } else if (bs->file) { + bdrv_io_unplug(bs->file); + } +} + +void bdrv_flush_io_queue(BlockDriverState *bs) +{ + BlockDriver *drv = bs->drv; + if (drv && drv->bdrv_flush_io_queue) { + drv->bdrv_flush_io_queue(bs); + } else if (bs->file) { + bdrv_flush_io_queue(bs->file); + } +} |