summaryrefslogtreecommitdiff
path: root/block/io.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/io.c')
-rw-r--r--block/io.c1634
1 files changed, 780 insertions, 854 deletions
diff --git a/block/io.c b/block/io.c
index a7dbf85b19..420944d80d 100644
--- a/block/io.c
+++ b/block/io.c
@@ -27,118 +27,54 @@
#include "sysemu/block-backend.h"
#include "block/blockjob.h"
#include "block/block_int.h"
-#include "block/throttle-groups.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
-static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque);
-static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque);
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov);
-static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BdrvRequestFlags flags,
- BlockCompletionFunc *cb,
- void *opaque,
- bool is_write);
+static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
+ int64_t offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags,
+ BlockCompletionFunc *cb,
+ void *opaque,
+ bool is_write);
static void coroutine_fn bdrv_co_do_rw(void *opaque);
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
+static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags);
-/* throttling disk I/O limits */
-void bdrv_set_io_limits(BlockDriverState *bs,
- ThrottleConfig *cfg)
+static void bdrv_parent_drained_begin(BlockDriverState *bs)
{
- int i;
-
- throttle_group_config(bs, cfg);
-
- for (i = 0; i < 2; i++) {
- qemu_co_enter_next(&bs->throttled_reqs[i]);
- }
-}
-
-/* this function drain all the throttled IOs */
-static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
-{
- bool drained = false;
- bool enabled = bs->io_limits_enabled;
- int i;
-
- bs->io_limits_enabled = false;
+ BdrvChild *c;
- for (i = 0; i < 2; i++) {
- while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
- drained = true;
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->drained_begin) {
+ c->role->drained_begin(c);
}
}
-
- bs->io_limits_enabled = enabled;
-
- return drained;
-}
-
-void bdrv_io_limits_disable(BlockDriverState *bs)
-{
- bs->io_limits_enabled = false;
- bdrv_start_throttled_reqs(bs);
- throttle_group_unregister_bs(bs);
-}
-
-/* should be called before bdrv_set_io_limits if a limit is set */
-void bdrv_io_limits_enable(BlockDriverState *bs, const char *group)
-{
- assert(!bs->io_limits_enabled);
- throttle_group_register_bs(bs, group);
- bs->io_limits_enabled = true;
}
-void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group)
+static void bdrv_parent_drained_end(BlockDriverState *bs)
{
- /* this bs is not part of any group */
- if (!bs->throttle_state) {
- return;
- }
+ BdrvChild *c;
- /* this bs is a part of the same group than the one we want */
- if (!g_strcmp0(throttle_group_get_name(bs), group)) {
- return;
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->drained_end) {
+ c->role->drained_end(c);
+ }
}
-
- /* need to change the group this bs belong to */
- bdrv_io_limits_disable(bs);
- bdrv_io_limits_enable(bs, group);
}
-void bdrv_setup_io_funcs(BlockDriver *bdrv)
+static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
{
- /* Block drivers without coroutine functions need emulation */
- if (!bdrv->bdrv_co_readv) {
- bdrv->bdrv_co_readv = bdrv_co_readv_em;
- bdrv->bdrv_co_writev = bdrv_co_writev_em;
-
- /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
- * the block driver lacks aio we need to emulate that too.
- */
- if (!bdrv->bdrv_aio_readv) {
- /* add AIO emulation layer */
- bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
- bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
- }
- }
+ dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
+ dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
+ dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
+ src->opt_mem_alignment);
+ dst->min_mem_alignment = MAX(dst->min_mem_alignment,
+ src->min_mem_alignment);
+ dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
}
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
@@ -152,6 +88,9 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
return;
}
+ /* Default alignment based on whether driver has byte interface */
+ bs->bl.request_alignment = drv->bdrv_co_preadv ? 1 : 512;
+
/* Take some limits from the children as a default */
if (bs->file) {
bdrv_refresh_limits(bs->file->bs, &local_err);
@@ -159,11 +98,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
error_propagate(errp, local_err);
return;
}
- bs->bl.opt_transfer_length = bs->file->bs->bl.opt_transfer_length;
- bs->bl.max_transfer_length = bs->file->bs->bl.max_transfer_length;
- bs->bl.min_mem_alignment = bs->file->bs->bl.min_mem_alignment;
- bs->bl.opt_mem_alignment = bs->file->bs->bl.opt_mem_alignment;
- bs->bl.max_iov = bs->file->bs->bl.max_iov;
+ bdrv_merge_limits(&bs->bl, &bs->file->bs->bl);
} else {
bs->bl.min_mem_alignment = 512;
bs->bl.opt_mem_alignment = getpagesize();
@@ -178,21 +113,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
error_propagate(errp, local_err);
return;
}
- bs->bl.opt_transfer_length =
- MAX(bs->bl.opt_transfer_length,
- bs->backing->bs->bl.opt_transfer_length);
- bs->bl.max_transfer_length =
- MIN_NON_ZERO(bs->bl.max_transfer_length,
- bs->backing->bs->bl.max_transfer_length);
- bs->bl.opt_mem_alignment =
- MAX(bs->bl.opt_mem_alignment,
- bs->backing->bs->bl.opt_mem_alignment);
- bs->bl.min_mem_alignment =
- MAX(bs->bl.min_mem_alignment,
- bs->backing->bs->bl.min_mem_alignment);
- bs->bl.max_iov =
- MIN(bs->bl.max_iov,
- bs->backing->bs->bl.max_iov);
+ bdrv_merge_limits(&bs->bl, &bs->backing->bs->bl);
}
/* Then let the driver override it */
@@ -225,12 +146,6 @@ bool bdrv_requests_pending(BlockDriverState *bs)
if (!QLIST_EMPTY(&bs->tracked_requests)) {
return true;
}
- if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
- return true;
- }
- if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
- return true;
- }
QLIST_FOREACH(child, &bs->children, next) {
if (bdrv_requests_pending(child->bs)) {
@@ -260,18 +175,29 @@ typedef struct {
bool done;
} BdrvCoDrainData;
+static void bdrv_drain_poll(BlockDriverState *bs)
+{
+ bool busy = true;
+
+ while (busy) {
+ /* Keep iterating */
+ busy = bdrv_requests_pending(bs);
+ busy |= aio_poll(bdrv_get_aio_context(bs), busy);
+ }
+}
+
static void bdrv_co_drain_bh_cb(void *opaque)
{
BdrvCoDrainData *data = opaque;
Coroutine *co = data->co;
qemu_bh_delete(data->bh);
- bdrv_drain(data->bs);
+ bdrv_drain_poll(data->bs);
data->done = true;
- qemu_coroutine_enter(co, NULL);
+ qemu_coroutine_enter(co);
}
-void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
+static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
{
BdrvCoDrainData data;
@@ -294,6 +220,34 @@ void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
assert(data.done);
}
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+ if (!bs->quiesce_counter++) {
+ aio_disable_external(bdrv_get_aio_context(bs));
+ bdrv_parent_drained_begin(bs);
+ }
+
+ bdrv_io_unplugged_begin(bs);
+ bdrv_drain_recurse(bs);
+ if (qemu_in_coroutine()) {
+ bdrv_co_yield_to_drain(bs);
+ } else {
+ bdrv_drain_poll(bs);
+ }
+ bdrv_io_unplugged_end(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+ assert(bs->quiesce_counter > 0);
+ if (--bs->quiesce_counter > 0) {
+ return;
+ }
+
+ bdrv_parent_drained_end(bs);
+ aio_enable_external(bdrv_get_aio_context(bs));
+}
+
/*
* Wait for pending requests to complete on a single BlockDriverState subtree,
* and suspend block driver's internal I/O until next request arrives.
@@ -305,21 +259,17 @@ void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
* not depend on events in other AioContexts. In that case, use
* bdrv_drain_all() instead.
*/
-void bdrv_drain(BlockDriverState *bs)
+void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
{
- bool busy = true;
+ assert(qemu_in_coroutine());
+ bdrv_drained_begin(bs);
+ bdrv_drained_end(bs);
+}
- bdrv_drain_recurse(bs);
- if (qemu_in_coroutine()) {
- bdrv_co_drain(bs);
- return;
- }
- while (busy) {
- /* Keep iterating */
- bdrv_flush_io_queue(bs);
- busy = bdrv_requests_pending(bs);
- busy |= aio_poll(bdrv_get_aio_context(bs), busy);
- }
+void bdrv_drain(BlockDriverState *bs)
+{
+ bdrv_drained_begin(bs);
+ bdrv_drained_end(bs);
}
/*
@@ -332,16 +282,25 @@ void bdrv_drain_all(void)
{
/* Always run first iteration so any pending completion BHs run */
bool busy = true;
- BlockDriverState *bs = NULL;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
+ BlockJob *job = NULL;
GSList *aio_ctxs = NULL, *ctx;
- while ((bs = bdrv_next(bs))) {
+ while ((job = block_job_next(job))) {
+ AioContext *aio_context = blk_get_aio_context(job->blk);
+
+ aio_context_acquire(aio_context);
+ block_job_pause(job);
+ aio_context_release(aio_context);
+ }
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- if (bs->job) {
- block_job_pause(bs->job);
- }
+ bdrv_parent_drained_begin(bs);
+ bdrv_io_unplugged_begin(bs);
bdrv_drain_recurse(bs);
aio_context_release(aio_context);
@@ -361,12 +320,10 @@ void bdrv_drain_all(void)
for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
AioContext *aio_context = ctx->data;
- bs = NULL;
aio_context_acquire(aio_context);
- while ((bs = bdrv_next(bs))) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
if (aio_context == bdrv_get_aio_context(bs)) {
- bdrv_flush_io_queue(bs);
if (bdrv_requests_pending(bs)) {
busy = true;
aio_poll(aio_context, busy);
@@ -378,17 +335,24 @@ void bdrv_drain_all(void)
}
}
- bs = NULL;
- while ((bs = bdrv_next(bs))) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- if (bs->job) {
- block_job_resume(bs->job);
- }
+ bdrv_io_unplugged_end(bs);
+ bdrv_parent_drained_end(bs);
aio_context_release(aio_context);
}
g_slist_free(aio_ctxs);
+
+ job = NULL;
+ while ((job = block_job_next(job))) {
+ AioContext *aio_context = blk_get_aio_context(job->blk);
+
+ aio_context_acquire(aio_context);
+ block_job_resume(job);
+ aio_context_release(aio_context);
+ }
}
/**
@@ -447,12 +411,12 @@ static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
}
/**
- * Round a region to cluster boundaries
+ * Round a region to cluster boundaries (sector-based)
*/
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors)
+void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors)
{
BlockDriverInfo bdi;
@@ -467,6 +431,26 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
}
}
+/**
+ * Round a region to cluster boundaries
+ */
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes,
+ int64_t *cluster_offset,
+ unsigned int *cluster_bytes)
+{
+ BlockDriverInfo bdi;
+
+ if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+ *cluster_offset = offset;
+ *cluster_bytes = bytes;
+ } else {
+ int64_t c = bdi.cluster_size;
+ *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
+ *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
+ }
+}
+
static int bdrv_get_cluster_size(BlockDriverState *bs)
{
BlockDriverInfo bdi;
@@ -474,7 +458,7 @@ static int bdrv_get_cluster_size(BlockDriverState *bs)
ret = bdrv_get_info(bs, &bdi);
if (ret < 0 || bdi.cluster_size == 0) {
- return bs->request_alignment;
+ return bs->bl.request_alignment;
} else {
return bdi.cluster_size;
}
@@ -568,7 +552,7 @@ static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
}
typedef struct RwCo {
- BlockDriverState *bs;
+ BdrvChild *child;
int64_t offset;
QEMUIOVector *qiov;
bool is_write;
@@ -581,26 +565,26 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
RwCo *rwco = opaque;
if (!rwco->is_write) {
- rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
- rwco->qiov->size, rwco->qiov,
- rwco->flags);
+ rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset,
+ rwco->qiov->size, rwco->qiov,
+ rwco->flags);
} else {
- rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
- rwco->qiov->size, rwco->qiov,
- rwco->flags);
+ rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset,
+ rwco->qiov->size, rwco->qiov,
+ rwco->flags);
}
}
/*
* Process a vectored synchronous request using coroutines
*/
-static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
+static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
QEMUIOVector *qiov, bool is_write,
BdrvRequestFlags flags)
{
Coroutine *co;
RwCo rwco = {
- .bs = bs,
+ .child = child,
.offset = offset,
.qiov = qiov,
.is_write = is_write,
@@ -608,25 +592,14 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
.flags = flags,
};
- /**
- * In sync call context, when the vcpu is blocked, this throttling timer
- * will not fire; so the I/O throttling function has to be disabled here
- * if it has been enabled.
- */
- if (bs->io_limits_enabled) {
- fprintf(stderr, "Disabling I/O throttling on '%s' due "
- "to synchronous I/O.\n", bdrv_get_device_name(bs));
- bdrv_io_limits_disable(bs);
- }
-
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_rw_co_entry(&rwco);
} else {
- AioContext *aio_context = bdrv_get_aio_context(bs);
+ AioContext *aio_context = bdrv_get_aio_context(child->bs);
- co = qemu_coroutine_create(bdrv_rw_co_entry);
- qemu_coroutine_enter(co, &rwco);
+ co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
+ qemu_coroutine_enter(co);
while (rwco.ret == NOT_DONE) {
aio_poll(aio_context, true);
}
@@ -637,7 +610,7 @@ static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
/*
* Process a synchronous request using coroutines
*/
-static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
+static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf,
int nb_sectors, bool is_write, BdrvRequestFlags flags)
{
QEMUIOVector qiov;
@@ -651,15 +624,15 @@ static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
}
qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
+ return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS,
&qiov, is_write, flags);
}
/* return < 0 if error. See bdrv_write() for the return codes */
-int bdrv_read(BlockDriverState *bs, int64_t sector_num,
+int bdrv_read(BdrvChild *child, int64_t sector_num,
uint8_t *buf, int nb_sectors)
{
- return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
+ return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0);
}
/* Return < 0 if error. Important errors are:
@@ -668,30 +641,39 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num,
-EINVAL Invalid sector number or nb_sectors
-EACCES Trying to write a read-only device
*/
-int bdrv_write(BlockDriverState *bs, int64_t sector_num,
+int bdrv_write(BdrvChild *child, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
- return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
+ return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
}
-int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
- return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
- BDRV_REQ_ZERO_WRITE | flags);
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = NULL,
+ .iov_len = count,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_prwv_co(child, offset, &qiov, true,
+ BDRV_REQ_ZERO_WRITE | flags);
}
/*
- * Completely zero out a block device with the help of bdrv_write_zeroes.
+ * Completely zero out a block device with the help of bdrv_pwrite_zeroes.
* The operation is sped up by checking the block status and only writing
* zeroes to the device if they currently do not return zeroes. Optional
- * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
+ * flags are passed through to bdrv_pwrite_zeroes (e.g. BDRV_REQ_MAY_UNMAP,
+ * BDRV_REQ_FUA).
*
* Returns < 0 on error, 0 on success. For error codes see bdrv_write().
*/
-int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
+int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
{
int64_t target_sectors, ret, nb_sectors, sector_num = 0;
+ BlockDriverState *bs = child->bs;
BlockDriverState *file;
int n;
@@ -715,7 +697,8 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
sector_num += n;
continue;
}
- ret = bdrv_write_zeroes(bs, sector_num, n, flags);
+ ret = bdrv_pwrite_zeroes(child, sector_num << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, flags);
if (ret < 0) {
error_report("error writing zeroes at sector %" PRId64 ": %s",
sector_num, strerror(-ret));
@@ -725,33 +708,39 @@ int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
}
}
-int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
+int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
+{
+ int ret;
+
+ ret = bdrv_prwv_co(child, offset, qiov, false, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return qiov->size;
+}
+
+int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
{
QEMUIOVector qiov;
struct iovec iov = {
.iov_base = (void *)buf,
.iov_len = bytes,
};
- int ret;
if (bytes < 0) {
return -EINVAL;
}
qemu_iovec_init_external(&qiov, &iov, 1);
- ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
- if (ret < 0) {
- return ret;
- }
-
- return bytes;
+ return bdrv_preadv(child, offset, &qiov);
}
-int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
+int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
{
int ret;
- ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
+ ret = bdrv_prwv_co(child, offset, qiov, true, 0);
if (ret < 0) {
return ret;
}
@@ -759,8 +748,7 @@ int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
return qiov->size;
}
-int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
- const void *buf, int bytes)
+int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
{
QEMUIOVector qiov;
struct iovec iov = {
@@ -773,7 +761,7 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
}
qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_pwritev(bs, offset, &qiov);
+ return bdrv_pwritev(child, offset, &qiov);
}
/*
@@ -782,17 +770,17 @@ int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
*
* Returns 0 on success, -errno in error cases.
*/
-int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
- const void *buf, int count)
+int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
+ const void *buf, int count)
{
int ret;
- ret = bdrv_pwrite(bs, offset, buf, count);
+ ret = bdrv_pwrite(child, offset, buf, count);
if (ret < 0) {
return ret;
}
- ret = bdrv_flush(bs);
+ ret = bdrv_flush(child->bs);
if (ret < 0) {
return ret;
}
@@ -800,8 +788,117 @@ int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
return 0;
}
+typedef struct CoroutineIOCompletion {
+ Coroutine *coroutine;
+ int ret;
+} CoroutineIOCompletion;
+
+static void bdrv_co_io_em_complete(void *opaque, int ret)
+{
+ CoroutineIOCompletion *co = opaque;
+
+ co->ret = ret;
+ qemu_coroutine_enter(co->coroutine);
+}
+
+static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BlockDriver *drv = bs->drv;
+ int64_t sector_num;
+ unsigned int nb_sectors;
+
+ assert(!(flags & ~BDRV_REQ_MASK));
+
+ if (drv->bdrv_co_preadv) {
+ return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
+ }
+
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ nb_sectors = bytes >> BDRV_SECTOR_BITS;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+
+ if (drv->bdrv_co_readv) {
+ return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+ } else {
+ BlockAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ return -EIO;
+ } else {
+ qemu_coroutine_yield();
+ return co.ret;
+ }
+ }
+}
+
+static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BlockDriver *drv = bs->drv;
+ int64_t sector_num;
+ unsigned int nb_sectors;
+ int ret;
+
+ assert(!(flags & ~BDRV_REQ_MASK));
+
+ if (drv->bdrv_co_pwritev) {
+ ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
+ flags & bs->supported_write_flags);
+ flags &= ~bs->supported_write_flags;
+ goto emulate_flags;
+ }
+
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ nb_sectors = bytes >> BDRV_SECTOR_BITS;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+
+ if (drv->bdrv_co_writev_flags) {
+ ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
+ flags & bs->supported_write_flags);
+ flags &= ~bs->supported_write_flags;
+ } else if (drv->bdrv_co_writev) {
+ assert(!bs->supported_write_flags);
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ } else {
+ BlockAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ ret = -EIO;
+ } else {
+ qemu_coroutine_yield();
+ ret = co.ret;
+ }
+ }
+
+emulate_flags:
+ if (ret == 0 && (flags & BDRV_REQ_FUA)) {
+ ret = bdrv_co_flush(bs);
+ }
+
+ return ret;
+}
+
static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
{
/* Perform I/O through a temporary buffer so that users who scribble over
* their read buffer while the operation is in progress do not end up
@@ -813,21 +910,20 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
BlockDriver *drv = bs->drv;
struct iovec iov;
QEMUIOVector bounce_qiov;
- int64_t cluster_sector_num;
- int cluster_nb_sectors;
+ int64_t cluster_offset;
+ unsigned int cluster_bytes;
size_t skip_bytes;
int ret;
/* Cover entire cluster so no additional backing file I/O is required when
* allocating cluster in the image file.
*/
- bdrv_round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
+ bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
- trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
- cluster_sector_num, cluster_nb_sectors);
+ trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
+ cluster_offset, cluster_bytes);
- iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_len = cluster_bytes;
iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
if (bounce_buffer == NULL) {
ret = -ENOMEM;
@@ -836,22 +932,24 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
- ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
+ ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes,
+ &bounce_qiov, 0);
if (ret < 0) {
goto err;
}
- if (drv->bdrv_co_write_zeroes &&
+ if (drv->bdrv_co_pwrite_zeroes &&
buffer_is_zero(bounce_buffer, iov.iov_len)) {
- ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
- cluster_nb_sectors, 0);
+ /* FIXME: Should we (perhaps conditionally) be setting
+ * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
+ * that still correctly reads as zero? */
+ ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0);
} else {
/* This does not change the data on the disk, it is not necessary
* to flush even in cache=writethrough mode.
*/
- ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
+ ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes,
+ &bounce_qiov, 0);
}
if (ret < 0) {
@@ -862,9 +960,8 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
goto err;
}
- skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
- qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
- nb_sectors * BDRV_SECTOR_SIZE);
+ skip_bytes = offset - cluster_offset;
+ qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes);
err:
qemu_vfree(bounce_buffer);
@@ -873,23 +970,31 @@ err:
/*
* Forwards an already correctly aligned request to the BlockDriver. This
- * handles copy on read and zeroing after EOF; any other features must be
- * implemented by the caller.
+ * handles copy on read, zeroing after EOF, and fragmentation of large
+ * reads; any other features must be implemented by the caller.
*/
static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
int64_t align, QEMUIOVector *qiov, int flags)
{
- BlockDriver *drv = bs->drv;
- int ret;
-
- int64_t sector_num = offset >> BDRV_SECTOR_BITS;
- unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int64_t total_bytes, max_bytes;
+ int ret = 0;
+ uint64_t bytes_remaining = bytes;
+ int max_transfer;
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert(is_power_of_2(align));
+ assert((offset & (align - 1)) == 0);
+ assert((bytes & (align - 1)) == 0);
assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
+ max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
+ align);
+
+ /* TODO: We would need a per-BDS .supported_read_flags and
+ * potential fallback support, if we ever implement any read flags
+ * to pass through to drivers. For now, there aren't any
+ * passthrough flags. */
+ assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ)));
/* Handle Copy on Read and associated serialisation */
if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -906,76 +1011,77 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
}
if (flags & BDRV_REQ_COPY_ON_READ) {
+ int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+ unsigned int nb_sectors = end_sector - start_sector;
int pnum;
- ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
+ ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum);
if (ret < 0) {
goto out;
}
if (!ret || pnum != nb_sectors) {
- ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
+ ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
goto out;
}
}
- /* Forward the request to the BlockDriver */
- if (!bs->zero_beyond_eof) {
- ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
- } else {
- /* Read zeros after EOF */
- int64_t total_sectors, max_nb_sectors;
+ /* Forward the request to the BlockDriver, possibly fragmenting it */
+ total_bytes = bdrv_getlength(bs);
+ if (total_bytes < 0) {
+ ret = total_bytes;
+ goto out;
+ }
- total_sectors = bdrv_nb_sectors(bs);
- if (total_sectors < 0) {
- ret = total_sectors;
- goto out;
- }
+ max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
+ if (bytes <= max_bytes && bytes <= max_transfer) {
+ ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
+ goto out;
+ }
+
+ while (bytes_remaining) {
+ int num;
- max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
- align >> BDRV_SECTOR_BITS);
- if (nb_sectors < max_nb_sectors) {
- ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
- } else if (max_nb_sectors > 0) {
+ if (max_bytes) {
QEMUIOVector local_qiov;
+ num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
+ assert(num);
qemu_iovec_init(&local_qiov, qiov->niov);
- qemu_iovec_concat(&local_qiov, qiov, 0,
- max_nb_sectors * BDRV_SECTOR_SIZE);
-
- ret = drv->bdrv_co_readv(bs, sector_num, max_nb_sectors,
- &local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
+ ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
+ num, &local_qiov, 0);
+ max_bytes -= num;
qemu_iovec_destroy(&local_qiov);
} else {
- ret = 0;
+ num = bytes_remaining;
+ ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
+ bytes_remaining);
}
-
- /* Reading beyond end of file is supposed to produce zeroes */
- if (ret == 0 && total_sectors < sector_num + nb_sectors) {
- uint64_t offset = MAX(0, total_sectors - sector_num);
- uint64_t bytes = (sector_num + nb_sectors - offset) *
- BDRV_SECTOR_SIZE;
- qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
+ if (ret < 0) {
+ goto out;
}
+ bytes_remaining -= num;
}
out:
- return ret;
+ return ret < 0 ? ret : 0;
}
/*
* Handle a read request in coroutine context
*/
-int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+int coroutine_fn bdrv_co_preadv(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
+ BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
BdrvTrackedRequest req;
- /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->bl.request_alignment;
uint8_t *head_buf = NULL;
uint8_t *tail_buf = NULL;
QEMUIOVector local_qiov;
@@ -996,11 +1102,6 @@ int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
flags |= BDRV_REQ_COPY_ON_READ;
}
- /* throttling disk I/O */
- if (bs->io_limits_enabled) {
- throttle_group_co_io_limits_intercept(bs, bytes, false);
- }
-
/* Align read if necessary by padding qiov */
if (offset & (align - 1)) {
head_buf = qemu_blockalign(bs, align);
@@ -1041,7 +1142,7 @@ int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
return ret;
}
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_readv(BdrvChild *child,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
@@ -1049,67 +1150,56 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
return -EINVAL;
}
- return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+ return bdrv_co_preadv(child, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
}
-int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
{
- trace_bdrv_co_readv(bs, sector_num, nb_sectors);
+ trace_bdrv_co_readv(child->bs, sector_num, nb_sectors);
- return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
+ return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0);
}
-int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_readv_no_serialising(bs, sector_num, nb_sectors);
+/* Maximum buffer for write zeroes fallback, in bytes */
+#define MAX_WRITE_ZEROES_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
- return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
- BDRV_REQ_NO_SERIALISING);
-}
-
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
- BDRV_REQ_COPY_ON_READ);
-}
-
-#define MAX_WRITE_ZEROES_BOUNCE_BUFFER 32768
-
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
QEMUIOVector qiov;
struct iovec iov = {0};
int ret = 0;
+ bool need_flush = false;
+ int head = 0;
+ int tail = 0;
+
+ int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
+ int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
+ bs->bl.request_alignment);
- int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_write_zeroes,
- BDRV_REQUEST_MAX_SECTORS);
+ assert(alignment % bs->bl.request_alignment == 0);
+ head = offset % alignment;
+ tail = (offset + count) % alignment;
+ max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
+ assert(max_write_zeroes >= bs->bl.request_alignment);
- while (nb_sectors > 0 && !ret) {
- int num = nb_sectors;
+ while (count > 0 && !ret) {
+ int num = count;
/* Align request. Block drivers can expect the "bulk" of the request
- * to be aligned.
+ * to be aligned, and that unaligned requests do not cross cluster
+ * boundaries.
*/
- if (bs->bl.write_zeroes_alignment
- && num > bs->bl.write_zeroes_alignment) {
- if (sector_num % bs->bl.write_zeroes_alignment != 0) {
- /* Make a small request up to the first aligned sector. */
- num = bs->bl.write_zeroes_alignment;
- num -= sector_num % bs->bl.write_zeroes_alignment;
- } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
- /* Shorten the request to the last aligned sector. num cannot
- * underflow because num > bs->bl.write_zeroes_alignment.
- */
- num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
- }
+ if (head) {
+ /* Make a small request up to the first aligned sector. */
+ num = MIN(count, alignment - head);
+ head = 0;
+ } else if (tail && num > alignment) {
+ /* Shorten the request to the last aligned sector. */
+ num -= tail;
}
/* limit request size */
@@ -1119,64 +1209,90 @@ static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
ret = -ENOTSUP;
/* First try the efficient write zeroes operation */
- if (drv->bdrv_co_write_zeroes) {
- ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
+ if (drv->bdrv_co_pwrite_zeroes) {
+ ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
+ flags & bs->supported_zero_flags);
+ if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
+ !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
+ need_flush = true;
+ }
+ } else {
+ assert(!bs->supported_zero_flags);
}
if (ret == -ENOTSUP) {
/* Fall back to bounce buffer if write zeroes is unsupported */
- int max_xfer_len = MIN_NON_ZERO(bs->bl.max_transfer_length,
+ int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
MAX_WRITE_ZEROES_BOUNCE_BUFFER);
- num = MIN(num, max_xfer_len);
- iov.iov_len = num * BDRV_SECTOR_SIZE;
+ BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
+
+ if ((flags & BDRV_REQ_FUA) &&
+ !(bs->supported_write_flags & BDRV_REQ_FUA)) {
+ /* No need for bdrv_driver_pwrite() to do a fallback
+ * flush on each chunk; use just one at the end */
+ write_flags &= ~BDRV_REQ_FUA;
+ need_flush = true;
+ }
+ num = MIN(num, max_transfer);
+ iov.iov_len = num;
if (iov.iov_base == NULL) {
- iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
+ iov.iov_base = qemu_try_blockalign(bs, num);
if (iov.iov_base == NULL) {
ret = -ENOMEM;
goto fail;
}
- memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
+ memset(iov.iov_base, 0, num);
}
qemu_iovec_init_external(&qiov, &iov, 1);
- ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
+ ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);
/* Keep bounce buffer around if it is big enough for all
* all future requests.
*/
- if (num < max_xfer_len) {
+ if (num < max_transfer) {
qemu_vfree(iov.iov_base);
iov.iov_base = NULL;
}
}
- sector_num += num;
- nb_sectors -= num;
+ offset += num;
+ count -= num;
}
fail:
+ if (ret == 0 && need_flush) {
+ ret = bdrv_co_flush(bs);
+ }
qemu_vfree(iov.iov_base);
return ret;
}
/*
- * Forwards an already correctly aligned write request to the BlockDriver.
+ * Forwards an already correctly aligned write request to the BlockDriver,
+ * after possibly fragmenting it.
*/
static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
- QEMUIOVector *qiov, int flags)
+ int64_t align, QEMUIOVector *qiov, int flags)
{
BlockDriver *drv = bs->drv;
bool waited;
int ret;
- int64_t sector_num = offset >> BDRV_SECTOR_BITS;
- unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+ uint64_t bytes_remaining = bytes;
+ int max_transfer;
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert(is_power_of_2(align));
+ assert((offset & (align - 1)) == 0);
+ assert((bytes & (align - 1)) == 0);
assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
+ assert(!(flags & ~BDRV_REQ_MASK));
+ max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
+ align);
waited = wait_serialising_requests(req);
assert(!waited || !req->serialising);
@@ -1186,7 +1302,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
- !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
+ !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
qemu_iovec_is_zero(qiov)) {
flags |= BDRV_REQ_ZERO_WRITE;
if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
@@ -1198,32 +1314,48 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
- ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
- } else if (drv->bdrv_co_writev_flags) {
+ ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
+ } else if (bytes <= max_transfer) {
bdrv_debug_event(bs, BLKDBG_PWRITEV);
- ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
- flags);
+ ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
} else {
- assert(drv->supported_write_flags == 0);
bdrv_debug_event(bs, BLKDBG_PWRITEV);
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
- }
- bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
+ while (bytes_remaining) {
+ int num = MIN(bytes_remaining, max_transfer);
+ QEMUIOVector local_qiov;
+ int local_flags = flags;
+
+ assert(num);
+ if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
+ !(bs->supported_write_flags & BDRV_REQ_FUA)) {
+ /* If FUA is going to be emulated by flush, we only
+ * need to flush on the last iteration */
+ local_flags &= ~BDRV_REQ_FUA;
+ }
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
- if (ret == 0 && (flags & BDRV_REQ_FUA) &&
- !(drv->supported_write_flags & BDRV_REQ_FUA))
- {
- ret = bdrv_co_flush(bs);
+ ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
+ num, &local_qiov, local_flags);
+ qemu_iovec_destroy(&local_qiov);
+ if (ret < 0) {
+ break;
+ }
+ bytes_remaining -= num;
+ }
}
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
- bdrv_set_dirty(bs, sector_num, nb_sectors);
+ ++bs->write_gen;
+ bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
if (bs->wr_highest_offset < offset + bytes) {
bs->wr_highest_offset = offset + bytes;
}
if (ret >= 0) {
- bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
+ bs->total_sectors = MAX(bs->total_sectors, end_sector);
+ ret = 0;
}
return ret;
@@ -1238,7 +1370,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
uint8_t *buf = NULL;
QEMUIOVector local_qiov;
struct iovec iov;
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->bl.request_alignment;
unsigned int head_padding_bytes, tail_padding_bytes;
int ret = 0;
@@ -1271,7 +1403,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
memset(buf + head_padding_bytes, 0, zero_bytes);
ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
- &local_qiov,
+ align, &local_qiov,
flags & ~BDRV_REQ_ZERO_WRITE);
if (ret < 0) {
goto fail;
@@ -1284,7 +1416,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
if (bytes >= align) {
/* Write the aligned part in the middle. */
uint64_t aligned_bytes = bytes & ~(align - 1);
- ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes,
+ ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
NULL, flags);
if (ret < 0) {
goto fail;
@@ -1308,7 +1440,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
memset(buf, 0, bytes);
- ret = bdrv_aligned_pwritev(bs, req, offset, align,
+ ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
&local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
}
fail:
@@ -1320,13 +1452,13 @@ fail:
/*
* Handle a write request in coroutine context
*/
-int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
+ BlockDriverState *bs = child->bs;
BdrvTrackedRequest req;
- /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
+ uint64_t align = bs->bl.request_alignment;
uint8_t *head_buf = NULL;
uint8_t *tail_buf = NULL;
QEMUIOVector local_qiov;
@@ -1346,11 +1478,6 @@ int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
return ret;
}
- /* throttling disk I/O */
- if (bs->io_limits_enabled) {
- throttle_group_co_io_limits_intercept(bs, bytes, true);
- }
-
/*
* Align write if necessary by performing a read-modify-write cycle.
* Pad qiov with the read parts and be sure to have a tracked request not
@@ -1392,6 +1519,14 @@ int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
bytes += offset & (align - 1);
offset = offset & ~(align - 1);
+
+ /* We have read the tail already if the request is smaller
+ * than one aligned block.
+ */
+ if (bytes < align) {
+ qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
+ bytes = align;
+ }
}
if ((offset + bytes) & (align - 1)) {
@@ -1431,7 +1566,7 @@ int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
bytes = ROUND_UP(bytes, align);
}
- ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
+ ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align,
use_local_qiov ? &local_qiov : qiov,
flags);
@@ -1447,7 +1582,7 @@ out:
return ret;
}
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
+static int coroutine_fn bdrv_co_do_writev(BdrvChild *child,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
@@ -1455,30 +1590,29 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
return -EINVAL;
}
- return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+ return bdrv_co_pwritev(child, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
}
-int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
+int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
- trace_bdrv_co_writev(bs, sector_num, nb_sectors);
+ trace_bdrv_co_writev(child->bs, sector_num, nb_sectors);
- return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
+ return bdrv_co_do_writev(child, sector_num, nb_sectors, qiov, 0);
}
-int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BdrvRequestFlags flags)
+int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
- trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
+ trace_bdrv_co_pwrite_zeroes(child->bs, offset, count, flags);
- if (!(bs->open_flags & BDRV_O_UNMAP)) {
+ if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
flags &= ~BDRV_REQ_MAY_UNMAP;
}
- return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
- BDRV_REQ_ZERO_WRITE | flags);
+ return bdrv_co_pwritev(child, offset, count, NULL,
+ BDRV_REQ_ZERO_WRITE | flags);
}
typedef struct BdrvCoGetBlockStatusData {
@@ -1663,8 +1797,9 @@ int64_t bdrv_get_block_status_above(BlockDriverState *bs,
} else {
AioContext *aio_context = bdrv_get_aio_context(bs);
- co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry);
- qemu_coroutine_enter(co, &data);
+ co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry,
+ &data);
+ qemu_coroutine_enter(co);
while (!data.done) {
aio_poll(aio_context, true);
}
@@ -1766,273 +1901,134 @@ int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
}
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
- int64_t pos, int size)
-{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *) buf,
- .iov_len = size,
- };
+typedef struct BdrvVmstateCo {
+ BlockDriverState *bs;
+ QEMUIOVector *qiov;
+ int64_t pos;
+ bool is_read;
+ int ret;
+} BdrvVmstateCo;
- qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_writev_vmstate(bs, &qiov, pos);
-}
-
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
+static int coroutine_fn
+bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+ bool is_read)
{
BlockDriver *drv = bs->drv;
if (!drv) {
return -ENOMEDIUM;
- } else if (drv->bdrv_save_vmstate) {
- return drv->bdrv_save_vmstate(bs, qiov, pos);
+ } else if (drv->bdrv_load_vmstate) {
+ return is_read ? drv->bdrv_load_vmstate(bs, qiov, pos)
+ : drv->bdrv_save_vmstate(bs, qiov, pos);
} else if (bs->file) {
- return bdrv_writev_vmstate(bs->file->bs, qiov, pos);
+ return bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
}
return -ENOTSUP;
}
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size)
-{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
- if (drv->bdrv_load_vmstate)
- return drv->bdrv_load_vmstate(bs, buf, pos, size);
- if (bs->file)
- return bdrv_load_vmstate(bs->file->bs, buf, pos, size);
- return -ENOTSUP;
-}
-
-/**************************************************************/
-/* async I/Os */
-
-BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
- cb, opaque, false);
-}
-
-BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
{
- trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
- cb, opaque, true);
+ BdrvVmstateCo *co = opaque;
+ co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
}
-BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque)
+static inline int
+bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+ bool is_read)
{
- trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
- BDRV_REQ_ZERO_WRITE | flags,
- cb, opaque, true);
-}
-
-
-typedef struct MultiwriteCB {
- int error;
- int num_requests;
- int num_callbacks;
- struct {
- BlockCompletionFunc *cb;
- void *opaque;
- QEMUIOVector *free_qiov;
- } callbacks[];
-} MultiwriteCB;
-
-static void multiwrite_user_cb(MultiwriteCB *mcb)
-{
- int i;
+ if (qemu_in_coroutine()) {
+ return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
+ } else {
+ BdrvVmstateCo data = {
+ .bs = bs,
+ .qiov = qiov,
+ .pos = pos,
+ .is_read = is_read,
+ .ret = -EINPROGRESS,
+ };
+ Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
- for (i = 0; i < mcb->num_callbacks; i++) {
- mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
- if (mcb->callbacks[i].free_qiov) {
- qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
+ qemu_coroutine_enter(co);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(bdrv_get_aio_context(bs), true);
}
- g_free(mcb->callbacks[i].free_qiov);
+ return data.ret;
}
}
-static void multiwrite_cb(void *opaque, int ret)
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size)
{
- MultiwriteCB *mcb = opaque;
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *) buf,
+ .iov_len = size,
+ };
+ int ret;
- trace_multiwrite_cb(mcb, ret);
+ qemu_iovec_init_external(&qiov, &iov, 1);
- if (ret < 0 && !mcb->error) {
- mcb->error = ret;
+ ret = bdrv_writev_vmstate(bs, &qiov, pos);
+ if (ret < 0) {
+ return ret;
}
- mcb->num_requests--;
- if (mcb->num_requests == 0) {
- multiwrite_user_cb(mcb);
- g_free(mcb);
- }
+ return size;
}
-static int multiwrite_req_compare(const void *a, const void *b)
+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
- const BlockRequest *req1 = a, *req2 = b;
-
- /*
- * Note that we can't simply subtract req2->sector from req1->sector
- * here as that could overflow the return value.
- */
- if (req1->sector > req2->sector) {
- return 1;
- } else if (req1->sector < req2->sector) {
- return -1;
- } else {
- return 0;
- }
+ return bdrv_rw_vmstate(bs, qiov, pos, false);
}
-/*
- * Takes a bunch of requests and tries to merge them. Returns the number of
- * requests that remain after merging.
- */
-static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
- int num_reqs, MultiwriteCB *mcb)
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size)
{
- int i, outidx;
-
- // Sort requests by start sector
- qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
-
- // Check if adjacent requests touch the same clusters. If so, combine them,
- // filling up gaps with zero sectors.
- outidx = 0;
- for (i = 1; i < num_reqs; i++) {
- int merge = 0;
- int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
-
- // Handle exactly sequential writes and overlapping writes.
- if (reqs[i].sector <= oldreq_last) {
- merge = 1;
- }
-
- if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 >
- bs->bl.max_iov) {
- merge = 0;
- }
-
- if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
- reqs[i].nb_sectors > bs->bl.max_transfer_length) {
- merge = 0;
- }
-
- if (merge) {
- size_t size;
- QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
- qemu_iovec_init(qiov,
- reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
-
- // Add the first request to the merged one. If the requests are
- // overlapping, drop the last sectors of the first request.
- size = (reqs[i].sector - reqs[outidx].sector) << 9;
- qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
-
- // We should need to add any zeros between the two requests
- assert (reqs[i].sector <= oldreq_last);
-
- // Add the second request
- qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
-
- // Add tail of first request, if necessary
- if (qiov->size < reqs[outidx].qiov->size) {
- qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
- reqs[outidx].qiov->size - qiov->size);
- }
-
- reqs[outidx].nb_sectors = qiov->size >> 9;
- reqs[outidx].qiov = qiov;
-
- mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
- } else {
- outidx++;
- reqs[outidx].sector = reqs[i].sector;
- reqs[outidx].nb_sectors = reqs[i].nb_sectors;
- reqs[outidx].qiov = reqs[i].qiov;
- }
- }
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = size,
+ };
+ int ret;
- if (bs->blk) {
- block_acct_merge_done(blk_get_stats(bs->blk), BLOCK_ACCT_WRITE,
- num_reqs - outidx - 1);
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ ret = bdrv_readv_vmstate(bs, &qiov, pos);
+ if (ret < 0) {
+ return ret;
}
- return outidx + 1;
+ return size;
}
-/*
- * Submit multiple AIO write requests at once.
- *
- * On success, the function returns 0 and all requests in the reqs array have
- * been submitted. In error case this function returns -1, and any of the
- * requests may or may not be submitted yet. In particular, this means that the
- * callback will be called for some of the requests, for others it won't. The
- * caller must check the error field of the BlockRequest to wait for the right
- * callbacks (if error != 0, no callback will be called).
- *
- * The implementation may modify the contents of the reqs array, e.g. to merge
- * requests. However, the fields opaque and error are left unmodified as they
- * are used to signal failure for a single request to the caller.
- */
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
+int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
{
- MultiwriteCB *mcb;
- int i;
-
- /* don't submit writes if we don't have a medium */
- if (bs->drv == NULL) {
- for (i = 0; i < num_reqs; i++) {
- reqs[i].error = -ENOMEDIUM;
- }
- return -1;
- }
-
- if (num_reqs == 0) {
- return 0;
- }
-
- // Create MultiwriteCB structure
- mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
- mcb->num_requests = 0;
- mcb->num_callbacks = num_reqs;
+ return bdrv_rw_vmstate(bs, qiov, pos, true);
+}
- for (i = 0; i < num_reqs; i++) {
- mcb->callbacks[i].cb = reqs[i].cb;
- mcb->callbacks[i].opaque = reqs[i].opaque;
- }
+/**************************************************************/
+/* async I/Os */
- // Check for mergable requests
- num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
+BlockAIOCB *bdrv_aio_readv(BdrvChild *child, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_readv(child->bs, sector_num, nb_sectors, opaque);
- trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
+ assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size);
+ return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov,
+ 0, cb, opaque, false);
+}
- /* Run the aio requests. */
- mcb->num_requests = num_reqs;
- for (i = 0; i < num_reqs; i++) {
- bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
- reqs[i].nb_sectors, reqs[i].flags,
- multiwrite_cb, mcb,
- true);
- }
+BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_writev(child->bs, sector_num, nb_sectors, opaque);
- return 0;
+ assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size);
+ return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov,
+ 0, cb, opaque, true);
}
void bdrv_aio_cancel(BlockAIOCB *acb)
@@ -2064,82 +2060,30 @@ void bdrv_aio_cancel_async(BlockAIOCB *acb)
/**************************************************************/
/* async block device emulation */
-typedef struct BlockAIOCBSync {
- BlockAIOCB common;
- QEMUBH *bh;
- int ret;
- /* vector translation state */
- QEMUIOVector *qiov;
- uint8_t *bounce;
- int is_write;
-} BlockAIOCBSync;
-
-static const AIOCBInfo bdrv_em_aiocb_info = {
- .aiocb_size = sizeof(BlockAIOCBSync),
-};
-
-static void bdrv_aio_bh_cb(void *opaque)
-{
- BlockAIOCBSync *acb = opaque;
-
- if (!acb->is_write && acb->ret >= 0) {
- qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
- }
- qemu_vfree(acb->bounce);
- acb->common.cb(acb->common.opaque, acb->ret);
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
- qemu_aio_unref(acb);
-}
-
-static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque,
- int is_write)
-
-{
- BlockAIOCBSync *acb;
-
- acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
- acb->is_write = is_write;
- acb->qiov = qiov;
- acb->bounce = qemu_try_blockalign(bs, qiov->size);
- acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
-
- if (acb->bounce == NULL) {
- acb->ret = -ENOMEM;
- } else if (is_write) {
- qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
- acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
- } else {
- acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
- }
-
- qemu_bh_schedule(acb->bh);
-
- return &acb->common;
-}
-
-static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-}
-
-static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-}
+typedef struct BlockRequest {
+ union {
+ /* Used during read, write, trim */
+ struct {
+ int64_t offset;
+ int bytes;
+ int flags;
+ QEMUIOVector *qiov;
+ };
+ /* Used during ioctl */
+ struct {
+ int req;
+ void *buf;
+ };
+ };
+ BlockCompletionFunc *cb;
+ void *opaque;
+ int error;
+} BlockRequest;
typedef struct BlockAIOCBCoroutine {
BlockAIOCB common;
+ BdrvChild *child;
BlockRequest req;
bool is_write;
bool need_bh;
@@ -2183,42 +2127,40 @@ static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
static void coroutine_fn bdrv_co_do_rw(void *opaque)
{
BlockAIOCBCoroutine *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
if (!acb->is_write) {
- acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
- acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
+ acb->req.error = bdrv_co_preadv(acb->child, acb->req.offset,
+ acb->req.qiov->size, acb->req.qiov, acb->req.flags);
} else {
- acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
- acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
+ acb->req.error = bdrv_co_pwritev(acb->child, acb->req.offset,
+ acb->req.qiov->size, acb->req.qiov, acb->req.flags);
}
bdrv_co_complete(acb);
}
-static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BdrvRequestFlags flags,
- BlockCompletionFunc *cb,
- void *opaque,
- bool is_write)
+static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
+ int64_t offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags,
+ BlockCompletionFunc *cb,
+ void *opaque,
+ bool is_write)
{
Coroutine *co;
BlockAIOCBCoroutine *acb;
- acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, child->bs, cb, opaque);
+ acb->child = child;
acb->need_bh = true;
acb->req.error = -EINPROGRESS;
- acb->req.sector = sector_num;
- acb->req.nb_sectors = nb_sectors;
+ acb->req.offset = offset;
acb->req.qiov = qiov;
acb->req.flags = flags;
acb->is_write = is_write;
- co = qemu_coroutine_create(bdrv_co_do_rw);
- qemu_coroutine_enter(co, acb);
+ co = qemu_coroutine_create(bdrv_co_do_rw, acb);
+ qemu_coroutine_enter(co);
bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
@@ -2245,38 +2187,37 @@ BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
acb->need_bh = true;
acb->req.error = -EINPROGRESS;
- co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
- qemu_coroutine_enter(co, acb);
+ co = qemu_coroutine_create(bdrv_aio_flush_co_entry, acb);
+ qemu_coroutine_enter(co);
bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
}
-static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
+static void coroutine_fn bdrv_aio_pdiscard_co_entry(void *opaque)
{
BlockAIOCBCoroutine *acb = opaque;
BlockDriverState *bs = acb->common.bs;
- acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
+ acb->req.error = bdrv_co_pdiscard(bs, acb->req.offset, acb->req.bytes);
bdrv_co_complete(acb);
}
-BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *bdrv_aio_pdiscard(BlockDriverState *bs, int64_t offset, int count,
+ BlockCompletionFunc *cb, void *opaque)
{
Coroutine *co;
BlockAIOCBCoroutine *acb;
- trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
+ trace_bdrv_aio_pdiscard(bs, offset, count, opaque);
acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
acb->need_bh = true;
acb->req.error = -EINPROGRESS;
- acb->req.sector = sector_num;
- acb->req.nb_sectors = nb_sectors;
- co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
- qemu_coroutine_enter(co, acb);
+ acb->req.offset = offset;
+ acb->req.bytes = count;
+ co = qemu_coroutine_create(bdrv_aio_pdiscard_co_entry, acb);
+ qemu_coroutine_enter(co);
bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
@@ -2314,62 +2255,15 @@ void qemu_aio_unref(void *p)
/**************************************************************/
/* Coroutine block device emulation */
-typedef struct CoroutineIOCompletion {
- Coroutine *coroutine;
+typedef struct FlushCo {
+ BlockDriverState *bs;
int ret;
-} CoroutineIOCompletion;
-
-static void bdrv_co_io_em_complete(void *opaque, int ret)
-{
- CoroutineIOCompletion *co = opaque;
-
- co->ret = ret;
- qemu_coroutine_enter(co->coroutine, NULL);
-}
-
-static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *iov,
- bool is_write)
-{
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
- BlockAIOCB *acb;
-
- if (is_write) {
- acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
- } else {
- acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
- }
-
- trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
- if (!acb) {
- return -EIO;
- }
- qemu_coroutine_yield();
-
- return co.ret;
-}
-
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
-}
+} FlushCo;
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
-}
static void coroutine_fn bdrv_flush_co_entry(void *opaque)
{
- RwCo *rwco = opaque;
+ FlushCo *rwco = opaque;
rwco->ret = bdrv_co_flush(rwco->bs);
}
@@ -2386,6 +2280,15 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
+ int current_gen = bs->write_gen;
+
+ /* Wait until any previous flushes are completed */
+ while (bs->active_flush_req != NULL) {
+ qemu_co_queue_wait(&bs->flush_queue);
+ }
+
+ bs->active_flush_req = &req;
+
/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
ret = bs->drv->bdrv_co_flush(bs);
@@ -2406,6 +2309,11 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
goto flush_parent;
}
+ /* Check if we really need to flush anything */
+ if (bs->flushed_gen == current_gen) {
+ goto flush_parent;
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
if (bs->drv->bdrv_co_flush_to_disk) {
ret = bs->drv->bdrv_co_flush_to_disk(bs);
@@ -2436,6 +2344,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
*/
ret = 0;
}
+
if (ret < 0) {
goto out;
}
@@ -2446,6 +2355,12 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
flush_parent:
ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
out:
+ /* Notify any pending flushes that we have completed */
+ bs->flushed_gen = current_gen;
+ bs->active_flush_req = NULL;
+ /* Return value is ignored - it's ok if wait queue is empty */
+ qemu_co_queue_next(&bs->flush_queue);
+
tracked_request_end(&req);
return ret;
}
@@ -2453,51 +2368,52 @@ out:
int bdrv_flush(BlockDriverState *bs)
{
Coroutine *co;
- RwCo rwco = {
+ FlushCo flush_co = {
.bs = bs,
.ret = NOT_DONE,
};
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
- bdrv_flush_co_entry(&rwco);
+ bdrv_flush_co_entry(&flush_co);
} else {
AioContext *aio_context = bdrv_get_aio_context(bs);
- co = qemu_coroutine_create(bdrv_flush_co_entry);
- qemu_coroutine_enter(co, &rwco);
- while (rwco.ret == NOT_DONE) {
+ co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
+ qemu_coroutine_enter(co);
+ while (flush_co.ret == NOT_DONE) {
aio_poll(aio_context, true);
}
}
- return rwco.ret;
+ return flush_co.ret;
}
typedef struct DiscardCo {
BlockDriverState *bs;
- int64_t sector_num;
- int nb_sectors;
+ int64_t offset;
+ int count;
int ret;
} DiscardCo;
-static void coroutine_fn bdrv_discard_co_entry(void *opaque)
+static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
{
DiscardCo *rwco = opaque;
- rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
+ rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->count);
}
-int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
+int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
+ int count)
{
BdrvTrackedRequest req;
- int max_discard, ret;
+ int max_pdiscard, ret;
+ int head, align;
if (!bs->drv) {
return -ENOMEDIUM;
}
- ret = bdrv_check_request(bs, sector_num, nb_sectors);
+ ret = bdrv_check_byte_request(bs, offset, count);
if (ret < 0) {
return ret;
} else if (bs->read_only) {
@@ -2510,44 +2426,49 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
return 0;
}
- if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
+ if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
return 0;
}
- tracked_request_begin(&req, bs, sector_num, nb_sectors,
- BDRV_TRACKED_DISCARD);
- bdrv_set_dirty(bs, sector_num, nb_sectors);
+ /* Discard is advisory, so ignore any unaligned head or tail */
+ align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
+ assert(align % bs->bl.request_alignment == 0);
+ head = offset % align;
+ if (head) {
+ head = MIN(count, align - head);
+ count -= head;
+ offset += head;
+ }
+ count = QEMU_ALIGN_DOWN(count, align);
+ if (!count) {
+ return 0;
+ }
- max_discard = MIN_NON_ZERO(bs->bl.max_discard, BDRV_REQUEST_MAX_SECTORS);
- while (nb_sectors > 0) {
- int ret;
- int num = nb_sectors;
-
- /* align request */
- if (bs->bl.discard_alignment &&
- num >= bs->bl.discard_alignment &&
- sector_num % bs->bl.discard_alignment) {
- if (num > bs->bl.discard_alignment) {
- num = bs->bl.discard_alignment;
- }
- num -= sector_num % bs->bl.discard_alignment;
- }
+ tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD);
- /* limit request size */
- if (num > max_discard) {
- num = max_discard;
- }
+ ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
+ if (ret < 0) {
+ goto out;
+ }
- if (bs->drv->bdrv_co_discard) {
- ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
+ max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
+ align);
+ assert(max_pdiscard);
+
+ while (count > 0) {
+ int ret;
+ int num = MIN(count, max_pdiscard);
+
+ if (bs->drv->bdrv_co_pdiscard) {
+ ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
} else {
BlockAIOCB *acb;
CoroutineIOCompletion co = {
.coroutine = qemu_coroutine_self(),
};
- acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
- bdrv_co_io_em_complete, &co);
+ acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
+ bdrv_co_io_em_complete, &co);
if (acb == NULL) {
ret = -EIO;
goto out;
@@ -2560,33 +2481,36 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
goto out;
}
- sector_num += num;
- nb_sectors -= num;
+ offset += num;
+ count -= num;
}
ret = 0;
out:
+ ++bs->write_gen;
+ bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
+ req.bytes >> BDRV_SECTOR_BITS);
tracked_request_end(&req);
return ret;
}
-int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
Coroutine *co;
DiscardCo rwco = {
.bs = bs,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
+ .offset = offset,
+ .count = count,
.ret = NOT_DONE,
};
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
- bdrv_discard_co_entry(&rwco);
+ bdrv_pdiscard_co_entry(&rwco);
} else {
AioContext *aio_context = bdrv_get_aio_context(bs);
- co = qemu_coroutine_create(bdrv_discard_co_entry);
- qemu_coroutine_enter(co, &rwco);
+ co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
+ qemu_coroutine_enter(co);
while (rwco.ret == NOT_DONE) {
aio_poll(aio_context, true);
}
@@ -2595,19 +2519,6 @@ int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
return rwco.ret;
}
-typedef struct {
- CoroutineIOCompletion *co;
- QEMUBH *bh;
-} BdrvIoctlCompletionData;
-
-static void bdrv_ioctl_bh_cb(void *opaque)
-{
- BdrvIoctlCompletionData *data = opaque;
-
- bdrv_co_io_em_complete(data->co, -ENOTSUP);
- qemu_bh_delete(data->bh);
-}
-
static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
{
BlockDriver *drv = bs->drv;
@@ -2625,11 +2536,8 @@ static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
if (!acb) {
- BdrvIoctlCompletionData *data = g_new(BdrvIoctlCompletionData, 1);
- data->bh = aio_bh_new(bdrv_get_aio_context(bs),
- bdrv_ioctl_bh_cb, data);
- data->co = &co;
- qemu_bh_schedule(data->bh);
+ co.ret = -ENOTSUP;
+ goto out;
}
qemu_coroutine_yield();
out:
@@ -2664,9 +2572,9 @@ int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
/* Fast-path if already in coroutine context */
bdrv_co_ioctl_entry(&data);
} else {
- Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry);
+ Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry, &data);
- qemu_coroutine_enter(co, &data);
+ qemu_coroutine_enter(co);
while (data.ret == -EINPROGRESS) {
aio_poll(bdrv_get_aio_context(bs), true);
}
@@ -2694,8 +2602,8 @@ BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
acb->req.error = -EINPROGRESS;
acb->req.req = req;
acb->req.buf = buf;
- co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry);
- qemu_coroutine_enter(co, acb);
+ co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry, acb);
+ qemu_coroutine_enter(co);
bdrv_co_maybe_schedule_bh(acb);
return &acb->common;
@@ -2763,48 +2671,66 @@ void bdrv_add_before_write_notifier(BlockDriverState *bs,
void bdrv_io_plug(BlockDriverState *bs)
{
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_io_plug) {
- drv->bdrv_io_plug(bs);
- } else if (bs->file) {
- bdrv_io_plug(bs->file->bs);
+ BdrvChild *child;
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_plug(child->bs);
+ }
+
+ if (bs->io_plugged++ == 0 && bs->io_plug_disabled == 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_plug) {
+ drv->bdrv_io_plug(bs);
+ }
}
}
void bdrv_io_unplug(BlockDriverState *bs)
{
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_io_unplug) {
- drv->bdrv_io_unplug(bs);
- } else if (bs->file) {
- bdrv_io_unplug(bs->file->bs);
+ BdrvChild *child;
+
+ assert(bs->io_plugged);
+ if (--bs->io_plugged == 0 && bs->io_plug_disabled == 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_unplug) {
+ drv->bdrv_io_unplug(bs);
+ }
}
-}
-void bdrv_flush_io_queue(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_flush_io_queue) {
- drv->bdrv_flush_io_queue(bs);
- } else if (bs->file) {
- bdrv_flush_io_queue(bs->file->bs);
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_unplug(child->bs);
}
- bdrv_start_throttled_reqs(bs);
}
-void bdrv_drained_begin(BlockDriverState *bs)
+void bdrv_io_unplugged_begin(BlockDriverState *bs)
{
- if (!bs->quiesce_counter++) {
- aio_disable_external(bdrv_get_aio_context(bs));
+ BdrvChild *child;
+
+ if (bs->io_plug_disabled++ == 0 && bs->io_plugged > 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_unplug) {
+ drv->bdrv_io_unplug(bs);
+ }
+ }
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_unplugged_begin(child->bs);
}
- bdrv_drain(bs);
}
-void bdrv_drained_end(BlockDriverState *bs)
+void bdrv_io_unplugged_end(BlockDriverState *bs)
{
- assert(bs->quiesce_counter > 0);
- if (--bs->quiesce_counter > 0) {
- return;
+ BdrvChild *child;
+
+ assert(bs->io_plug_disabled);
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_unplugged_end(child->bs);
+ }
+
+ if (--bs->io_plug_disabled == 0 && bs->io_plugged > 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_plug) {
+ drv->bdrv_io_plug(bs);
+ }
}
- aio_enable_external(bdrv_get_aio_context(bs));
}