diff options
-rw-r--r-- | block/Makefile.objs | 1 | ||||
-rw-r--r-- | block/backup.c | 341 | ||||
-rw-r--r-- | include/block/block_int.h | 19 | ||||
-rw-r--r-- | trace-events | 8 |
4 files changed, 369 insertions, 0 deletions
diff --git a/block/Makefile.objs b/block/Makefile.objs index 2981654846..4cf9aa499f 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -21,5 +21,6 @@ endif common-obj-y += stream.o common-obj-y += commit.o common-obj-y += mirror.o +common-obj-y += backup.o $(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS) diff --git a/block/backup.c b/block/backup.c new file mode 100644 index 0000000000..16105d40b1 --- /dev/null +++ b/block/backup.c @@ -0,0 +1,341 @@ +/* + * QEMU backup + * + * Copyright (C) 2013 Proxmox Server Solutions + * + * Authors: + * Dietmar Maurer (dietmar@proxmox.com) + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include <stdio.h> +#include <errno.h> +#include <unistd.h> + +#include "trace.h" +#include "block/block.h" +#include "block/block_int.h" +#include "block/blockjob.h" +#include "qemu/ratelimit.h" + +#define BACKUP_CLUSTER_BITS 16 +#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS) +#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE) + +#define SLICE_TIME 100000000ULL /* ns */ + +typedef struct CowRequest { + int64_t start; + int64_t end; + QLIST_ENTRY(CowRequest) list; + CoQueue wait_queue; /* coroutines blocked on this request */ +} CowRequest; + +typedef struct BackupBlockJob { + BlockJob common; + BlockDriverState *target; + RateLimit limit; + BlockdevOnError on_source_error; + BlockdevOnError on_target_error; + CoRwlock flush_rwlock; + uint64_t sectors_read; + HBitmap *bitmap; + QLIST_HEAD(, CowRequest) inflight_reqs; +} BackupBlockJob; + +/* See if in-flight requests overlap and wait for them to complete */ +static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job, + int64_t start, + int64_t end) +{ + CowRequest *req; + bool retry; + + do { + retry = false; + QLIST_FOREACH(req, &job->inflight_reqs, list) { + if (end > req->start && start < req->end) { + qemu_co_queue_wait(&req->wait_queue); + retry = true; + break; + } + } + } while (retry); +} + +/* Keep track of an in-flight request */ +static void cow_request_begin(CowRequest *req, BackupBlockJob *job, + int64_t start, int64_t end) +{ + req->start = start; + req->end = end; + qemu_co_queue_init(&req->wait_queue); + QLIST_INSERT_HEAD(&job->inflight_reqs, req, list); +} + +/* Forget about a completed request */ +static void cow_request_end(CowRequest *req) +{ + QLIST_REMOVE(req, list); + qemu_co_queue_restart_all(&req->wait_queue); +} + +static int coroutine_fn backup_do_cow(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + bool *error_is_read) +{ + BackupBlockJob *job = (BackupBlockJob *)bs->job; + CowRequest cow_request; + struct iovec iov; + QEMUIOVector bounce_qiov; + void *bounce_buffer = NULL; + int ret = 0; + int64_t start, end; + int n; + + qemu_co_rwlock_rdlock(&job->flush_rwlock); + + start = sector_num / BACKUP_SECTORS_PER_CLUSTER; + end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER); + + trace_backup_do_cow_enter(job, start, sector_num, nb_sectors); + + wait_for_overlapping_requests(job, start, end); + cow_request_begin(&cow_request, job, start, end); + + for (; start < end; start++) { + if (hbitmap_get(job->bitmap, start)) { + trace_backup_do_cow_skip(job, start); + continue; /* already copied */ + } + + trace_backup_do_cow_process(job, start); + + n = MIN(BACKUP_SECTORS_PER_CLUSTER, + job->common.len / BDRV_SECTOR_SIZE - + start * BACKUP_SECTORS_PER_CLUSTER); + + if (!bounce_buffer) { + bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE); + } + iov.iov_base = bounce_buffer; + iov.iov_len = n * BDRV_SECTOR_SIZE; + qemu_iovec_init_external(&bounce_qiov, &iov, 1); + + ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n, + &bounce_qiov); + if (ret < 0) { + trace_backup_do_cow_read_fail(job, start, ret); + if (error_is_read) { + *error_is_read = true; + } + goto out; + } + + if (buffer_is_zero(iov.iov_base, iov.iov_len)) { + ret = bdrv_co_write_zeroes(job->target, + start * BACKUP_SECTORS_PER_CLUSTER, n); + } else { + ret = bdrv_co_writev(job->target, + start * BACKUP_SECTORS_PER_CLUSTER, n, + &bounce_qiov); + } + if (ret < 0) { + trace_backup_do_cow_write_fail(job, start, ret); + if (error_is_read) { + *error_is_read = false; + } + goto out; + } + + hbitmap_set(job->bitmap, start, 1); + + /* Publish progress, guest I/O counts as progress too. Note that the + * offset field is an opaque progress value, it is not a disk offset. + */ + job->sectors_read += n; + job->common.offset += n * BDRV_SECTOR_SIZE; + } + +out: + if (bounce_buffer) { + qemu_vfree(bounce_buffer); + } + + cow_request_end(&cow_request); + + trace_backup_do_cow_return(job, sector_num, nb_sectors, ret); + + qemu_co_rwlock_unlock(&job->flush_rwlock); + + return ret; +} + +static int coroutine_fn backup_before_write_notify( + NotifierWithReturn *notifier, + void *opaque) +{ + BdrvTrackedRequest *req = opaque; + + return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL); +} + +static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common); + + if (speed < 0) { + error_set(errp, QERR_INVALID_PARAMETER, "speed"); + return; + } + ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); +} + +static void backup_iostatus_reset(BlockJob *job) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common); + + bdrv_iostatus_reset(s->target); +} + +static const BlockJobType backup_job_type = { + .instance_size = sizeof(BackupBlockJob), + .job_type = "backup", + .set_speed = backup_set_speed, + .iostatus_reset = backup_iostatus_reset, +}; + +static BlockErrorAction backup_error_action(BackupBlockJob *job, + bool read, int error) +{ + if (read) { + return block_job_error_action(&job->common, job->common.bs, + job->on_source_error, true, error); + } else { + return block_job_error_action(&job->common, job->target, + job->on_target_error, false, error); + } +} + +static void coroutine_fn backup_run(void *opaque) +{ + BackupBlockJob *job = opaque; + BlockDriverState *bs = job->common.bs; + BlockDriverState *target = job->target; + BlockdevOnError on_target_error = job->on_target_error; + NotifierWithReturn before_write = { + .notify = backup_before_write_notify, + }; + int64_t start, end; + int ret = 0; + + QLIST_INIT(&job->inflight_reqs); + qemu_co_rwlock_init(&job->flush_rwlock); + + start = 0; + end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE, + BACKUP_SECTORS_PER_CLUSTER); + + job->bitmap = hbitmap_alloc(end, 0); + + bdrv_set_enable_write_cache(target, true); + bdrv_set_on_error(target, on_target_error, on_target_error); + bdrv_iostatus_enable(target); + + bdrv_add_before_write_notifier(bs, &before_write); + + for (; start < end; start++) { + bool error_is_read; + + if (block_job_is_cancelled(&job->common)) { + break; + } + + /* we need to yield so that qemu_aio_flush() returns. + * (without, VM does not reboot) + */ + if (job->common.speed) { + uint64_t delay_ns = ratelimit_calculate_delay( + &job->limit, job->sectors_read); + job->sectors_read = 0; + block_job_sleep_ns(&job->common, rt_clock, delay_ns); + } else { + block_job_sleep_ns(&job->common, rt_clock, 0); + } + + if (block_job_is_cancelled(&job->common)) { + break; + } + + ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, + BACKUP_SECTORS_PER_CLUSTER, &error_is_read); + if (ret < 0) { + /* Depending on error action, fail now or retry cluster */ + BlockErrorAction action = + backup_error_action(job, error_is_read, -ret); + if (action == BDRV_ACTION_REPORT) { + break; + } else { + start--; + continue; + } + } + } + + notifier_with_return_remove(&before_write); + + /* wait until pending backup_do_cow() calls have completed */ + qemu_co_rwlock_wrlock(&job->flush_rwlock); + qemu_co_rwlock_unlock(&job->flush_rwlock); + + hbitmap_free(job->bitmap); + + bdrv_iostatus_disable(target); + bdrv_delete(target); + + block_job_completed(&job->common, ret); +} + +void backup_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + BlockDriverCompletionFunc *cb, void *opaque, + Error **errp) +{ + int64_t len; + + assert(bs); + assert(target); + assert(cb); + + if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || + on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER, "on-source-error"); + return; + } + + len = bdrv_getlength(bs); + if (len < 0) { + error_setg_errno(errp, -len, "unable to get length for '%s'", + bdrv_get_device_name(bs)); + return; + } + + BackupBlockJob *job = block_job_create(&backup_job_type, bs, speed, + cb, opaque, errp); + if (!job) { + return; + } + + job->on_source_error = on_source_error; + job->on_target_error = on_target_error; + job->target = target; + job->common.len = len; + job->common.co = qemu_coroutine_create(backup_run); + qemu_coroutine_enter(job->common.co, job); +} diff --git a/include/block/block_int.h b/include/block/block_int.h index 2d009556b0..c6ac871e21 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -399,4 +399,23 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); +/* + * backup_start: + * @bs: Block device to operate on. + * @target: Block device to write to. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * + * Start a backup operation on @bs. Clusters in @bs are written to @target + * until the job is cancelled or manually completed. + */ +void backup_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + BlockDriverCompletionFunc *cb, void *opaque, + Error **errp); + #endif /* BLOCK_INT_H */ diff --git a/trace-events b/trace-events index c5f1ccb96d..0acce7b350 100644 --- a/trace-events +++ b/trace-events @@ -92,6 +92,14 @@ mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_ mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d" mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d" +# block/backup.c +backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d" +backup_do_cow_return(void *job, int64_t sector_num, int nb_sectors, int ret) "job %p sector_num %"PRId64" nb_sectors %d ret %d" +backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64 +backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64 +backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d" +backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d" + # blockdev.c qmp_block_job_cancel(void *job) "job %p" qmp_block_job_pause(void *job) "job %p" |