diff options
author | Junfeng Dong <junfeng.dong@intel.com> | 2013-11-19 17:45:23 +0800 |
---|---|---|
committer | Junfeng Dong <junfeng.dong@intel.com> | 2013-11-19 17:45:23 +0800 |
commit | 340f06c9eaee097e626c251bf7a013350649c091 (patch) | |
tree | 107e5705050a12da68fc80a56ae37afd50a2cc94 /block/qcow2-cluster.c | |
parent | 42bf3037d458a330856a0be584200c1e41c3f417 (diff) | |
download | qemu-340f06c9eaee097e626c251bf7a013350649c091.tar.gz qemu-340f06c9eaee097e626c251bf7a013350649c091.tar.bz2 qemu-340f06c9eaee097e626c251bf7a013350649c091.zip |
Import upstream 1.6.0.upstream/1.6.0
Change-Id: Icf52b556470cac8677297f2ef14ded16684f7887
Signed-off-by: Junfeng Dong <junfeng.dong@intel.com>
Diffstat (limited to 'block/qcow2-cluster.c')
-rw-r--r-- | block/qcow2-cluster.c | 687 |
1 files changed, 483 insertions, 204 deletions
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c index e179211c5..cca76d4fc 100644 --- a/block/qcow2-cluster.c +++ b/block/qcow2-cluster.c @@ -25,16 +25,17 @@ #include <zlib.h> #include "qemu-common.h" -#include "block_int.h" +#include "block/block_int.h" #include "block/qcow2.h" #include "trace.h" -int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) +int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size, + bool exact_size) { BDRVQcowState *s = bs->opaque; - int new_l1_size, new_l1_size2, ret, i; + int new_l1_size2, ret, i; uint64_t *new_l1_table; - int64_t new_l1_table_offset; + int64_t new_l1_table_offset, new_l1_size; uint8_t data[12]; if (min_size <= s->l1_size) @@ -53,8 +54,13 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) } } + if (new_l1_size > INT_MAX) { + return -EFBIG; + } + #ifdef DEBUG_ALLOC2 - fprintf(stderr, "grow l1_table from %d to %d\n", s->l1_size, new_l1_size); + fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n", + s->l1_size, new_l1_size); #endif new_l1_size2 = sizeof(uint64_t) * new_l1_size; @@ -92,14 +98,16 @@ int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size) goto fail; } g_free(s->l1_table); - qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t)); + qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t), + QCOW2_DISCARD_OTHER); s->l1_table_offset = new_l1_table_offset; s->l1_table = new_l1_table; s->l1_size = new_l1_size; return 0; fail: g_free(new_l1_table); - qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2); + qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2, + QCOW2_DISCARD_OTHER); return ret; } @@ -391,8 +399,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, int *num, uint64_t *cluster_offset) { BDRVQcowState *s = bs->opaque; - unsigned int l1_index, l2_index; - uint64_t l2_offset, *l2_table; + unsigned int l2_index; + uint64_t l1_index, l2_offset, *l2_table; int l1_bits, c; unsigned int index_in_cluster, nb_clusters; uint64_t nb_available, nb_needed; @@ -454,6 +462,9 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset, *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK; break; case QCOW2_CLUSTER_ZERO: + if (s->qcow_version < 3) { + return -EIO; + } c = count_contiguous_clusters(nb_clusters, s->cluster_size, &l2_table[l2_index], 0, QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO); @@ -504,8 +515,8 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, int *new_l2_index) { BDRVQcowState *s = bs->opaque; - unsigned int l1_index, l2_index; - uint64_t l2_offset; + unsigned int l2_index; + uint64_t l1_index, l2_offset; uint64_t *l2_table = NULL; int ret; @@ -519,6 +530,7 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, } } + assert(l1_index < s->l1_size); l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK; /* seek the l2 table of the given l2 offset */ @@ -538,7 +550,8 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset, /* Then decrease the refcount of the old table */ if (l2_offset) { - qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t)); + qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t), + QCOW2_DISCARD_OTHER); } } @@ -615,57 +628,67 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, return cluster_offset; } -int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) +static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r) { BDRVQcowState *s = bs->opaque; - int i, j = 0, l2_index, ret; - uint64_t *old_cluster, start_sect, *l2_table; - uint64_t cluster_offset = m->alloc_offset; - bool cow = false; - - trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); + int ret; - if (m->nb_clusters == 0) + if (r->nb_sectors == 0) { return 0; + } - old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); + qemu_co_mutex_unlock(&s->lock); + ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset, + r->offset / BDRV_SECTOR_SIZE, + r->offset / BDRV_SECTOR_SIZE + r->nb_sectors); + qemu_co_mutex_lock(&s->lock); - /* copy content of unmodified sectors */ - start_sect = (m->offset & ~(s->cluster_size - 1)) >> 9; - if (m->n_start) { - cow = true; - qemu_co_mutex_unlock(&s->lock); - ret = copy_sectors(bs, start_sect, cluster_offset, 0, m->n_start); - qemu_co_mutex_lock(&s->lock); - if (ret < 0) - goto err; - } - - if (m->nb_available & (s->cluster_sectors - 1)) { - cow = true; - qemu_co_mutex_unlock(&s->lock); - ret = copy_sectors(bs, start_sect, cluster_offset, m->nb_available, - align_offset(m->nb_available, s->cluster_sectors)); - qemu_co_mutex_lock(&s->lock); - if (ret < 0) - goto err; + if (ret < 0) { + return ret; } /* - * Update L2 table. - * * Before we update the L2 table to actually point to the new cluster, we * need to be sure that the refcounts have been increased and COW was * handled. */ - if (cow) { - qcow2_cache_depends_on_flush(s->l2_table_cache); + qcow2_cache_depends_on_flush(s->l2_table_cache); + + return 0; +} + +int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) +{ + BDRVQcowState *s = bs->opaque; + int i, j = 0, l2_index, ret; + uint64_t *old_cluster, *l2_table; + uint64_t cluster_offset = m->alloc_offset; + + trace_qcow2_cluster_link_l2(qemu_coroutine_self(), m->nb_clusters); + assert(m->nb_clusters > 0); + + old_cluster = g_malloc(m->nb_clusters * sizeof(uint64_t)); + + /* copy content of unmodified sectors */ + ret = perform_cow(bs, m, &m->cow_start); + if (ret < 0) { + goto err; } + ret = perform_cow(bs, m, &m->cow_end); + if (ret < 0) { + goto err; + } + + /* Update L2 table. */ + if (s->use_lazy_refcounts) { + qcow2_mark_dirty(bs); + } if (qcow2_need_accurate_refcounts(s)) { qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache); } + ret = get_cluster_table(bs, m->offset, &l2_table, &l2_index); if (ret < 0) { goto err; @@ -695,10 +718,14 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m) /* * If this was a COW, we need to decrease the refcount of the old cluster. * Also flush bs->file to get the right order for L2 and refcount update. + * + * Don't discard clusters that reach a refcount of 0 (e.g. compressed + * clusters), the next write will reuse them anyway. */ if (j != 0) { for (i = 0; i < j; i++) { - qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1); + qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1, + QCOW2_DISCARD_NEVER); } } @@ -743,56 +770,53 @@ out: } /* - * Allocates new clusters for the given guest_offset. - * - * At most *nb_clusters are allocated, and on return *nb_clusters is updated to - * contain the number of clusters that have been allocated and are contiguous - * in the image file. + * Check if there already is an AIO write request in flight which allocates + * the same cluster. In this case we need to wait until the previous + * request has completed and updated the L2 table accordingly. * - * If *host_offset is non-zero, it specifies the offset in the image file at - * which the new clusters must start. *nb_clusters can be 0 on return in this - * case if the cluster at host_offset is already in use. If *host_offset is - * zero, the clusters can be allocated anywhere in the image file. + * Returns: + * 0 if there was no dependency. *cur_bytes indicates the number of + * bytes from guest_offset that can be read before the next + * dependency must be processed (or the request is complete) * - * *host_offset is updated to contain the offset into the image file at which - * the first allocated cluster starts. - * - * Return 0 on success and -errno in error cases. -EAGAIN means that the - * function has been waiting for another request and the allocation must be - * restarted, but the whole request should not be failed. + * -EAGAIN if we had to wait for another request, previously gathered + * information on cluster allocation may be invalid now. The caller + * must start over anyway, so consider *cur_bytes undefined. */ -static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, - uint64_t *host_offset, unsigned int *nb_clusters) +static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *cur_bytes, QCowL2Meta **m) { BDRVQcowState *s = bs->opaque; QCowL2Meta *old_alloc; + uint64_t bytes = *cur_bytes; - trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, - *host_offset, *nb_clusters); - - /* - * Check if there already is an AIO write request in flight which allocates - * the same cluster. In this case we need to wait until the previous - * request has completed and updated the L2 table accordingly. - */ QLIST_FOREACH(old_alloc, &s->cluster_allocs, next_in_flight) { - uint64_t start = guest_offset >> s->cluster_bits; - uint64_t end = start + *nb_clusters; - uint64_t old_start = old_alloc->offset >> s->cluster_bits; - uint64_t old_end = old_start + old_alloc->nb_clusters; + uint64_t start = guest_offset; + uint64_t end = start + bytes; + uint64_t old_start = l2meta_cow_start(old_alloc); + uint64_t old_end = l2meta_cow_end(old_alloc); - if (end < old_start || start > old_end) { + if (end <= old_start || start >= old_end) { /* No intersection */ } else { if (start < old_start) { /* Stop at the start of a running allocation */ - *nb_clusters = old_start - start; + bytes = old_start - start; } else { - *nb_clusters = 0; + bytes = 0; } - if (*nb_clusters == 0) { + /* Stop if already an l2meta exists. After yielding, it wouldn't + * be valid any more, so we'd have to clean up the old L2Metas + * and deal with requests depending on them before starting to + * gather new ones. Not worth the trouble. */ + if (bytes == 0 && *m) { + *cur_bytes = 0; + return 0; + } + + if (bytes == 0) { /* Wait for the dependency to complete. We need to recheck * the free/allocated clusters when we continue. */ qemu_co_mutex_unlock(&s->lock); @@ -803,10 +827,144 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, } } - if (!*nb_clusters) { - abort(); + /* Make sure that existing clusters and new allocations are only used up to + * the next dependency if we shortened the request above */ + *cur_bytes = bytes; + + return 0; +} + +/* + * Checks how many already allocated clusters that don't require a copy on + * write there are at the given guest_offset (up to *bytes). If + * *host_offset is not zero, only physically contiguous clusters beginning at + * this host offset are counted. + * + * Note that guest_offset may not be cluster aligned. In this case, the + * returned *host_offset points to exact byte referenced by guest_offset and + * therefore isn't cluster aligned as well. + * + * Returns: + * 0: if no allocated clusters are available at the given offset. + * *bytes is normally unchanged. It is set to 0 if the cluster + * is allocated and doesn't need COW, but doesn't have the right + * physical offset. + * + * 1: if allocated clusters that don't require a COW are available at + * the requested offset. *bytes may have decreased and describes + * the length of the area that can be written to. + * + * -errno: in error cases + */ +static int handle_copied(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) +{ + BDRVQcowState *s = bs->opaque; + int l2_index; + uint64_t cluster_offset; + uint64_t *l2_table; + unsigned int nb_clusters; + unsigned int keep_clusters; + int ret, pret; + + trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset, + *bytes); + + assert(*host_offset == 0 || offset_into_cluster(s, guest_offset) + == offset_into_cluster(s, *host_offset)); + + /* + * Calculate the number of clusters to look for. We stop at L2 table + * boundaries to keep things simple. + */ + nb_clusters = + size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); + + l2_index = offset_to_l2_index(s, guest_offset); + nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + + /* Find L2 entry for the first involved cluster */ + ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); + if (ret < 0) { + return ret; + } + + cluster_offset = be64_to_cpu(l2_table[l2_index]); + + /* Check how many clusters are already allocated and don't need COW */ + if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL + && (cluster_offset & QCOW_OFLAG_COPIED)) + { + /* If a specific host_offset is required, check it */ + bool offset_matches = + (cluster_offset & L2E_OFFSET_MASK) == *host_offset; + + if (*host_offset != 0 && !offset_matches) { + *bytes = 0; + ret = 0; + goto out; + } + + /* We keep all QCOW_OFLAG_COPIED clusters */ + keep_clusters = + count_contiguous_clusters(nb_clusters, s->cluster_size, + &l2_table[l2_index], 0, + QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); + assert(keep_clusters <= nb_clusters); + + *bytes = MIN(*bytes, + keep_clusters * s->cluster_size + - offset_into_cluster(s, guest_offset)); + + ret = 1; + } else { + ret = 0; + } + + /* Cleanup */ +out: + pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + if (pret < 0) { + return pret; } + /* Only return a host offset if we actually made progress. Otherwise we + * would make requirements for handle_alloc() that it can't fulfill */ + if (ret) { + *host_offset = (cluster_offset & L2E_OFFSET_MASK) + + offset_into_cluster(s, guest_offset); + } + + return ret; +} + +/* + * Allocates new clusters for the given guest_offset. + * + * At most *nb_clusters are allocated, and on return *nb_clusters is updated to + * contain the number of clusters that have been allocated and are contiguous + * in the image file. + * + * If *host_offset is non-zero, it specifies the offset in the image file at + * which the new clusters must start. *nb_clusters can be 0 on return in this + * case if the cluster at host_offset is already in use. If *host_offset is + * zero, the clusters can be allocated anywhere in the image file. + * + * *host_offset is updated to contain the offset into the image file at which + * the first allocated cluster starts. + * + * Return 0 on success and -errno in error cases. -EAGAIN means that the + * function has been waiting for another request and the allocation must be + * restarted, but the whole request should not be failed. + */ +static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, unsigned int *nb_clusters) +{ + BDRVQcowState *s = bs->opaque; + + trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset, + *host_offset, *nb_clusters); + /* Allocate new clusters */ trace_qcow2_cluster_alloc_phys(qemu_coroutine_self()); if (*host_offset == 0) { @@ -828,6 +986,151 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, } /* + * Allocates new clusters for an area that either is yet unallocated or needs a + * copy on write. If *host_offset is non-zero, clusters are only allocated if + * the new allocation can match the specified host offset. + * + * Note that guest_offset may not be cluster aligned. In this case, the + * returned *host_offset points to exact byte referenced by guest_offset and + * therefore isn't cluster aligned as well. + * + * Returns: + * 0: if no clusters could be allocated. *bytes is set to 0, + * *host_offset is left unchanged. + * + * 1: if new clusters were allocated. *bytes may be decreased if the + * new allocation doesn't cover all of the requested area. + * *host_offset is updated to contain the host offset of the first + * newly allocated cluster. + * + * -errno: in error cases + */ +static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset, + uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m) +{ + BDRVQcowState *s = bs->opaque; + int l2_index; + uint64_t *l2_table; + uint64_t entry; + unsigned int nb_clusters; + int ret; + + uint64_t alloc_cluster_offset; + + trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset, + *bytes); + assert(*bytes > 0); + + /* + * Calculate the number of clusters to look for. We stop at L2 table + * boundaries to keep things simple. + */ + nb_clusters = + size_to_clusters(s, offset_into_cluster(s, guest_offset) + *bytes); + + l2_index = offset_to_l2_index(s, guest_offset); + nb_clusters = MIN(nb_clusters, s->l2_size - l2_index); + + /* Find L2 entry for the first involved cluster */ + ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index); + if (ret < 0) { + return ret; + } + + entry = be64_to_cpu(l2_table[l2_index]); + + /* For the moment, overwrite compressed clusters one by one */ + if (entry & QCOW_OFLAG_COMPRESSED) { + nb_clusters = 1; + } else { + nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index); + } + + /* This function is only called when there were no non-COW clusters, so if + * we can't find any unallocated or COW clusters either, something is + * wrong with our code. */ + assert(nb_clusters > 0); + + ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); + if (ret < 0) { + return ret; + } + + /* Allocate, if necessary at a given offset in the image file */ + alloc_cluster_offset = start_of_cluster(s, *host_offset); + ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset, + &nb_clusters); + if (ret < 0) { + goto fail; + } + + /* Can't extend contiguous allocation */ + if (nb_clusters == 0) { + *bytes = 0; + return 0; + } + + /* + * Save info needed for meta data update. + * + * requested_sectors: Number of sectors from the start of the first + * newly allocated cluster to the end of the (possibly shortened + * before) write request. + * + * avail_sectors: Number of sectors from the start of the first + * newly allocated to the end of the last newly allocated cluster. + * + * nb_sectors: The number of sectors from the start of the first + * newly allocated cluster to the end of the area that the write + * request actually writes to (excluding COW at the end) + */ + int requested_sectors = + (*bytes + offset_into_cluster(s, guest_offset)) + >> BDRV_SECTOR_BITS; + int avail_sectors = nb_clusters + << (s->cluster_bits - BDRV_SECTOR_BITS); + int alloc_n_start = offset_into_cluster(s, guest_offset) + >> BDRV_SECTOR_BITS; + int nb_sectors = MIN(requested_sectors, avail_sectors); + QCowL2Meta *old_m = *m; + + *m = g_malloc0(sizeof(**m)); + + **m = (QCowL2Meta) { + .next = old_m, + + .alloc_offset = alloc_cluster_offset, + .offset = start_of_cluster(s, guest_offset), + .nb_clusters = nb_clusters, + .nb_available = nb_sectors, + + .cow_start = { + .offset = 0, + .nb_sectors = alloc_n_start, + }, + .cow_end = { + .offset = nb_sectors * BDRV_SECTOR_SIZE, + .nb_sectors = avail_sectors - nb_sectors, + }, + }; + qemu_co_queue_init(&(*m)->dependent_requests); + QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight); + + *host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset); + *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE) + - offset_into_cluster(s, guest_offset)); + assert(*bytes != 0); + + return 1; + +fail: + if (*m && (*m)->nb_clusters > 0) { + QLIST_REMOVE(*m, next_in_flight); + } + return ret; +} + +/* * alloc_cluster_offset * * For a given offset on the virtual disk, find the cluster offset in qcow2 @@ -847,151 +1150,113 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset, * Return 0 on success and -errno in error cases */ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset, - int n_start, int n_end, int *num, QCowL2Meta *m) + int n_start, int n_end, int *num, uint64_t *host_offset, QCowL2Meta **m) { BDRVQcowState *s = bs->opaque; - int l2_index, ret, sectors; - uint64_t *l2_table; - unsigned int nb_clusters, keep_clusters; + uint64_t start, remaining; uint64_t cluster_offset; + uint64_t cur_bytes; + int ret; trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, n_start, n_end); - /* Find L2 entry for the first involved cluster */ -again: - ret = get_cluster_table(bs, offset, &l2_table, &l2_index); - if (ret < 0) { - return ret; - } + assert(n_start * BDRV_SECTOR_SIZE == offset_into_cluster(s, offset)); + offset = start_of_cluster(s, offset); - /* - * Calculate the number of clusters to look for. We stop at L2 table - * boundaries to keep things simple. - */ - nb_clusters = MIN(size_to_clusters(s, n_end << BDRV_SECTOR_BITS), - s->l2_size - l2_index); +again: + start = offset + (n_start << BDRV_SECTOR_BITS); + remaining = (n_end - n_start) << BDRV_SECTOR_BITS; + cluster_offset = 0; + *host_offset = 0; + cur_bytes = 0; + *m = NULL; - cluster_offset = be64_to_cpu(l2_table[l2_index]); + while (true) { - /* - * Check how many clusters are already allocated and don't need COW, and how - * many need a new allocation. - */ - if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL - && (cluster_offset & QCOW_OFLAG_COPIED)) - { - /* We keep all QCOW_OFLAG_COPIED clusters */ - keep_clusters = - count_contiguous_clusters(nb_clusters, s->cluster_size, - &l2_table[l2_index], 0, - QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO); - assert(keep_clusters <= nb_clusters); - nb_clusters -= keep_clusters; - } else { - keep_clusters = 0; - cluster_offset = 0; - } - - if (nb_clusters > 0) { - /* For the moment, overwrite compressed clusters one by one */ - uint64_t entry = be64_to_cpu(l2_table[l2_index + keep_clusters]); - if (entry & QCOW_OFLAG_COMPRESSED) { - nb_clusters = 1; - } else { - nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, - l2_index + keep_clusters); + if (!*host_offset) { + *host_offset = start_of_cluster(s, cluster_offset); } - } - cluster_offset &= L2E_OFFSET_MASK; + assert(remaining >= cur_bytes); - /* - * The L2 table isn't used any more after this. As long as the cache works - * synchronously, it's important to release it before calling - * do_alloc_cluster_offset, which may yield if we need to wait for another - * request to complete. If we still had the reference, we could use up the - * whole cache with sleeping requests. - */ - ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); - if (ret < 0) { - return ret; - } - - /* If there is something left to allocate, do that now */ - *m = (QCowL2Meta) { - .cluster_offset = cluster_offset, - .nb_clusters = 0, - }; - qemu_co_queue_init(&m->dependent_requests); + start += cur_bytes; + remaining -= cur_bytes; + cluster_offset += cur_bytes; - if (nb_clusters > 0) { - uint64_t alloc_offset; - uint64_t alloc_cluster_offset; - uint64_t keep_bytes = keep_clusters * s->cluster_size; - - /* Calculate start and size of allocation */ - alloc_offset = offset + keep_bytes; - - if (keep_clusters == 0) { - alloc_cluster_offset = 0; - } else { - alloc_cluster_offset = cluster_offset + keep_bytes; + if (remaining == 0) { + break; } - /* Allocate, if necessary at a given offset in the image file */ - ret = do_alloc_cluster_offset(bs, alloc_offset, &alloc_cluster_offset, - &nb_clusters); + cur_bytes = remaining; + + /* + * Now start gathering as many contiguous clusters as possible: + * + * 1. Check for overlaps with in-flight allocations + * + * a) Overlap not in the first cluster -> shorten this request and + * let the caller handle the rest in its next loop iteration. + * + * b) Real overlaps of two requests. Yield and restart the search + * for contiguous clusters (the situation could have changed + * while we were sleeping) + * + * c) TODO: Request starts in the same cluster as the in-flight + * allocation ends. Shorten the COW of the in-fight allocation, + * set cluster_offset to write to the same cluster and set up + * the right synchronisation between the in-flight request and + * the new one. + */ + ret = handle_dependencies(bs, start, &cur_bytes, m); if (ret == -EAGAIN) { + /* Currently handle_dependencies() doesn't yield if we already had + * an allocation. If it did, we would have to clean up the L2Meta + * structs before starting over. */ + assert(*m == NULL); goto again; } else if (ret < 0) { - goto fail; + return ret; + } else if (cur_bytes == 0) { + break; + } else { + /* handle_dependencies() may have decreased cur_bytes (shortened + * the allocations below) so that the next dependency is processed + * correctly during the next loop iteration. */ } - /* save info needed for meta data update */ - if (nb_clusters > 0) { - /* - * requested_sectors: Number of sectors from the start of the first - * newly allocated cluster to the end of the (possibly shortened - * before) write request. - * - * avail_sectors: Number of sectors from the start of the first - * newly allocated to the end of the last newly allocated cluster. - */ - int requested_sectors = n_end - keep_clusters * s->cluster_sectors; - int avail_sectors = nb_clusters - << (s->cluster_bits - BDRV_SECTOR_BITS); - - *m = (QCowL2Meta) { - .cluster_offset = keep_clusters == 0 ? - alloc_cluster_offset : cluster_offset, - .alloc_offset = alloc_cluster_offset, - .offset = alloc_offset, - .n_start = keep_clusters == 0 ? n_start : 0, - .nb_clusters = nb_clusters, - .nb_available = MIN(requested_sectors, avail_sectors), - }; - qemu_co_queue_init(&m->dependent_requests); - QLIST_INSERT_HEAD(&s->cluster_allocs, m, next_in_flight); + /* + * 2. Count contiguous COPIED clusters. + */ + ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m); + if (ret < 0) { + return ret; + } else if (ret) { + continue; + } else if (cur_bytes == 0) { + break; } - } - /* Some cleanup work */ - sectors = (keep_clusters + nb_clusters) << (s->cluster_bits - 9); - if (sectors > n_end) { - sectors = n_end; + /* + * 3. If the request still hasn't completed, allocate new clusters, + * considering any cluster_offset of steps 1c or 2. + */ + ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m); + if (ret < 0) { + return ret; + } else if (ret) { + continue; + } else { + assert(cur_bytes == 0); + break; + } } - assert(sectors > n_start); - *num = sectors - n_start; + *num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS); + assert(*num > 0); + assert(*host_offset != 0); return 0; - -fail: - if (m->nb_clusters > 0) { - QLIST_REMOVE(m, next_in_flight); - } - return ret; } static int decompress_buffer(uint8_t *out_buf, int out_buf_size, @@ -1081,7 +1346,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset, l2_table[l2_index + i] = cpu_to_be64(0); /* Then decrease the refcount */ - qcow2_free_any_clusters(bs, old_offset, 1); + qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); } ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table); @@ -1112,18 +1377,25 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset, nb_clusters = size_to_clusters(s, end_offset - offset); + s->cache_discards = true; + /* Each L2 table is handled by its own loop iteration */ while (nb_clusters > 0) { ret = discard_single_l2(bs, offset, nb_clusters); if (ret < 0) { - return ret; + goto fail; } nb_clusters -= ret; offset += (ret * s->cluster_size); } - return 0; + ret = 0; +fail: + s->cache_discards = false; + qcow2_process_discards(bs, ret); + + return ret; } /* @@ -1157,7 +1429,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset, qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table); if (old_offset & QCOW_OFLAG_COMPRESSED) { l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO); - qcow2_free_any_clusters(bs, old_offset, 1); + qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST); } else { l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO); } @@ -1185,15 +1457,22 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors) /* Each L2 table is handled by its own loop iteration */ nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS); + s->cache_discards = true; + while (nb_clusters > 0) { ret = zero_single_l2(bs, offset, nb_clusters); if (ret < 0) { - return ret; + goto fail; } nb_clusters -= ret; offset += (ret * s->cluster_size); } - return 0; + ret = 0; +fail: + s->cache_discards = false; + qcow2_process_discards(bs, ret); + + return ret; } |