summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block.c67
-rw-r--r--block.h8
-rw-r--r--block/iscsi.c139
-rw-r--r--block/qcow2.c194
-rw-r--r--block/qcow2.h9
-rw-r--r--block/qed.c125
-rw-r--r--block/qed.h7
-rw-r--r--block/sheepdog.c3
-rw-r--r--block/vpc.c282
-rw-r--r--block_int.h8
-rw-r--r--compiler.h4
-rw-r--r--cutils.c35
-rw-r--r--hw/ide/ahci.c7
-rw-r--r--qemu-common.h2
-rw-r--r--qemu-config.c27
-rw-r--r--qemu-doc.texi54
-rw-r--r--qemu-img.c46
-rw-r--r--qemu-io.c77
-rw-r--r--qemu-options.hx16
-rw-r--r--trace-events3
-rw-r--r--vl.c8
21 files changed, 881 insertions, 240 deletions
diff --git a/block.c b/block.c
index 3621d11de0..ae297bb847 100644
--- a/block.c
+++ b/block.c
@@ -50,6 +50,7 @@
typedef enum {
BDRV_REQ_COPY_ON_READ = 0x1,
+ BDRV_REQ_ZERO_WRITE = 0x2,
} BdrvRequestFlags;
static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load);
@@ -69,7 +70,8 @@ static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
BdrvRequestFlags flags);
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags);
static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov,
@@ -1300,7 +1302,7 @@ static void coroutine_fn bdrv_rw_co_entry(void *opaque)
rwco->nb_sectors, rwco->qiov, 0);
} else {
rwco->ret = bdrv_co_do_writev(rwco->bs, rwco->sector_num,
- rwco->nb_sectors, rwco->qiov);
+ rwco->nb_sectors, rwco->qiov, 0);
}
}
@@ -1515,6 +1517,7 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
*/
void *bounce_buffer;
+ BlockDriver *drv = bs->drv;
struct iovec iov;
QEMUIOVector bounce_qiov;
int64_t cluster_sector_num;
@@ -1535,14 +1538,21 @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
iov.iov_base = bounce_buffer = qemu_blockalign(bs, iov.iov_len);
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
- ret = bs->drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
+ ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
+ &bounce_qiov);
if (ret < 0) {
goto err;
}
- ret = bs->drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
+ if (drv->bdrv_co_write_zeroes &&
+ buffer_is_zero(bounce_buffer, iov.iov_len)) {
+ ret = drv->bdrv_co_write_zeroes(bs, cluster_sector_num,
+ cluster_nb_sectors);
+ } else {
+ ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
&bounce_qiov);
+ }
+
if (ret < 0) {
/* It might be okay to ignore write errors for guest requests. If this
* is a deliberate copy-on-read then we don't want to ignore the error.
@@ -1639,11 +1649,37 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
BDRV_REQ_COPY_ON_READ);
}
+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ BlockDriver *drv = bs->drv;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ int ret;
+
+ /* First try the efficient write zeroes operation */
+ if (drv->bdrv_co_write_zeroes) {
+ return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+ }
+
+ /* Fall back to bounce buffer if write zeroes is unsupported */
+ iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE;
+ iov.iov_base = qemu_blockalign(bs, iov.iov_len);
+ memset(iov.iov_base, 0, iov.iov_len);
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, &qiov);
+
+ qemu_vfree(iov.iov_base);
+ return ret;
+}
+
/*
* Handle a write request in coroutine context
*/
static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
BdrvTrackedRequest req;
@@ -1670,7 +1706,11 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
tracked_request_begin(&req, bs, sector_num, nb_sectors, true);
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ if (flags & BDRV_REQ_ZERO_WRITE) {
+ ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors);
+ } else {
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ }
if (bs->dirty_bitmap) {
set_dirty_bitmap(bs, sector_num, nb_sectors, 1);
@@ -1690,7 +1730,16 @@ int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
{
trace_bdrv_co_writev(bs, sector_num, nb_sectors);
- return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov);
+ return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
+}
+
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors)
+{
+ trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
+ BDRV_REQ_ZERO_WRITE);
}
/**
@@ -3192,7 +3241,7 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque)
acb->req.nb_sectors, acb->req.qiov, 0);
} else {
acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
- acb->req.nb_sectors, acb->req.qiov);
+ acb->req.nb_sectors, acb->req.qiov, 0);
}
acb->bh = qemu_bh_new(bdrv_co_em_bh, acb);
diff --git a/block.h b/block.h
index cae289b2fb..60ea7308c6 100644
--- a/block.h
+++ b/block.h
@@ -146,6 +146,14 @@ int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov);
+/*
+ * Efficiently zero a region of the disk image. Note that this is a regular
+ * I/O request like read or write and should have a reasonable size. This
+ * function is not suitable for zeroing the entire image in a single request
+ * because it may allocate memory for the entire region.
+ */
+int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors);
int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, int *pnum);
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
diff --git a/block/iscsi.c b/block/iscsi.c
index 938c568071..bd3ca11b2e 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -455,6 +455,109 @@ iscsi_connect_cb(struct iscsi_context *iscsi, int status, void *command_data,
}
}
+static int parse_chap(struct iscsi_context *iscsi, const char *target)
+{
+ QemuOptsList *list;
+ QemuOpts *opts;
+ const char *user = NULL;
+ const char *password = NULL;
+
+ list = qemu_find_opts("iscsi");
+ if (!list) {
+ return 0;
+ }
+
+ opts = qemu_opts_find(list, target);
+ if (opts == NULL) {
+ opts = QTAILQ_FIRST(&list->head);
+ if (!opts) {
+ return 0;
+ }
+ }
+
+ user = qemu_opt_get(opts, "user");
+ if (!user) {
+ return 0;
+ }
+
+ password = qemu_opt_get(opts, "password");
+ if (!password) {
+ error_report("CHAP username specified but no password was given");
+ return -1;
+ }
+
+ if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
+ error_report("Failed to set initiator username and password");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void parse_header_digest(struct iscsi_context *iscsi, const char *target)
+{
+ QemuOptsList *list;
+ QemuOpts *opts;
+ const char *digest = NULL;
+
+ list = qemu_find_opts("iscsi");
+ if (!list) {
+ return;
+ }
+
+ opts = qemu_opts_find(list, target);
+ if (opts == NULL) {
+ opts = QTAILQ_FIRST(&list->head);
+ if (!opts) {
+ return;
+ }
+ }
+
+ digest = qemu_opt_get(opts, "header-digest");
+ if (!digest) {
+ return;
+ }
+
+ if (!strcmp(digest, "CRC32C")) {
+ iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
+ } else if (!strcmp(digest, "NONE")) {
+ iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
+ } else if (!strcmp(digest, "CRC32C-NONE")) {
+ iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
+ } else if (!strcmp(digest, "NONE-CRC32C")) {
+ iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
+ } else {
+ error_report("Invalid header-digest setting : %s", digest);
+ }
+}
+
+static char *parse_initiator_name(const char *target)
+{
+ QemuOptsList *list;
+ QemuOpts *opts;
+ const char *name = NULL;
+
+ list = qemu_find_opts("iscsi");
+ if (!list) {
+ return g_strdup("iqn.2008-11.org.linux-kvm");
+ }
+
+ opts = qemu_opts_find(list, target);
+ if (opts == NULL) {
+ opts = QTAILQ_FIRST(&list->head);
+ if (!opts) {
+ return g_strdup("iqn.2008-11.org.linux-kvm");
+ }
+ }
+
+ name = qemu_opt_get(opts, "initiator-name");
+ if (!name) {
+ return g_strdup("iqn.2008-11.org.linux-kvm");
+ }
+
+ return g_strdup(name);
+}
+
/*
* We support iscsi url's on the form
* iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
@@ -465,6 +568,7 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
struct iscsi_context *iscsi = NULL;
struct iscsi_url *iscsi_url = NULL;
struct IscsiTask task;
+ char *initiator_name = NULL;
int ret;
if ((BDRV_SECTOR_SIZE % 512) != 0) {
@@ -474,16 +578,6 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
return -EINVAL;
}
- memset(iscsilun, 0, sizeof(IscsiLun));
-
- /* Should really append the KVM name after the ':' here */
- iscsi = iscsi_create_context("iqn.2008-11.org.linux-kvm:");
- if (iscsi == NULL) {
- error_report("iSCSI: Failed to create iSCSI context.");
- ret = -ENOMEM;
- goto failed;
- }
-
iscsi_url = iscsi_parse_full_url(iscsi, filename);
if (iscsi_url == NULL) {
error_report("Failed to parse URL : %s %s", filename,
@@ -492,6 +586,17 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
goto failed;
}
+ memset(iscsilun, 0, sizeof(IscsiLun));
+
+ initiator_name = parse_initiator_name(iscsi_url->target);
+
+ iscsi = iscsi_create_context(initiator_name);
+ if (iscsi == NULL) {
+ error_report("iSCSI: Failed to create iSCSI context.");
+ ret = -ENOMEM;
+ goto failed;
+ }
+
if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
error_report("iSCSI: Failed to set target name.");
ret = -EINVAL;
@@ -507,6 +612,14 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
goto failed;
}
}
+
+ /* check if we got CHAP username/password via the options */
+ if (parse_chap(iscsi, iscsi_url->target) != 0) {
+ error_report("iSCSI: Failed to set CHAP user/password");
+ ret = -EINVAL;
+ goto failed;
+ }
+
if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
error_report("iSCSI: Failed to set session type to normal.");
ret = -EINVAL;
@@ -515,6 +628,9 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
+ /* check if we got HEADER_DIGEST via the options */
+ parse_header_digest(iscsi, iscsi_url->target);
+
task.iscsilun = iscsilun;
task.status = 0;
task.complete = 0;
@@ -548,6 +664,9 @@ static int iscsi_open(BlockDriverState *bs, const char *filename, int flags)
return 0;
failed:
+ if (initiator_name != NULL) {
+ g_free(initiator_name);
+ }
if (iscsi_url != NULL) {
iscsi_destroy_url(iscsi_url);
}
diff --git a/block/qcow2.c b/block/qcow2.c
index aa32e8d01a..3692b4523b 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -77,8 +77,10 @@ static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
uint64_t end_offset)
{
+ BDRVQcowState *s = bs->opaque;
QCowExtension ext;
uint64_t offset;
+ int ret;
#ifdef DEBUG_EXT
printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
@@ -129,8 +131,22 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
break;
default:
- /* unknown magic -- just skip it */
- offset = ((offset + ext.len + 7) & ~7);
+ /* unknown magic - save it in case we need to rewrite the header */
+ {
+ Qcow2UnknownHeaderExtension *uext;
+
+ uext = g_malloc0(sizeof(*uext) + ext.len);
+ uext->magic = ext.magic;
+ uext->len = ext.len;
+ QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
+
+ ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
+ if (ret < 0) {
+ return ret;
+ }
+
+ offset = ((offset + ext.len + 7) & ~7);
+ }
break;
}
}
@@ -138,6 +154,16 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
return 0;
}
+static void cleanup_unknown_header_ext(BlockDriverState *bs)
+{
+ BDRVQcowState *s = bs->opaque;
+ Qcow2UnknownHeaderExtension *uext, *next;
+
+ QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
+ QLIST_REMOVE(uext, next);
+ g_free(uext);
+ }
+}
static int qcow2_open(BlockDriverState *bs, int flags)
{
@@ -291,6 +317,7 @@ static int qcow2_open(BlockDriverState *bs, int flags)
return ret;
fail:
+ cleanup_unknown_header_ext(bs);
qcow2_free_snapshots(bs);
qcow2_refcount_close(bs);
g_free(s->l1_table);
@@ -632,6 +659,7 @@ static void qcow2_close(BlockDriverState *bs)
qcow2_cache_destroy(bs, s->l2_table_cache);
qcow2_cache_destroy(bs, s->refcount_block_cache);
+ cleanup_unknown_header_ext(bs);
g_free(s->cluster_cache);
qemu_vfree(s->cluster_data);
qcow2_refcount_close(bs);
@@ -669,103 +697,149 @@ static void qcow2_invalidate_cache(BlockDriverState *bs)
}
}
+static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
+ size_t len, size_t buflen)
+{
+ QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
+ size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
+
+ if (buflen < ext_len) {
+ return -ENOSPC;
+ }
+
+ *ext_backing_fmt = (QCowExtension) {
+ .magic = cpu_to_be32(magic),
+ .len = cpu_to_be32(len),
+ };
+ memcpy(buf + sizeof(QCowExtension), s, len);
+
+ return ext_len;
+}
+
/*
- * Updates the variable length parts of the qcow2 header, i.e. the backing file
- * name and all extensions. qcow2 was not designed to allow such changes, so if
- * we run out of space (we can only use the first cluster) this function may
- * fail.
+ * Updates the qcow2 header, including the variable length parts of it, i.e.
+ * the backing file name and all extensions. qcow2 was not designed to allow
+ * such changes, so if we run out of space (we can only use the first cluster)
+ * this function may fail.
*
* Returns 0 on success, -errno in error cases.
*/
-static int qcow2_update_ext_header(BlockDriverState *bs,
- const char *backing_file, const char *backing_fmt)
+int qcow2_update_header(BlockDriverState *bs)
{
- size_t backing_file_len = 0;
- size_t backing_fmt_len = 0;
BDRVQcowState *s = bs->opaque;
- QCowExtension ext_backing_fmt = {0, 0};
+ QCowHeader *header;
+ char *buf;
+ size_t buflen = s->cluster_size;
int ret;
+ uint64_t total_size;
+ uint32_t refcount_table_clusters;
+ Qcow2UnknownHeaderExtension *uext;
- /* Backing file format doesn't make sense without a backing file */
- if (backing_fmt && !backing_file) {
- return -EINVAL;
- }
+ buf = qemu_blockalign(bs, buflen);
+ memset(buf, 0, s->cluster_size);
- /* Prepare the backing file format extension if needed */
- if (backing_fmt) {
- ext_backing_fmt.len = cpu_to_be32(strlen(backing_fmt));
- ext_backing_fmt.magic = cpu_to_be32(QCOW2_EXT_MAGIC_BACKING_FORMAT);
- backing_fmt_len = ((sizeof(ext_backing_fmt)
- + strlen(backing_fmt) + 7) & ~7);
- }
+ /* Header structure */
+ header = (QCowHeader*) buf;
- /* Check if we can fit the new header into the first cluster */
- if (backing_file) {
- backing_file_len = strlen(backing_file);
- }
-
- size_t header_size = sizeof(QCowHeader) + backing_file_len
- + backing_fmt_len;
-
- if (header_size > s->cluster_size) {
- return -ENOSPC;
+ if (buflen < sizeof(*header)) {
+ ret = -ENOSPC;
+ goto fail;
}
- /* Rewrite backing file name and qcow2 extensions */
- size_t ext_size = header_size - sizeof(QCowHeader);
- uint8_t buf[ext_size];
- size_t offset = 0;
- size_t backing_file_offset = 0;
+ total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
+ refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
+
+ *header = (QCowHeader) {
+ .magic = cpu_to_be32(QCOW_MAGIC),
+ .version = cpu_to_be32(QCOW_VERSION),
+ .backing_file_offset = 0,
+ .backing_file_size = 0,
+ .cluster_bits = cpu_to_be32(s->cluster_bits),
+ .size = cpu_to_be64(total_size),
+ .crypt_method = cpu_to_be32(s->crypt_method_header),
+ .l1_size = cpu_to_be32(s->l1_size),
+ .l1_table_offset = cpu_to_be64(s->l1_table_offset),
+ .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
+ .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
+ .nb_snapshots = cpu_to_be32(s->nb_snapshots),
+ .snapshots_offset = cpu_to_be64(s->snapshots_offset),
+ };
- if (backing_file) {
- if (backing_fmt) {
- int padding = backing_fmt_len -
- (sizeof(ext_backing_fmt) + strlen(backing_fmt));
+ buf += sizeof(*header);
+ buflen -= sizeof(*header);
- memcpy(buf + offset, &ext_backing_fmt, sizeof(ext_backing_fmt));
- offset += sizeof(ext_backing_fmt);
+ /* Backing file format header extension */
+ if (*bs->backing_format) {
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
+ bs->backing_format, strlen(bs->backing_format),
+ buflen);
+ if (ret < 0) {
+ goto fail;
+ }
- memcpy(buf + offset, backing_fmt, strlen(backing_fmt));
- offset += strlen(backing_fmt);
+ buf += ret;
+ buflen -= ret;
+ }
- memset(buf + offset, 0, padding);
- offset += padding;
+ /* Keep unknown header extensions */
+ QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
+ ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
+ if (ret < 0) {
+ goto fail;
}
- memcpy(buf + offset, backing_file, backing_file_len);
- backing_file_offset = sizeof(QCowHeader) + offset;
+ buf += ret;
+ buflen -= ret;
}
- ret = bdrv_pwrite_sync(bs->file, sizeof(QCowHeader), buf, ext_size);
+ /* End of header extensions */
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
if (ret < 0) {
goto fail;
}
- /* Update header fields */
- uint64_t be_backing_file_offset = cpu_to_be64(backing_file_offset);
- uint32_t be_backing_file_size = cpu_to_be32(backing_file_len);
+ buf += ret;
+ buflen -= ret;
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, backing_file_offset),
- &be_backing_file_offset, sizeof(uint64_t));
- if (ret < 0) {
- goto fail;
+ /* Backing file name */
+ if (*bs->backing_file) {
+ size_t backing_file_len = strlen(bs->backing_file);
+
+ if (buflen < backing_file_len) {
+ ret = -ENOSPC;
+ goto fail;
+ }
+
+ strncpy(buf, bs->backing_file, buflen);
+
+ header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
+ header->backing_file_size = cpu_to_be32(backing_file_len);
}
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, backing_file_size),
- &be_backing_file_size, sizeof(uint32_t));
+ /* Write the new header */
+ ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
if (ret < 0) {
goto fail;
}
ret = 0;
fail:
+ qemu_vfree(header);
return ret;
}
static int qcow2_change_backing_file(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt)
{
- return qcow2_update_ext_header(bs, backing_file, backing_fmt);
+ /* Backing file format doesn't make sense without a backing file */
+ if (backing_fmt && !backing_file) {
+ return -EINVAL;
+ }
+
+ pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
+ pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
+
+ return qcow2_update_header(bs);
}
static int preallocate(BlockDriverState *bs)
diff --git a/block/qcow2.h b/block/qcow2.h
index 99e45361f5..fc35838175 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -87,6 +87,13 @@ typedef struct QCowSnapshot {
struct Qcow2Cache;
typedef struct Qcow2Cache Qcow2Cache;
+typedef struct Qcow2UnknownHeaderExtension {
+ uint32_t magic;
+ uint32_t len;
+ QLIST_ENTRY(Qcow2UnknownHeaderExtension) next;
+ uint8_t data[];
+} Qcow2UnknownHeaderExtension;
+
typedef struct BDRVQcowState {
int cluster_bits;
int cluster_size;
@@ -127,6 +134,7 @@ typedef struct BDRVQcowState {
QCowSnapshot *snapshots;
int flags;
+ QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
} BDRVQcowState;
/* XXX: use std qcow open function ? */
@@ -178,6 +186,7 @@ static inline int64_t align_offset(int64_t offset, int n)
/* qcow2.c functions */
int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t sector_num, int nb_sectors);
+int qcow2_update_header(BlockDriverState *bs);
/* qcow2-refcount.c functions */
int qcow2_refcount_init(BlockDriverState *bs);
diff --git a/block/qed.c b/block/qed.c
index 8da3ebe9d4..a041d31e66 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -875,6 +875,12 @@ static void qed_aio_complete(QEDAIOCB *acb, int ret)
qemu_iovec_destroy(&acb->cur_qiov);
qed_unref_l2_cache_entry(acb->request.l2_table);
+ /* Free the buffer we may have allocated for zero writes */
+ if (acb->flags & QED_AIOCB_ZERO) {
+ qemu_vfree(acb->qiov->iov[0].iov_base);
+ acb->qiov->iov[0].iov_base = NULL;
+ }
+
/* Arrange for a bh to invoke the completion function */
acb->bh_ret = ret;
acb->bh = qemu_bh_new(qed_aio_complete_bh, acb);
@@ -941,9 +947,8 @@ static void qed_aio_write_l1_update(void *opaque, int ret)
/**
* Update L2 table with new cluster offsets and write them out
*/
-static void qed_aio_write_l2_update(void *opaque, int ret)
+static void qed_aio_write_l2_update(QEDAIOCB *acb, int ret, uint64_t offset)
{
- QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
bool need_alloc = acb->find_cluster_ret == QED_CLUSTER_L1;
int index;
@@ -959,7 +964,7 @@ static void qed_aio_write_l2_update(void *opaque, int ret)
index = qed_l2_index(s, acb->cur_pos);
qed_update_l2_table(s, acb->request.l2_table->table, index, acb->cur_nclusters,
- acb->cur_cluster);
+ offset);
if (need_alloc) {
/* Write out the whole new L2 table */
@@ -976,6 +981,12 @@ err:
qed_aio_complete(acb, ret);
}
+static void qed_aio_write_l2_update_cb(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+ qed_aio_write_l2_update(acb, ret, acb->cur_cluster);
+}
+
/**
* Flush new data clusters before updating the L2 table
*
@@ -990,7 +1001,7 @@ static void qed_aio_write_flush_before_l2_update(void *opaque, int ret)
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
- if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update, opaque)) {
+ if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
qed_aio_complete(acb, -EIO);
}
}
@@ -1019,7 +1030,7 @@ static void qed_aio_write_main(void *opaque, int ret)
if (s->bs->backing_hd) {
next_fn = qed_aio_write_flush_before_l2_update;
} else {
- next_fn = qed_aio_write_l2_update;
+ next_fn = qed_aio_write_l2_update_cb;
}
}
@@ -1081,6 +1092,18 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
return !(s->header.features & QED_F_NEED_CHECK);
}
+static void qed_aio_write_zero_cluster(void *opaque, int ret)
+{
+ QEDAIOCB *acb = opaque;
+
+ if (ret) {
+ qed_aio_complete(acb, ret);
+ return;
+ }
+
+ qed_aio_write_l2_update(acb, 0, 1);
+}
+
/**
* Write new data cluster
*
@@ -1092,6 +1115,7 @@ static bool qed_should_set_need_check(BDRVQEDState *s)
static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
{
BDRVQEDState *s = acb_to_s(acb);
+ BlockDriverCompletionFunc *cb;
/* Cancel timer when the first allocating request comes in */
if (QSIMPLEQ_EMPTY(&s->allocating_write_reqs)) {
@@ -1109,14 +1133,26 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
acb->cur_nclusters = qed_bytes_to_clusters(s,
qed_offset_into_cluster(s, acb->cur_pos) + len);
- acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
+ if (acb->flags & QED_AIOCB_ZERO) {
+ /* Skip ahead if the clusters are already zero */
+ if (acb->find_cluster_ret == QED_CLUSTER_ZERO) {
+ qed_aio_next_io(acb, 0);
+ return;
+ }
+
+ cb = qed_aio_write_zero_cluster;
+ } else {
+ cb = qed_aio_write_prefill;
+ acb->cur_cluster = qed_alloc_clusters(s, acb->cur_nclusters);
+ }
+
if (qed_should_set_need_check(s)) {
s->header.features |= QED_F_NEED_CHECK;
- qed_write_header(s, qed_aio_write_prefill, acb);
+ qed_write_header(s, cb, acb);
} else {
- qed_aio_write_prefill(acb, 0);
+ cb(acb, 0);
}
}
@@ -1131,6 +1167,16 @@ static void qed_aio_write_alloc(QEDAIOCB *acb, size_t len)
*/
static void qed_aio_write_inplace(QEDAIOCB *acb, uint64_t offset, size_t len)
{
+ /* Allocate buffer for zero writes */
+ if (acb->flags & QED_AIOCB_ZERO) {
+ struct iovec *iov = acb->qiov->iov;
+
+ if (!iov->iov_base) {
+ iov->iov_base = qemu_blockalign(acb->common.bs, iov->iov_len);
+ memset(iov->iov_base, 0, iov->iov_len);
+ }
+ }
+
/* Calculate the I/O vector */
acb->cur_cluster = offset;
qemu_iovec_copy(&acb->cur_qiov, acb->qiov, acb->qiov_offset, len);
@@ -1233,8 +1279,8 @@ static void qed_aio_next_io(void *opaque, int ret)
{
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
- QEDFindClusterFunc *io_fn =
- acb->is_write ? qed_aio_write_data : qed_aio_read_data;
+ QEDFindClusterFunc *io_fn = (acb->flags & QED_AIOCB_WRITE) ?
+ qed_aio_write_data : qed_aio_read_data;
trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
@@ -1264,14 +1310,14 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
int64_t sector_num,
QEMUIOVector *qiov, int nb_sectors,
BlockDriverCompletionFunc *cb,
- void *opaque, bool is_write)
+ void *opaque, int flags)
{
QEDAIOCB *acb = qemu_aio_get(&qed_aio_pool, bs, cb, opaque);
trace_qed_aio_setup(bs->opaque, acb, sector_num, nb_sectors,
- opaque, is_write);
+ opaque, flags);
- acb->is_write = is_write;
+ acb->flags = flags;
acb->finished = NULL;
acb->qiov = qiov;
acb->qiov_offset = 0;
@@ -1291,7 +1337,7 @@ static BlockDriverAIOCB *bdrv_qed_aio_readv(BlockDriverState *bs,
BlockDriverCompletionFunc *cb,
void *opaque)
{
- return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, false);
+ return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
}
static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
@@ -1300,7 +1346,8 @@ static BlockDriverAIOCB *bdrv_qed_aio_writev(BlockDriverState *bs,
BlockDriverCompletionFunc *cb,
void *opaque)
{
- return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb, opaque, true);
+ return qed_aio_setup(bs, sector_num, qiov, nb_sectors, cb,
+ opaque, QED_AIOCB_WRITE);
}
static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs,
@@ -1310,6 +1357,53 @@ static BlockDriverAIOCB *bdrv_qed_aio_flush(BlockDriverState *bs,
return bdrv_aio_flush(bs->file, cb, opaque);
}
+typedef struct {
+ Coroutine *co;
+ int ret;
+ bool done;
+} QEDWriteZeroesCB;
+
+static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
+{
+ QEDWriteZeroesCB *cb = opaque;
+
+ cb->done = true;
+ cb->ret = ret;
+ if (cb->co) {
+ qemu_coroutine_enter(cb->co, NULL);
+ }
+}
+
+static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors)
+{
+ BlockDriverAIOCB *blockacb;
+ QEDWriteZeroesCB cb = { .done = false };
+ QEMUIOVector qiov;
+ struct iovec iov;
+
+ /* Zero writes start without an I/O buffer. If a buffer becomes necessary
+ * then it will be allocated during request processing.
+ */
+ iov.iov_base = NULL,
+ iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors,
+ qed_co_write_zeroes_cb, &cb,
+ QED_AIOCB_WRITE | QED_AIOCB_ZERO);
+ if (!blockacb) {
+ return -EIO;
+ }
+ if (!cb.done) {
+ cb.co = qemu_coroutine_self();
+ qemu_coroutine_yield();
+ }
+ assert(cb.done);
+ return cb.ret;
+}
+
static int bdrv_qed_truncate(BlockDriverState *bs, int64_t offset)
{
BDRVQEDState *s = bs->opaque;
@@ -1469,6 +1563,7 @@ static BlockDriver bdrv_qed = {
.bdrv_aio_readv = bdrv_qed_aio_readv,
.bdrv_aio_writev = bdrv_qed_aio_writev,
.bdrv_aio_flush = bdrv_qed_aio_flush,
+ .bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
.bdrv_truncate = bdrv_qed_truncate,
.bdrv_getlength = bdrv_qed_getlength,
.bdrv_get_info = bdrv_qed_get_info,
diff --git a/block/qed.h b/block/qed.h
index 62cbd3b899..62624a1f34 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -123,12 +123,17 @@ typedef struct QEDRequest {
CachedL2Table *l2_table;
} QEDRequest;
+enum {
+ QED_AIOCB_WRITE = 0x0001, /* read or write? */
+ QED_AIOCB_ZERO = 0x0002, /* zero write, used with QED_AIOCB_WRITE */
+};
+
typedef struct QEDAIOCB {
BlockDriverAIOCB common;
QEMUBH *bh;
int bh_ret; /* final return status for completion bh */
QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */
- bool is_write; /* false - read, true - write */
+ int flags; /* QED_AIOCB_* bits ORed together */
bool *finished; /* signal for cancel completion */
uint64_t end_pos; /* request end on block device, in bytes */
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 9416400165..00276f6f46 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -629,6 +629,9 @@ static void coroutine_fn aio_read_response(void *opaque)
switch (acb->aiocb_type) {
case AIOCB_WRITE_UDATA:
+ /* this coroutine context is no longer suitable for co_recv
+ * because we may send data to update vdi objects */
+ s->co_recv = NULL;
if (!is_data_obj(aio_req->oid)) {
break;
}
diff --git a/block/vpc.c b/block/vpc.c
index 89a5ee2668..6b4816f563 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -161,13 +161,27 @@ static int vpc_open(BlockDriverState *bs, int flags)
uint8_t buf[HEADER_SIZE];
uint32_t checksum;
int err = -1;
+ int disk_type = VHD_DYNAMIC;
if (bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE) != HEADER_SIZE)
goto fail;
footer = (struct vhd_footer*) s->footer_buf;
- if (strncmp(footer->creator, "conectix", 8))
- goto fail;
+ if (strncmp(footer->creator, "conectix", 8)) {
+ int64_t offset = bdrv_getlength(bs->file);
+ if (offset < HEADER_SIZE) {
+ goto fail;
+ }
+ /* If a fixed disk, the footer is found only at the end of the file */
+ if (bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf, HEADER_SIZE)
+ != HEADER_SIZE) {
+ goto fail;
+ }
+ if (strncmp(footer->creator, "conectix", 8)) {
+ goto fail;
+ }
+ disk_type = VHD_FIXED;
+ }
checksum = be32_to_cpu(footer->checksum);
footer->checksum = 0;
@@ -186,49 +200,54 @@ static int vpc_open(BlockDriverState *bs, int flags)
goto fail;
}
- if (bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf, HEADER_SIZE)
- != HEADER_SIZE)
- goto fail;
-
- dyndisk_header = (struct vhd_dyndisk_header*) buf;
+ if (disk_type == VHD_DYNAMIC) {
+ if (bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
+ HEADER_SIZE) != HEADER_SIZE) {
+ goto fail;
+ }
- if (strncmp(dyndisk_header->magic, "cxsparse", 8))
- goto fail;
+ dyndisk_header = (struct vhd_dyndisk_header *) buf;
+ if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
+ goto fail;
+ }
- s->block_size = be32_to_cpu(dyndisk_header->block_size);
- s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
+ s->block_size = be32_to_cpu(dyndisk_header->block_size);
+ s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
- s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
- s->pagetable = g_malloc(s->max_table_entries * 4);
+ s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
+ s->pagetable = g_malloc(s->max_table_entries * 4);
- s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
- if (bdrv_pread(bs->file, s->bat_offset, s->pagetable,
- s->max_table_entries * 4) != s->max_table_entries * 4)
- goto fail;
+ s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
+ if (bdrv_pread(bs->file, s->bat_offset, s->pagetable,
+ s->max_table_entries * 4) != s->max_table_entries * 4) {
+ goto fail;
+ }
- s->free_data_block_offset =
- (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;
+ s->free_data_block_offset =
+ (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;
- for (i = 0; i < s->max_table_entries; i++) {
- be32_to_cpus(&s->pagetable[i]);
- if (s->pagetable[i] != 0xFFFFFFFF) {
- int64_t next = (512 * (int64_t) s->pagetable[i]) +
- s->bitmap_size + s->block_size;
+ for (i = 0; i < s->max_table_entries; i++) {
+ be32_to_cpus(&s->pagetable[i]);
+ if (s->pagetable[i] != 0xFFFFFFFF) {
+ int64_t next = (512 * (int64_t) s->pagetable[i]) +
+ s->bitmap_size + s->block_size;
- if (next> s->free_data_block_offset)
- s->free_data_block_offset = next;
+ if (next > s->free_data_block_offset) {
+ s->free_data_block_offset = next;
+ }
+ }
}
- }
- s->last_bitmap_offset = (int64_t) -1;
+ s->last_bitmap_offset = (int64_t) -1;
#ifdef CACHE
- s->pageentry_u8 = g_malloc(512);
- s->pageentry_u32 = s->pageentry_u8;
- s->pageentry_u16 = s->pageentry_u8;
- s->last_pagetable = -1;
+ s->pageentry_u8 = g_malloc(512);
+ s->pageentry_u32 = s->pageentry_u8;
+ s->pageentry_u16 = s->pageentry_u8;
+ s->last_pagetable = -1;
#endif
+ }
qemu_co_mutex_init(&s->lock);
@@ -395,7 +414,11 @@ static int vpc_read(BlockDriverState *bs, int64_t sector_num,
int ret;
int64_t offset;
int64_t sectors, sectors_per_block;
+ struct vhd_footer *footer = (struct vhd_footer *) s->footer_buf;
+ if (cpu_to_be32(footer->type) == VHD_FIXED) {
+ return bdrv_read(bs->file, sector_num, buf, nb_sectors);
+ }
while (nb_sectors > 0) {
offset = get_sector_offset(bs, sector_num, 0);
@@ -440,7 +463,11 @@ static int vpc_write(BlockDriverState *bs, int64_t sector_num,
int64_t offset;
int64_t sectors, sectors_per_block;
int ret;
+ struct vhd_footer *footer = (struct vhd_footer *) s->footer_buf;
+ if (cpu_to_be32(footer->type) == VHD_FIXED) {
+ return bdrv_write(bs->file, sector_num, buf, nb_sectors);
+ }
while (nb_sectors > 0) {
offset = get_sector_offset(bs, sector_num, 1);
@@ -533,70 +560,14 @@ static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
return 0;
}
-static int vpc_create(const char *filename, QEMUOptionParameter *options)
+static int create_dynamic_disk(int fd, uint8_t *buf, int64_t total_sectors)
{
- uint8_t buf[1024];
- struct vhd_footer* footer = (struct vhd_footer*) buf;
struct vhd_dyndisk_header* dyndisk_header =
(struct vhd_dyndisk_header*) buf;
- int fd, i;
- uint16_t cyls = 0;
- uint8_t heads = 0;
- uint8_t secs_per_cyl = 0;
size_t block_size, num_bat_entries;
- int64_t total_sectors = 0;
+ int i;
int ret = -EIO;
- // Read out options
- total_sectors = get_option_parameter(options, BLOCK_OPT_SIZE)->value.n /
- BDRV_SECTOR_SIZE;
-
- // Create the file
- fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
- if (fd < 0)
- return -EIO;
-
- /* Calculate matching total_size and geometry. Increase the number of
- sectors requested until we get enough (or fail). */
- for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
- if (calculate_geometry(total_sectors + i,
- &cyls, &heads, &secs_per_cyl)) {
- ret = -EFBIG;
- goto fail;
- }
- }
- total_sectors = (int64_t) cyls * heads * secs_per_cyl;
-
- // Prepare the Hard Disk Footer
- memset(buf, 0, 1024);
-
- memcpy(footer->creator, "conectix", 8);
- // TODO Check if "qemu" creator_app is ok for VPC
- memcpy(footer->creator_app, "qemu", 4);
- memcpy(footer->creator_os, "Wi2k", 4);
-
- footer->features = be32_to_cpu(0x02);
- footer->version = be32_to_cpu(0x00010000);
- footer->data_offset = be64_to_cpu(HEADER_SIZE);
- footer->timestamp = be32_to_cpu(time(NULL) - VHD_TIMESTAMP_BASE);
-
- // Version of Virtual PC 2007
- footer->major = be16_to_cpu(0x0005);
- footer->minor =be16_to_cpu(0x0003);
-
- footer->orig_size = be64_to_cpu(total_sectors * 512);
- footer->size = be64_to_cpu(total_sectors * 512);
-
- footer->cyls = be16_to_cpu(cyls);
- footer->heads = heads;
- footer->secs_per_cyl = secs_per_cyl;
-
- footer->type = be32_to_cpu(VHD_DYNAMIC);
-
- // TODO uuid is missing
-
- footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));
-
// Write the footer (twice: at the beginning and at the end)
block_size = 0x200000;
num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
@@ -624,7 +595,6 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
}
}
-
// Prepare the Dynamic Disk Header
memset(buf, 0, 1024);
@@ -653,6 +623,129 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options)
ret = 0;
fail:
+ return ret;
+}
+
+static int create_fixed_disk(int fd, uint8_t *buf, int64_t total_size)
+{
+ int ret = -EIO;
+
+ /* Add footer to total size */
+ total_size += 512;
+ if (ftruncate(fd, total_size) != 0) {
+ ret = -errno;
+ goto fail;
+ }
+ if (lseek(fd, -512, SEEK_END) < 0) {
+ goto fail;
+ }
+ if (write(fd, buf, HEADER_SIZE) != HEADER_SIZE) {
+ goto fail;
+ }
+
+ ret = 0;
+
+ fail:
+ return ret;
+}
+
+static int vpc_create(const char *filename, QEMUOptionParameter *options)
+{
+ uint8_t buf[1024];
+ struct vhd_footer *footer = (struct vhd_footer *) buf;
+ QEMUOptionParameter *disk_type_param;
+ int fd, i;
+ uint16_t cyls = 0;
+ uint8_t heads = 0;
+ uint8_t secs_per_cyl = 0;
+ int64_t total_sectors;
+ int64_t total_size;
+ int disk_type;
+ int ret = -EIO;
+
+ /* Read out options */
+ total_size = get_option_parameter(options, BLOCK_OPT_SIZE)->value.n;
+
+ disk_type_param = get_option_parameter(options, BLOCK_OPT_SUBFMT);
+ if (disk_type_param && disk_type_param->value.s) {
+ if (!strcmp(disk_type_param->value.s, "dynamic")) {
+ disk_type = VHD_DYNAMIC;
+ } else if (!strcmp(disk_type_param->value.s, "fixed")) {
+ disk_type = VHD_FIXED;
+ } else {
+ return -EINVAL;
+ }
+ } else {
+ disk_type = VHD_DYNAMIC;
+ }
+
+ /* Create the file */
+ fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
+ if (fd < 0) {
+ return -EIO;
+ }
+
+ /*
+ * Calculate matching total_size and geometry. Increase the number of
+ * sectors requested until we get enough (or fail). This ensures that
+ * qemu-img convert doesn't truncate images, but rather rounds up.
+ */
+ total_sectors = total_size / BDRV_SECTOR_SIZE;
+ for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
+ if (calculate_geometry(total_sectors + i, &cyls, &heads,
+ &secs_per_cyl))
+ {
+ ret = -EFBIG;
+ goto fail;
+ }
+ }
+
+ total_sectors = (int64_t) cyls * heads * secs_per_cyl;
+
+ /* Prepare the Hard Disk Footer */
+ memset(buf, 0, 1024);
+
+ memcpy(footer->creator, "conectix", 8);
+ /* TODO Check if "qemu" creator_app is ok for VPC */
+ memcpy(footer->creator_app, "qemu", 4);
+ memcpy(footer->creator_os, "Wi2k", 4);
+
+ footer->features = be32_to_cpu(0x02);
+ footer->version = be32_to_cpu(0x00010000);
+ if (disk_type == VHD_DYNAMIC) {
+ footer->data_offset = be64_to_cpu(HEADER_SIZE);
+ } else {
+ footer->data_offset = be64_to_cpu(0xFFFFFFFFFFFFFFFFULL);
+ }
+ footer->timestamp = be32_to_cpu(time(NULL) - VHD_TIMESTAMP_BASE);
+
+ /* Version of Virtual PC 2007 */
+ footer->major = be16_to_cpu(0x0005);
+ footer->minor = be16_to_cpu(0x0003);
+ if (disk_type == VHD_DYNAMIC) {
+ footer->orig_size = be64_to_cpu(total_sectors * 512);
+ footer->size = be64_to_cpu(total_sectors * 512);
+ } else {
+ footer->orig_size = be64_to_cpu(total_size);
+ footer->size = be64_to_cpu(total_size);
+ }
+ footer->cyls = be16_to_cpu(cyls);
+ footer->heads = heads;
+ footer->secs_per_cyl = secs_per_cyl;
+
+ footer->type = be32_to_cpu(disk_type);
+
+ /* TODO uuid is missing */
+
+ footer->checksum = be32_to_cpu(vpc_checksum(buf, HEADER_SIZE));
+
+ if (disk_type == VHD_DYNAMIC) {
+ ret = create_dynamic_disk(fd, buf, total_sectors);
+ } else {
+ ret = create_fixed_disk(fd, buf, total_size);
+ }
+
+ fail:
close(fd);
return ret;
}
@@ -675,6 +768,13 @@ static QEMUOptionParameter vpc_create_options[] = {
.type = OPT_SIZE,
.help = "Virtual disk size"
},
+ {
+ .name = BLOCK_OPT_SUBFMT,
+ .type = OPT_STRING,
+ .help =
+ "Type of virtual hard disk format. Supported formats are "
+ "{dynamic (default) | fixed} "
+ },
{ NULL }
};
diff --git a/block_int.h b/block_int.h
index 7be2988ca7..7946cf66a1 100644
--- a/block_int.h
+++ b/block_int.h
@@ -131,6 +131,14 @@ struct BlockDriver {
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ /*
+ * Efficiently zero a region of the disk image. Typically an image format
+ * would use a compact metadata representation to implement this. This
+ * function pointer may be NULL and .bdrv_co_writev() will be called
+ * instead.
+ */
+ int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors);
int coroutine_fn (*bdrv_co_is_allocated)(BlockDriverState *bs,
diff --git a/compiler.h b/compiler.h
index a1c0794947..736e77075a 100644
--- a/compiler.h
+++ b/compiler.h
@@ -30,8 +30,10 @@
# define QEMU_PACKED __attribute__((packed))
#endif
+#define cat(x,y) x ## y
+#define cat2(x,y) cat(x,y)
#define QEMU_BUILD_BUG_ON(x) \
- typedef char qemu_build_bug_on__##__LINE__[(x)?-1:1];
+ typedef char cat2(qemu_build_bug_on__,__LINE__)[(x)?-1:1];
#if defined __GNUC__
# if !QEMU_GNUC_PREREQ(4, 4)
diff --git a/cutils.c b/cutils.c
index a6ffd46445..af308cd7b9 100644
--- a/cutils.c
+++ b/cutils.c
@@ -303,6 +303,41 @@ void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count,
}
}
+/*
+ * Checks if a buffer is all zeroes
+ *
+ * Attention! The len must be a multiple of 4 * sizeof(long) due to
+ * restriction of optimizations in this function.
+ */
+bool buffer_is_zero(const void *buf, size_t len)
+{
+ /*
+ * Use long as the biggest available internal data type that fits into the
+ * CPU register and unroll the loop to smooth out the effect of memory
+ * latency.
+ */
+
+ size_t i;
+ long d0, d1, d2, d3;
+ const long * const data = buf;
+
+ assert(len % (4 * sizeof(long)) == 0);
+ len /= sizeof(long);
+
+ for (i = 0; i < len; i += 4) {
+ d0 = data[i + 0];
+ d1 = data[i + 1];
+ d2 = data[i + 2];
+ d3 = data[i + 3];
+
+ if (d0 || d1 || d2 || d3) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
#ifndef _WIN32
/* Sets a specific flag */
int fcntl_setfl(int fd, int flag)
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index c3603999d1..b515f417b1 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -146,6 +146,7 @@ static void ahci_check_irq(AHCIState *s)
DPRINTF(-1, "check irq %#x\n", s->control_regs.irqstatus);
+ s->control_regs.irqstatus = 0;
for (i = 0; i < s->ports; i++) {
AHCIPortRegs *pr = &s->dev[i].port_regs;
if (pr->irq_stat & pr->irq_mask) {
@@ -216,6 +217,7 @@ static void ahci_port_write(AHCIState *s, int port, int offset, uint32_t val)
break;
case PORT_IRQ_STAT:
pr->irq_stat &= ~val;
+ ahci_check_irq(s);
break;
case PORT_IRQ_MASK:
pr->irq_mask = val & 0xfdc000ff;
@@ -560,6 +562,11 @@ static void ahci_reset_port(AHCIState *s, int port)
ncq_tfs->aiocb = NULL;
}
+ /* Maybe we just finished the request thanks to bdrv_aio_cancel() */
+ if (!ncq_tfs->used) {
+ continue;
+ }
+
qemu_sglist_destroy(&ncq_tfs->sglist);
ncq_tfs->used = 0;
}
diff --git a/qemu-common.h b/qemu-common.h
index 9b997f8838..c5e9cad35e 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -335,6 +335,8 @@ void qemu_iovec_memset(QEMUIOVector *qiov, int c, size_t count);
void qemu_iovec_memset_skip(QEMUIOVector *qiov, int c, size_t count,
size_t skip);
+bool buffer_is_zero(const void *buf, size_t len);
+
void qemu_progress_init(int enabled, float min_skip);
void qemu_progress_end(void);
void qemu_progress_print(float delta, int max);
diff --git a/qemu-config.c b/qemu-config.c
index b030205e23..e980fd68d6 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -118,6 +118,32 @@ static QemuOptsList qemu_drive_opts = {
},
};
+static QemuOptsList qemu_iscsi_opts = {
+ .name = "iscsi",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
+ .desc = {
+ {
+ .name = "user",
+ .type = QEMU_OPT_STRING,
+ .help = "username for CHAP authentication to target",
+ },{
+ .name = "password",
+ .type = QEMU_OPT_STRING,
+ .help = "password for CHAP authentication to target",
+ },{
+ .name = "header-digest",
+ .type = QEMU_OPT_STRING,
+ .help = "HeaderDigest setting. "
+ "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
+ },{
+ .name = "initiator-name",
+ .type = QEMU_OPT_STRING,
+ .help = "Initiator iqn name to use when connecting",
+ },
+ { /* end of list */ }
+ },
+};
+
static QemuOptsList qemu_chardev_opts = {
.name = "chardev",
.implied_opt_name = "backend",
@@ -580,6 +606,7 @@ static QemuOptsList *vm_config_groups[32] = {
&qemu_option_rom_opts,
&qemu_machine_opts,
&qemu_boot_opts,
+ &qemu_iscsi_opts,
NULL,
};
diff --git a/qemu-doc.texi b/qemu-doc.texi
index 11f4166995..83b2ad5237 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -730,6 +730,57 @@ export LIBISCSI_CHAP_PASSWORD=<password>
iscsi://<host>/<target-iqn-name>/<lun>
@end example
+Various session related parameters can be set via special options, either
+in a configuration file provided via '-readconfig' or directly on the
+command line.
+
+@example
+Setting a specific initiator name to use when logging in to the target
+-iscsi initiator-name=iqn.qemu.test:my-initiator
+@end example
+
+@example
+Controlling which type of header digest to negotiate with the target
+-iscsi header-digest=CRC32C|CRC32C-NONE|NONE-CRC32C|NONE
+@end example
+
+These can also be set via a configuration file
+@example
+[iscsi]
+ user = "CHAP username"
+ password = "CHAP password"
+ initiator-name = "iqn.qemu.test:my-initiator"
+ # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE
+ header-digest = "CRC32C"
+@end example
+
+
+Setting the target name allows different options for different targets
+@example
+[iscsi "iqn.target.name"]
+ user = "CHAP username"
+ password = "CHAP password"
+ initiator-name = "iqn.qemu.test:my-initiator"
+ # header digest is one of CRC32C|CRC32C-NONE|NONE-CRC32C|NONE
+ header-digest = "CRC32C"
+@end example
+
+
+Howto use a configuration file to set iSCSI configuration options:
+@example
+cat >iscsi.conf <<EOF
+[iscsi]
+ user = "me"
+ password = "my password"
+ initiator-name = "iqn.qemu.test:my-initiator"
+ header-digest = "CRC32C"
+EOF
+
+qemu-system-i386 -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \
+ -readconfig iscsi.conf
+@end example
+
+
Howto set up a simple iSCSI target on loopback and accessing it via QEMU:
@example
This example shows how to set up an iSCSI target with one CDROM and one DISK
@@ -744,7 +795,8 @@ tgtadm --lld iscsi --mode logicalunit --op new --tid 1 --lun 2 \
-b /IMAGES/cd.iso --device-type=cd
tgtadm --lld iscsi --op bind --mode target --tid 1 -I ALL
-qemu-system-i386 -boot d -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \
+qemu-system-i386 -iscsi initiator-name=iqn.qemu.test:my-initiator \
+ -boot d -drive file=iscsi://127.0.0.1/iqn.qemu.test/1 \
-cdrom iscsi://127.0.0.1/iqn.qemu.test/2
@end example
diff --git a/qemu-img.c b/qemu-img.c
index 01cc0d35ad..c4bcf41e15 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -515,40 +515,6 @@ static int img_commit(int argc, char **argv)
}
/*
- * Checks whether the sector is not a zero sector.
- *
- * Attention! The len must be a multiple of 4 * sizeof(long) due to
- * restriction of optimizations in this function.
- */
-static int is_not_zero(const uint8_t *sector, int len)
-{
- /*
- * Use long as the biggest available internal data type that fits into the
- * CPU register and unroll the loop to smooth out the effect of memory
- * latency.
- */
-
- int i;
- long d0, d1, d2, d3;
- const long * const data = (const long *) sector;
-
- len /= sizeof(long);
-
- for(i = 0; i < len; i += 4) {
- d0 = data[i + 0];
- d1 = data[i + 1];
- d2 = data[i + 2];
- d3 = data[i + 3];
-
- if (d0 || d1 || d2 || d3) {
- return 1;
- }
- }
-
- return 0;
-}
-
-/*
* Returns true iff the first sector pointed to by 'buf' contains at least
* a non-NUL byte.
*
@@ -557,20 +523,22 @@ static int is_not_zero(const uint8_t *sector, int len)
*/
static int is_allocated_sectors(const uint8_t *buf, int n, int *pnum)
{
- int v, i;
+ bool is_zero;
+ int i;
if (n <= 0) {
*pnum = 0;
return 0;
}
- v = is_not_zero(buf, 512);
+ is_zero = buffer_is_zero(buf, 512);
for(i = 1; i < n; i++) {
buf += 512;
- if (v != is_not_zero(buf, 512))
+ if (is_zero != buffer_is_zero(buf, 512)) {
break;
+ }
}
*pnum = i;
- return v;
+ return !is_zero;
}
/*
@@ -955,7 +923,7 @@ static int img_convert(int argc, char **argv)
if (n < cluster_sectors) {
memset(buf + n * 512, 0, cluster_size - n * 512);
}
- if (is_not_zero(buf, cluster_size)) {
+ if (!buffer_is_zero(buf, cluster_size)) {
ret = bdrv_write_compressed(out_bs, sector_num, buf,
cluster_sectors);
if (ret != 0) {
diff --git a/qemu-io.c b/qemu-io.c
index d4a5fcbcbc..0249be4e71 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -218,6 +218,51 @@ static int do_pwrite(char *buf, int64_t offset, int count, int *total)
return 1;
}
+typedef struct {
+ int64_t offset;
+ int count;
+ int *total;
+ int ret;
+ bool done;
+} CoWriteZeroes;
+
+static void coroutine_fn co_write_zeroes_entry(void *opaque)
+{
+ CoWriteZeroes *data = opaque;
+
+ data->ret = bdrv_co_write_zeroes(bs, data->offset / BDRV_SECTOR_SIZE,
+ data->count / BDRV_SECTOR_SIZE);
+ data->done = true;
+ if (data->ret < 0) {
+ *data->total = data->ret;
+ return;
+ }
+
+ *data->total = data->count;
+}
+
+static int do_co_write_zeroes(int64_t offset, int count, int *total)
+{
+ Coroutine *co;
+ CoWriteZeroes data = {
+ .offset = offset,
+ .count = count,
+ .total = total,
+ .done = false,
+ };
+
+ co = qemu_coroutine_create(co_write_zeroes_entry);
+ qemu_coroutine_enter(co, &data);
+ while (!data.done) {
+ qemu_aio_wait();
+ }
+ if (data.ret < 0) {
+ return data.ret;
+ } else {
+ return 1;
+ }
+}
+
static int do_load_vmstate(char *buf, int64_t offset, int count, int *total)
{
*total = bdrv_load_vmstate(bs, (uint8_t *)buf, offset, count);
@@ -643,6 +688,7 @@ static void write_help(void)
" -P, -- use different pattern to fill file\n"
" -C, -- report statistics in a machine parsable format\n"
" -q, -- quiet mode, do not show I/O statistics\n"
+" -z, -- write zeroes using bdrv_co_write_zeroes\n"
"\n");
}
@@ -654,7 +700,7 @@ static const cmdinfo_t write_cmd = {
.cfunc = write_f,
.argmin = 2,
.argmax = -1,
- .args = "[-abCpq] [-P pattern ] off len",
+ .args = "[-bCpqz] [-P pattern ] off len",
.oneline = "writes a number of bytes at a specified offset",
.help = write_help,
};
@@ -662,16 +708,16 @@ static const cmdinfo_t write_cmd = {
static int write_f(int argc, char **argv)
{
struct timeval t1, t2;
- int Cflag = 0, pflag = 0, qflag = 0, bflag = 0;
+ int Cflag = 0, pflag = 0, qflag = 0, bflag = 0, Pflag = 0, zflag = 0;
int c, cnt;
- char *buf;
+ char *buf = NULL;
int64_t offset;
int count;
/* Some compilers get confused and warn if this is not initialized. */
int total = 0;
int pattern = 0xcd;
- while ((c = getopt(argc, argv, "bCpP:q")) != EOF) {
+ while ((c = getopt(argc, argv, "bCpP:qz")) != EOF) {
switch (c) {
case 'b':
bflag = 1;
@@ -683,6 +729,7 @@ static int write_f(int argc, char **argv)
pflag = 1;
break;
case 'P':
+ Pflag = 1;
pattern = parse_pattern(optarg);
if (pattern < 0) {
return 0;
@@ -691,6 +738,9 @@ static int write_f(int argc, char **argv)
case 'q':
qflag = 1;
break;
+ case 'z':
+ zflag = 1;
+ break;
default:
return command_usage(&write_cmd);
}
@@ -700,8 +750,13 @@ static int write_f(int argc, char **argv)
return command_usage(&write_cmd);
}
- if (bflag && pflag) {
- printf("-b and -p cannot be specified at the same time\n");
+ if (bflag + pflag + zflag > 1) {
+ printf("-b, -p, or -z cannot be specified at the same time\n");
+ return 0;
+ }
+
+ if (zflag && Pflag) {
+ printf("-z and -P cannot be specified at the same time\n");
return 0;
}
@@ -732,13 +787,17 @@ static int write_f(int argc, char **argv)
}
}
- buf = qemu_io_alloc(count, pattern);
+ if (!zflag) {
+ buf = qemu_io_alloc(count, pattern);
+ }
gettimeofday(&t1, NULL);
if (pflag) {
cnt = do_pwrite(buf, offset, count, &total);
} else if (bflag) {
cnt = do_save_vmstate(buf, offset, count, &total);
+ } else if (zflag) {
+ cnt = do_co_write_zeroes(offset, count, &total);
} else {
cnt = do_write(buf, offset, count, &total);
}
@@ -758,7 +817,9 @@ static int write_f(int argc, char **argv)
print_report("wrote", &t2, offset, count, total, cnt, Cflag);
out:
- qemu_io_free(buf);
+ if (!zflag) {
+ qemu_io_free(buf);
+ }
return 0;
}
diff --git a/qemu-options.hx b/qemu-options.hx
index f577cc8470..42d22aebf9 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1829,24 +1829,32 @@ Syntax for specifying iSCSI LUNs is
Example (without authentication):
@example
-qemu -cdrom iscsi://192.0.2.1/iqn.2001-04.com.example/2 \
---drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1
+qemu -iscsi initiator-name=iqn.2001-04.com.example:my-initiator \
+-cdrom iscsi://192.0.2.1/iqn.2001-04.com.example/2 \
+-drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1
@end example
Example (CHAP username/password via URL):
@example
-qemu --drive file=iscsi://user%password@@192.0.2.1/iqn.2001-04.com.example/1
+qemu -drive file=iscsi://user%password@@192.0.2.1/iqn.2001-04.com.example/1
@end example
Example (CHAP username/password via environment variables):
@example
LIBISCSI_CHAP_USERNAME="user" \
LIBISCSI_CHAP_PASSWORD="password" \
-qemu --drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1
+qemu -drive file=iscsi://192.0.2.1/iqn.2001-04.com.example/1
@end example
iSCSI support is an optional feature of QEMU and only available when
compiled and linked against libiscsi.
+ETEXI
+DEF("iscsi", HAS_ARG, QEMU_OPTION_iscsi,
+ "-iscsi [user=user][,password=password]\n"
+ " [,header-digest=CRC32C|CR32C-NONE|NONE-CRC32C|NONE\n"
+ " [,initiator-name=iqn]\n"
+ " iSCSI session parameters\n", QEMU_ARCH_ALL)
+STEXI
@item NBD
QEMU supports NBD (Network Block Devices) both using TCP protocol as well
diff --git a/trace-events b/trace-events
index 5d05749643..9b26ce2304 100644
--- a/trace-events
+++ b/trace-events
@@ -67,6 +67,7 @@ bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_copy_on_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
+bdrv_co_write_zeroes(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
bdrv_co_io_em(void *bs, int64_t sector_num, int nb_sectors, int is_write, void *acb) "bs %p sector_num %"PRId64" nb_sectors %d is_write %d acb %p"
bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t cluster_sector_num, int cluster_nb_sectors) "bs %p sector_num %"PRId64" nb_sectors %d cluster_sector_num %"PRId64" cluster_nb_sectors %d"
@@ -320,7 +321,7 @@ qed_need_check_timer_cb(void *s) "s %p"
qed_start_need_check_timer(void *s) "s %p"
qed_cancel_need_check_timer(void *s) "s %p"
qed_aio_complete(void *s, void *acb, int ret) "s %p acb %p ret %d"
-qed_aio_setup(void *s, void *acb, int64_t sector_num, int nb_sectors, void *opaque, int is_write) "s %p acb %p sector_num %"PRId64" nb_sectors %d opaque %p is_write %d"
+qed_aio_setup(void *s, void *acb, int64_t sector_num, int nb_sectors, void *opaque, int flags) "s %p acb %p sector_num %"PRId64" nb_sectors %d opaque %p flags %#x"
qed_aio_next_io(void *s, void *acb, int ret, uint64_t cur_pos) "s %p acb %p ret %d cur_pos %"PRIu64
qed_aio_read_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
diff --git a/vl.c b/vl.c
index 9b7ddfe413..0e8425e35e 100644
--- a/vl.c
+++ b/vl.c
@@ -2529,6 +2529,14 @@ int main(int argc, char **argv, char **envp)
exit(1);
}
break;
+#ifdef CONFIG_LIBISCSI
+ case QEMU_OPTION_iscsi:
+ opts = qemu_opts_parse(qemu_find_opts("iscsi"), optarg, 0);
+ if (!opts) {
+ exit(1);
+ }
+ break;
+#endif
#ifdef CONFIG_SLIRP
case QEMU_OPTION_tftp:
legacy_tftp_prefix = optarg;