summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoraliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>2008-10-14 14:42:54 +0000
committeraliguori <aliguori@c046a42c-6fe2-441c-8c8c-71466251a162>2008-10-14 14:42:54 +0000
commit9f7965c7e965c8b80da27048017a360b3c57c4af (patch)
tree9a7b218b7a7b590fe926f1c282dfa0916f19092c
parenteeb438c1b84468e8faa7e69bec86b78b45f2347f (diff)
downloadqemu-9f7965c7e965c8b80da27048017a360b3c57c4af.tar.gz
qemu-9f7965c7e965c8b80da27048017a360b3c57c4af.tar.bz2
qemu-9f7965c7e965c8b80da27048017a360b3c57c4af.zip
Expand cache= option and use write-through caching by default
This patch changes the cache= option to accept none, writeback, or writethough to control the host page cache behavior. By default, writethrough caching is now used which internally is implemented by using O_DSYNC to open the disk images. When using -snapshot, writeback is used by default since data integrity it not at all an issue. cache=none has the same behavior as cache=off previously. The later syntax is still supported by now deprecated. I also cleaned up the O_DIRECT implementation to avoid many of the #ifdefs. Signed-off-by: Anthony Liguori <aliguori@us.ibm.com> git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5485 c046a42c-6fe2-441c-8c8c-71466251a162
-rw-r--r--block-raw-posix.c41
-rw-r--r--block-raw-win32.c8
-rw-r--r--block.c6
-rw-r--r--block.h5
-rw-r--r--qemu-doc.texi17
-rw-r--r--qemu-nbd.c2
-rw-r--r--vl.c18
7 files changed, 59 insertions, 38 deletions
diff --git a/block-raw-posix.c b/block-raw-posix.c
index 83a358cd4d..4c04dbf8da 100644
--- a/block-raw-posix.c
+++ b/block-raw-posix.c
@@ -73,6 +73,11 @@
#define DEBUG_BLOCK_PRINT(formatCstr, args...)
#endif
+/* Approximate O_DIRECT with O_DSYNC if O_DIRECT isn't available */
+#ifndef O_DIRECT
+#define O_DIRECT O_DSYNC
+#endif
+
#define FTYPE_FILE 0
#define FTYPE_CD 1
#define FTYPE_FD 2
@@ -101,9 +106,7 @@ typedef struct BDRVRawState {
int fd_got_error;
int fd_media_changed;
#endif
-#if defined(O_DIRECT)
uint8_t* aligned_buf;
-#endif
} BDRVRawState;
static int posix_aio_init(void);
@@ -129,10 +132,13 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
}
if (flags & BDRV_O_CREAT)
open_flags |= O_CREAT | O_TRUNC;
-#ifdef O_DIRECT
- if (flags & BDRV_O_DIRECT)
+
+ /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+ * and O_DIRECT for no caching. */
+ if ((flags & BDRV_O_NOCACHE))
open_flags |= O_DIRECT;
-#endif
+ else if (!(flags & BDRV_O_CACHE_WB))
+ open_flags |= O_DSYNC;
s->type = FTYPE_FILE;
@@ -146,9 +152,8 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
s->fd = fd;
for (i = 0; i < RAW_FD_POOL_SIZE; i++)
s->fd_pool[i] = -1;
-#if defined(O_DIRECT)
s->aligned_buf = NULL;
- if (flags & BDRV_O_DIRECT) {
+ if ((flags & BDRV_O_NOCACHE)) {
s->aligned_buf = qemu_memalign(512, ALIGNED_BUFFER_SIZE);
if (s->aligned_buf == NULL) {
ret = -errno;
@@ -156,7 +161,6 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
return ret;
}
}
-#endif
return 0;
}
@@ -281,7 +285,6 @@ label__raw_write__success:
}
-#if defined(O_DIRECT)
/*
* offset and count are in bytes and possibly not aligned. For files opened
* with O_DIRECT, necessary alignments are ensured before calling
@@ -432,12 +435,6 @@ static int raw_pwrite(BlockDriverState *bs, int64_t offset,
return raw_pwrite_aligned(bs, offset, buf, count) + sum;
}
-#else
-#define raw_pread raw_pread_aligned
-#define raw_pwrite raw_pwrite_aligned
-#endif
-
-
#ifdef CONFIG_AIO
/***********************************************************/
/* Unix AIO using POSIX AIO */
@@ -661,7 +658,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
-#if defined(O_DIRECT)
BDRVRawState *s = bs->opaque;
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
@@ -672,7 +668,6 @@ static BlockDriverAIOCB *raw_aio_read(BlockDriverState *bs,
qemu_bh_schedule(bh);
return &acb->common;
}
-#endif
acb = raw_aio_setup(bs, sector_num, buf, nb_sectors, cb, opaque);
if (!acb)
@@ -694,7 +689,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
* If O_DIRECT is used and the buffer is not aligned fall back
* to synchronous IO.
*/
-#if defined(O_DIRECT)
BDRVRawState *s = bs->opaque;
if (unlikely(s->aligned_buf != NULL && ((uintptr_t) buf % 512))) {
@@ -705,7 +699,6 @@ static BlockDriverAIOCB *raw_aio_write(BlockDriverState *bs,
qemu_bh_schedule(bh);
return &acb->common;
}
-#endif
acb = raw_aio_setup(bs, sector_num, (uint8_t*)buf, nb_sectors, cb, opaque);
if (!acb)
@@ -770,10 +763,8 @@ static void raw_close(BlockDriverState *bs)
if (s->fd >= 0) {
close(s->fd);
s->fd = -1;
-#if defined(O_DIRECT)
if (s->aligned_buf != NULL)
qemu_free(s->aligned_buf);
-#endif
}
raw_close_fd_pool(s);
}
@@ -1003,10 +994,12 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
open_flags |= O_RDONLY;
bs->read_only = 1;
}
-#ifdef O_DIRECT
- if (flags & BDRV_O_DIRECT)
+ /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+ * and O_DIRECT for no caching. */
+ if ((flags & BDRV_O_NOCACHE))
open_flags |= O_DIRECT;
-#endif
+ else if (!(flags & BDRV_O_CACHE_WB))
+ open_flags |= O_DSYNC;
s->type = FTYPE_FILE;
#if defined(__linux__)
diff --git a/block-raw-win32.c b/block-raw-win32.c
index fd4a9e3a4c..892f2d1e5a 100644
--- a/block-raw-win32.c
+++ b/block-raw-win32.c
@@ -104,8 +104,10 @@ static int raw_open(BlockDriverState *bs, const char *filename, int flags)
#else
overlapped = FILE_ATTRIBUTE_NORMAL;
#endif
- if (flags & BDRV_O_DIRECT)
+ if ((flags & BDRV_O_NOCACHE))
overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
+ else if (!(flags & BDRV_O_CACHE_WB))
+ overlapped |= FILE_FLAG_WRITE_THROUGH;
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
create_flags, overlapped, NULL);
@@ -440,8 +442,10 @@ static int hdev_open(BlockDriverState *bs, const char *filename, int flags)
#else
overlapped = FILE_ATTRIBUTE_NORMAL;
#endif
- if (flags & BDRV_O_DIRECT)
+ if ((flags & BDRV_O_NOCACHE))
overlapped |= FILE_FLAG_NO_BUFFERING | FILE_FLAG_WRITE_THROUGH;
+ else if (!(flags & BDRV_O_CACHE_WB))
+ overlapped |= FILE_FLAG_WRITE_THROUGH;
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
create_flags, overlapped, NULL);
diff --git a/block.c b/block.c
index 5d708baada..48229cde7b 100644
--- a/block.c
+++ b/block.c
@@ -395,12 +395,12 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
/* Note: for compatibility, we open disk image files as RDWR, and
RDONLY as fallback */
if (!(flags & BDRV_O_FILE))
- open_flags = BDRV_O_RDWR | (flags & BDRV_O_DIRECT);
+ open_flags = BDRV_O_RDWR | (flags & BDRV_O_CACHE_MASK);
else
open_flags = flags & ~(BDRV_O_FILE | BDRV_O_SNAPSHOT);
ret = drv->bdrv_open(bs, filename, open_flags);
if ((ret == -EACCES || ret == -EPERM) && !(flags & BDRV_O_FILE)) {
- ret = drv->bdrv_open(bs, filename, BDRV_O_RDONLY);
+ ret = drv->bdrv_open(bs, filename, open_flags & ~BDRV_O_RDWR);
bs->read_only = 1;
}
if (ret < 0) {
@@ -427,7 +427,7 @@ int bdrv_open2(BlockDriverState *bs, const char *filename, int flags,
}
path_combine(backing_filename, sizeof(backing_filename),
filename, bs->backing_file);
- if (bdrv_open(bs->backing_hd, backing_filename, 0) < 0)
+ if (bdrv_open(bs->backing_hd, backing_filename, open_flags) < 0)
goto fail;
}
diff --git a/block.h b/block.h
index f0129130be..72c1c24bb4 100644
--- a/block.h
+++ b/block.h
@@ -47,7 +47,10 @@ typedef struct QEMUSnapshotInfo {
use a disk image format on top of
it (default for
bdrv_file_open()) */
-#define BDRV_O_DIRECT 0x0020
+#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
+#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */
+
+#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB)
void bdrv_info(void);
void bdrv_info_stats(void);
diff --git a/qemu-doc.texi b/qemu-doc.texi
index adf270b470..84021fb816 100644
--- a/qemu-doc.texi
+++ b/qemu-doc.texi
@@ -267,13 +267,28 @@ These options have the same definition as they have in @option{-hdachs}.
@item snapshot=@var{snapshot}
@var{snapshot} is "on" or "off" and allows to enable snapshot for given drive (see @option{-snapshot}).
@item cache=@var{cache}
-@var{cache} is "on" or "off" and allows to disable host cache to access data.
+@var{cache} is "none", "writeback", or "writethrough" and controls how the host cache is used to access block data.
@item format=@var{format}
Specify which disk @var{format} will be used rather than detecting
the format. Can be used to specifiy format=raw to avoid interpreting
an untrusted format header.
@end table
+By default, writethrough caching is used for all block device. This means that
+the host page cache will be used to read and write data but write notification
+will be sent to the guest only when the data has been reported as written by
+the storage subsystem.
+
+Writeback caching will report data writes as completed as soon as the data is
+present in the host page cache. This is safe as long as you trust your host.
+If your host crashes or loses power, then the guest may experience data
+corruption. When using the @option{-snapshot} option, writeback caching is
+used by default.
+
+The host page can be avoided entirely with @option{cache=none}. This will
+attempt to do disk IO directly to the guests memory. QEMU may still perform
+an internal copy of the data.
+
Instead of @option{-cdrom} you can use:
@example
qemu -drive file=file,index=2,media=cdrom
diff --git a/qemu-nbd.c b/qemu-nbd.c
index d5d5db73a0..fa618165c8 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -232,7 +232,7 @@ int main(int argc, char **argv)
flags |= BDRV_O_SNAPSHOT;
break;
case 'n':
- flags |= BDRV_O_DIRECT;
+ flags |= BDRV_O_NOCACHE;
break;
case 'b':
bindto = optarg;
diff --git a/vl.c b/vl.c
index 97aca75179..c0e43ac030 100644
--- a/vl.c
+++ b/vl.c
@@ -5648,10 +5648,12 @@ static int drive_init(struct drive_opt *arg, int snapshot,
}
if (get_param_value(buf, sizeof(buf), "cache", str)) {
- if (!strcmp(buf, "off"))
+ if (!strcmp(buf, "off") || !strcmp(buf, "none"))
cache = 0;
- else if (!strcmp(buf, "on"))
+ else if (!strcmp(buf, "writethrough"))
cache = 1;
+ else if (!strcmp(buf, "writeback"))
+ cache = 2;
else {
fprintf(stderr, "qemu: invalid cache option\n");
return -1;
@@ -5770,10 +5772,14 @@ static int drive_init(struct drive_opt *arg, int snapshot,
if (!file[0])
return 0;
bdrv_flags = 0;
- if (snapshot)
+ if (snapshot) {
bdrv_flags |= BDRV_O_SNAPSHOT;
- if (!cache)
- bdrv_flags |= BDRV_O_DIRECT;
+ cache = 2; /* always use write-back with snapshot */
+ }
+ if (cache == 0) /* no caching */
+ bdrv_flags |= BDRV_O_NOCACHE;
+ else if (cache == 2) /* write-back */
+ bdrv_flags |= BDRV_O_CACHE_WB;
if (bdrv_open2(bdrv, file, bdrv_flags, drv) < 0 || qemu_key_check(bdrv, file)) {
fprintf(stderr, "qemu: could not open disk image %s\n",
file);
@@ -8145,7 +8151,7 @@ static void help(int exitcode)
"-cdrom file use 'file' as IDE cdrom image (cdrom is ide1 master)\n"
"-drive [file=file][,if=type][,bus=n][,unit=m][,media=d][,index=i]\n"
" [,cyls=c,heads=h,secs=s[,trans=t]][,snapshot=on|off]\n"
- " [,cache=on|off][,format=f]\n"
+ " [,cache=writethrough|writeback|none][,format=f]\n"
" use 'file' as a drive image\n"
"-mtdblock file use 'file' as on-board Flash memory image\n"
"-sd file use 'file' as SecureDigital card image\n"