diff options
-rw-r--r-- | cpus.c | 4 | ||||
-rw-r--r-- | fsdev/virtfs-proxy-helper.texi | 2 | ||||
-rw-r--r-- | nbd/server.c | 2 | ||||
-rw-r--r-- | numa.c | 11 | ||||
-rw-r--r-- | qapi-schema.json | 2 | ||||
-rw-r--r-- | qemu-char.c | 926 | ||||
-rw-r--r-- | qemu-doc.texi | 8 | ||||
-rw-r--r-- | qemu-ga.texi | 2 | ||||
-rw-r--r-- | qemu-img.texi | 2 | ||||
-rw-r--r-- | qemu-options.hx | 9 | ||||
-rw-r--r-- | scripts/dump-guest-memory.py | 762 | ||||
-rwxr-xr-x | scripts/kvm/kvm_stat | 1199 | ||||
-rw-r--r-- | tests/Makefile | 2 |
13 files changed, 1663 insertions, 1268 deletions
@@ -986,7 +986,7 @@ static void qemu_wait_io_event_common(CPUState *cpu) if (cpu->stop) { cpu->stop = false; cpu->stopped = true; - qemu_cond_signal(&qemu_pause_cond); + qemu_cond_broadcast(&qemu_pause_cond); } flush_queued_work(cpu); cpu->thread_kicked = false; @@ -1396,7 +1396,7 @@ void cpu_stop_current(void) current_cpu->stop = false; current_cpu->stopped = true; cpu_exit(current_cpu); - qemu_cond_signal(&qemu_pause_cond); + qemu_cond_broadcast(&qemu_pause_cond); } } diff --git a/fsdev/virtfs-proxy-helper.texi b/fsdev/virtfs-proxy-helper.texi index e60e3b9465..9a25d7ecf4 100644 --- a/fsdev/virtfs-proxy-helper.texi +++ b/fsdev/virtfs-proxy-helper.texi @@ -1,6 +1,6 @@ @example @c man begin SYNOPSIS -usage: virtfs-proxy-helper options +@command{virtfs-proxy-helper} @var{options} @c man end @end example diff --git a/nbd/server.c b/nbd/server.c index 2265cb0680..256feafcec 100644 --- a/nbd/server.c +++ b/nbd/server.c @@ -671,7 +671,9 @@ NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size, * that BDRV_O_INACTIVE is cleared and the image is ready for write * access since the export could be available before migration handover. */ + aio_context_acquire(exp->ctx); blk_invalidate_cache(blk, NULL); + aio_context_release(exp->ctx); return exp; fail: @@ -418,12 +418,15 @@ static void allocate_system_memory_nonnuma(MemoryRegion *mr, Object *owner, Error *err = NULL; memory_region_init_ram_from_file(mr, owner, name, ram_size, false, mem_path, &err); - - /* Legacy behavior: if allocation failed, fall back to - * regular RAM allocation. - */ if (err) { error_report_err(err); + if (mem_prealloc) { + exit(1); + } + + /* Legacy behavior: if allocation failed, fall back to + * regular RAM allocation. + */ memory_region_init_ram(mr, owner, name, ram_size, &error_fatal); } #else diff --git a/qapi-schema.json b/qapi-schema.json index b3038b215a..8d04897922 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -3146,6 +3146,7 @@ # # @addr: socket address to listen on (server=true) # or connect to (server=false) +# @tls-creds: #optional the ID of the TLS credentials object (since 2.6) # @server: #optional create server socket (default: true) # @wait: #optional wait for incoming connection on server # sockets (default: false). @@ -3160,6 +3161,7 @@ # Since: 1.4 ## { 'struct': 'ChardevSocket', 'data': { 'addr' : 'SocketAddress', + '*tls-creds' : 'str', '*server' : 'bool', '*wait' : 'bool', '*nodelay' : 'bool', diff --git a/qemu-char.c b/qemu-char.c index e133f4fc35..ca53e8c376 100644 --- a/qemu-char.c +++ b/qemu-char.c @@ -33,6 +33,9 @@ #include "qapi/qmp-output-visitor.h" #include "qapi-visit.h" #include "qemu/base64.h" +#include "io/channel-socket.h" +#include "io/channel-file.h" +#include "io/channel-tls.h" #include <unistd.h> #include <fcntl.h> @@ -88,39 +91,37 @@ #define READ_BUF_LEN 4096 #define READ_RETRIES 10 -#define CHR_MAX_FILENAME_SIZE 256 #define TCP_MAX_FDS 16 /***********************************************************/ /* Socket address helpers */ -static int SocketAddress_to_str(char *dest, int max_len, - const char *prefix, SocketAddress *addr, - bool is_listen, bool is_telnet) +static char *SocketAddress_to_str(const char *prefix, SocketAddress *addr, + bool is_listen, bool is_telnet) { switch (addr->type) { case SOCKET_ADDRESS_KIND_INET: - return snprintf(dest, max_len, "%s%s:%s:%s%s", prefix, - is_telnet ? "telnet" : "tcp", addr->u.inet->host, - addr->u.inet->port, is_listen ? ",server" : ""); + return g_strdup_printf("%s%s:%s:%s%s", prefix, + is_telnet ? "telnet" : "tcp", addr->u.inet->host, + addr->u.inet->port, is_listen ? ",server" : ""); break; case SOCKET_ADDRESS_KIND_UNIX: - return snprintf(dest, max_len, "%sunix:%s%s", prefix, - addr->u.q_unix->path, is_listen ? ",server" : ""); + return g_strdup_printf("%sunix:%s%s", prefix, + addr->u.q_unix->path, + is_listen ? ",server" : ""); break; case SOCKET_ADDRESS_KIND_FD: - return snprintf(dest, max_len, "%sfd:%s%s", prefix, addr->u.fd->str, - is_listen ? ",server" : ""); + return g_strdup_printf("%sfd:%s%s", prefix, addr->u.fd->str, + is_listen ? ",server" : ""); break; default: abort(); } } -static int sockaddr_to_str(char *dest, int max_len, - struct sockaddr_storage *ss, socklen_t ss_len, - struct sockaddr_storage *ps, socklen_t ps_len, - bool is_listen, bool is_telnet) +static char *sockaddr_to_str(struct sockaddr_storage *ss, socklen_t ss_len, + struct sockaddr_storage *ps, socklen_t ps_len, + bool is_listen, bool is_telnet) { char shost[NI_MAXHOST], sserv[NI_MAXSERV]; char phost[NI_MAXHOST], pserv[NI_MAXSERV]; @@ -129,9 +130,9 @@ static int sockaddr_to_str(char *dest, int max_len, switch (ss->ss_family) { #ifndef _WIN32 case AF_UNIX: - return snprintf(dest, max_len, "unix:%s%s", - ((struct sockaddr_un *)(ss))->sun_path, - is_listen ? ",server" : ""); + return g_strdup_printf("unix:%s%s", + ((struct sockaddr_un *)(ss))->sun_path, + is_listen ? ",server" : ""); #endif case AF_INET6: left = "["; @@ -142,14 +143,14 @@ static int sockaddr_to_str(char *dest, int max_len, sserv, sizeof(sserv), NI_NUMERICHOST | NI_NUMERICSERV); getnameinfo((struct sockaddr *) ps, ps_len, phost, sizeof(phost), pserv, sizeof(pserv), NI_NUMERICHOST | NI_NUMERICSERV); - return snprintf(dest, max_len, "%s:%s%s%s:%s%s <-> %s%s%s:%s", - is_telnet ? "telnet" : "tcp", - left, shost, right, sserv, - is_listen ? ",server" : "", - left, phost, right, pserv); + return g_strdup_printf("%s:%s%s%s:%s%s <-> %s%s%s:%s", + is_telnet ? "telnet" : "tcp", + left, shost, right, sserv, + is_listen ? ",server" : "", + left, phost, right, pserv); default: - return snprintf(dest, max_len, "unknown"); + return g_strdup_printf("unknown"); } } @@ -768,7 +769,7 @@ typedef struct IOWatchPoll { GSource parent; - GIOChannel *channel; + QIOChannel *ioc; GSource *src; IOCanReadHandler *fd_can_read; @@ -791,8 +792,8 @@ static gboolean io_watch_poll_prepare(GSource *source, gint *timeout_) } if (now_active) { - iwp->src = g_io_create_watch(iwp->channel, - G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL); + iwp->src = qio_channel_create_watch( + iwp->ioc, G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL); g_source_set_callback(iwp->src, iwp->fd_read, iwp->opaque, NULL); g_source_attach(iwp->src, NULL); } else { @@ -838,9 +839,9 @@ static GSourceFuncs io_watch_poll_funcs = { }; /* Can only be used for read */ -static guint io_add_watch_poll(GIOChannel *channel, +static guint io_add_watch_poll(QIOChannel *ioc, IOCanReadHandler *fd_can_read, - GIOFunc fd_read, + QIOChannelFunc fd_read, gpointer user_data) { IOWatchPoll *iwp; @@ -849,7 +850,7 @@ static guint io_add_watch_poll(GIOChannel *channel, iwp = (IOWatchPoll *) g_source_new(&io_watch_poll_funcs, sizeof(IOWatchPoll)); iwp->fd_can_read = fd_can_read; iwp->opaque = user_data; - iwp->channel = channel; + iwp->ioc = ioc; iwp->fd_read = (GSourceFunc) fd_read; iwp->src = NULL; @@ -885,79 +886,50 @@ static void remove_fd_in_watch(CharDriverState *chr) } } -#ifndef _WIN32 -static GIOChannel *io_channel_from_fd(int fd) -{ - GIOChannel *chan; - - if (fd == -1) { - return NULL; - } - - chan = g_io_channel_unix_new(fd); - - g_io_channel_set_encoding(chan, NULL, NULL); - g_io_channel_set_buffered(chan, FALSE); - - return chan; -} -#endif -static GIOChannel *io_channel_from_socket(int fd) +static int io_channel_send_full(QIOChannel *ioc, + const void *buf, size_t len, + int *fds, size_t nfds) { - GIOChannel *chan; + size_t offset = 0; - if (fd == -1) { - return NULL; - } + while (offset < len) { + ssize_t ret = 0; + struct iovec iov = { .iov_base = (char *)buf + offset, + .iov_len = len - offset }; + + ret = qio_channel_writev_full( + ioc, &iov, 1, + fds, nfds, NULL); + if (ret == QIO_CHANNEL_ERR_BLOCK) { + errno = EAGAIN; + return -1; + } else if (ret < 0) { + if (offset) { + return offset; + } -#ifdef _WIN32 - chan = g_io_channel_win32_new_socket(fd); -#else - chan = g_io_channel_unix_new(fd); -#endif + errno = EINVAL; + return -1; + } - g_io_channel_set_encoding(chan, NULL, NULL); - g_io_channel_set_buffered(chan, FALSE); + offset += ret; + } - return chan; + return offset; } -static int io_channel_send(GIOChannel *fd, const void *buf, size_t len) -{ - size_t offset = 0; - GIOStatus status = G_IO_STATUS_NORMAL; - while (offset < len && status == G_IO_STATUS_NORMAL) { - gsize bytes_written = 0; - - status = g_io_channel_write_chars(fd, buf + offset, len - offset, - &bytes_written, NULL); - offset += bytes_written; - } - - if (offset > 0) { - return offset; - } - switch (status) { - case G_IO_STATUS_NORMAL: - g_assert(len == 0); - return 0; - case G_IO_STATUS_AGAIN: - errno = EAGAIN; - return -1; - default: - break; - } - errno = EINVAL; - return -1; +#ifndef _WIN32 +static int io_channel_send(QIOChannel *ioc, const void *buf, size_t len) +{ + return io_channel_send_full(ioc, buf, len, NULL, 0); } -#ifndef _WIN32 typedef struct FDCharDriver { CharDriverState *chr; - GIOChannel *fd_in, *fd_out; + QIOChannel *ioc_in, *ioc_out; int max_size; } FDCharDriver; @@ -966,17 +938,16 @@ static int fd_chr_write(CharDriverState *chr, const uint8_t *buf, int len) { FDCharDriver *s = chr->opaque; - return io_channel_send(s->fd_out, buf, len); + return io_channel_send(s->ioc_out, buf, len); } -static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) +static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) { CharDriverState *chr = opaque; FDCharDriver *s = chr->opaque; int len; uint8_t buf[READ_BUF_LEN]; - GIOStatus status; - gsize bytes_read; + ssize_t ret; len = sizeof(buf); if (len > s->max_size) { @@ -986,15 +957,15 @@ static gboolean fd_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) return TRUE; } - status = g_io_channel_read_chars(chan, (gchar *)buf, - len, &bytes_read, NULL); - if (status == G_IO_STATUS_EOF) { + ret = qio_channel_read( + chan, (gchar *)buf, len, NULL); + if (ret == 0) { remove_fd_in_watch(chr); qemu_chr_be_event(chr, CHR_EVENT_CLOSED); return FALSE; } - if (status == G_IO_STATUS_NORMAL) { - qemu_chr_be_write(chr, buf, bytes_read); + if (ret > 0) { + qemu_chr_be_write(chr, buf, ret); } return TRUE; @@ -1012,7 +983,7 @@ static int fd_chr_read_poll(void *opaque) static GSource *fd_chr_add_watch(CharDriverState *chr, GIOCondition cond) { FDCharDriver *s = chr->opaque; - return g_io_create_watch(s->fd_out, cond); + return qio_channel_create_watch(s->ioc_out, cond); } static void fd_chr_update_read_handler(CharDriverState *chr) @@ -1020,8 +991,9 @@ static void fd_chr_update_read_handler(CharDriverState *chr) FDCharDriver *s = chr->opaque; remove_fd_in_watch(chr); - if (s->fd_in) { - chr->fd_in_tag = io_add_watch_poll(s->fd_in, fd_chr_read_poll, + if (s->ioc_in) { + chr->fd_in_tag = io_add_watch_poll(s->ioc_in, + fd_chr_read_poll, fd_chr_read, chr); } } @@ -1031,11 +1003,11 @@ static void fd_chr_close(struct CharDriverState *chr) FDCharDriver *s = chr->opaque; remove_fd_in_watch(chr); - if (s->fd_in) { - g_io_channel_unref(s->fd_in); + if (s->ioc_in) { + object_unref(OBJECT(s->ioc_in)); } - if (s->fd_out) { - g_io_channel_unref(s->fd_out); + if (s->ioc_out) { + object_unref(OBJECT(s->ioc_out)); } g_free(s); @@ -1054,8 +1026,8 @@ static CharDriverState *qemu_chr_open_fd(int fd_in, int fd_out, return NULL; } s = g_new0(FDCharDriver, 1); - s->fd_in = io_channel_from_fd(fd_in); - s->fd_out = io_channel_from_fd(fd_out); + s->ioc_in = QIO_CHANNEL(qio_channel_file_new_fd(fd_in)); + s->ioc_out = QIO_CHANNEL(qio_channel_file_new_fd(fd_out)); qemu_set_nonblock(fd_out); s->chr = chr; chr->opaque = s; @@ -1074,15 +1046,18 @@ static CharDriverState *qemu_chr_open_pipe(const char *id, { ChardevHostdev *opts = backend->u.pipe; int fd_in, fd_out; - char filename_in[CHR_MAX_FILENAME_SIZE]; - char filename_out[CHR_MAX_FILENAME_SIZE]; + char *filename_in; + char *filename_out; const char *filename = opts->device; ChardevCommon *common = qapi_ChardevHostdev_base(backend->u.pipe); - snprintf(filename_in, CHR_MAX_FILENAME_SIZE, "%s.in", filename); - snprintf(filename_out, CHR_MAX_FILENAME_SIZE, "%s.out", filename); + + filename_in = g_strdup_printf("%s.in", filename); + filename_out = g_strdup_printf("%s.out", filename); TFR(fd_in = qemu_open(filename_in, O_RDWR | O_BINARY)); TFR(fd_out = qemu_open(filename_out, O_RDWR | O_BINARY)); + g_free(filename_in); + g_free(filename_out); if (fd_in < 0 || fd_out < 0) { if (fd_in >= 0) close(fd_in); @@ -1195,7 +1170,7 @@ static CharDriverState *qemu_chr_open_stdio(const char *id, #define HAVE_CHARDEV_PTY 1 typedef struct { - GIOChannel *fd; + QIOChannel *ioc; int read_bytes; /* Protected by the CharDriverState chr_write_lock. */ @@ -1246,8 +1221,9 @@ static void pty_chr_update_read_handler_locked(CharDriverState *chr) PtyCharDriver *s = chr->opaque; GPollFD pfd; int rc; + QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc); - pfd.fd = g_io_channel_unix_get_fd(s->fd); + pfd.fd = fioc->fd; pfd.events = G_IO_OUT; pfd.revents = 0; do { @@ -1281,7 +1257,7 @@ static int pty_chr_write(CharDriverState *chr, const uint8_t *buf, int len) return 0; } } - return io_channel_send(s->fd, buf, len); + return io_channel_send(s->ioc, buf, len); } static GSource *pty_chr_add_watch(CharDriverState *chr, GIOCondition cond) @@ -1290,7 +1266,7 @@ static GSource *pty_chr_add_watch(CharDriverState *chr, GIOCondition cond) if (!s->connected) { return NULL; } - return g_io_create_watch(s->fd, cond); + return qio_channel_create_watch(s->ioc, cond); } static int pty_chr_read_poll(void *opaque) @@ -1302,13 +1278,13 @@ static int pty_chr_read_poll(void *opaque) return s->read_bytes; } -static gboolean pty_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) +static gboolean pty_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) { CharDriverState *chr = opaque; PtyCharDriver *s = chr->opaque; - gsize size, len; + gsize len; uint8_t buf[READ_BUF_LEN]; - GIOStatus status; + ssize_t ret; len = sizeof(buf); if (len > s->read_bytes) @@ -1316,13 +1292,13 @@ static gboolean pty_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) if (len == 0) { return TRUE; } - status = g_io_channel_read_chars(s->fd, (gchar *)buf, len, &size, NULL); - if (status != G_IO_STATUS_NORMAL) { + ret = qio_channel_read(s->ioc, (char *)buf, len, NULL); + if (ret <= 0) { pty_chr_state(chr, 0); return FALSE; } else { pty_chr_state(chr, 1); - qemu_chr_be_write(chr, buf, size); + qemu_chr_be_write(chr, buf, ret); } return TRUE; } @@ -1364,7 +1340,8 @@ static void pty_chr_state(CharDriverState *chr, int connected) s->open_tag = g_idle_add(qemu_chr_be_generic_open_func, chr); } if (!chr->fd_in_tag) { - chr->fd_in_tag = io_add_watch_poll(s->fd, pty_chr_read_poll, + chr->fd_in_tag = io_add_watch_poll(s->ioc, + pty_chr_read_poll, pty_chr_read, chr); } } @@ -1373,13 +1350,10 @@ static void pty_chr_state(CharDriverState *chr, int connected) static void pty_chr_close(struct CharDriverState *chr) { PtyCharDriver *s = chr->opaque; - int fd; qemu_mutex_lock(&chr->chr_write_lock); pty_chr_state(chr, 0); - fd = g_io_channel_unix_get_fd(s->fd); - g_io_channel_unref(s->fd); - close(fd); + object_unref(OBJECT(s->ioc)); if (s->timer_tag) { g_source_remove(s->timer_tag); s->timer_tag = 0; @@ -1430,7 +1404,7 @@ static CharDriverState *qemu_chr_open_pty(const char *id, chr->chr_add_watch = pty_chr_add_watch; chr->explicit_be_open = true; - s->fd = io_channel_from_fd(master_fd); + s->ioc = QIO_CHANNEL(qio_channel_file_new_fd(master_fd)); s->timer_tag = 0; return chr; @@ -1554,12 +1528,13 @@ static void tty_serial_init(int fd, int speed, static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg) { FDCharDriver *s = chr->opaque; + QIOChannelFile *fioc = QIO_CHANNEL_FILE(s->ioc_in); switch(cmd) { case CHR_IOCTL_SERIAL_SET_PARAMS: { QEMUSerialSetParams *ssp = arg; - tty_serial_init(g_io_channel_unix_get_fd(s->fd_in), + tty_serial_init(fioc->fd, ssp->speed, ssp->parity, ssp->data_bits, ssp->stop_bits); } @@ -1568,7 +1543,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg) { int enable = *(int *)arg; if (enable) { - tcsendbreak(g_io_channel_unix_get_fd(s->fd_in), 1); + tcsendbreak(fioc->fd, 1); } } break; @@ -1576,7 +1551,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg) { int sarg = 0; int *targ = (int *)arg; - ioctl(g_io_channel_unix_get_fd(s->fd_in), TIOCMGET, &sarg); + ioctl(fioc->fd, TIOCMGET, &sarg); *targ = 0; if (sarg & TIOCM_CTS) *targ |= CHR_TIOCM_CTS; @@ -1596,7 +1571,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg) { int sarg = *(int *)arg; int targ = 0; - ioctl(g_io_channel_unix_get_fd(s->fd_in), TIOCMGET, &targ); + ioctl(fioc->fd, TIOCMGET, &targ); targ &= ~(CHR_TIOCM_CTS | CHR_TIOCM_CAR | CHR_TIOCM_DSR | CHR_TIOCM_RI | CHR_TIOCM_DTR | CHR_TIOCM_RTS); if (sarg & CHR_TIOCM_CTS) @@ -1611,7 +1586,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg) targ |= TIOCM_DTR; if (sarg & CHR_TIOCM_RTS) targ |= TIOCM_RTS; - ioctl(g_io_channel_unix_get_fd(s->fd_in), TIOCMSET, &targ); + ioctl(fioc->fd, TIOCMSET, &targ); } break; default: @@ -1622,18 +1597,7 @@ static int tty_serial_ioctl(CharDriverState *chr, int cmd, void *arg) static void qemu_chr_close_tty(CharDriverState *chr) { - FDCharDriver *s = chr->opaque; - int fd = -1; - - if (s) { - fd = g_io_channel_unix_get_fd(s->fd_in); - } - fd_chr_close(chr); - - if (fd >= 0) { - close(fd); - } } static CharDriverState *qemu_chr_open_tty_fd(int fd, @@ -1776,18 +1740,19 @@ static CharDriverState *qemu_chr_open_pp_fd(int fd, return NULL; } - drv = g_new0(ParallelCharDriver, 1); - drv->fd = fd; - drv->mode = IEEE1284_MODE_COMPAT; - chr = qemu_chr_alloc(backend, errp); if (!chr) { return NULL; } + + drv = g_new0(ParallelCharDriver, 1); + chr->opaque = drv; chr->chr_write = null_chr_write; chr->chr_ioctl = pp_ioctl; chr->chr_close = pp_close; - chr->opaque = drv; + + drv->fd = fd; + drv->mode = IEEE1284_MODE_COMPAT; return chr; } @@ -2115,7 +2080,7 @@ static int win_chr_pipe_init(CharDriverState *chr, const char *filename, OVERLAPPED ov; int ret; DWORD size; - char openname[CHR_MAX_FILENAME_SIZE]; + char *openname; s->fpipe = TRUE; @@ -2130,11 +2095,12 @@ static int win_chr_pipe_init(CharDriverState *chr, const char *filename, goto fail; } - snprintf(openname, sizeof(openname), "\\\\.\\pipe\\%s", filename); + openname = g_strdup_printf("\\\\.\\pipe\\%s", filename); s->hcom = CreateNamedPipe(openname, PIPE_ACCESS_DUPLEX | FILE_FLAG_OVERLAPPED, PIPE_TYPE_BYTE | PIPE_READMODE_BYTE | PIPE_WAIT, MAXCONNECT, NSENDBUF, NRECVBUF, NTIMEOUT, NULL); + g_free(openname); if (s->hcom == INVALID_HANDLE_VALUE) { error_setg(errp, "Failed CreateNamedPipe (%lu)", GetLastError()); s->hcom = NULL; @@ -2454,8 +2420,7 @@ err1: /* UDP Net console */ typedef struct { - int fd; - GIOChannel *chan; + QIOChannel *ioc; uint8_t buf[READ_BUF_LEN]; int bufcnt; int bufptr; @@ -2466,17 +2431,9 @@ typedef struct { static int udp_chr_write(CharDriverState *chr, const uint8_t *buf, int len) { NetCharDriver *s = chr->opaque; - gsize bytes_written; - GIOStatus status; - - status = g_io_channel_write_chars(s->chan, (const gchar *)buf, len, &bytes_written, NULL); - if (status == G_IO_STATUS_EOF) { - return 0; - } else if (status != G_IO_STATUS_NORMAL) { - return -1; - } - return bytes_written; + return qio_channel_write( + s->ioc, (const char *)buf, len, NULL); } static int udp_chr_read_poll(void *opaque) @@ -2497,24 +2454,22 @@ static int udp_chr_read_poll(void *opaque) return s->max_size; } -static gboolean udp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) +static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) { CharDriverState *chr = opaque; NetCharDriver *s = chr->opaque; - gsize bytes_read = 0; - GIOStatus status; + ssize_t ret; if (s->max_size == 0) { return TRUE; } - status = g_io_channel_read_chars(s->chan, (gchar *)s->buf, sizeof(s->buf), - &bytes_read, NULL); - s->bufcnt = bytes_read; - s->bufptr = s->bufcnt; - if (status != G_IO_STATUS_NORMAL) { + ret = qio_channel_read( + s->ioc, (char *)s->buf, sizeof(s->buf), NULL); + if (ret <= 0) { remove_fd_in_watch(chr); return FALSE; } + s->bufcnt = ret; s->bufptr = 0; while (s->max_size > 0 && s->bufptr < s->bufcnt) { @@ -2531,8 +2486,9 @@ static void udp_chr_update_read_handler(CharDriverState *chr) NetCharDriver *s = chr->opaque; remove_fd_in_watch(chr); - if (s->chan) { - chr->fd_in_tag = io_add_watch_poll(s->chan, udp_chr_read_poll, + if (s->ioc) { + chr->fd_in_tag = io_add_watch_poll(s->ioc, + udp_chr_read_poll, udp_chr_read, chr); } } @@ -2542,17 +2498,16 @@ static void udp_chr_close(CharDriverState *chr) NetCharDriver *s = chr->opaque; remove_fd_in_watch(chr); - if (s->chan) { - g_io_channel_unref(s->chan); - closesocket(s->fd); + if (s->ioc) { + object_unref(OBJECT(s->ioc)); } g_free(s); qemu_chr_be_event(chr, CHR_EVENT_CLOSED); } -static CharDriverState *qemu_chr_open_udp_fd(int fd, - ChardevCommon *backend, - Error **errp) +static CharDriverState *qemu_chr_open_udp(QIOChannelSocket *sioc, + ChardevCommon *backend, + Error **errp) { CharDriverState *chr = NULL; NetCharDriver *s = NULL; @@ -2563,8 +2518,7 @@ static CharDriverState *qemu_chr_open_udp_fd(int fd, } s = g_new0(NetCharDriver, 1); - s->fd = fd; - s->chan = io_channel_from_socket(s->fd); + s->ioc = QIO_CHANNEL(sioc); s->bufcnt = 0; s->bufptr = 0; chr->opaque = s; @@ -2580,19 +2534,20 @@ static CharDriverState *qemu_chr_open_udp_fd(int fd, /* TCP Net console */ typedef struct { - - GIOChannel *chan, *listen_chan; + QIOChannel *ioc; /* Client I/O channel */ + QIOChannelSocket *sioc; /* Client master channel */ + QIOChannelSocket *listen_ioc; guint listen_tag; - int fd, listen_fd; + QCryptoTLSCreds *tls_creds; int connected; int max_size; int do_telnetopt; int do_nodelay; int is_unix; int *read_msgfds; - int read_msgfds_num; + size_t read_msgfds_num; int *write_msgfds; - int write_msgfds_num; + size_t write_msgfds_num; SocketAddress *addr; bool is_listen; @@ -2626,68 +2581,27 @@ static void check_report_connect_error(CharDriverState *chr, qemu_chr_socket_restart_timer(chr); } -static gboolean tcp_chr_accept(GIOChannel *chan, GIOCondition cond, void *opaque); - -#ifndef _WIN32 -static int unix_send_msgfds(CharDriverState *chr, const uint8_t *buf, int len) -{ - TCPCharDriver *s = chr->opaque; - struct msghdr msgh; - struct iovec iov; - int r; - - size_t fd_size = s->write_msgfds_num * sizeof(int); - char control[CMSG_SPACE(fd_size)]; - struct cmsghdr *cmsg; - - memset(&msgh, 0, sizeof(msgh)); - memset(control, 0, sizeof(control)); - - /* set the payload */ - iov.iov_base = (uint8_t *) buf; - iov.iov_len = len; - - msgh.msg_iov = &iov; - msgh.msg_iovlen = 1; - - msgh.msg_control = control; - msgh.msg_controllen = sizeof(control); - - cmsg = CMSG_FIRSTHDR(&msgh); - - cmsg->cmsg_len = CMSG_LEN(fd_size); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SCM_RIGHTS; - memcpy(CMSG_DATA(cmsg), s->write_msgfds, fd_size); - - do { - r = sendmsg(s->fd, &msgh, 0); - } while (r < 0 && errno == EINTR); - - /* free the written msgfds, no matter what */ - if (s->write_msgfds_num) { - g_free(s->write_msgfds); - s->write_msgfds = 0; - s->write_msgfds_num = 0; - } - - return r; -} -#endif +static gboolean tcp_chr_accept(QIOChannel *chan, + GIOCondition cond, + void *opaque); /* Called with chr_write_lock held. */ static int tcp_chr_write(CharDriverState *chr, const uint8_t *buf, int len) { TCPCharDriver *s = chr->opaque; if (s->connected) { -#ifndef _WIN32 - if (s->is_unix && s->write_msgfds_num) { - return unix_send_msgfds(chr, buf, len); - } else -#endif - { - return io_channel_send(s->chan, buf, len); + int ret = io_channel_send_full(s->ioc, buf, len, + s->write_msgfds, + s->write_msgfds_num); + + /* free the written msgfds, no matter what */ + if (s->write_msgfds_num) { + g_free(s->write_msgfds); + s->write_msgfds = 0; + s->write_msgfds_num = 0; } + + return ret; } else { /* XXX: indicate an error ? */ return len; @@ -2783,6 +2697,10 @@ static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num) { TCPCharDriver *s = chr->opaque; + if (!qio_channel_has_feature(s->ioc, + QIO_CHANNEL_FEATURE_FD_PASS)) { + return -1; + } /* clear old pending fd array */ g_free(s->write_msgfds); @@ -2796,27 +2714,26 @@ static int tcp_set_msgfds(CharDriverState *chr, int *fds, int num) return 0; } -#ifndef _WIN32 -static void unix_process_msgfd(CharDriverState *chr, struct msghdr *msg) +static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len) { TCPCharDriver *s = chr->opaque; - struct cmsghdr *cmsg; - - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { - int fd_size, i; - - if (cmsg->cmsg_len < CMSG_LEN(sizeof(int)) || - cmsg->cmsg_level != SOL_SOCKET || - cmsg->cmsg_type != SCM_RIGHTS) { - continue; - } - - fd_size = cmsg->cmsg_len - CMSG_LEN(0); - - if (!fd_size) { - continue; - } + struct iovec iov = { .iov_base = buf, .iov_len = len }; + int ret; + size_t i; + int *msgfds = NULL; + size_t msgfds_num = 0; + + if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + &msgfds, &msgfds_num, + NULL); + } else { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + NULL, NULL, + NULL); + } + if (msgfds_num) { /* close and clean read_msgfds */ for (i = 0; i < s->read_msgfds_num; i++) { close(s->read_msgfds[i]); @@ -2826,77 +2743,31 @@ static void unix_process_msgfd(CharDriverState *chr, struct msghdr *msg) g_free(s->read_msgfds); } - s->read_msgfds_num = fd_size / sizeof(int); - s->read_msgfds = g_malloc(fd_size); - memcpy(s->read_msgfds, CMSG_DATA(cmsg), fd_size); - - for (i = 0; i < s->read_msgfds_num; i++) { - int fd = s->read_msgfds[i]; - if (fd < 0) { - continue; - } - - /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */ - qemu_set_block(fd); - - #ifndef MSG_CMSG_CLOEXEC - qemu_set_cloexec(fd); - #endif - } + s->read_msgfds = msgfds; + s->read_msgfds_num = msgfds_num; } -} - -static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len) -{ - TCPCharDriver *s = chr->opaque; - struct msghdr msg = { NULL, }; - struct iovec iov[1]; - union { - struct cmsghdr cmsg; - char control[CMSG_SPACE(sizeof(int) * TCP_MAX_FDS)]; - } msg_control; - int flags = 0; - ssize_t ret; - iov[0].iov_base = buf; - iov[0].iov_len = len; + for (i = 0; i < s->read_msgfds_num; i++) { + int fd = s->read_msgfds[i]; + if (fd < 0) { + continue; + } - msg.msg_iov = iov; - msg.msg_iovlen = 1; - msg.msg_control = &msg_control; - msg.msg_controllen = sizeof(msg_control); + /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */ + qemu_set_block(fd); -#ifdef MSG_CMSG_CLOEXEC - flags |= MSG_CMSG_CLOEXEC; +#ifndef MSG_CMSG_CLOEXEC + qemu_set_cloexec(fd); #endif - do { - ret = recvmsg(s->fd, &msg, flags); - } while (ret == -1 && errno == EINTR); - - if (ret > 0 && s->is_unix) { - unix_process_msgfd(chr, &msg); } return ret; } -#else -static ssize_t tcp_chr_recv(CharDriverState *chr, char *buf, size_t len) -{ - TCPCharDriver *s = chr->opaque; - ssize_t ret; - - do { - ret = qemu_recv(s->fd, buf, len, 0); - } while (ret == -1 && socket_error() == EINTR); - - return ret; -} -#endif static GSource *tcp_chr_add_watch(CharDriverState *chr, GIOCondition cond) { TCPCharDriver *s = chr->opaque; - return g_io_create_watch(s->chan, cond); + return qio_channel_create_watch(s->ioc, cond); } static void tcp_chr_disconnect(CharDriverState *chr) @@ -2904,24 +2775,25 @@ static void tcp_chr_disconnect(CharDriverState *chr) TCPCharDriver *s = chr->opaque; s->connected = 0; - if (s->listen_chan) { - s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN, - tcp_chr_accept, chr); + if (s->listen_ioc) { + s->listen_tag = qio_channel_add_watch( + QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL); } remove_fd_in_watch(chr); - g_io_channel_unref(s->chan); - s->chan = NULL; - closesocket(s->fd); - s->fd = -1; - SocketAddress_to_str(chr->filename, CHR_MAX_FILENAME_SIZE, - "disconnected:", s->addr, s->is_listen, s->is_telnet); + object_unref(OBJECT(s->sioc)); + s->sioc = NULL; + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; + g_free(chr->filename); + chr->filename = SocketAddress_to_str("disconnected:", s->addr, + s->is_listen, s->is_telnet); qemu_chr_be_event(chr, CHR_EVENT_CLOSED); if (s->reconnect_time) { qemu_chr_socket_restart_timer(chr); } } -static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) +static gboolean tcp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque) { CharDriverState *chr = opaque; TCPCharDriver *s = chr->opaque; @@ -2935,9 +2807,7 @@ static gboolean tcp_chr_read(GIOChannel *chan, GIOCondition cond, void *opaque) if (len > s->max_size) len = s->max_size; size = tcp_chr_recv(chr, (void *)buf, len); - if (size == 0 || - (size < 0 && - socket_error() != EAGAIN && socket_error() != EWOULDBLOCK)) { + if (size == 0 || size == -1) { /* connection closed */ tcp_chr_disconnect(chr); } else if (size > 0) { @@ -2985,25 +2855,17 @@ static void tcp_chr_connect(void *opaque) { CharDriverState *chr = opaque; TCPCharDriver *s = chr->opaque; - struct sockaddr_storage ss, ps; - socklen_t ss_len = sizeof(ss), ps_len = sizeof(ps); - - memset(&ss, 0, ss_len); - if (getsockname(s->fd, (struct sockaddr *) &ss, &ss_len) != 0) { - snprintf(chr->filename, CHR_MAX_FILENAME_SIZE, - "Error in getsockname: %s\n", strerror(errno)); - } else if (getpeername(s->fd, (struct sockaddr *) &ps, &ps_len) != 0) { - snprintf(chr->filename, CHR_MAX_FILENAME_SIZE, - "Error in getpeername: %s\n", strerror(errno)); - } else { - sockaddr_to_str(chr->filename, CHR_MAX_FILENAME_SIZE, - &ss, ss_len, &ps, ps_len, - s->is_listen, s->is_telnet); - } + + g_free(chr->filename); + chr->filename = sockaddr_to_str( + &s->sioc->localAddr, s->sioc->localAddrLen, + &s->sioc->remoteAddr, s->sioc->remoteAddrLen, + s->is_listen, s->is_telnet); s->connected = 1; - if (s->chan) { - chr->fd_in_tag = io_add_watch_poll(s->chan, tcp_chr_read_poll, + if (s->ioc) { + chr->fd_in_tag = io_add_watch_poll(s->ioc, + tcp_chr_read_poll, tcp_chr_read, chr); } qemu_chr_be_generic_open(chr); @@ -3014,82 +2876,195 @@ static void tcp_chr_update_read_handler(CharDriverState *chr) TCPCharDriver *s = chr->opaque; remove_fd_in_watch(chr); - if (s->chan) { - chr->fd_in_tag = io_add_watch_poll(s->chan, tcp_chr_read_poll, + if (s->ioc) { + chr->fd_in_tag = io_add_watch_poll(s->ioc, + tcp_chr_read_poll, tcp_chr_read, chr); } } -#define IACSET(x,a,b,c) x[0] = a; x[1] = b; x[2] = c; -static void tcp_chr_telnet_init(int fd) +typedef struct { + CharDriverState *chr; + char buf[12]; + size_t buflen; +} TCPCharDriverTelnetInit; + +static gboolean tcp_chr_telnet_init_io(QIOChannel *ioc, + GIOCondition cond G_GNUC_UNUSED, + gpointer user_data) { - char buf[3]; - /* Send the telnet negotion to put telnet in binary, no echo, single char mode */ - IACSET(buf, 0xff, 0xfb, 0x01); /* IAC WILL ECHO */ - send(fd, (char *)buf, 3, 0); - IACSET(buf, 0xff, 0xfb, 0x03); /* IAC WILL Suppress go ahead */ - send(fd, (char *)buf, 3, 0); - IACSET(buf, 0xff, 0xfb, 0x00); /* IAC WILL Binary */ - send(fd, (char *)buf, 3, 0); - IACSET(buf, 0xff, 0xfd, 0x00); /* IAC DO Binary */ - send(fd, (char *)buf, 3, 0); + TCPCharDriverTelnetInit *init = user_data; + ssize_t ret; + + ret = qio_channel_write(ioc, init->buf, init->buflen, NULL); + if (ret < 0) { + if (ret == QIO_CHANNEL_ERR_BLOCK) { + ret = 0; + } else { + tcp_chr_disconnect(init->chr); + return FALSE; + } + } + init->buflen -= ret; + + if (init->buflen == 0) { + tcp_chr_connect(init->chr); + return FALSE; + } + + memmove(init->buf, init->buf + ret, init->buflen); + + return TRUE; } -static int tcp_chr_add_client(CharDriverState *chr, int fd) +static void tcp_chr_telnet_init(CharDriverState *chr) +{ + TCPCharDriver *s = chr->opaque; + TCPCharDriverTelnetInit *init = + g_new0(TCPCharDriverTelnetInit, 1); + size_t n = 0; + + init->chr = chr; + init->buflen = 12; + +#define IACSET(x, a, b, c) \ + do { \ + x[n++] = a; \ + x[n++] = b; \ + x[n++] = c; \ + } while (0) + + /* Prep the telnet negotion to put telnet in binary, + * no echo, single char mode */ + IACSET(init->buf, 0xff, 0xfb, 0x01); /* IAC WILL ECHO */ + IACSET(init->buf, 0xff, 0xfb, 0x03); /* IAC WILL Suppress go ahead */ + IACSET(init->buf, 0xff, 0xfb, 0x00); /* IAC WILL Binary */ + IACSET(init->buf, 0xff, 0xfd, 0x00); /* IAC DO Binary */ + +#undef IACSET + + qio_channel_add_watch( + s->ioc, G_IO_OUT, + tcp_chr_telnet_init_io, + init, NULL); +} + + +static void tcp_chr_tls_handshake(Object *source, + Error *err, + gpointer user_data) { + CharDriverState *chr = user_data; TCPCharDriver *s = chr->opaque; - if (s->fd != -1) + + if (err) { + tcp_chr_disconnect(chr); + } else { + if (s->do_telnetopt) { + tcp_chr_telnet_init(chr); + } else { + tcp_chr_connect(chr); + } + } +} + + +static void tcp_chr_tls_init(CharDriverState *chr) +{ + TCPCharDriver *s = chr->opaque; + QIOChannelTLS *tioc; + Error *err = NULL; + + if (s->is_listen) { + tioc = qio_channel_tls_new_server( + s->ioc, s->tls_creds, + NULL, /* XXX Use an ACL */ + &err); + } else { + tioc = qio_channel_tls_new_client( + s->ioc, s->tls_creds, + s->addr->u.inet->host, + &err); + } + if (tioc == NULL) { + error_free(err); + tcp_chr_disconnect(chr); + } + object_unref(OBJECT(s->ioc)); + s->ioc = QIO_CHANNEL(tioc); + + qio_channel_tls_handshake(tioc, + tcp_chr_tls_handshake, + chr, + NULL); +} + + +static int tcp_chr_new_client(CharDriverState *chr, QIOChannelSocket *sioc) +{ + TCPCharDriver *s = chr->opaque; + if (s->ioc != NULL) { return -1; + } - qemu_set_nonblock(fd); - if (s->do_nodelay) - socket_set_nodelay(fd); - s->fd = fd; - s->chan = io_channel_from_socket(fd); + s->ioc = QIO_CHANNEL(sioc); + object_ref(OBJECT(sioc)); + s->sioc = sioc; + object_ref(OBJECT(sioc)); + + if (s->do_nodelay) { + qio_channel_set_delay(s->ioc, false); + } if (s->listen_tag) { g_source_remove(s->listen_tag); s->listen_tag = 0; } - tcp_chr_connect(chr); + + if (s->tls_creds) { + tcp_chr_tls_init(chr); + } else { + if (s->do_telnetopt) { + tcp_chr_telnet_init(chr); + } else { + tcp_chr_connect(chr); + } + } return 0; } -static gboolean tcp_chr_accept(GIOChannel *channel, GIOCondition cond, void *opaque) + +static int tcp_chr_add_client(CharDriverState *chr, int fd) +{ + int ret; + QIOChannelSocket *sioc; + + sioc = qio_channel_socket_new_fd(fd, NULL); + if (!sioc) { + return -1; + } + qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL); + ret = tcp_chr_new_client(chr, sioc); + object_unref(OBJECT(sioc)); + return ret; +} + +static gboolean tcp_chr_accept(QIOChannel *channel, + GIOCondition cond, + void *opaque) { CharDriverState *chr = opaque; - TCPCharDriver *s = chr->opaque; - struct sockaddr_in saddr; -#ifndef _WIN32 - struct sockaddr_un uaddr; -#endif - struct sockaddr *addr; - socklen_t len; - int fd; + QIOChannelSocket *sioc; - for(;;) { -#ifndef _WIN32 - if (s->is_unix) { - len = sizeof(uaddr); - addr = (struct sockaddr *)&uaddr; - } else -#endif - { - len = sizeof(saddr); - addr = (struct sockaddr *)&saddr; - } - fd = qemu_accept(s->listen_fd, addr, &len); - if (fd < 0 && errno != EINTR) { - s->listen_tag = 0; - return FALSE; - } else if (fd >= 0) { - if (s->do_telnetopt) - tcp_chr_telnet_init(fd); - break; - } + sioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(channel), + NULL); + if (!sioc) { + return TRUE; } - if (tcp_chr_add_client(chr, fd) < 0) - close(fd); + + tcp_chr_new_client(chr, sioc); + + object_unref(OBJECT(sioc)); return TRUE; } @@ -3104,22 +3079,16 @@ static void tcp_chr_close(CharDriverState *chr) s->reconnect_timer = 0; } qapi_free_SocketAddress(s->addr); - if (s->fd >= 0) { - remove_fd_in_watch(chr); - if (s->chan) { - g_io_channel_unref(s->chan); - } - closesocket(s->fd); + remove_fd_in_watch(chr); + if (s->ioc) { + object_unref(OBJECT(s->ioc)); } - if (s->listen_fd >= 0) { - if (s->listen_tag) { - g_source_remove(s->listen_tag); - s->listen_tag = 0; - } - if (s->listen_chan) { - g_io_channel_unref(s->listen_chan); - } - closesocket(s->listen_fd); + if (s->listen_tag) { + g_source_remove(s->listen_tag); + s->listen_tag = 0; + } + if (s->listen_ioc) { + object_unref(OBJECT(s->listen_ioc)); } if (s->read_msgfds_num) { for (i = 0; i < s->read_msgfds_num; i++) { @@ -3127,6 +3096,9 @@ static void tcp_chr_close(CharDriverState *chr) } g_free(s->read_msgfds); } + if (s->tls_creds) { + object_unref(OBJECT(s->tls_creds)); + } if (s->write_msgfds_num) { g_free(s->write_msgfds); } @@ -3134,57 +3106,63 @@ static void tcp_chr_close(CharDriverState *chr) qemu_chr_be_event(chr, CHR_EVENT_CLOSED); } -static void qemu_chr_finish_socket_connection(CharDriverState *chr, int fd) +static void qemu_chr_finish_socket_connection(CharDriverState *chr, + QIOChannelSocket *sioc) { TCPCharDriver *s = chr->opaque; if (s->is_listen) { - s->listen_fd = fd; - s->listen_chan = io_channel_from_socket(s->listen_fd); - s->listen_tag = g_io_add_watch(s->listen_chan, G_IO_IN, - tcp_chr_accept, chr); + s->listen_ioc = sioc; + s->listen_tag = qio_channel_add_watch( + QIO_CHANNEL(s->listen_ioc), G_IO_IN, tcp_chr_accept, chr, NULL); } else { - s->connected = 1; - s->fd = fd; - socket_set_nodelay(fd); - s->chan = io_channel_from_socket(s->fd); - tcp_chr_connect(chr); + tcp_chr_new_client(chr, sioc); + object_unref(OBJECT(sioc)); } } -static void qemu_chr_socket_connected(int fd, Error *err, void *opaque) +static void qemu_chr_socket_connected(Object *src, Error *err, void *opaque) { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(src); CharDriverState *chr = opaque; TCPCharDriver *s = chr->opaque; - if (fd < 0) { + if (err) { check_report_connect_error(chr, err); + object_unref(src); return; } s->connect_err_reported = false; - qemu_chr_finish_socket_connection(chr, fd); + qemu_chr_finish_socket_connection(chr, sioc); } static bool qemu_chr_open_socket_fd(CharDriverState *chr, Error **errp) { TCPCharDriver *s = chr->opaque; - int fd; + QIOChannelSocket *sioc = qio_channel_socket_new(); if (s->is_listen) { - fd = socket_listen(s->addr, errp); + if (qio_channel_socket_listen_sync(sioc, s->addr, errp) < 0) { + goto fail; + } + qemu_chr_finish_socket_connection(chr, sioc); } else if (s->reconnect_time) { - fd = socket_connect(s->addr, errp, qemu_chr_socket_connected, chr); - return fd >= 0; + qio_channel_socket_connect_async(sioc, s->addr, + qemu_chr_socket_connected, + chr, NULL); } else { - fd = socket_connect(s->addr, errp, NULL, NULL); - } - if (fd < 0) { - return false; + if (qio_channel_socket_connect_sync(sioc, s->addr, errp) < 0) { + goto fail; + } + qemu_chr_finish_socket_connection(chr, sioc); } - qemu_chr_finish_socket_connection(chr, fd); return true; + + fail: + object_unref(OBJECT(sioc)); + return false; } /*********************************************************/ @@ -3651,6 +3629,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend, const char *path = qemu_opt_get(opts, "path"); const char *host = qemu_opt_get(opts, "host"); const char *port = qemu_opt_get(opts, "port"); + const char *tls_creds = qemu_opt_get(opts, "tls-creds"); SocketAddress *addr; if (!path) { @@ -3662,6 +3641,11 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend, error_setg(errp, "chardev: socket: no port given"); return; } + } else { + if (tls_creds) { + error_setg(errp, "TLS can only be used over TCP socket"); + return; + } } backend->u.socket = g_new0(ChardevSocket, 1); @@ -3677,6 +3661,7 @@ static void qemu_chr_parse_socket(QemuOpts *opts, ChardevBackend *backend, backend->u.socket->wait = is_waitconnect; backend->u.socket->has_reconnect = true; backend->u.socket->reconnect = reconnect; + backend->u.socket->tls_creds = g_strdup(tls_creds); addr = g_new0(SocketAddress, 1); if (path) { @@ -4104,6 +4089,9 @@ QemuOptsList qemu_chardev_opts = { .name = "telnet", .type = QEMU_OPT_BOOL, },{ + .name = "tls-creds", + .type = QEMU_OPT_STRING, + },{ .name = "width", .type = QEMU_OPT_NUMBER, },{ @@ -4315,12 +4303,43 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, } s = g_new0(TCPCharDriver, 1); - s->fd = -1; - s->listen_fd = -1; s->is_unix = addr->type == SOCKET_ADDRESS_KIND_UNIX; s->is_listen = is_listen; s->is_telnet = is_telnet; s->do_nodelay = do_nodelay; + if (sock->tls_creds) { + Object *creds; + creds = object_resolve_path_component( + object_get_objects_root(), sock->tls_creds); + if (!creds) { + error_setg(errp, "No TLS credentials with id '%s'", + sock->tls_creds); + goto error; + } + s->tls_creds = (QCryptoTLSCreds *) + object_dynamic_cast(creds, + TYPE_QCRYPTO_TLS_CREDS); + if (!s->tls_creds) { + error_setg(errp, "Object with id '%s' is not TLS credentials", + sock->tls_creds); + goto error; + } + object_ref(OBJECT(s->tls_creds)); + if (is_listen) { + if (s->tls_creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) { + error_setg(errp, "%s", + "Expected TLS credentials for server endpoint"); + goto error; + } + } else { + if (s->tls_creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) { + error_setg(errp, "%s", + "Expected TLS credentials for client endpoint"); + goto error; + } + } + } + qapi_copy_SocketAddress(&s->addr, sock->addr); chr->opaque = s; @@ -4335,9 +4354,8 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, /* be isn't opened until we get a connection */ chr->explicit_be_open = true; - chr->filename = g_malloc(CHR_MAX_FILENAME_SIZE); - SocketAddress_to_str(chr->filename, CHR_MAX_FILENAME_SIZE, "disconnected:", - addr, is_listen, is_telnet); + chr->filename = SocketAddress_to_str("disconnected:", + addr, is_listen, is_telnet); if (is_listen) { if (is_telnet) { @@ -4350,19 +4368,25 @@ static CharDriverState *qmp_chardev_open_socket(const char *id, if (s->reconnect_time) { socket_try_connect(chr); } else if (!qemu_chr_open_socket_fd(chr, errp)) { - g_free(s); - qemu_chr_free_common(chr); - return NULL; + goto error; } if (is_listen && is_waitconnect) { fprintf(stderr, "QEMU waiting for connection on: %s\n", chr->filename); - tcp_chr_accept(s->listen_chan, G_IO_IN, chr); - qemu_set_nonblock(s->listen_fd); + tcp_chr_accept(QIO_CHANNEL(s->listen_ioc), G_IO_IN, chr); + qio_channel_set_blocking(QIO_CHANNEL(s->listen_ioc), false, NULL); } return chr; + + error: + if (s->tls_creds) { + object_unref(OBJECT(s->tls_creds)); + } + g_free(s); + qemu_chr_free_common(chr); + return NULL; } static CharDriverState *qmp_chardev_open_udp(const char *id, @@ -4372,13 +4396,15 @@ static CharDriverState *qmp_chardev_open_udp(const char *id, { ChardevUdp *udp = backend->u.udp; ChardevCommon *common = qapi_ChardevUdp_base(backend->u.udp); - int fd; + QIOChannelSocket *sioc = qio_channel_socket_new(); - fd = socket_dgram(udp->remote, udp->local, errp); - if (fd < 0) { + if (qio_channel_socket_dgram_sync(sioc, + udp->remote, udp->local, + errp) < 0) { + object_unref(OBJECT(sioc)); return NULL; } - return qemu_chr_open_udp_fd(fd, common, errp); + return qemu_chr_open_udp(sioc, common, errp); } ChardevReturn *qmp_chardev_add(const char *id, ChardevBackend *backend, diff --git a/qemu-doc.texi b/qemu-doc.texi index 7bc388231f..ca4d9de15e 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -259,7 +259,7 @@ Linux should boot and give you a prompt. @example @c man begin SYNOPSIS -usage: qemu-system-i386 [options] [@var{disk_image}] +@command{qemu-system-i386} [@var{options}] [@var{disk_image}] @c man end @end example @@ -1406,7 +1406,7 @@ no type is given, the HCI logic corresponds to @code{-bt hci,vlan=0}. This USB device implements the USB Transport Layer of HCI. Example usage: @example -qemu-system-i386 [...OPTIONS...] -usbdevice bt:hci,vlan=3 -bt device:keyboard,vlan=3 +@command{qemu-system-i386} [...@var{OPTIONS}...] @option{-usbdevice} bt:hci,vlan=3 @option{-bt} device:keyboard,vlan=3 @end example @end table @@ -2755,7 +2755,7 @@ qemu-i386 /usr/local/qemu-i386/wine/bin/wine \ @subsection Command line options @example -usage: qemu-i386 [-h] [-d] [-L path] [-s size] [-cpu model] [-g port] [-B offset] [-R size] program [arguments...] +@command{qemu-i386} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-cpu} @var{model}] [@option{-g} @var{port}] [@option{-B} @var{offset}] [@option{-R} @var{size}] @var{program} [@var{arguments}...] @end example @table @option @@ -2897,7 +2897,7 @@ qemu-sparc64 /bin/ls @subsection Command line options @example -usage: qemu-sparc64 [-h] [-d] [-L path] [-s size] [-bsd type] program [arguments...] +@command{qemu-sparc64} [@option{-h]} [@option{-d]} [@option{-L} @var{path}] [@option{-s} @var{size}] [@option{-bsd} @var{type}] @var{program} [@var{arguments}...] @end example @table @option diff --git a/qemu-ga.texi b/qemu-ga.texi index 536a9b5241..0e53bf6b2c 100644 --- a/qemu-ga.texi +++ b/qemu-ga.texi @@ -1,6 +1,6 @@ @example @c man begin SYNOPSIS -usage: qemu-ga [OPTIONS] +@command{qemu-ga} [@var{OPTIONS}] @c man end @end example diff --git a/qemu-img.texi b/qemu-img.texi index 55c6be391d..7163a108e2 100644 --- a/qemu-img.texi +++ b/qemu-img.texi @@ -1,6 +1,6 @@ @example @c man begin SYNOPSIS -usage: qemu-img command [command options] +@command{qemu-img} @var{command} [@var{command} @var{options}] @c man end @end example diff --git a/qemu-options.hx b/qemu-options.hx index b4763ba226..f31a240bed 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2092,7 +2092,7 @@ DEF("chardev", HAS_ARG, QEMU_OPTION_chardev, "-chardev null,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n" "-chardev socket,id=id[,host=host],port=port[,to=to][,ipv4][,ipv6][,nodelay][,reconnect=seconds]\n" " [,server][,nowait][,telnet][,reconnect=seconds][,mux=on|off]\n" - " [,logfile=PATH][,logappend=on|off] (tcp)\n" + " [,logfile=PATH][,logappend=on|off][,tls-creds=ID] (tcp)\n" "-chardev socket,id=id,path=path[,server][,nowait][,telnet][,reconnect=seconds]\n" " [,mux=on|off][,logfile=PATH][,logappend=on|off] (unix)\n" "-chardev udp,id=id[,host=host],port=port[,localaddr=localaddr]\n" @@ -2172,7 +2172,7 @@ Further options to each backend are described below. A void device. This device will not emit any data, and will drop any data it receives. The null backend does not take any options. -@item -chardev socket ,id=@var{id} [@var{TCP options} or @var{unix options}] [,server] [,nowait] [,telnet] [,reconnect=@var{seconds}] +@item -chardev socket ,id=@var{id} [@var{TCP options} or @var{unix options}] [,server] [,nowait] [,telnet] [,reconnect=@var{seconds}] [,tls-creds=@var{id}] Create a two-way stream socket, which can be either a TCP or a unix socket. A unix socket will be created if @option{path} is specified. Behaviour is @@ -2190,6 +2190,11 @@ escape sequences. the remote end goes away. qemu will delay this many seconds and then attempt to reconnect. Zero disables reconnecting, and is the default. +@option{tls-creds} requests enablement of the TLS protocol for encryption, +and specifies the id of the TLS credentials to use for the handshake. The +credentials must be previously created with the @option{-object tls-creds} +argument. + TCP and unix socket options are given below: @table @option diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py index 08796fff8c..f274bf80fa 100644 --- a/scripts/dump-guest-memory.py +++ b/scripts/dump-guest-memory.py @@ -1,39 +1,456 @@ -# This python script adds a new gdb command, "dump-guest-memory". It -# should be loaded with "source dump-guest-memory.py" at the (gdb) -# prompt. -# -# Copyright (C) 2013, Red Hat, Inc. -# -# Authors: -# Laszlo Ersek <lersek@redhat.com> -# -# This work is licensed under the terms of the GNU GPL, version 2 or later. See -# the COPYING file in the top-level directory. -# +""" +This python script adds a new gdb command, "dump-guest-memory". It +should be loaded with "source dump-guest-memory.py" at the (gdb) +prompt. + +Copyright (C) 2013, Red Hat, Inc. + +Authors: + Laszlo Ersek <lersek@redhat.com> + Janosch Frank <frankja@linux.vnet.ibm.com> + +This work is licensed under the terms of the GNU GPL, version 2 or later. See +the COPYING file in the top-level directory. +""" + +import ctypes + +UINTPTR_T = gdb.lookup_type("uintptr_t") + +TARGET_PAGE_SIZE = 0x1000 +TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 + +# Special value for e_phnum. This indicates that the real number of +# program headers is too large to fit into e_phnum. Instead the real +# value is in the field sh_info of section 0. +PN_XNUM = 0xFFFF + +EV_CURRENT = 1 + +ELFCLASS32 = 1 +ELFCLASS64 = 2 + +ELFDATA2LSB = 1 +ELFDATA2MSB = 2 + +ET_CORE = 4 + +PT_LOAD = 1 +PT_NOTE = 4 + +EM_386 = 3 +EM_PPC = 20 +EM_PPC64 = 21 +EM_S390 = 22 +EM_AARCH = 183 +EM_X86_64 = 62 + +class ELF(object): + """Representation of a ELF file.""" + + def __init__(self, arch): + self.ehdr = None + self.notes = [] + self.segments = [] + self.notes_size = 0 + self.endianess = None + self.elfclass = ELFCLASS64 + + if arch == 'aarch64-le': + self.endianess = ELFDATA2LSB + self.elfclass = ELFCLASS64 + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_AARCH + + elif arch == 'aarch64-be': + self.endianess = ELFDATA2MSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_AARCH + + elif arch == 'X86_64': + self.endianess = ELFDATA2LSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_X86_64 + + elif arch == '386': + self.endianess = ELFDATA2LSB + self.elfclass = ELFCLASS32 + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_386 + + elif arch == 's390': + self.endianess = ELFDATA2MSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_S390 + + elif arch == 'ppc64-le': + self.endianess = ELFDATA2LSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_PPC64 + + elif arch == 'ppc64-be': + self.endianess = ELFDATA2MSB + self.ehdr = get_arch_ehdr(self.endianess, self.elfclass) + self.ehdr.e_machine = EM_PPC64 + + else: + raise gdb.GdbError("No valid arch type specified.\n" + "Currently supported types:\n" + "aarch64-be, aarch64-le, X86_64, 386, s390, " + "ppc64-be, ppc64-le") + + self.add_segment(PT_NOTE, 0, 0) + + def add_note(self, n_name, n_desc, n_type): + """Adds a note to the ELF.""" + + note = get_arch_note(self.endianess, len(n_name), len(n_desc)) + note.n_namesz = len(n_name) + 1 + note.n_descsz = len(n_desc) + note.n_name = n_name.encode() + note.n_type = n_type + + # Desc needs to be 4 byte aligned (although the 64bit spec + # specifies 8 byte). When defining n_desc as uint32 it will be + # automatically aligned but we need the memmove to copy the + # string into it. + ctypes.memmove(note.n_desc, n_desc.encode(), len(n_desc)) + + self.notes.append(note) + self.segments[0].p_filesz += ctypes.sizeof(note) + self.segments[0].p_memsz += ctypes.sizeof(note) + + def add_segment(self, p_type, p_paddr, p_size): + """Adds a segment to the elf.""" + + phdr = get_arch_phdr(self.endianess, self.elfclass) + phdr.p_type = p_type + phdr.p_paddr = p_paddr + phdr.p_filesz = p_size + phdr.p_memsz = p_size + self.segments.append(phdr) + self.ehdr.e_phnum += 1 + + def to_file(self, elf_file): + """Writes all ELF structures to the the passed file. + + Structure: + Ehdr + Segment 0:PT_NOTE + Segment 1:PT_LOAD + Segment N:PT_LOAD + Note 0..N + Dump contents + """ + elf_file.write(self.ehdr) + off = ctypes.sizeof(self.ehdr) + \ + len(self.segments) * ctypes.sizeof(self.segments[0]) + + for phdr in self.segments: + phdr.p_offset = off + elf_file.write(phdr) + off += phdr.p_filesz + + for note in self.notes: + elf_file.write(note) + + +def get_arch_note(endianess, len_name, len_desc): + """Returns a Note class with the specified endianess.""" + + if endianess == ELFDATA2LSB: + superclass = ctypes.LittleEndianStructure + else: + superclass = ctypes.BigEndianStructure + + len_name = len_name + 1 + + class Note(superclass): + """Represents an ELF note, includes the content.""" + + _fields_ = [("n_namesz", ctypes.c_uint32), + ("n_descsz", ctypes.c_uint32), + ("n_type", ctypes.c_uint32), + ("n_name", ctypes.c_char * len_name), + ("n_desc", ctypes.c_uint32 * ((len_desc + 3) // 4))] + return Note() + + +class Ident(ctypes.Structure): + """Represents the ELF ident array in the ehdr structure.""" + + _fields_ = [('ei_mag0', ctypes.c_ubyte), + ('ei_mag1', ctypes.c_ubyte), + ('ei_mag2', ctypes.c_ubyte), + ('ei_mag3', ctypes.c_ubyte), + ('ei_class', ctypes.c_ubyte), + ('ei_data', ctypes.c_ubyte), + ('ei_version', ctypes.c_ubyte), + ('ei_osabi', ctypes.c_ubyte), + ('ei_abiversion', ctypes.c_ubyte), + ('ei_pad', ctypes.c_ubyte * 7)] + + def __init__(self, endianess, elfclass): + self.ei_mag0 = 0x7F + self.ei_mag1 = ord('E') + self.ei_mag2 = ord('L') + self.ei_mag3 = ord('F') + self.ei_class = elfclass + self.ei_data = endianess + self.ei_version = EV_CURRENT + + +def get_arch_ehdr(endianess, elfclass): + """Returns a EHDR64 class with the specified endianess.""" + + if endianess == ELFDATA2LSB: + superclass = ctypes.LittleEndianStructure + else: + superclass = ctypes.BigEndianStructure + + class EHDR64(superclass): + """Represents the 64 bit ELF header struct.""" + + _fields_ = [('e_ident', Ident), + ('e_type', ctypes.c_uint16), + ('e_machine', ctypes.c_uint16), + ('e_version', ctypes.c_uint32), + ('e_entry', ctypes.c_uint64), + ('e_phoff', ctypes.c_uint64), + ('e_shoff', ctypes.c_uint64), + ('e_flags', ctypes.c_uint32), + ('e_ehsize', ctypes.c_uint16), + ('e_phentsize', ctypes.c_uint16), + ('e_phnum', ctypes.c_uint16), + ('e_shentsize', ctypes.c_uint16), + ('e_shnum', ctypes.c_uint16), + ('e_shstrndx', ctypes.c_uint16)] + + def __init__(self): + super(superclass, self).__init__() + self.e_ident = Ident(endianess, elfclass) + self.e_type = ET_CORE + self.e_version = EV_CURRENT + self.e_ehsize = ctypes.sizeof(self) + self.e_phoff = ctypes.sizeof(self) + self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass)) + self.e_phnum = 0 + + + class EHDR32(superclass): + """Represents the 32 bit ELF header struct.""" + + _fields_ = [('e_ident', Ident), + ('e_type', ctypes.c_uint16), + ('e_machine', ctypes.c_uint16), + ('e_version', ctypes.c_uint32), + ('e_entry', ctypes.c_uint32), + ('e_phoff', ctypes.c_uint32), + ('e_shoff', ctypes.c_uint32), + ('e_flags', ctypes.c_uint32), + ('e_ehsize', ctypes.c_uint16), + ('e_phentsize', ctypes.c_uint16), + ('e_phnum', ctypes.c_uint16), + ('e_shentsize', ctypes.c_uint16), + ('e_shnum', ctypes.c_uint16), + ('e_shstrndx', ctypes.c_uint16)] + + def __init__(self): + super(superclass, self).__init__() + self.e_ident = Ident(endianess, elfclass) + self.e_type = ET_CORE + self.e_version = EV_CURRENT + self.e_ehsize = ctypes.sizeof(self) + self.e_phoff = ctypes.sizeof(self) + self.e_phentsize = ctypes.sizeof(get_arch_phdr(endianess, elfclass)) + self.e_phnum = 0 + + # End get_arch_ehdr + if elfclass == ELFCLASS64: + return EHDR64() + else: + return EHDR32() + + +def get_arch_phdr(endianess, elfclass): + """Returns a 32 or 64 bit PHDR class with the specified endianess.""" + + if endianess == ELFDATA2LSB: + superclass = ctypes.LittleEndianStructure + else: + superclass = ctypes.BigEndianStructure + + class PHDR64(superclass): + """Represents the 64 bit ELF program header struct.""" + + _fields_ = [('p_type', ctypes.c_uint32), + ('p_flags', ctypes.c_uint32), + ('p_offset', ctypes.c_uint64), + ('p_vaddr', ctypes.c_uint64), + ('p_paddr', ctypes.c_uint64), + ('p_filesz', ctypes.c_uint64), + ('p_memsz', ctypes.c_uint64), + ('p_align', ctypes.c_uint64)] + + class PHDR32(superclass): + """Represents the 32 bit ELF program header struct.""" + + _fields_ = [('p_type', ctypes.c_uint32), + ('p_offset', ctypes.c_uint32), + ('p_vaddr', ctypes.c_uint32), + ('p_paddr', ctypes.c_uint32), + ('p_filesz', ctypes.c_uint32), + ('p_memsz', ctypes.c_uint32), + ('p_flags', ctypes.c_uint32), + ('p_align', ctypes.c_uint32)] + + # End get_arch_phdr + if elfclass == ELFCLASS64: + return PHDR64() + else: + return PHDR32() + + +def int128_get64(val): + """Returns low 64bit part of Int128 struct.""" + + assert val["hi"] == 0 + return val["lo"] + + +def qlist_foreach(head, field_str): + """Generator for qlists.""" + + var_p = head["lh_first"] + while var_p != 0: + var = var_p.dereference() + var_p = var[field_str]["le_next"] + yield var + + +def qemu_get_ram_block(ram_addr): + """Returns the RAMBlock struct to which the given address belongs.""" + + ram_blocks = gdb.parse_and_eval("ram_list.blocks") + + for block in qlist_foreach(ram_blocks, "next"): + if (ram_addr - block["offset"]) < block["used_length"]: + return block + + raise gdb.GdbError("Bad ram offset %x" % ram_addr) + + +def qemu_get_ram_ptr(ram_addr): + """Returns qemu vaddr for given guest physical address.""" + + block = qemu_get_ram_block(ram_addr) + return block["host"] + (ram_addr - block["offset"]) + + +def memory_region_get_ram_ptr(memory_region): + if memory_region["alias"] != 0: + return (memory_region_get_ram_ptr(memory_region["alias"].dereference()) + + memory_region["alias_offset"]) + + return qemu_get_ram_ptr(memory_region["ram_addr"] & TARGET_PAGE_MASK) + + +def get_guest_phys_blocks(): + """Returns a list of ram blocks. + + Each block entry contains: + 'target_start': guest block phys start address + 'target_end': guest block phys end address + 'host_addr': qemu vaddr of the block's start + """ + + guest_phys_blocks = [] + + print("guest RAM blocks:") + print("target_start target_end host_addr message " + "count") + print("---------------- ---------------- ---------------- ------- " + "-----") + + current_map_p = gdb.parse_and_eval("address_space_memory.current_map") + current_map = current_map_p.dereference() + + # Conversion to int is needed for python 3 + # compatibility. Otherwise range doesn't cast the value itself and + # breaks. + for cur in range(int(current_map["nr"])): + flat_range = (current_map["ranges"] + cur).dereference() + memory_region = flat_range["mr"].dereference() + + # we only care about RAM + if not memory_region["ram"]: + continue + + section_size = int128_get64(flat_range["addr"]["size"]) + target_start = int128_get64(flat_range["addr"]["start"]) + target_end = target_start + section_size + host_addr = (memory_region_get_ram_ptr(memory_region) + + flat_range["offset_in_region"]) + predecessor = None + + # find continuity in guest physical address space + if len(guest_phys_blocks) > 0: + predecessor = guest_phys_blocks[-1] + predecessor_size = (predecessor["target_end"] - + predecessor["target_start"]) + + # the memory API guarantees monotonically increasing + # traversal + assert predecessor["target_end"] <= target_start + + # we want continuity in both guest-physical and + # host-virtual memory + if (predecessor["target_end"] < target_start or + predecessor["host_addr"] + predecessor_size != host_addr): + predecessor = None + + if predecessor is None: + # isolated mapping, add it to the list + guest_phys_blocks.append({"target_start": target_start, + "target_end": target_end, + "host_addr": host_addr}) + message = "added" + else: + # expand predecessor until @target_end; predecessor's + # start doesn't change + predecessor["target_end"] = target_end + message = "joined" + + print("%016x %016x %016x %-7s %5u" % + (target_start, target_end, host_addr.cast(UINTPTR_T), + message, len(guest_phys_blocks))) + + return guest_phys_blocks + + # The leading docstring doesn't have idiomatic Python formatting. It is # printed by gdb's "help" command (the first line is printed in the # "help data" summary), and it should match how other help texts look in # gdb. - -import struct - class DumpGuestMemory(gdb.Command): """Extract guest vmcore from qemu process coredump. -The sole argument is FILE, identifying the target file to write the -guest vmcore to. +The two required arguments are FILE and ARCH: +FILE identifies the target file to write the guest vmcore to. +ARCH specifies the architecture for which the core will be generated. This GDB command reimplements the dump-guest-memory QMP command in python, using the representation of guest memory as captured in the qemu coredump. The qemu process that has been dumped must have had the -command line option "-machine dump-guest-core=on". +command line option "-machine dump-guest-core=on" which is the default. For simplicity, the "paging", "begin" and "end" parameters of the QMP command are not supported -- no attempt is made to get the guest's internal paging structures (ie. paging=false is hard-wired), and guest memory is always fully dumped. -Only x86_64 guests are supported. +Currently aarch64-be, aarch64-le, X86_64, 386, s390, ppc64-be, +ppc64-le guests are supported. The CORE/NT_PRSTATUS and QEMU notes (that is, the VCPUs' statuses) are not written to the vmcore. Preparing these would require context that is @@ -47,293 +464,66 @@ deliberately called abort(), or it was dumped in response to a signal at a halfway fortunate point, then its coredump should be in reasonable shape and this command should mostly work.""" - TARGET_PAGE_SIZE = 0x1000 - TARGET_PAGE_MASK = 0xFFFFFFFFFFFFF000 - - # Various ELF constants - EM_X86_64 = 62 # AMD x86-64 target machine - ELFDATA2LSB = 1 # little endian - ELFCLASS64 = 2 - ELFMAG = "\x7FELF" - EV_CURRENT = 1 - ET_CORE = 4 - PT_LOAD = 1 - PT_NOTE = 4 - - # Special value for e_phnum. This indicates that the real number of - # program headers is too large to fit into e_phnum. Instead the real - # value is in the field sh_info of section 0. - PN_XNUM = 0xFFFF - - # Format strings for packing and header size calculation. - ELF64_EHDR = ("4s" # e_ident/magic - "B" # e_ident/class - "B" # e_ident/data - "B" # e_ident/version - "B" # e_ident/osabi - "8s" # e_ident/pad - "H" # e_type - "H" # e_machine - "I" # e_version - "Q" # e_entry - "Q" # e_phoff - "Q" # e_shoff - "I" # e_flags - "H" # e_ehsize - "H" # e_phentsize - "H" # e_phnum - "H" # e_shentsize - "H" # e_shnum - "H" # e_shstrndx - ) - ELF64_PHDR = ("I" # p_type - "I" # p_flags - "Q" # p_offset - "Q" # p_vaddr - "Q" # p_paddr - "Q" # p_filesz - "Q" # p_memsz - "Q" # p_align - ) - def __init__(self): super(DumpGuestMemory, self).__init__("dump-guest-memory", gdb.COMMAND_DATA, gdb.COMPLETE_FILENAME) - self.uintptr_t = gdb.lookup_type("uintptr_t") - self.elf64_ehdr_le = struct.Struct("<%s" % self.ELF64_EHDR) - self.elf64_phdr_le = struct.Struct("<%s" % self.ELF64_PHDR) - - def int128_get64(self, val): - assert (val["hi"] == 0) - return val["lo"] - - def qlist_foreach(self, head, field_str): - var_p = head["lh_first"] - while (var_p != 0): - var = var_p.dereference() - yield var - var_p = var[field_str]["le_next"] - - def qemu_get_ram_block(self, ram_addr): - ram_blocks = gdb.parse_and_eval("ram_list.blocks") - for block in self.qlist_foreach(ram_blocks, "next"): - if (ram_addr - block["offset"] < block["used_length"]): - return block - raise gdb.GdbError("Bad ram offset %x" % ram_addr) - - def qemu_get_ram_ptr(self, ram_addr): - block = self.qemu_get_ram_block(ram_addr) - return block["host"] + (ram_addr - block["offset"]) - - def memory_region_get_ram_ptr(self, mr): - if (mr["alias"] != 0): - return (self.memory_region_get_ram_ptr(mr["alias"].dereference()) + - mr["alias_offset"]) - return self.qemu_get_ram_ptr(mr["ram_addr"] & self.TARGET_PAGE_MASK) - - def guest_phys_blocks_init(self): - self.guest_phys_blocks = [] - - def guest_phys_blocks_append(self): - print "guest RAM blocks:" - print ("target_start target_end host_addr message " - "count") - print ("---------------- ---------------- ---------------- ------- " - "-----") - - current_map_p = gdb.parse_and_eval("address_space_memory.current_map") - current_map = current_map_p.dereference() - for cur in range(current_map["nr"]): - flat_range = (current_map["ranges"] + cur).dereference() - mr = flat_range["mr"].dereference() - - # we only care about RAM - if (not mr["ram"]): - continue - - section_size = self.int128_get64(flat_range["addr"]["size"]) - target_start = self.int128_get64(flat_range["addr"]["start"]) - target_end = target_start + section_size - host_addr = (self.memory_region_get_ram_ptr(mr) + - flat_range["offset_in_region"]) - predecessor = None - - # find continuity in guest physical address space - if (len(self.guest_phys_blocks) > 0): - predecessor = self.guest_phys_blocks[-1] - predecessor_size = (predecessor["target_end"] - - predecessor["target_start"]) - - # the memory API guarantees monotonically increasing - # traversal - assert (predecessor["target_end"] <= target_start) - - # we want continuity in both guest-physical and - # host-virtual memory - if (predecessor["target_end"] < target_start or - predecessor["host_addr"] + predecessor_size != host_addr): - predecessor = None - - if (predecessor is None): - # isolated mapping, add it to the list - self.guest_phys_blocks.append({"target_start": target_start, - "target_end" : target_end, - "host_addr" : host_addr}) - message = "added" - else: - # expand predecessor until @target_end; predecessor's - # start doesn't change - predecessor["target_end"] = target_end - message = "joined" - - print ("%016x %016x %016x %-7s %5u" % - (target_start, target_end, host_addr.cast(self.uintptr_t), - message, len(self.guest_phys_blocks))) - - def cpu_get_dump_info(self): - # We can't synchronize the registers with KVM post-mortem, and - # the bits in (first_x86_cpu->env.hflags) seem to be stale; they - # may not reflect long mode for example. Hence just assume the - # most common values. This also means that instruction pointer - # etc. will be bogus in the dump, but at least the RAM contents - # should be valid. - self.dump_info = {"d_machine": self.EM_X86_64, - "d_endian" : self.ELFDATA2LSB, - "d_class" : self.ELFCLASS64} - - def encode_elf64_ehdr_le(self): - return self.elf64_ehdr_le.pack( - self.ELFMAG, # e_ident/magic - self.dump_info["d_class"], # e_ident/class - self.dump_info["d_endian"], # e_ident/data - self.EV_CURRENT, # e_ident/version - 0, # e_ident/osabi - "", # e_ident/pad - self.ET_CORE, # e_type - self.dump_info["d_machine"], # e_machine - self.EV_CURRENT, # e_version - 0, # e_entry - self.elf64_ehdr_le.size, # e_phoff - 0, # e_shoff - 0, # e_flags - self.elf64_ehdr_le.size, # e_ehsize - self.elf64_phdr_le.size, # e_phentsize - self.phdr_num, # e_phnum - 0, # e_shentsize - 0, # e_shnum - 0 # e_shstrndx - ) - - def encode_elf64_note_le(self): - return self.elf64_phdr_le.pack(self.PT_NOTE, # p_type - 0, # p_flags - (self.memory_offset - - len(self.note)), # p_offset - 0, # p_vaddr - 0, # p_paddr - len(self.note), # p_filesz - len(self.note), # p_memsz - 0 # p_align - ) - - def encode_elf64_load_le(self, offset, start_hwaddr, range_size): - return self.elf64_phdr_le.pack(self.PT_LOAD, # p_type - 0, # p_flags - offset, # p_offset - 0, # p_vaddr - start_hwaddr, # p_paddr - range_size, # p_filesz - range_size, # p_memsz - 0 # p_align - ) - - def note_init(self, name, desc, type): - # name must include a trailing NUL - namesz = (len(name) + 1 + 3) / 4 * 4 - descsz = (len(desc) + 3) / 4 * 4 - fmt = ("<" # little endian - "I" # n_namesz - "I" # n_descsz - "I" # n_type - "%us" # name - "%us" # desc - % (namesz, descsz)) - self.note = struct.pack(fmt, - len(name) + 1, len(desc), type, name, desc) - - def dump_init(self): - self.guest_phys_blocks_init() - self.guest_phys_blocks_append() - self.cpu_get_dump_info() - # we have no way to retrieve the VCPU status from KVM - # post-mortem - self.note_init("NONE", "EMPTY", 0) - - # Account for PT_NOTE. - self.phdr_num = 1 - - # We should never reach PN_XNUM for paging=false dumps: there's - # just a handful of discontiguous ranges after merging. - self.phdr_num += len(self.guest_phys_blocks) - assert (self.phdr_num < self.PN_XNUM) - - # Calculate the ELF file offset where the memory dump commences: - # - # ELF header - # PT_NOTE - # PT_LOAD: 1 - # PT_LOAD: 2 - # ... - # PT_LOAD: len(self.guest_phys_blocks) - # ELF note - # memory dump - self.memory_offset = (self.elf64_ehdr_le.size + - self.elf64_phdr_le.size * self.phdr_num + - len(self.note)) - - def dump_begin(self, vmcore): - vmcore.write(self.encode_elf64_ehdr_le()) - vmcore.write(self.encode_elf64_note_le()) - running = self.memory_offset + self.elf = None + self.guest_phys_blocks = None + + def dump_init(self, vmcore): + """Prepares and writes ELF structures to core file.""" + + # Needed to make crash happy, data for more useful notes is + # not available in a qemu core. + self.elf.add_note("NONE", "EMPTY", 0) + + # We should never reach PN_XNUM for paging=false dumps, + # there's just a handful of discontiguous ranges after + # merging. + # The constant is needed to account for the PT_NOTE segment. + phdr_num = len(self.guest_phys_blocks) + 1 + assert phdr_num < PN_XNUM + for block in self.guest_phys_blocks: - range_size = block["target_end"] - block["target_start"] - vmcore.write(self.encode_elf64_load_le(running, - block["target_start"], - range_size)) - running += range_size - vmcore.write(self.note) + block_size = block["target_end"] - block["target_start"] + self.elf.add_segment(PT_LOAD, block["target_start"], block_size) + + self.elf.to_file(vmcore) def dump_iterate(self, vmcore): + """Writes guest core to file.""" + qemu_core = gdb.inferiors()[0] for block in self.guest_phys_blocks: - cur = block["host_addr"] + cur = block["host_addr"] left = block["target_end"] - block["target_start"] - print ("dumping range at %016x for length %016x" % - (cur.cast(self.uintptr_t), left)) - while (left > 0): - chunk_size = min(self.TARGET_PAGE_SIZE, left) + print("dumping range at %016x for length %016x" % + (cur.cast(UINTPTR_T), left)) + + while left > 0: + chunk_size = min(TARGET_PAGE_SIZE, left) chunk = qemu_core.read_memory(cur, chunk_size) vmcore.write(chunk) - cur += chunk_size + cur += chunk_size left -= chunk_size - def create_vmcore(self, filename): - vmcore = open(filename, "wb") - self.dump_begin(vmcore) - self.dump_iterate(vmcore) - vmcore.close() - def invoke(self, args, from_tty): + """Handles command invocation from gdb.""" + # Unwittingly pressing the Enter key after the command should # not dump the same multi-gig coredump to the same file. self.dont_repeat() argv = gdb.string_to_argv(args) - if (len(argv) != 1): - raise gdb.GdbError("usage: dump-guest-memory FILE") + if len(argv) != 2: + raise gdb.GdbError("usage: dump-guest-memory FILE ARCH") + + self.elf = ELF(argv[1]) + self.guest_phys_blocks = get_guest_phys_blocks() - self.dump_init() - self.create_vmcore(argv[0]) + with open(argv[0], "wb") as vmcore: + self.dump_init(vmcore) + self.dump_iterate(vmcore) DumpGuestMemory() diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat index 7e5d25612b..d43e8f3e85 100755 --- a/scripts/kvm/kvm_stat +++ b/scripts/kvm/kvm_stat @@ -12,285 +12,311 @@ # the COPYING file in the top-level directory. import curses -import sys, os, time, optparse, ctypes -from ctypes import * - -class DebugfsProvider(object): - def __init__(self): - self.base = '/sys/kernel/debug/kvm' - self._fields = os.listdir(self.base) - def fields(self): - return self._fields - def select(self, fields): - self._fields = fields - def read(self): - def val(key): - return int(file(self.base + '/' + key).read()) - return dict([(key, val(key)) for key in self._fields]) - -vmx_exit_reasons = { - 0: 'EXCEPTION_NMI', - 1: 'EXTERNAL_INTERRUPT', - 2: 'TRIPLE_FAULT', - 7: 'PENDING_INTERRUPT', - 8: 'NMI_WINDOW', - 9: 'TASK_SWITCH', - 10: 'CPUID', - 12: 'HLT', - 14: 'INVLPG', - 15: 'RDPMC', - 16: 'RDTSC', - 18: 'VMCALL', - 19: 'VMCLEAR', - 20: 'VMLAUNCH', - 21: 'VMPTRLD', - 22: 'VMPTRST', - 23: 'VMREAD', - 24: 'VMRESUME', - 25: 'VMWRITE', - 26: 'VMOFF', - 27: 'VMON', - 28: 'CR_ACCESS', - 29: 'DR_ACCESS', - 30: 'IO_INSTRUCTION', - 31: 'MSR_READ', - 32: 'MSR_WRITE', - 33: 'INVALID_STATE', - 36: 'MWAIT_INSTRUCTION', - 39: 'MONITOR_INSTRUCTION', - 40: 'PAUSE_INSTRUCTION', - 41: 'MCE_DURING_VMENTRY', - 43: 'TPR_BELOW_THRESHOLD', - 44: 'APIC_ACCESS', - 48: 'EPT_VIOLATION', - 49: 'EPT_MISCONFIG', - 54: 'WBINVD', - 55: 'XSETBV', - 56: 'APIC_WRITE', - 58: 'INVPCID', +import sys +import os +import time +import optparse +import ctypes +import fcntl +import resource +import struct +import re +from collections import defaultdict + +VMX_EXIT_REASONS = { + 'EXCEPTION_NMI': 0, + 'EXTERNAL_INTERRUPT': 1, + 'TRIPLE_FAULT': 2, + 'PENDING_INTERRUPT': 7, + 'NMI_WINDOW': 8, + 'TASK_SWITCH': 9, + 'CPUID': 10, + 'HLT': 12, + 'INVLPG': 14, + 'RDPMC': 15, + 'RDTSC': 16, + 'VMCALL': 18, + 'VMCLEAR': 19, + 'VMLAUNCH': 20, + 'VMPTRLD': 21, + 'VMPTRST': 22, + 'VMREAD': 23, + 'VMRESUME': 24, + 'VMWRITE': 25, + 'VMOFF': 26, + 'VMON': 27, + 'CR_ACCESS': 28, + 'DR_ACCESS': 29, + 'IO_INSTRUCTION': 30, + 'MSR_READ': 31, + 'MSR_WRITE': 32, + 'INVALID_STATE': 33, + 'MWAIT_INSTRUCTION': 36, + 'MONITOR_INSTRUCTION': 39, + 'PAUSE_INSTRUCTION': 40, + 'MCE_DURING_VMENTRY': 41, + 'TPR_BELOW_THRESHOLD': 43, + 'APIC_ACCESS': 44, + 'EPT_VIOLATION': 48, + 'EPT_MISCONFIG': 49, + 'WBINVD': 54, + 'XSETBV': 55, + 'APIC_WRITE': 56, + 'INVPCID': 58, } -svm_exit_reasons = { - 0x000: 'READ_CR0', - 0x003: 'READ_CR3', - 0x004: 'READ_CR4', - 0x008: 'READ_CR8', - 0x010: 'WRITE_CR0', - 0x013: 'WRITE_CR3', - 0x014: 'WRITE_CR4', - 0x018: 'WRITE_CR8', - 0x020: 'READ_DR0', - 0x021: 'READ_DR1', - 0x022: 'READ_DR2', - 0x023: 'READ_DR3', - 0x024: 'READ_DR4', - 0x025: 'READ_DR5', - 0x026: 'READ_DR6', - 0x027: 'READ_DR7', - 0x030: 'WRITE_DR0', - 0x031: 'WRITE_DR1', - 0x032: 'WRITE_DR2', - 0x033: 'WRITE_DR3', - 0x034: 'WRITE_DR4', - 0x035: 'WRITE_DR5', - 0x036: 'WRITE_DR6', - 0x037: 'WRITE_DR7', - 0x040: 'EXCP_BASE', - 0x060: 'INTR', - 0x061: 'NMI', - 0x062: 'SMI', - 0x063: 'INIT', - 0x064: 'VINTR', - 0x065: 'CR0_SEL_WRITE', - 0x066: 'IDTR_READ', - 0x067: 'GDTR_READ', - 0x068: 'LDTR_READ', - 0x069: 'TR_READ', - 0x06a: 'IDTR_WRITE', - 0x06b: 'GDTR_WRITE', - 0x06c: 'LDTR_WRITE', - 0x06d: 'TR_WRITE', - 0x06e: 'RDTSC', - 0x06f: 'RDPMC', - 0x070: 'PUSHF', - 0x071: 'POPF', - 0x072: 'CPUID', - 0x073: 'RSM', - 0x074: 'IRET', - 0x075: 'SWINT', - 0x076: 'INVD', - 0x077: 'PAUSE', - 0x078: 'HLT', - 0x079: 'INVLPG', - 0x07a: 'INVLPGA', - 0x07b: 'IOIO', - 0x07c: 'MSR', - 0x07d: 'TASK_SWITCH', - 0x07e: 'FERR_FREEZE', - 0x07f: 'SHUTDOWN', - 0x080: 'VMRUN', - 0x081: 'VMMCALL', - 0x082: 'VMLOAD', - 0x083: 'VMSAVE', - 0x084: 'STGI', - 0x085: 'CLGI', - 0x086: 'SKINIT', - 0x087: 'RDTSCP', - 0x088: 'ICEBP', - 0x089: 'WBINVD', - 0x08a: 'MONITOR', - 0x08b: 'MWAIT', - 0x08c: 'MWAIT_COND', - 0x08d: 'XSETBV', - 0x400: 'NPF', +SVM_EXIT_REASONS = { + 'READ_CR0': 0x000, + 'READ_CR3': 0x003, + 'READ_CR4': 0x004, + 'READ_CR8': 0x008, + 'WRITE_CR0': 0x010, + 'WRITE_CR3': 0x013, + 'WRITE_CR4': 0x014, + 'WRITE_CR8': 0x018, + 'READ_DR0': 0x020, + 'READ_DR1': 0x021, + 'READ_DR2': 0x022, + 'READ_DR3': 0x023, + 'READ_DR4': 0x024, + 'READ_DR5': 0x025, + 'READ_DR6': 0x026, + 'READ_DR7': 0x027, + 'WRITE_DR0': 0x030, + 'WRITE_DR1': 0x031, + 'WRITE_DR2': 0x032, + 'WRITE_DR3': 0x033, + 'WRITE_DR4': 0x034, + 'WRITE_DR5': 0x035, + 'WRITE_DR6': 0x036, + 'WRITE_DR7': 0x037, + 'EXCP_BASE': 0x040, + 'INTR': 0x060, + 'NMI': 0x061, + 'SMI': 0x062, + 'INIT': 0x063, + 'VINTR': 0x064, + 'CR0_SEL_WRITE': 0x065, + 'IDTR_READ': 0x066, + 'GDTR_READ': 0x067, + 'LDTR_READ': 0x068, + 'TR_READ': 0x069, + 'IDTR_WRITE': 0x06a, + 'GDTR_WRITE': 0x06b, + 'LDTR_WRITE': 0x06c, + 'TR_WRITE': 0x06d, + 'RDTSC': 0x06e, + 'RDPMC': 0x06f, + 'PUSHF': 0x070, + 'POPF': 0x071, + 'CPUID': 0x072, + 'RSM': 0x073, + 'IRET': 0x074, + 'SWINT': 0x075, + 'INVD': 0x076, + 'PAUSE': 0x077, + 'HLT': 0x078, + 'INVLPG': 0x079, + 'INVLPGA': 0x07a, + 'IOIO': 0x07b, + 'MSR': 0x07c, + 'TASK_SWITCH': 0x07d, + 'FERR_FREEZE': 0x07e, + 'SHUTDOWN': 0x07f, + 'VMRUN': 0x080, + 'VMMCALL': 0x081, + 'VMLOAD': 0x082, + 'VMSAVE': 0x083, + 'STGI': 0x084, + 'CLGI': 0x085, + 'SKINIT': 0x086, + 'RDTSCP': 0x087, + 'ICEBP': 0x088, + 'WBINVD': 0x089, + 'MONITOR': 0x08a, + 'MWAIT': 0x08b, + 'MWAIT_COND': 0x08c, + 'XSETBV': 0x08d, + 'NPF': 0x400, } # EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h) -aarch64_exit_reasons = { - 0x00: 'UNKNOWN', - 0x01: 'WFI', - 0x03: 'CP15_32', - 0x04: 'CP15_64', - 0x05: 'CP14_MR', - 0x06: 'CP14_LS', - 0x07: 'FP_ASIMD', - 0x08: 'CP10_ID', - 0x0C: 'CP14_64', - 0x0E: 'ILL_ISS', - 0x11: 'SVC32', - 0x12: 'HVC32', - 0x13: 'SMC32', - 0x15: 'SVC64', - 0x16: 'HVC64', - 0x17: 'SMC64', - 0x18: 'SYS64', - 0x20: 'IABT', - 0x21: 'IABT_HYP', - 0x22: 'PC_ALIGN', - 0x24: 'DABT', - 0x25: 'DABT_HYP', - 0x26: 'SP_ALIGN', - 0x28: 'FP_EXC32', - 0x2C: 'FP_EXC64', - 0x2F: 'SERROR', - 0x30: 'BREAKPT', - 0x31: 'BREAKPT_HYP', - 0x32: 'SOFTSTP', - 0x33: 'SOFTSTP_HYP', - 0x34: 'WATCHPT', - 0x35: 'WATCHPT_HYP', - 0x38: 'BKPT32', - 0x3A: 'VECTOR32', - 0x3C: 'BRK64', +AARCH64_EXIT_REASONS = { + 'UNKNOWN': 0x00, + 'WFI': 0x01, + 'CP15_32': 0x03, + 'CP15_64': 0x04, + 'CP14_MR': 0x05, + 'CP14_LS': 0x06, + 'FP_ASIMD': 0x07, + 'CP10_ID': 0x08, + 'CP14_64': 0x0C, + 'ILL_ISS': 0x0E, + 'SVC32': 0x11, + 'HVC32': 0x12, + 'SMC32': 0x13, + 'SVC64': 0x15, + 'HVC64': 0x16, + 'SMC64': 0x17, + 'SYS64': 0x18, + 'IABT': 0x20, + 'IABT_HYP': 0x21, + 'PC_ALIGN': 0x22, + 'DABT': 0x24, + 'DABT_HYP': 0x25, + 'SP_ALIGN': 0x26, + 'FP_EXC32': 0x28, + 'FP_EXC64': 0x2C, + 'SERROR': 0x2F, + 'BREAKPT': 0x30, + 'BREAKPT_HYP': 0x31, + 'SOFTSTP': 0x32, + 'SOFTSTP_HYP': 0x33, + 'WATCHPT': 0x34, + 'WATCHPT_HYP': 0x35, + 'BKPT32': 0x38, + 'VECTOR32': 0x3A, + 'BRK64': 0x3C, } # From include/uapi/linux/kvm.h, KVM_EXIT_xxx -userspace_exit_reasons = { - 0: 'UNKNOWN', - 1: 'EXCEPTION', - 2: 'IO', - 3: 'HYPERCALL', - 4: 'DEBUG', - 5: 'HLT', - 6: 'MMIO', - 7: 'IRQ_WINDOW_OPEN', - 8: 'SHUTDOWN', - 9: 'FAIL_ENTRY', - 10: 'INTR', - 11: 'SET_TPR', - 12: 'TPR_ACCESS', - 13: 'S390_SIEIC', - 14: 'S390_RESET', - 15: 'DCR', - 16: 'NMI', - 17: 'INTERNAL_ERROR', - 18: 'OSI', - 19: 'PAPR_HCALL', - 20: 'S390_UCONTROL', - 21: 'WATCHDOG', - 22: 'S390_TSCH', - 23: 'EPR', - 24: 'SYSTEM_EVENT', +USERSPACE_EXIT_REASONS = { + 'UNKNOWN': 0, + 'EXCEPTION': 1, + 'IO': 2, + 'HYPERCALL': 3, + 'DEBUG': 4, + 'HLT': 5, + 'MMIO': 6, + 'IRQ_WINDOW_OPEN': 7, + 'SHUTDOWN': 8, + 'FAIL_ENTRY': 9, + 'INTR': 10, + 'SET_TPR': 11, + 'TPR_ACCESS': 12, + 'S390_SIEIC': 13, + 'S390_RESET': 14, + 'DCR': 15, + 'NMI': 16, + 'INTERNAL_ERROR': 17, + 'OSI': 18, + 'PAPR_HCALL': 19, + 'S390_UCONTROL': 20, + 'WATCHDOG': 21, + 'S390_TSCH': 22, + 'EPR': 23, + 'SYSTEM_EVENT': 24, } -x86_exit_reasons = { - 'vmx': vmx_exit_reasons, - 'svm': svm_exit_reasons, +IOCTL_NUMBERS = { + 'SET_FILTER': 0x40082406, + 'ENABLE': 0x00002400, + 'DISABLE': 0x00002401, + 'RESET': 0x00002403, } -sc_perf_evt_open = None -exit_reasons = None +class Arch(object): + """Class that encapsulates global architecture specific data like + syscall and ioctl numbers. + + """ + @staticmethod + def get_arch(): + machine = os.uname()[4] + + if machine.startswith('ppc'): + return ArchPPC() + elif machine.startswith('aarch64'): + return ArchA64() + elif machine.startswith('s390'): + return ArchS390() + else: + # X86_64 + for line in open('/proc/cpuinfo'): + if not line.startswith('flags'): + continue + + flags = line.split() + if 'vmx' in flags: + return ArchX86(VMX_EXIT_REASONS) + if 'svm' in flags: + return ArchX86(SVM_EXIT_REASONS) + return + +class ArchX86(Arch): + def __init__(self, exit_reasons): + self.sc_perf_evt_open = 298 + self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reasons = exit_reasons + +class ArchPPC(Arch): + def __init__(self): + self.sc_perf_evt_open = 319 + self.ioctl_numbers = IOCTL_NUMBERS + self.ioctl_numbers['ENABLE'] = 0x20002400 + self.ioctl_numbers['DISABLE'] = 0x20002401 -ioctl_numbers = { - 'SET_FILTER' : 0x40082406, - 'ENABLE' : 0x00002400, - 'DISABLE' : 0x00002401, - 'RESET' : 0x00002403, -} + # PPC comes in 32 and 64 bit and some generated ioctl + # numbers depend on the wordsize. + char_ptr_size = ctypes.sizeof(ctypes.c_char_p) + self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16 + +class ArchA64(Arch): + def __init__(self): + self.sc_perf_evt_open = 241 + self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reasons = AARCH64_EXIT_REASONS + +class ArchS390(Arch): + def __init__(self): + self.sc_perf_evt_open = 331 + self.ioctl_numbers = IOCTL_NUMBERS + self.exit_reasons = None + +ARCH = Arch.get_arch() + + +def walkdir(path): + """Returns os.walk() data for specified directory. + + As it is only a wrapper it returns the same 3-tuple of (dirpath, + dirnames, filenames). + """ + return next(os.walk(path)) + + +def parse_int_list(list_string): + """Returns an int list from a string of comma separated integers and + integer ranges.""" + integers = [] + members = list_string.split(',') -def x86_init(flag): - globals().update({ - 'sc_perf_evt_open' : 298, - 'exit_reasons' : x86_exit_reasons[flag], - }) - -def s390_init(): - globals().update({ - 'sc_perf_evt_open' : 331 - }) - -def ppc_init(): - globals().update({ - 'sc_perf_evt_open' : 319, - 'ioctl_numbers' : { - 'SET_FILTER' : 0x80002406 | (ctypes.sizeof(ctypes.c_char_p) << 16), - 'ENABLE' : 0x20002400, - 'DISABLE' : 0x20002401, - } - }) - -def aarch64_init(): - globals().update({ - 'sc_perf_evt_open' : 241, - 'exit_reasons' : aarch64_exit_reasons, - }) - -def detect_platform(): - if os.uname()[4].startswith('ppc'): - ppc_init() - return - elif os.uname()[4].startswith('aarch64'): - aarch64_init() - return - - for line in file('/proc/cpuinfo').readlines(): - if line.startswith('flags'): - for flag in line.split(): - if flag in x86_exit_reasons: - x86_init(flag) - return - elif line.startswith('vendor_id'): - for flag in line.split(): - if flag == 'IBM/S390': - s390_init() - return - -detect_platform() - -def invert(d): - return dict((x[1], x[0]) for x in d.iteritems()) - -filters = {} -filters['kvm_userspace_exit'] = ('reason', invert(userspace_exit_reasons)) -if exit_reasons: - filters['kvm_exit'] = ('exit_reason', invert(exit_reasons)) - -import struct, array - -libc = ctypes.CDLL('libc.so.6') + for member in members: + if '-' not in member: + integers.append(int(member)) + else: + int_range = member.split('-') + integers.extend(range(int(int_range[0]), + int(int_range[1]) + 1)) + + return integers + + +def get_online_cpus(): + with open('/sys/devices/system/cpu/online') as cpu_list: + cpu_string = cpu_list.readline() + return parse_int_list(cpu_string) + + +def get_filters(): + filters = {} + filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS) + if ARCH.exit_reasons: + filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons) + return filters + +libc = ctypes.CDLL('libc.so.6', use_errno=True) syscall = libc.syscall -get_errno = libc.__errno_location -get_errno.restype = POINTER(c_int) class perf_event_attr(ctypes.Structure): _fields_ = [('type', ctypes.c_uint32), @@ -305,262 +331,350 @@ class perf_event_attr(ctypes.Structure): ('bp_addr', ctypes.c_uint64), ('bp_len', ctypes.c_uint64), ] -def _perf_event_open(attr, pid, cpu, group_fd, flags): - return syscall(sc_perf_evt_open, ctypes.pointer(attr), ctypes.c_int(pid), - ctypes.c_int(cpu), ctypes.c_int(group_fd), - ctypes.c_long(flags)) - -PERF_TYPE_HARDWARE = 0 -PERF_TYPE_SOFTWARE = 1 -PERF_TYPE_TRACEPOINT = 2 -PERF_TYPE_HW_CACHE = 3 -PERF_TYPE_RAW = 4 -PERF_TYPE_BREAKPOINT = 5 - -PERF_SAMPLE_IP = 1 << 0 -PERF_SAMPLE_TID = 1 << 1 -PERF_SAMPLE_TIME = 1 << 2 -PERF_SAMPLE_ADDR = 1 << 3 -PERF_SAMPLE_READ = 1 << 4 -PERF_SAMPLE_CALLCHAIN = 1 << 5 -PERF_SAMPLE_ID = 1 << 6 -PERF_SAMPLE_CPU = 1 << 7 -PERF_SAMPLE_PERIOD = 1 << 8 -PERF_SAMPLE_STREAM_ID = 1 << 9 -PERF_SAMPLE_RAW = 1 << 10 - -PERF_FORMAT_TOTAL_TIME_ENABLED = 1 << 0 -PERF_FORMAT_TOTAL_TIME_RUNNING = 1 << 1 -PERF_FORMAT_ID = 1 << 2 -PERF_FORMAT_GROUP = 1 << 3 -import re + def __init__(self): + super(self.__class__, self).__init__() + self.type = PERF_TYPE_TRACEPOINT + self.size = ctypes.sizeof(self) + self.read_format = PERF_FORMAT_GROUP + +def perf_event_open(attr, pid, cpu, group_fd, flags): + return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr), + ctypes.c_int(pid), ctypes.c_int(cpu), + ctypes.c_int(group_fd), ctypes.c_long(flags)) -sys_tracing = '/sys/kernel/debug/tracing' +PERF_TYPE_TRACEPOINT = 2 +PERF_FORMAT_GROUP = 1 << 3 + +PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing' +PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm' class Group(object): - def __init__(self, cpu): + def __init__(self): self.events = [] - self.group_leader = None - self.cpu = cpu - def add_event(self, name, event_set, tracepoint, filter = None): - self.events.append(Event(group = self, - name = name, event_set = event_set, - tracepoint = tracepoint, filter = filter)) - if len(self.events) == 1: - self.file = os.fdopen(self.events[0].fd) + + def add_event(self, event): + self.events.append(event) + def read(self): - bytes = 8 * (1 + len(self.events)) - fmt = 'xxxxxxxx' + 'q' * len(self.events) + length = 8 * (1 + len(self.events)) + read_format = 'xxxxxxxx' + 'Q' * len(self.events) return dict(zip([event.name for event in self.events], - struct.unpack(fmt, self.file.read(bytes)))) + struct.unpack(read_format, + os.read(self.events[0].fd, length)))) class Event(object): - def __init__(self, group, name, event_set, tracepoint, filter = None): + def __init__(self, name, group, trace_cpu, trace_point, trace_filter, + trace_set='kvm'): self.name = name - attr = perf_event_attr() - attr.type = PERF_TYPE_TRACEPOINT - attr.size = ctypes.sizeof(attr) - id_path = os.path.join(sys_tracing, 'events', event_set, - tracepoint, 'id') - id = int(file(id_path).read()) - attr.config = id - attr.sample_type = (PERF_SAMPLE_RAW - | PERF_SAMPLE_TIME - | PERF_SAMPLE_CPU) - attr.sample_period = 1 - attr.read_format = PERF_FORMAT_GROUP + self.fd = None + self.setup_event(group, trace_cpu, trace_point, trace_filter, + trace_set) + + def setup_event_attribute(self, trace_set, trace_point): + id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set, + trace_point, 'id') + + event_attr = perf_event_attr() + event_attr.config = int(open(id_path).read()) + return event_attr + + def setup_event(self, group, trace_cpu, trace_point, trace_filter, + trace_set): + event_attr = self.setup_event_attribute(trace_set, trace_point) + group_leader = -1 if group.events: group_leader = group.events[0].fd - fd = _perf_event_open(attr, -1, group.cpu, group_leader, 0) + + fd = perf_event_open(event_attr, -1, trace_cpu, + group_leader, 0) if fd == -1: - err = get_errno()[0] - raise Exception('perf_event_open failed, errno = ' + err.__str__()) - if filter: - import fcntl - fcntl.ioctl(fd, ioctl_numbers['SET_FILTER'], filter) + err = ctypes.get_errno() + raise OSError(err, os.strerror(err), + 'while calling sys_perf_event_open().') + + if trace_filter: + fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'], + trace_filter) + self.fd = fd + def enable(self): - import fcntl - fcntl.ioctl(self.fd, ioctl_numbers['ENABLE'], 0) + fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0) + def disable(self): - import fcntl - fcntl.ioctl(self.fd, ioctl_numbers['DISABLE'], 0) + fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0) + def reset(self): - import fcntl - fcntl.ioctl(self.fd, ioctl_numbers['RESET'], 0) + fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0) class TracepointProvider(object): def __init__(self): - path = os.path.join(sys_tracing, 'events', 'kvm') - fields = [f - for f in os.listdir(path) - if os.path.isdir(os.path.join(path, f))] + self.group_leaders = [] + self.filters = get_filters() + self._fields = self.get_available_fields() + self.setup_traces() + self.fields = self._fields + + def get_available_fields(self): + path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm') + fields = walkdir(path)[1] extra = [] - for f in fields: - if f in filters: - subfield, values = filters[f] - for name, number in values.iteritems(): - extra.append(f + '(' + name + ')') + for field in fields: + if field in self.filters: + filter_name_, filter_dicts = self.filters[field] + for name in filter_dicts: + extra.append(field + '(' + name + ')') fields += extra - self._setup(fields) - self.select(fields) - def fields(self): - return self._fields + return fields + + def setup_traces(self): + cpus = get_online_cpus() + + # The constant is needed as a buffer for python libs, std + # streams and other files that the script opens. + newlim = len(cpus) * len(self._fields) + 50 + try: + softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE) + + if hardlim < newlim: + # Now we need CAP_SYS_RESOURCE, to increase the hard limit. + resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim)) + else: + # Raising the soft limit is sufficient. + resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim)) + + except ValueError: + sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim)) - def _online_cpus(self): - l = [] - pattern = r'cpu([0-9]+)' - basedir = '/sys/devices/system/cpu' - for entry in os.listdir(basedir): - match = re.match(pattern, entry) - if not match: - continue - path = os.path.join(basedir, entry, 'online') - if os.path.exists(path) and open(path).read().strip() != '1': - continue - l.append(int(match.group(1))) - return l - - def _setup(self, _fields): - self._fields = _fields - cpus = self._online_cpus() - import resource - nfiles = len(cpus) * 1000 - resource.setrlimit(resource.RLIMIT_NOFILE, (nfiles, nfiles)) - events = [] - self.group_leaders = [] for cpu in cpus: - group = Group(cpu) - for name in _fields: + group = Group() + for name in self._fields: tracepoint = name - filter = None - m = re.match(r'(.*)\((.*)\)', name) - if m: - tracepoint, sub = m.groups() - filter = '%s==%d\0' % (filters[tracepoint][0], - filters[tracepoint][1][sub]) - event = group.add_event(name, event_set = 'kvm', - tracepoint = tracepoint, - filter = filter) + tracefilter = None + match = re.match(r'(.*)\((.*)\)', name) + if match: + tracepoint, sub = match.groups() + tracefilter = ('%s==%d\0' % + (self.filters[tracepoint][0], + self.filters[tracepoint][1][sub])) + + group.add_event(Event(name=name, + group=group, + trace_cpu=cpu, + trace_point=tracepoint, + trace_filter=tracefilter)) self.group_leaders.append(group) - def select(self, fields): + + def available_fields(self): + return self.get_available_fields() + + @property + def fields(self): + return self._fields + + @fields.setter + def fields(self, fields): + self._fields = fields for group in self.group_leaders: - for event in group.events: + for index, event in enumerate(group.events): if event.name in fields: event.reset() event.enable() else: - event.disable() + # Do not disable the group leader. + # It would disable all of its events. + if index != 0: + event.disable() + def read(self): - from collections import defaultdict ret = defaultdict(int) for group in self.group_leaders: for name, val in group.read().iteritems(): - ret[name] += val + if name in self._fields: + ret[name] += val return ret -class Stats: - def __init__(self, providers, fields = None): +class DebugfsProvider(object): + def __init__(self): + self._fields = self.get_available_fields() + + def get_available_fields(self): + return walkdir(PATH_DEBUGFS_KVM)[2] + + @property + def fields(self): + return self._fields + + @fields.setter + def fields(self, fields): + self._fields = fields + + def read(self): + def val(key): + return int(file(PATH_DEBUGFS_KVM + '/' + key).read()) + return dict([(key, val(key)) for key in self._fields]) + +class Stats(object): + def __init__(self, providers, fields=None): self.providers = providers - self.fields_filter = fields - self._update() - def _update(self): + self._fields_filter = fields + self.values = {} + self.update_provider_filters() + + def update_provider_filters(self): def wanted(key): - import re - if not self.fields_filter: + if not self._fields_filter: return True - return re.match(self.fields_filter, key) is not None - self.values = dict() - for d in providers: - provider_fields = [key for key in d.fields() if wanted(key)] - for key in provider_fields: - self.values[key] = None - d.select(provider_fields) - def set_fields_filter(self, fields_filter): - self.fields_filter = fields_filter - self._update() + return re.match(self._fields_filter, key) is not None + + # As we reset the counters when updating the fields we can + # also clear the cache of old values. + self.values = {} + for provider in self.providers: + provider_fields = [key for key in provider.get_available_fields() + if wanted(key)] + provider.fields = provider_fields + + @property + def fields_filter(self): + return self._fields_filter + + @fields_filter.setter + def fields_filter(self, fields_filter): + self._fields_filter = fields_filter + self.update_provider_filters() + def get(self): - for d in providers: - new = d.read() - for key in d.fields(): + for provider in self.providers: + new = provider.read() + for key in provider.fields: oldval = self.values.get(key, (0, 0)) - newval = new[key] + newval = new.get(key, 0) newdelta = None if oldval is not None: newdelta = newval - oldval[0] self.values[key] = (newval, newdelta) return self.values -if not os.access('/sys/kernel/debug', os.F_OK): - print 'Please enable CONFIG_DEBUG_FS in your kernel' - sys.exit(1) -if not os.access('/sys/kernel/debug/kvm', os.F_OK): - print "Please mount debugfs ('mount -t debugfs debugfs /sys/kernel/debug')" - print "and ensure the kvm modules are loaded" - sys.exit(1) - -label_width = 40 -number_width = 10 - -def tui(screen, stats): - curses.use_default_colors() - curses.noecho() - drilldown = False - fields_filter = stats.fields_filter - def update_drilldown(): - if not fields_filter: - if drilldown: - stats.set_fields_filter(None) - else: - stats.set_fields_filter(r'^[^\(]*$') - update_drilldown() - def refresh(sleeptime): - screen.erase() - screen.addstr(0, 0, 'kvm statistics') - screen.addstr(2, 1, 'Event') - screen.addstr(2, 1 + label_width + number_width - len('Total'), 'Total') - screen.addstr(2, 1 + label_width + number_width + 8 - len('Current'), 'Current') +LABEL_WIDTH = 40 +NUMBER_WIDTH = 10 + +class Tui(object): + def __init__(self, stats): + self.stats = stats + self.screen = None + self.drilldown = False + self.update_drilldown() + + def __enter__(self): + """Initialises curses for later use. Based on curses.wrapper + implementation from the Python standard library.""" + self.screen = curses.initscr() + curses.noecho() + curses.cbreak() + + # The try/catch works around a minor bit of + # over-conscientiousness in the curses module, the error + # return from C start_color() is ignorable. + try: + curses.start_color() + except: + pass + + curses.use_default_colors() + return self + + def __exit__(self, *exception): + """Resets the terminal to its normal state. Based on curses.wrappre + implementation from the Python standard library.""" + if self.screen: + self.screen.keypad(0) + curses.echo() + curses.nocbreak() + curses.endwin() + + def update_drilldown(self): + if not self.stats.fields_filter: + self.stats.fields_filter = r'^[^\(]*$' + + elif self.stats.fields_filter == r'^[^\(]*$': + self.stats.fields_filter = None + + def refresh(self, sleeptime): + self.screen.erase() + self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD) + self.screen.addstr(2, 1, 'Event') + self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH - + len('Total'), 'Total') + self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 - + len('Current'), 'Current') row = 3 - s = stats.get() + stats = self.stats.get() def sortkey(x): - if s[x][1]: - return (-s[x][1], -s[x][0]) + if stats[x][1]: + return (-stats[x][1], -stats[x][0]) else: - return (0, -s[x][0]) - for key in sorted(s.keys(), key = sortkey): - if row >= screen.getmaxyx()[0]: + return (0, -stats[x][0]) + for key in sorted(stats.keys(), key=sortkey): + + if row >= self.screen.getmaxyx()[0]: break - values = s[key] + values = stats[key] if not values[0] and not values[1]: break col = 1 - screen.addstr(row, col, key) - col += label_width - screen.addstr(row, col, '%10d' % (values[0],)) - col += number_width + self.screen.addstr(row, col, key) + col += LABEL_WIDTH + self.screen.addstr(row, col, '%10d' % (values[0],)) + col += NUMBER_WIDTH if values[1] is not None: - screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) + self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,)) row += 1 - screen.refresh() + self.screen.refresh() + + def show_filter_selection(self): + while True: + self.screen.erase() + self.screen.addstr(0, 0, + "Show statistics for events matching a regex.", + curses.A_BOLD) + self.screen.addstr(2, 0, + "Current regex: {0}" + .format(self.stats.fields_filter)) + self.screen.addstr(3, 0, "New regex: ") + curses.echo() + regex = self.screen.getstr() + curses.noecho() + if len(regex) == 0: + return + try: + re.compile(regex) + self.stats.fields_filter = regex + return + except re.error: + continue - sleeptime = 0.25 - while True: - refresh(sleeptime) - curses.halfdelay(int(sleeptime * 10)) - sleeptime = 3 - try: - c = screen.getkey() - if c == 'x': - drilldown = not drilldown - update_drilldown() - if c == 'q': + def show_stats(self): + sleeptime = 0.25 + while True: + self.refresh(sleeptime) + curses.halfdelay(int(sleeptime * 10)) + sleeptime = 3 + try: + char = self.screen.getkey() + if char == 'x': + self.drilldown = not self.drilldown + self.update_drilldown() + if char == 'q': + break + if char == 'f': + self.show_filter_selection() + except KeyboardInterrupt: break - except KeyboardInterrupt: - break - except curses.error: - continue + except curses.error: + continue def batch(stats): s = stats.get() @@ -568,13 +682,13 @@ def batch(stats): s = stats.get() for key in sorted(s.keys()): values = s[key] - print '%-22s%10d%10d' % (key, values[0], values[1]) + print '%-42s%10d%10d' % (key, values[0], values[1]) def log(stats): keys = sorted(stats.get().iterkeys()) def banner(): for k in keys: - print '%10s' % k[0:9], + print '%s' % k, print def statline(): s = stats.get() @@ -590,57 +704,110 @@ def log(stats): statline() line += 1 -options = optparse.OptionParser() -options.add_option('-1', '--once', '--batch', - action = 'store_true', - default = False, - dest = 'once', - help = 'run in batch mode for one second', - ) -options.add_option('-l', '--log', - action = 'store_true', - default = False, - dest = 'log', - help = 'run in logging mode (like vmstat)', - ) -options.add_option('-t', '--tracepoints', - action = 'store_true', - default = False, - dest = 'tracepoints', - help = 'retrieve statistics from tracepoints', - ) -options.add_option('-d', '--debugfs', - action = 'store_true', - default = False, - dest = 'debugfs', - help = 'retrieve statistics from debugfs', - ) -options.add_option('-f', '--fields', - action = 'store', - default = None, - dest = 'fields', - help = 'fields to display (regex)', - ) -(options, args) = options.parse_args(sys.argv) - -providers = [] -if options.tracepoints: - providers.append(TracepointProvider()) -if options.debugfs: - providers.append(DebugfsProvider()) - -if len(providers) == 0: - try: - providers = [TracepointProvider()] - except: - providers = [DebugfsProvider()] - -stats = Stats(providers, fields = options.fields) - -if options.log: - log(stats) -elif not options.once: - import curses.wrapper - curses.wrapper(tui, stats) -else: - batch(stats) +def get_options(): + description_text = """ +This script displays various statistics about VMs running under KVM. +The statistics are gathered from the KVM debugfs entries and / or the +currently available perf traces. + +The monitoring takes additional cpu cycles and might affect the VM's +performance. + +Requirements: +- Access to: + /sys/kernel/debug/kvm + /sys/kernel/debug/trace/events/* + /proc/pid/task +- /proc/sys/kernel/perf_event_paranoid < 1 if user has no + CAP_SYS_ADMIN and perf events are used. +- CAP_SYS_RESOURCE if the hard limit is not high enough to allow + the large number of files that are possibly opened. +""" + + class PlainHelpFormatter(optparse.IndentedHelpFormatter): + def format_description(self, description): + if description: + return description + "\n" + else: + return "" + + optparser = optparse.OptionParser(description=description_text, + formatter=PlainHelpFormatter()) + optparser.add_option('-1', '--once', '--batch', + action='store_true', + default=False, + dest='once', + help='run in batch mode for one second', + ) + optparser.add_option('-l', '--log', + action='store_true', + default=False, + dest='log', + help='run in logging mode (like vmstat)', + ) + optparser.add_option('-t', '--tracepoints', + action='store_true', + default=False, + dest='tracepoints', + help='retrieve statistics from tracepoints', + ) + optparser.add_option('-d', '--debugfs', + action='store_true', + default=False, + dest='debugfs', + help='retrieve statistics from debugfs', + ) + optparser.add_option('-f', '--fields', + action='store', + default=None, + dest='fields', + help='fields to display (regex)', + ) + (options, _) = optparser.parse_args(sys.argv) + return options + +def get_providers(options): + providers = [] + + if options.tracepoints: + providers.append(TracepointProvider()) + if options.debugfs: + providers.append(DebugfsProvider()) + if len(providers) == 0: + providers.append(TracepointProvider()) + + return providers + +def check_access(): + if not os.path.exists('/sys/kernel/debug'): + sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.') + sys.exit(1) + + if not os.path.exists(PATH_DEBUGFS_KVM): + sys.stderr.write("Please make sure, that debugfs is mounted and " + "readable by the current user:\n" + "('mount -t debugfs debugfs /sys/kernel/debug')\n" + "Also ensure, that the kvm modules are loaded.\n") + sys.exit(1) + + if not os.path.exists(PATH_DEBUGFS_TRACING): + sys.stderr.write("Please make {0} readable by the current user.\n" + .format(PATH_DEBUGFS_TRACING)) + sys.exit(1) + +def main(): + check_access() + options = get_options() + providers = get_providers(options) + stats = Stats(providers, fields=options.fields) + + if options.log: + log(stats) + elif not options.once: + with Tui(stats) as tui: + tui.show_stats() + else: + batch(stats) + +if __name__ == "__main__": + main() diff --git a/tests/Makefile b/tests/Makefile index b7352f1a35..650e654ec2 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -558,7 +558,7 @@ tests/usb-hcd-uhci-test$(EXESUF): tests/usb-hcd-uhci-test.o $(libqos-usb-obj-y) tests/usb-hcd-ehci-test$(EXESUF): tests/usb-hcd-ehci-test.o $(libqos-usb-obj-y) tests/usb-hcd-xhci-test$(EXESUF): tests/usb-hcd-xhci-test.o $(libqos-usb-obj-y) tests/pc-cpu-test$(EXESUF): tests/pc-cpu-test.o -tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) +tests/vhost-user-test$(EXESUF): tests/vhost-user-test.o qemu-char.o qemu-timer.o $(qtest-obj-y) $(test-io-obj-y) tests/qemu-iotests/socket_scm_helper$(EXESUF): tests/qemu-iotests/socket_scm_helper.o tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y) tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o $(test-block-obj-y) |