summaryrefslogtreecommitdiff
path: root/arch_init.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch_init.c')
-rw-r--r--arch_init.c178
1 files changed, 156 insertions, 22 deletions
diff --git a/arch_init.c b/arch_init.c
index acb9ef3777..4b30a4ffff 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -65,7 +65,7 @@ int graphic_depth = 8;
#else
int graphic_width = 800;
int graphic_height = 600;
-int graphic_depth = 15;
+int graphic_depth = 32;
#endif
@@ -104,6 +104,9 @@ int graphic_depth = 15;
#endif
const uint32_t arch_type = QEMU_ARCH;
+static bool mig_throttle_on;
+static int dirty_rate_high_cnt;
+static void check_guest_throttling(void);
/***********************************************************/
/* ram save/restore */
@@ -115,6 +118,7 @@ const uint32_t arch_type = QEMU_ARCH;
#define RAM_SAVE_FLAG_EOS 0x10
#define RAM_SAVE_FLAG_CONTINUE 0x20
#define RAM_SAVE_FLAG_XBZRLE 0x40
+/* 0x80 is reserved in migration.h start with 0x100 next */
static struct defconfig_file {
@@ -123,7 +127,7 @@ static struct defconfig_file {
bool userconfig;
} default_config_files[] = {
{ CONFIG_QEMU_CONFDIR "/qemu.conf", true },
- { CONFIG_QEMU_CONFDIR "/target-" TARGET_ARCH ".conf", true },
+ { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true },
{ NULL }, /* end of list */
};
@@ -378,15 +382,21 @@ static void migration_bitmap_sync(void)
uint64_t num_dirty_pages_init = migration_dirty_pages;
MigrationState *s = migrate_get_current();
static int64_t start_time;
+ static int64_t bytes_xfer_prev;
static int64_t num_dirty_pages_period;
int64_t end_time;
+ int64_t bytes_xfer_now;
+
+ if (!bytes_xfer_prev) {
+ bytes_xfer_prev = ram_bytes_transferred();
+ }
if (!start_time) {
start_time = qemu_get_clock_ms(rt_clock);
}
trace_migration_bitmap_sync_start();
- memory_global_sync_dirty_bitmap(get_system_memory());
+ address_space_sync_dirty_bitmap(&address_space_memory);
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
@@ -404,6 +414,25 @@ static void migration_bitmap_sync(void)
/* more than 1 second = 1000 millisecons */
if (end_time > start_time + 1000) {
+ if (migrate_auto_converge()) {
+ /* The following detection logic can be refined later. For now:
+ Check to see if the dirtied bytes is 50% more than the approx.
+ amount of bytes that just got transferred since the last time we
+ were in this routine. If that happens >N times (for now N==4)
+ we turn on the throttle down logic */
+ bytes_xfer_now = ram_bytes_transferred();
+ if (s->dirty_pages_rate &&
+ (num_dirty_pages_period * TARGET_PAGE_SIZE >
+ (bytes_xfer_now - bytes_xfer_prev)/2) &&
+ (dirty_rate_high_cnt++ > 4)) {
+ trace_migration_throttle();
+ mig_throttle_on = true;
+ dirty_rate_high_cnt = 0;
+ }
+ bytes_xfer_prev = bytes_xfer_now;
+ } else {
+ mig_throttle_on = false;
+ }
s->dirty_pages_rate = num_dirty_pages_period * 1000
/ (end_time - start_time);
s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
@@ -447,6 +476,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
ram_bulk_stage = false;
}
} else {
+ int ret;
uint8_t *p;
int cont = (block == last_sent_block) ?
RAM_SAVE_FLAG_CONTINUE : 0;
@@ -455,17 +485,23 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
/* In doubt sent page as normal */
bytes_sent = -1;
- if (is_zero_page(p)) {
- acct_info.dup_pages++;
- if (!ram_bulk_stage) {
- bytes_sent = save_block_hdr(f, block, offset, cont,
- RAM_SAVE_FLAG_COMPRESS);
- qemu_put_byte(f, 0);
- bytes_sent++;
- } else {
- acct_info.skipped_pages++;
- bytes_sent = 0;
+ ret = ram_control_save_page(f, block->offset,
+ offset, TARGET_PAGE_SIZE, &bytes_sent);
+
+ if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
+ if (ret != RAM_SAVE_CONTROL_DELAYED) {
+ if (bytes_sent > 0) {
+ acct_info.norm_pages++;
+ } else if (bytes_sent == 0) {
+ acct_info.dup_pages++;
+ }
}
+ } else if (is_zero_page(p)) {
+ acct_info.dup_pages++;
+ bytes_sent = save_block_hdr(f, block, offset, cont,
+ RAM_SAVE_FLAG_COMPRESS);
+ qemu_put_byte(f, 0);
+ bytes_sent++;
} else if (!ram_bulk_stage && migrate_use_xbzrle()) {
current_addr = block->offset + offset;
bytes_sent = save_xbzrle_page(f, p, current_addr, block,
@@ -498,6 +534,18 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
static uint64_t bytes_transferred;
+void acct_update_position(QEMUFile *f, size_t size, bool zero)
+{
+ uint64_t pages = size / TARGET_PAGE_SIZE;
+ if (zero) {
+ acct_info.dup_pages += pages;
+ } else {
+ acct_info.norm_pages += pages;
+ bytes_transferred += size;
+ qemu_update_position(f, size);
+ }
+}
+
static ram_addr_t ram_save_remaining(void)
{
return migration_dirty_pages;
@@ -566,6 +614,8 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
migration_bitmap = bitmap_new(ram_pages);
bitmap_set(migration_bitmap, 0, ram_pages);
migration_dirty_pages = ram_pages;
+ mig_throttle_on = false;
+ dirty_rate_high_cnt = 0;
if (migrate_use_xbzrle()) {
XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
@@ -598,6 +648,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
}
qemu_mutex_unlock_ramlist();
+
+ ram_control_before_iterate(f, RAM_CONTROL_SETUP);
+ ram_control_after_iterate(f, RAM_CONTROL_SETUP);
+
qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
return 0;
@@ -616,6 +670,8 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
reset_ram_globals();
}
+ ram_control_before_iterate(f, RAM_CONTROL_ROUND);
+
t0 = qemu_get_clock_ns(rt_clock);
i = 0;
while ((ret = qemu_file_rate_limit(f)) == 0) {
@@ -628,6 +684,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
}
total_sent += bytes_sent;
acct_info.iterations++;
+ check_guest_throttling();
/* we want to check in the 1st loop, just in case it was the 1st time
and we had to sync the dirty bitmap.
qemu_get_clock_ns() is a bit expensive, so we only check each some
@@ -646,6 +703,12 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
qemu_mutex_unlock_ramlist();
+ /*
+ * Must occur before EOS (or any QEMUFile operation)
+ * because of RDMA protocol.
+ */
+ ram_control_after_iterate(f, RAM_CONTROL_ROUND);
+
if (ret < 0) {
bytes_transferred += total_sent;
return ret;
@@ -663,6 +726,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
qemu_mutex_lock_ramlist();
migration_bitmap_sync();
+ ram_control_before_iterate(f, RAM_CONTROL_FINISH);
+
/* try transferring iterative blocks of memory */
/* flush all remaining blocks regardless of rate limiting */
@@ -676,6 +741,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
}
bytes_transferred += bytes_sent;
}
+
+ ram_control_after_iterate(f, RAM_CONTROL_FINISH);
migration_end();
qemu_mutex_unlock_ramlist();
@@ -770,6 +837,24 @@ static inline void *host_from_stream_offset(QEMUFile *f,
return NULL;
}
+/*
+ * If a page (or a whole RDMA chunk) has been
+ * determined to be zero, then zap it.
+ */
+void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
+{
+ if (ch != 0 || !is_zero_page(host)) {
+ memset(host, ch, size);
+#ifndef _WIN32
+ if (ch == 0 &&
+ (!kvm_enabled() || kvm_has_sync_mmu()) &&
+ getpagesize() <= TARGET_PAGE_SIZE) {
+ qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
+ }
+#endif
+ }
+}
+
static int ram_load(QEMUFile *f, void *opaque, int version_id)
{
ram_addr_t addr;
@@ -808,6 +893,10 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
QTAILQ_FOREACH(block, &ram_list.blocks, next) {
if (!strncmp(id, block->idstr, sizeof(id))) {
if (block->length != length) {
+ fprintf(stderr,
+ "Length mismatch: %s: " RAM_ADDR_FMT
+ " in != " RAM_ADDR_FMT "\n", id, length,
+ block->length);
ret = -EINVAL;
goto done;
}
@@ -837,14 +926,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
}
ch = qemu_get_byte(f);
- memset(host, ch, TARGET_PAGE_SIZE);
-#ifndef _WIN32
- if (ch == 0 &&
- (!kvm_enabled() || kvm_has_sync_mmu()) &&
- getpagesize() <= TARGET_PAGE_SIZE) {
- qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
- }
-#endif
+ ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
} else if (flags & RAM_SAVE_FLAG_PAGE) {
void *host;
@@ -864,6 +946,8 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
ret = -EINVAL;
goto done;
}
+ } else if (flags & RAM_SAVE_FLAG_HOOK) {
+ ram_control_load_hook(f, flags);
}
error = qemu_file_get_error(f);
if (error) {
@@ -1093,11 +1177,61 @@ TargetInfo *qmp_query_target(Error **errp)
{
TargetInfo *info = g_malloc0(sizeof(*info));
- info->arch = TARGET_TYPE;
+ info->arch = g_strdup(TARGET_NAME);
return info;
}
+/* Stub function that's gets run on the vcpu when its brought out of the
+ VM to run inside qemu via async_run_on_cpu()*/
+static void mig_sleep_cpu(void *opq)
+{
+ qemu_mutex_unlock_iothread();
+ g_usleep(30*1000);
+ qemu_mutex_lock_iothread();
+}
+
+/* To reduce the dirty rate explicitly disallow the VCPUs from spending
+ much time in the VM. The migration thread will try to catchup.
+ Workload will experience a performance drop.
+*/
+static void mig_throttle_cpu_down(CPUState *cpu, void *data)
+{
+ async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
+}
+
+static void mig_throttle_guest_down(void)
+{
+ qemu_mutex_lock_iothread();
+ qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
+ qemu_mutex_unlock_iothread();
+}
+
+static void check_guest_throttling(void)
+{
+ static int64_t t0;
+ int64_t t1;
+
+ if (!mig_throttle_on) {
+ return;
+ }
+
+ if (!t0) {
+ t0 = qemu_get_clock_ns(rt_clock);
+ return;
+ }
+
+ t1 = qemu_get_clock_ns(rt_clock);
+
+ /* If it has been more than 40 ms since the last time the guest
+ * was throttled then do it again.
+ */
+ if (40 < (t1-t0)/1000000) {
+ mig_throttle_guest_down();
+ t0 = t1;
+ }
+}
+
int hax_available(void)
{
#ifdef CONFIG_HAX