1 files changed, 206 insertions, 79 deletions
diff --git a/arch_init.c b/arch_init.c
index 68a7ab784..60c975db2 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -48,7 +48,9 @@
 #include "qmp-commands.h"
 #include "trace.h"
 #include "exec/cpu-all.h"
+#include "exec/ram_addr.h"
 #include "hw/acpi/acpi.h"
+#include "qemu/host-utils.h"
 
 #ifdef DEBUG_ARCH_INIT
 #define DPRINTF(fmt, ...) \
@@ -120,7 +122,6 @@ static void check_guest_throttling(void);
 #define RAM_SAVE_FLAG_XBZRLE   0x40
 /* 0x80 is reserved in migration.h start with 0x100 next */
 
-
 static struct defconfig_file {
     const char *filename;
     /* Indicates it is an user config file (disabled by -no-user-config) */
@@ -131,6 +132,7 @@ static struct defconfig_file {
     { NULL }, /* end of list */
 };
 
+static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
 
 int qemu_read_default_config_files(bool userconfig)
 {
@@ -150,10 +152,9 @@ int qemu_read_default_config_files(bool userconfig)
     return 0;
 }
 
-static inline bool is_zero_page(uint8_t *p)
+static inline bool is_zero_range(uint8_t *p, uint64_t size)
 {
-    return buffer_find_nonzero_offset(p, TARGET_PAGE_SIZE) ==
-        TARGET_PAGE_SIZE;
+    return buffer_find_nonzero_offset(p, size) == size;
 }
 
 /* struct contains XBZRLE cache and a static page
@@ -163,24 +164,63 @@ static struct {
     uint8_t *encoded_buf;
     /* buffer for storing page content */
     uint8_t *current_buf;
-    /* buffer used for XBZRLE decoding */
-    uint8_t *decoded_buf;
-    /* Cache for XBZRLE */
+    /* Cache for XBZRLE, Protected by lock. */
     PageCache *cache;
+    QemuMutex lock;
 } XBZRLE = {
     .encoded_buf = NULL,
     .current_buf = NULL,
-    .decoded_buf = NULL,
     .cache = NULL,
 };
+/* buffer used for XBZRLE decoding */
+static uint8_t *xbzrle_decoded_buf;
 
+static void XBZRLE_cache_lock(void)
+{
+    if (migrate_use_xbzrle())
+        qemu_mutex_lock(&XBZRLE.lock);
+}
+
+static void XBZRLE_cache_unlock(void)
+{
+    if (migrate_use_xbzrle())
+        qemu_mutex_unlock(&XBZRLE.lock);
+}
 
 int64_t xbzrle_cache_resize(int64_t new_size)
 {
+    PageCache *new_cache, *cache_to_free;
+
+    if (new_size < TARGET_PAGE_SIZE) {
+        return -1;
+    }
+
+    /* no need to lock, the current thread holds qemu big lock */
     if (XBZRLE.cache != NULL) {
-        return cache_resize(XBZRLE.cache, new_size / TARGET_PAGE_SIZE) *
-            TARGET_PAGE_SIZE;
+        /* check XBZRLE.cache again later */
+        if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
+            return pow2floor(new_size);
+        }
+        new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
+                                        TARGET_PAGE_SIZE);
+        if (!new_cache) {
+            DPRINTF("Error creating cache\n");
+            return -1;
+        }
+
+        XBZRLE_cache_lock();
+        /* the XBZRLE.cache may have be destroyed, check it again */
+        if (XBZRLE.cache != NULL) {
+            cache_to_free = XBZRLE.cache;
+            XBZRLE.cache = new_cache;
+        } else {
+            cache_to_free = new_cache;
+        }
+        XBZRLE_cache_unlock();
+
+        cache_fini(cache_to_free);
     }
+
     return pow2floor(new_size);
 }
 
@@ -270,6 +310,34 @@ static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
     return size;
 }
 
+/* This is the last block that we have visited serching for dirty pages
+ */
+static RAMBlock *last_seen_block;
+/* This is the last block from where we have sent data */
+static RAMBlock *last_sent_block;
+static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+static uint32_t last_version;
+static bool ram_bulk_stage;
+
+/* Update the xbzrle cache to reflect a page that's been sent as all 0.
+ * The important thing is that a stale (not-yet-0'd) page be replaced
+ * by the new data.
+ * As a bonus, if the page wasn't in the cache it gets added so that
+ * when a small write is made into the 0'd page it gets XBZRLE sent
+ */
+static void xbzrle_cache_zero_page(ram_addr_t current_addr)
+{
+    if (ram_bulk_stage || !migrate_use_xbzrle()) {
+        return;
+    }
+
+    /* We don't care if this fails to allocate a new cache page
+     * as long as it updated an old one */
+    cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
+}
+
 #define ENCODING_FLAG_XBZRLE 0x1
 
 static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
@@ -281,7 +349,9 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
 
     if (!cache_is_cached(XBZRLE.cache, current_addr)) {
         if (!last_stage) {
-            cache_insert(XBZRLE.cache, current_addr, current_data);
+            if (cache_insert(XBZRLE.cache, current_addr, current_data) == -1) {
+                return -1;
+            }
         }
         acct_info.xbzrle_cache_miss++;
         return -1;
@@ -324,25 +394,14 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data,
     return bytes_sent;
 }
 
-
-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
 static inline
 ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
                                                  ram_addr_t start)
 {
     unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
     unsigned long nr = base + (start >> TARGET_PAGE_BITS);
-    unsigned long size = base + (int128_get64(mr->size) >> TARGET_PAGE_BITS);
+    uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
+    unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
 
     unsigned long next;
 
@@ -359,11 +418,10 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
     return (next - base) << TARGET_PAGE_BITS;
 }
 
-static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
-                                              ram_addr_t offset)
+static inline bool migration_bitmap_set_dirty(ram_addr_t addr)
 {
     bool ret;
-    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
+    int nr = addr >> TARGET_PAGE_BITS;
 
     ret = test_and_set_bit(nr, migration_bitmap);
 
@@ -373,12 +431,47 @@ static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
     return ret;
 }
 
+static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
+{
+    ram_addr_t addr;
+    unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
+
+    /* start address is aligned at the start of a word? */
+    if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) {
+        int k;
+        int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
+        unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION];
+
+        for (k = page; k < page + nr; k++) {
+            if (src[k]) {
+                unsigned long new_dirty;
+                new_dirty = ~migration_bitmap[k];
+                migration_bitmap[k] |= src[k];
+                new_dirty &= src[k];
+                migration_dirty_pages += ctpopl(new_dirty);
+                src[k] = 0;
+            }
+        }
+    } else {
+        for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
+            if (cpu_physical_memory_get_dirty(start + addr,
+                                              TARGET_PAGE_SIZE,
+                                              DIRTY_MEMORY_MIGRATION)) {
+                cpu_physical_memory_reset_dirty(start + addr,
+                                                TARGET_PAGE_SIZE,
+                                                DIRTY_MEMORY_MIGRATION);
+                migration_bitmap_set_dirty(start + addr);
+            }
+        }
+    }
+}
+
+
 /* Needs iothread lock! */
 
 static void migration_bitmap_sync(void)
 {
     RAMBlock *block;
-    ram_addr_t addr;
     uint64_t num_dirty_pages_init = migration_dirty_pages;
     MigrationState *s = migrate_get_current();
     static int64_t start_time;
@@ -392,25 +485,19 @@ static void migration_bitmap_sync(void)
     }
 
     if (!start_time) {
-        start_time = qemu_get_clock_ms(rt_clock);
+        start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
     }
 
     trace_migration_bitmap_sync_start();
     address_space_sync_dirty_bitmap(&address_space_memory);
 
     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
-        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
-            if (memory_region_test_and_clear_dirty(block->mr,
-                                                   addr, TARGET_PAGE_SIZE,
-                                                   DIRTY_MEMORY_MIGRATION)) {
-                migration_bitmap_set_dirty(block->mr, addr);
-            }
-        }
+        migration_bitmap_sync_range(block->mr->ram_addr, block->length);
     }
     trace_migration_bitmap_sync_end(migration_dirty_pages
                                     - num_dirty_pages_init);
     num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
-    end_time = qemu_get_clock_ms(rt_clock);
+    end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 
     /* more than 1 second = 1000 millisecons */
     if (end_time > start_time + 1000) {
@@ -478,6 +565,7 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
         } else {
             int ret;
             uint8_t *p;
+            bool send_async = true;
             int cont = (block == last_sent_block) ?
                 RAM_SAVE_FLAG_CONTINUE : 0;
 
@@ -488,6 +576,9 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
             ret = ram_control_save_page(f, block->offset,
                                offset, TARGET_PAGE_SIZE, &bytes_sent);
 
+            XBZRLE_cache_lock();
+
+            current_addr = block->offset + offset;
             if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
                 if (ret != RAM_SAVE_CONTROL_DELAYED) {
                     if (bytes_sent > 0) {
@@ -496,29 +587,46 @@ static int ram_save_block(QEMUFile *f, bool last_stage)
                         acct_info.dup_pages++;
                     }
                 }
-            } else if (is_zero_page(p)) {
+            } else if (is_zero_range(p, TARGET_PAGE_SIZE)) {
                 acct_info.dup_pages++;
                 bytes_sent = save_block_hdr(f, block, offset, cont,
                                             RAM_SAVE_FLAG_COMPRESS);
                 qemu_put_byte(f, 0);
                 bytes_sent++;
+                /* Must let xbzrle know, otherwise a previous (now 0'd) cached
+                 * page would be stale
+                 */
+                xbzrle_cache_zero_page(current_addr);
             } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
-                current_addr = block->offset + offset;
                 bytes_sent = save_xbzrle_page(f, p, current_addr, block,
                                               offset, cont, last_stage);
                 if (!last_stage) {
+                    /* We must send exactly what's in the xbzrle cache
+                     * even if the page wasn't xbzrle compressed, so that
+                     * it's right next time.
+                     */
                     p = get_cached_data(XBZRLE.cache, current_addr);
+
+                    /* Can't send this cached data async, since the cache page
+                     * might get updated before it gets to the wire
+                     */
+                    send_async = false;
                 }
             }
 
             /* XBZRLE overflow or normal page */
             if (bytes_sent == -1) {
                 bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
-                qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+                if (send_async) {
+                    qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
+                } else {
+                    qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
+                }
                 bytes_sent += TARGET_PAGE_SIZE;
                 acct_info.norm_pages++;
             }
 
+            XBZRLE_cache_unlock();
             /* if page is unmodified, continue to the next */
             if (bytes_sent > 0) {
                 last_sent_block = block;
@@ -572,6 +680,12 @@ uint64_t ram_bytes_total(void)
     return total;
 }
 
+void free_xbzrle_decoded_buf(void)
+{
+    g_free(xbzrle_decoded_buf);
+    xbzrle_decoded_buf = NULL;
+}
+
 static void migration_end(void)
 {
     if (migration_bitmap) {
@@ -580,14 +694,17 @@ static void migration_end(void)
         migration_bitmap = NULL;
     }
 
+    XBZRLE_cache_lock();
     if (XBZRLE.cache) {
         cache_fini(XBZRLE.cache);
         g_free(XBZRLE.cache);
         g_free(XBZRLE.encoded_buf);
         g_free(XBZRLE.current_buf);
-        g_free(XBZRLE.decoded_buf);
         XBZRLE.cache = NULL;
+        XBZRLE.encoded_buf = NULL;
+        XBZRLE.current_buf = NULL;
     }
+    XBZRLE_cache_unlock();
 }
 
 static void ram_migration_cancel(void *opaque)
@@ -618,15 +735,33 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
     dirty_rate_high_cnt = 0;
 
     if (migrate_use_xbzrle()) {
+        qemu_mutex_lock_iothread();
         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
                                   TARGET_PAGE_SIZE,
                                   TARGET_PAGE_SIZE);
         if (!XBZRLE.cache) {
+            qemu_mutex_unlock_iothread();
             DPRINTF("Error creating cache\n");
             return -1;
         }
-        XBZRLE.encoded_buf = g_malloc0(TARGET_PAGE_SIZE);
-        XBZRLE.current_buf = g_malloc(TARGET_PAGE_SIZE);
+        qemu_mutex_init(&XBZRLE.lock);
+        qemu_mutex_unlock_iothread();
+
+        /* We prefer not to abort if there is no memory */
+        XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
+        if (!XBZRLE.encoded_buf) {
+            DPRINTF("Error allocating encoded_buf\n");
+            return -1;
+        }
+
+        XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
+        if (!XBZRLE.current_buf) {
+            DPRINTF("Error allocating current_buf\n");
+            g_free(XBZRLE.encoded_buf);
+            XBZRLE.encoded_buf = NULL;
+            return -1;
+        }
+
         acct_clear();
     }
 
@@ -672,7 +807,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
 
     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
 
-    t0 = qemu_get_clock_ns(rt_clock);
+    t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
     i = 0;
     while ((ret = qemu_file_rate_limit(f)) == 0) {
         int bytes_sent;
@@ -691,7 +826,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
            iterations
         */
         if ((i & 63) == 0) {
-            uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000;
+            uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
             if (t1 > MAX_WAIT) {
                 DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
                         t1, i);
@@ -709,15 +844,20 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
      */
     ram_control_after_iterate(f, RAM_CONTROL_ROUND);
 
+    bytes_transferred += total_sent;
+
+    /*
+     * Do not count these 8 bytes into total_sent, so that we can
+     * return 0 if no page had been dirtied.
+     */
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+    bytes_transferred += 8;
+
+    ret = qemu_file_get_error(f);
     if (ret < 0) {
-        bytes_transferred += total_sent;
         return ret;
     }
 
-    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-    total_sent += 8;
-    bytes_transferred += total_sent;
-
     return total_sent;
 }
 
@@ -772,8 +912,8 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
     unsigned int xh_len;
     int xh_flags;
 
-    if (!XBZRLE.decoded_buf) {
-        XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
+    if (!xbzrle_decoded_buf) {
+        xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
     }
 
     /* extract RLE header */
@@ -790,10 +930,10 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
         return -1;
     }
     /* load data and decode */
-    qemu_get_buffer(f, XBZRLE.decoded_buf, xh_len);
+    qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
 
     /* decode RLE */
-    ret = xbzrle_decode_buffer(XBZRLE.decoded_buf, xh_len, host,
+    ret = xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
                                TARGET_PAGE_SIZE);
     if (ret == -1) {
         fprintf(stderr, "Failed to load XBZRLE page - decode error!\n");
@@ -843,15 +983,8 @@ static inline void *host_from_stream_offset(QEMUFile *f,
  */
 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
 {
-    if (ch != 0 || !is_zero_page(host)) {
+    if (ch != 0 || !is_zero_range(host, size)) {
         memset(host, ch, size);
-#ifndef _WIN32
-        if (ch == 0 &&
-            (!kvm_enabled() || kvm_has_sync_mmu()) &&
-            getpagesize() <= TARGET_PAGE_SIZE) {
-            qemu_madvise(host, TARGET_PAGE_SIZE, QEMU_MADV_DONTNEED);
-        }
-#endif
     }
 }
 
@@ -1112,9 +1245,6 @@ int qemu_uuid_parse(const char *str, uint8_t *uuid)
     if (ret != 16) {
         return -1;
     }
-#ifdef TARGET_I386
-    smbios_add_field(1, offsetof(struct smbios_type_1, uuid), uuid, 16);
-#endif
     return 0;
 }
 
@@ -1125,20 +1255,18 @@ void do_acpitable_option(const QemuOpts *opts)
 
     acpi_table_add(opts, &err);
     if (err) {
-        fprintf(stderr, "Wrong acpi table provided: %s\n",
-                error_get_pretty(err));
+        error_report("Wrong acpi table provided: %s",
+                     error_get_pretty(err));
         error_free(err);
         exit(1);
     }
 #endif
 }
 
-void do_smbios_option(const char *optarg)
+void do_smbios_option(QemuOpts *opts)
 {
 #ifdef TARGET_I386
-    if (smbios_entry_add(optarg) < 0) {
-        exit(1);
-    }
+    smbios_entry_add(opts);
 #endif
 }
 
@@ -1195,15 +1323,14 @@ static void mig_sleep_cpu(void *opq)
    much time in the VM. The migration thread will try to catchup.
    Workload will experience a performance drop.
 */
-static void mig_throttle_cpu_down(CPUState *cpu, void *data)
-{
-    async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
-}
-
 static void mig_throttle_guest_down(void)
 {
+    CPUState *cpu;
+
     qemu_mutex_lock_iothread();
-    qemu_for_each_cpu(mig_throttle_cpu_down, NULL);
+    CPU_FOREACH(cpu) {
+        async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
+    }
     qemu_mutex_unlock_iothread();
 }
 
@@ -1217,11 +1344,11 @@ static void check_guest_throttling(void)
     }
 
     if (!t0)  {
-        t0 = qemu_get_clock_ns(rt_clock);
+        t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
         return;
     }
 
-    t1 = qemu_get_clock_ns(rt_clock);
+    t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 
     /* If it has been more than 40 ms since the last time the guest
      * was throttled then do it again.