Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging

- vhost-scsi: add bootindex property - RCU: fix MemoryRegion lifetime issues in PCI; document the rules; convert of AddressSpaceDispatch and RAMList - KVM: add kvm_exit reasons for aarch64 # gpg: Signature made Mon Feb 16 16:32:32 2015 GMT using RSA key ID 78C7AE83 # gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" # gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" # gpg: WARNING: This key is not certified with a trusted signature! # gpg: There is no indication that the signature belongs to the owner. # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * remotes/bonzini/tags/for-upstream: (21 commits) Convert ram_list to RCU exec: convert ram_list to QLIST cosmetic changes preparing for the following patches exec: protect mru_block with RCU rcu: add g_free_rcu rcu: introduce RCU-enabled QLIST exec: RCUify AddressSpaceDispatch exec: make iotlb RCU-friendly exec: introduce cpu_reload_memory_map docs: clarify memory region lifecycle pci: split shpc_cleanup and shpc_free pcie: remove mmconfig memory leak and wrap mmconfig update with transaction memory: keep the owner of the AddressSpace alive until do_address_space_destroy rcu: run RCU callbacks under the BQL rcu: do not let RCU callbacks pile up indefinitely vhost-scsi: set the bootable value of channel/target/lun vhost-scsi: add a property for booting vhost-scsi: expose the TYPE_FW_PATH_PROVIDER interface vhost-scsi: add bootindex property qdev: support to get a device firmware path directly ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
author: Peter Maydell <peter.maydell@linaro.org> 2015-02-24 13:58:18 +0000
committer: Peter Maydell <peter.maydell@linaro.org> 2015-02-24 13:58:18 +0000
commit: 73104fd399c6778112f64fe0d439319f24508d9a (patch)
tree: c097e6a8acd33b8ad8561a2bccacc4247db76085
parent: 3dc10613c313a042a111e46a977733411495ea8c (diff)
parent: 0dc3f44aca18b1be8b425f3f4feb4b3e8d68de2e (diff)
download: qemu-73104fd399c6778112f64fe0d439319f24508d9a.tar.gz
qemu-73104fd399c6778112f64fe0d439319f24508d9a.tar.bz2
qemu-73104fd399c6778112f64fe0d439319f24508d9a.zip
36 files changed, 997 insertions, 191 deletions
diff --git a/arch_init.c b/arch_init.c
index 89c8fa46bb..5fc6fc382c 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -52,6 +52,7 @@
 #include "exec/ram_addr.h"
 #include "hw/acpi/acpi.h"
 #include "qemu/host-utils.h"
+#include "qemu/rcu_queue.h"
 
 #ifdef DEBUG_ARCH_INIT
 #define DPRINTF(fmt, ...) \
@@ -487,7 +488,6 @@ static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
 }
 
 
-/* Needs iothread lock! */
 /* Fix me: there are too many global variables used in migration process. */
 static int64_t start_time;
 static int64_t bytes_xfer_prev;
@@ -500,6 +500,7 @@ static void migration_bitmap_sync_init(void)
     num_dirty_pages_period = 0;
 }
 
+/* Called with iothread lock held, to protect ram_list.dirty_memory[] */
 static void migration_bitmap_sync(void)
 {
     RAMBlock *block;
@@ -523,9 +524,12 @@ static void migration_bitmap_sync(void)
     trace_migration_bitmap_sync_start();
     address_space_sync_dirty_bitmap(&address_space_memory);
 
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
     }
+    rcu_read_unlock();
+
     trace_migration_bitmap_sync_end(migration_dirty_pages
                                     - num_dirty_pages_init);
     num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
@@ -648,6 +652,8 @@ static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
 /*
  * ram_find_and_save_block: Finds a page to send and sends it to f
  *
+ * Called within an RCU critical section.
+ *
  * Returns:  The number of bytes written.
  *           0 means no dirty pages
  */
@@ -661,7 +667,7 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
     MemoryRegion *mr;
 
     if (!block)
-        block = QTAILQ_FIRST(&ram_list.blocks);
+        block = QLIST_FIRST_RCU(&ram_list.blocks);
 
     while (true) {
         mr = block->mr;
@@ -672,9 +678,9 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
         }
         if (offset >= block->used_length) {
             offset = 0;
-            block = QTAILQ_NEXT(block, next);
+            block = QLIST_NEXT_RCU(block, next);
             if (!block) {
-                block = QTAILQ_FIRST(&ram_list.blocks);
+                block = QLIST_FIRST_RCU(&ram_list.blocks);
                 complete_round = true;
                 ram_bulk_stage = false;
             }
@@ -688,9 +694,9 @@ static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
             }
         }
     }
+
     last_seen_block = block;
     last_offset = offset;
-
     return bytes_sent;
 }
 
@@ -728,9 +734,10 @@ uint64_t ram_bytes_total(void)
     RAMBlock *block;
     uint64_t total = 0;
 
-    QTAILQ_FOREACH(block, &ram_list.blocks, next)
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
         total += block->used_length;
-
+    rcu_read_unlock();
     return total;
 }
 
@@ -776,6 +783,13 @@ static void reset_ram_globals(void)
 
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
 
+
+/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
+ * long-running RCU critical section.  When rcu-reclaims in the code
+ * start to become numerous it will be necessary to reduce the
+ * granularity of these critical sections.
+ */
+
 static int ram_save_setup(QEMUFile *f, void *opaque)
 {
     RAMBlock *block;
@@ -816,8 +830,10 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
         acct_clear();
     }
 
+    /* iothread lock needed for ram_list.dirty_memory[] */
     qemu_mutex_lock_iothread();
     qemu_mutex_lock_ramlist();
+    rcu_read_lock();
     bytes_transferred = 0;
     reset_ram_globals();
 
@@ -830,7 +846,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
      * gaps due to alignment or unplugs.
      */
     migration_dirty_pages = 0;
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         uint64_t block_pages;
 
         block_pages = block->used_length >> TARGET_PAGE_BITS;
@@ -839,17 +855,18 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
 
     memory_global_dirty_log_start();
     migration_bitmap_sync();
+    qemu_mutex_unlock_ramlist();
     qemu_mutex_unlock_iothread();
 
     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
 
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         qemu_put_byte(f, strlen(block->idstr));
         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
         qemu_put_be64(f, block->used_length);
     }
 
-    qemu_mutex_unlock_ramlist();
+    rcu_read_unlock();
 
     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
@@ -866,12 +883,14 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
     int64_t t0;
     int total_sent = 0;
 
-    qemu_mutex_lock_ramlist();
-
+    rcu_read_lock();
     if (ram_list.version != last_version) {
         reset_ram_globals();
     }
 
+    /* Read version before ram_list.blocks */
+    smp_rmb();
+
     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
 
     t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
@@ -902,8 +921,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
         }
         i++;
     }
-
-    qemu_mutex_unlock_ramlist();
+    rcu_read_unlock();
 
     /*
      * Must occur before EOS (or any QEMUFile operation)
@@ -928,9 +946,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
     return total_sent;
 }
 
+/* Called with iothread lock */
 static int ram_save_complete(QEMUFile *f, void *opaque)
 {
-    qemu_mutex_lock_ramlist();
+    rcu_read_lock();
+
     migration_bitmap_sync();
 
     ram_control_before_iterate(f, RAM_CONTROL_FINISH);
@@ -952,7 +972,7 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
     migration_end();
 
-    qemu_mutex_unlock_ramlist();
+    rcu_read_unlock();
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     return 0;
@@ -966,7 +986,9 @@ static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
 
     if (remaining_size < max_size) {
         qemu_mutex_lock_iothread();
+        rcu_read_lock();
         migration_bitmap_sync();
+        rcu_read_unlock();
         qemu_mutex_unlock_iothread();
         remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
     }
@@ -1008,6 +1030,9 @@ static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
     return 0;
 }
 
+/* Must be called from within a rcu critical section.
+ * Returns a pointer from within the RCU-protected ram_list.
+ */
 static inline void *host_from_stream_offset(QEMUFile *f,
                                             ram_addr_t offset,
                                             int flags)
@@ -1029,7 +1054,7 @@ static inline void *host_from_stream_offset(QEMUFile *f,
     qemu_get_buffer(f, (uint8_t *)id, len);
     id[len] = 0;
 
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         if (!strncmp(id, block->idstr, sizeof(id)) &&
             block->max_length > offset) {
             return memory_region_get_ram_ptr(block->mr) + offset;
@@ -1062,6 +1087,12 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
         ret = -EINVAL;
     }
 
+    /* This RCU critical section can be very long running.
+     * When RCU reclaims in the code start to become numerous,
+     * it will be necessary to reduce the granularity of this
+     * critical section.
+     */
+    rcu_read_lock();
     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
         ram_addr_t addr, total_ram_bytes;
         void *host;
@@ -1086,7 +1117,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                 id[len] = 0;
                 length = qemu_get_be64(f);
 
-                QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+                QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
                     if (!strncmp(id, block->idstr, sizeof(id))) {
                         if (length != block->used_length) {
                             Error *local_err = NULL;
@@ -1117,7 +1148,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                 ret = -EINVAL;
                 break;
             }
-
             ch = qemu_get_byte(f);
             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
             break;
@@ -1128,7 +1158,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                 ret = -EINVAL;
                 break;
             }
-
             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
             break;
         case RAM_SAVE_FLAG_XBZRLE:
@@ -1138,7 +1167,6 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
                 ret = -EINVAL;
                 break;
             }
-
             if (load_xbzrle(f, addr, host) < 0) {
                 error_report("Failed to decompress XBZRLE page at "
                              RAM_ADDR_FMT, addr);
@@ -1163,6 +1191,7 @@ static int ram_load(QEMUFile *f, void *opaque, int version_id)
         }
     }
 
+    rcu_read_unlock();
     DPRINTF("Completed load of VM with exit code %d seq iteration "
             "%" PRIu64 "\n", ret, seq_iter);
     return ret;
diff --git a/bootdevice.c b/bootdevice.c
index 5914417027..c3a010c094 100644
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -210,7 +210,9 @@ char *get_boot_devices_list(size_t *size, bool ignore_suffixes)
     char *list = NULL;
 
     QTAILQ_FOREACH(i, &fw_boot_order, link) {
-        char *devpath = NULL, *bootpath;
+        char *devpath = NULL,  *suffix = NULL;
+        char *bootpath;
+        char *d;
         size_t len;
 
         if (i->dev) {
@@ -218,21 +220,22 @@ char *get_boot_devices_list(size_t *size, bool ignore_suffixes)
             assert(devpath);
         }
 
-        if (i->suffix && !ignore_suffixes && devpath) {
-            size_t bootpathlen = strlen(devpath) + strlen(i->suffix) + 1;
-
-            bootpath = g_malloc(bootpathlen);
-            snprintf(bootpath, bootpathlen, "%s%s", devpath, i->suffix);
-            g_free(devpath);
-        } else if (devpath) {
-            bootpath = devpath;
-        } else if (!ignore_suffixes) {
-            assert(i->suffix);
-            bootpath = g_strdup(i->suffix);
-        } else {
-            bootpath = g_strdup("");
+        if (!ignore_suffixes) {
+            d = qdev_get_own_fw_dev_path_from_handler(i->dev->parent_bus, i->dev);
+            if (d) {
+                assert(!i->suffix);
+                suffix = d;
+            } else {
+                suffix = g_strdup(i->suffix);
+            }
         }
 
+        bootpath = g_strdup_printf("%s%s",
+                                   devpath ? devpath : "",
+                                   suffix ? suffix : "");
+        g_free(devpath);
+        g_free(suffix);
+
         if (total) {
             list[total-1] = '\n';
         }
diff --git a/cpu-exec.c b/cpu-exec.c
index 67381176da..2ffeb6e40d 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -24,6 +24,9 @@
 #include "qemu/atomic.h"
 #include "sysemu/qtest.h"
 #include "qemu/timer.h"
+#include "exec/address-spaces.h"
+#include "exec/memory-internal.h"
+#include "qemu/rcu.h"
 
 /* -icount align implementation. */
 
@@ -141,6 +144,33 @@ void cpu_resume_from_signal(CPUState *cpu, void *puc)
     cpu->exception_index = -1;
     siglongjmp(cpu->jmp_env, 1);
 }
+
+void cpu_reload_memory_map(CPUState *cpu)
+{
+    AddressSpaceDispatch *d;
+
+    if (qemu_in_vcpu_thread()) {
+        /* Do not let the guest prolong the critical section as much as it
+         * as it desires.
+         *
+         * Currently, this is prevented by the I/O thread's periodinc kicking
+         * of the VCPU thread (iothread_requesting_mutex, qemu_cpu_kick_thread)
+         * but this will go away once TCG's execution moves out of the global
+         * mutex.
+         *
+         * This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which
+         * only protects cpu->as->dispatch.  Since we reload it below, we can
+         * split the critical section.
+         */
+        rcu_read_unlock();
+        rcu_read_lock();
+    }
+
+    /* The CPU and TLB are protected by the iothread lock.  */
+    d = atomic_rcu_read(&cpu->as->dispatch);
+    cpu->memory_dispatch = d;
+    tlb_flush(cpu, 1);
+}
 #endif
 
 /* Execute a TB, and fix up the CPU state afterwards if necessary */
@@ -352,6 +382,8 @@ int cpu_exec(CPUArchState *env)
      * an instruction scheduling constraint on modern architectures.  */
     smp_mb();
 
+    rcu_read_lock();
+
     if (unlikely(exit_request)) {
         cpu->exit_request = 1;
     }
@@ -548,6 +580,7 @@ int cpu_exec(CPUArchState *env)
     } /* for(;;) */
 
     cc->cpu_exec_exit(cpu);
+    rcu_read_unlock();
 
     /* fail safe : never use current_cpu outside cpu_exec() */
     current_cpu = NULL;
diff --git a/cpus.c b/cpus.c
index 97939e4d9e..1cd9867893 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1108,7 +1108,7 @@ bool qemu_cpu_is_self(CPUState *cpu)
     return qemu_thread_is_self(cpu->thread);
 }
 
-static bool qemu_in_vcpu_thread(void)
+bool qemu_in_vcpu_thread(void)
 {
     return current_cpu && qemu_cpu_is_self(current_cpu);
 }
diff --git a/cputlb.c b/cputlb.c
index 3b271d44d9..38f2151166 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -243,8 +243,12 @@ static void tlb_add_large_page(CPUArchState *env, target_ulong vaddr,
 }
 
 /* Add a new TLB entry. At most one entry for a given virtual address
-   is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
-   supplied size is only used by tlb_flush_page.  */
+ * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
+ * supplied size is only used by tlb_flush_page.
+ *
+ * Called from TCG-generated code, which is under an RCU read-side
+ * critical section.
+ */
 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
                   hwaddr paddr, int prot,
                   int mmu_idx, target_ulong size)
@@ -265,8 +269,7 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
     }
 
     sz = size;
-    section = address_space_translate_for_iotlb(cpu->as, paddr,
-                                                &xlat, &sz);
+    section = address_space_translate_for_iotlb(cpu, paddr, &xlat, &sz);
     assert(sz >= TARGET_PAGE_SIZE);
 
 #if defined(DEBUG_TLB)
@@ -347,7 +350,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
         cpu_ldub_code(env1, addr);
     }
     pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
-    mr = iotlb_to_region(cpu->as, pd);
+    mr = iotlb_to_region(cpu, pd);
     if (memory_region_is_unassigned(mr)) {
         CPUClass *cc = CPU_GET_CLASS(cpu);
 
diff --git a/docs/memory.txt b/docs/memory.txt
index b12f1f049a..2ceb348942 100644
--- a/docs/memory.txt
+++ b/docs/memory.txt
@@ -73,17 +73,66 @@ stability.
 Region lifecycle
 ----------------
 
-A region is created by one of the constructor functions (memory_region_init*())
-and attached to an object.  It is then destroyed by object_unparent() or simply
-when the parent object dies.
+A region is created by one of the memory_region_init*() functions and
+attached to an object, which acts as its owner or parent.  QEMU ensures
+that the owner object remains alive as long as the region is visible to
+the guest, or as long as the region is in use by a virtual CPU or another
+device.  For example, the owner object will not die between an
+address_space_map operation and the corresponding address_space_unmap.
 
-In between, a region can be added to an address space
-by using memory_region_add_subregion() and removed using
-memory_region_del_subregion().  Destroying the region implicitly
-removes the region from the address space.
+After creation, a region can be added to an address space or a
+container with memory_region_add_subregion(), and removed using
+memory_region_del_subregion().
+
+Various region attributes (read-only, dirty logging, coalesced mmio,
+ioeventfd) can be changed during the region lifecycle.  They take effect
+as soon as the region is made visible.  This can be immediately, later,
+or never.
+
+Destruction of a memory region happens automatically when the owner
+object dies.
+
+If however the memory region is part of a dynamically allocated data
+structure, you should call object_unparent() to destroy the memory region
+before the data structure is freed.  For an example see VFIOMSIXInfo
+and VFIOQuirk in hw/vfio/pci.c.
+
+You must not destroy a memory region as long as it may be in use by a
+device or CPU.  In order to do this, as a general rule do not create or
+destroy memory regions dynamically during a device's lifetime, and only
+call object_unparent() in the memory region owner's instance_finalize
+callback.  The dynamically allocated data structure that contains the
+memory region then should obviously be freed in the instance_finalize
+callback as well.
+
+If you break this rule, the following situation can happen:
+
+- the memory region's owner had a reference taken via memory_region_ref
+  (for example by address_space_map)
+
+- the region is unparented, and has no owner anymore
+
+- when address_space_unmap is called, the reference to the memory region's
+  owner is leaked.
+
+
+There is an exception to the above rule: it is okay to call
+object_unparent at any time for an alias or a container region.  It is
+therefore also okay to create or destroy alias and container regions
+dynamically during a device's lifetime.
+
+This exceptional usage is valid because aliases and containers only help
+QEMU building the guest's memory map; they are never accessed directly.
+memory_region_ref and memory_region_unref are never called on aliases
+or containers, and the above situation then cannot happen.  Exploiting
+this exception is rarely necessary, and therefore it is discouraged,
+but nevertheless it is used in a few places.
+
+For regions that "have no owner" (NULL is passed at creation time), the
+machine object is actually used as the owner.  Since instance_finalize is
+never called for the machine object, you must never call object_unparent
+on regions that have no owner, unless they are aliases or containers.
 
-Region attributes may be changed at any point; they take effect once
-the region becomes exposed to the guest.
 
 Overlapping regions and priority
 --------------------------------
@@ -215,13 +264,6 @@ BAR containing MMIO registers is mapped after it.
 Note that if the guest maps a BAR outside the PCI hole, it would not be
 visible as the pci-hole alias clips it to a 0.5GB range.
 
-Attributes
-----------
-
-Various region attributes (read-only, dirty logging, coalesced mmio, ioeventfd)
-can be changed during the region lifecycle.  They take effect once the region
-is made visible (which can be immediately, later, or never).
-
 MMIO Operations
 ---------------
 
diff --git a/docs/rcu.txt b/docs/rcu.txt
index 61752b93ab..21ecb8106c 100644
--- a/docs/rcu.txt
+++ b/docs/rcu.txt
@@ -120,12 +120,15 @@ The core RCU API is small:
      void call_rcu(T *p,
                    void (*func)(T *p),
                    field-name);
+     void g_free_rcu(T *p,
+                     field-name);
 
-        call_rcu1 is typically used through this macro, in the common case
-        where the "struct rcu_head" is the first field in the struct.  In
-        the above case, one could have written simply:
+        call_rcu1 is typically used through these macro, in the common case
+        where the "struct rcu_head" is the first field in the struct.  If
+        the callback function is g_free, in particular, g_free_rcu can be
+        used.  In the above case, one could have written simply:
 
-            call_rcu(foo_reclaim, g_free, rcu);
+            g_free_rcu(foo_reclaim, rcu);
 
      typeof(*p) atomic_rcu_read(p);
 
diff --git a/exec.c b/exec.c
index 6dff7bc43a..c85321a38b 100644
--- a/exec.c
+++ b/exec.c
@@ -44,7 +44,7 @@
 #include "trace.h"
 #endif
 #include "exec/cpu-all.h"
-
+#include "qemu/rcu_queue.h"
 #include "exec/cputlb.h"
 #include "translate-all.h"
 
@@ -58,7 +58,10 @@
 #if !defined(CONFIG_USER_ONLY)
 static bool in_migration;
 
-RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
+/* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
+ * are protected by the ramlist lock.
+ */
+RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
 
 static MemoryRegion *system_memory;
 static MemoryRegion *system_io;
@@ -115,6 +118,8 @@ struct PhysPageEntry {
 typedef PhysPageEntry Node[P_L2_SIZE];
 
 typedef struct PhysPageMap {
+    struct rcu_head rcu;
+
     unsigned sections_nb;
     unsigned sections_nb_alloc;
     unsigned nodes_nb;
@@ -124,6 +129,8 @@ typedef struct PhysPageMap {
 } PhysPageMap;
 
 struct AddressSpaceDispatch {
+    struct rcu_head rcu;
+
     /* This is a multi-level map on the physical address space.
      * The bottom level has pointers to MemoryRegionSections.
      */
@@ -315,6 +322,7 @@ bool memory_region_is_unassigned(MemoryRegion *mr)
         && mr != &io_mem_watch;
 }
 
+/* Called from RCU critical section */
 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
                                                         hwaddr addr,
                                                         bool resolve_subpage)
@@ -330,6 +338,7 @@ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
     return section;
 }
 
+/* Called from RCU critical section */
 static MemoryRegionSection *
 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
                                  hwaddr *plen, bool resolve_subpage)
@@ -370,8 +379,10 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
     MemoryRegion *mr;
     hwaddr len = *plen;
 
+    rcu_read_lock();
     for (;;) {
-        section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
+        AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
+        section = address_space_translate_internal(d, addr, &addr, plen, true);
         mr = section->mr;
 
         if (!mr->iommu_ops) {
@@ -397,15 +408,18 @@ MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
 
     *plen = len;
     *xlat = addr;
+    rcu_read_unlock();
     return mr;
 }
 
+/* Called from RCU critical section */
 MemoryRegionSection *
-address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
-                                  hwaddr *plen)
+address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
+                                  hwaddr *xlat, hwaddr *plen)
 {
     MemoryRegionSection *section;
-    section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
+    section = address_space_translate_internal(cpu->memory_dispatch,
+                                               addr, xlat, plen, false);
 
     assert(!section->mr->iommu_ops);
     return section;
@@ -795,16 +809,16 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...)
 }
 
 #if !defined(CONFIG_USER_ONLY)
+/* Called from RCU critical section */
 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
 {
     RAMBlock *block;
 
-    /* The list is protected by the iothread lock here.  */
-    block = ram_list.mru_block;
+    block = atomic_rcu_read(&ram_list.mru_block);
     if (block && addr - block->offset < block->max_length) {
         goto found;
     }
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         if (addr - block->offset < block->max_length) {
             goto found;
         }
@@ -814,6 +828,22 @@ static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
     abort();
 
 found:
+    /* It is safe to write mru_block outside the iothread lock.  This
+     * is what happens:
+     *
+     *     mru_block = xxx
+     *     rcu_read_unlock()
+     *                                        xxx removed from list
+     *                  rcu_read_lock()
+     *                  read mru_block
+     *                                        mru_block = NULL;
+     *                                        call_rcu(reclaim_ramblock, xxx);
+     *                  rcu_read_unlock()
+     *
+     * atomic_rcu_set is not needed here.  The block was already published
+     * when it was placed into the list.  Here we're just making an extra
+     * copy of the pointer.
+     */
     ram_list.mru_block = block;
     return block;
 }
@@ -827,10 +857,12 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
     end = TARGET_PAGE_ALIGN(start + length);
     start &= TARGET_PAGE_MASK;
 
+    rcu_read_lock();
     block = qemu_get_ram_block(start);
     assert(block == qemu_get_ram_block(end - 1));
     start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
     cpu_tlb_reset_dirty_all(start1, length);
+    rcu_read_unlock();
 }
 
 /* Note: start and end must be within the same ram block.  */
@@ -851,6 +883,7 @@ static void cpu_physical_memory_set_dirty_tracking(bool enable)
     in_migration = enable;
 }
 
+/* Called from RCU critical section */
 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
                                        MemoryRegionSection *section,
                                        target_ulong vaddr,
@@ -1162,6 +1195,7 @@ error:
 }
 #endif
 
+/* Called with the ramlist lock held.  */
 static ram_addr_t find_ram_offset(ram_addr_t size)
 {
     RAMBlock *block, *next_block;
@@ -1169,15 +1203,16 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
 
     assert(size != 0); /* it would hand out same offset multiple times */
 
-    if (QTAILQ_EMPTY(&ram_list.blocks))
+    if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
         return 0;
+    }
 
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         ram_addr_t end, next = RAM_ADDR_MAX;
 
         end = block->offset + block->max_length;
 
-        QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
+        QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
             if (next_block->offset >= end) {
                 next = MIN(next, next_block->offset);
             }
@@ -1202,9 +1237,11 @@ ram_addr_t last_ram_offset(void)
     RAMBlock *block;
     ram_addr_t last = 0;
 
-    QTAILQ_FOREACH(block, &ram_list.blocks, next)
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         last = MAX(last, block->offset + block->max_length);
-
+    }
+    rcu_read_unlock();
     return last;
 }
 
@@ -1224,11 +1261,14 @@ static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
     }
 }
 
+/* Called within an RCU critical section, or while the ramlist lock
+ * is held.
+ */
 static RAMBlock *find_ram_block(ram_addr_t addr)
 {
     RAMBlock *block;
 
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         if (block->offset == addr) {
             return block;
         }
@@ -1237,11 +1277,13 @@ static RAMBlock *find_ram_block(ram_addr_t addr)
     return NULL;
 }
 
+/* Called with iothread lock held.  */
 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
 {
-    RAMBlock *new_block = find_ram_block(addr);
-    RAMBlock *block;
+    RAMBlock *new_block, *block;
 
+    rcu_read_lock();
+    new_block = find_ram_block(addr);
     assert(new_block);
     assert(!new_block->idstr[0]);
 
@@ -1254,25 +1296,32 @@ void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
     }
     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
 
-    /* This assumes the iothread lock is taken here too.  */
-    qemu_mutex_lock_ramlist();
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
                     new_block->idstr);
             abort();
         }
     }
-    qemu_mutex_unlock_ramlist();
+    rcu_read_unlock();
 }
 
+/* Called with iothread lock held.  */
 void qemu_ram_unset_idstr(ram_addr_t addr)
 {
-    RAMBlock *block = find_ram_block(addr);
+    RAMBlock *block;
+
+    /* FIXME: arch_init.c assumes that this is not called throughout
+     * migration.  Ignore the problem since hot-unplug during migration
+     * does not work anyway.
+     */
 
+    rcu_read_lock();
+    block = find_ram_block(addr);
     if (block) {
         memset(block->idstr, 0, sizeof(block->idstr));
     }
+    rcu_read_unlock();
 }
 
 static int memory_try_enable_merging(void *addr, size_t len)
@@ -1331,11 +1380,11 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
 {
     RAMBlock *block;
+    RAMBlock *last_block = NULL;
     ram_addr_t old_ram_size, new_ram_size;
 
     old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
 
-    /* This assumes the iothread lock is taken here too.  */
     qemu_mutex_lock_ramlist();
     new_block->offset = find_ram_offset(new_block->max_length);
 
@@ -1357,19 +1406,27 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
         }
     }
 
-    /* Keep the list sorted from biggest to smallest block.  */
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    /* Keep the list sorted from biggest to smallest block.  Unlike QTAILQ,
+     * QLIST (which has an RCU-friendly variant) does not have insertion at
+     * tail, so save the last element in last_block.
+     */
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+        last_block = block;
         if (block->max_length < new_block->max_length) {
             break;
         }
     }
     if (block) {
-        QTAILQ_INSERT_BEFORE(block, new_block, next);
-    } else {
-        QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
+        QLIST_INSERT_BEFORE_RCU(block, new_block, next);
+    } else if (last_block) {
+        QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
+    } else { /* list is empty */
+        QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
     }
     ram_list.mru_block = NULL;
 
+    /* Write list before version */
+    smp_wmb();
     ram_list.version++;
     qemu_mutex_unlock_ramlist();
 
@@ -1377,6 +1434,8 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
 
     if (new_ram_size > old_ram_size) {
         int i;
+
+        /* ram_list.dirty_memory[] is protected by the iothread lock.  */
         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
             ram_list.dirty_memory[i] =
                 bitmap_zero_extend(ram_list.dirty_memory[i],
@@ -1507,49 +1566,55 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
 {
     RAMBlock *block;
 
-    /* This assumes the iothread lock is taken here too.  */
     qemu_mutex_lock_ramlist();
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
-            QTAILQ_REMOVE(&ram_list.blocks, block, next);
+            QLIST_REMOVE_RCU(block, next);
             ram_list.mru_block = NULL;
+            /* Write list before version */
+            smp_wmb();
             ram_list.version++;
-            g_free(block);
+            g_free_rcu(block, rcu);
             break;
         }
     }
     qemu_mutex_unlock_ramlist();
 }
 
+static void reclaim_ramblock(RAMBlock *block)
+{
+    if (block->flags & RAM_PREALLOC) {
+        ;
+    } else if (xen_enabled()) {
+        xen_invalidate_map_cache_entry(block->host);
+#ifndef _WIN32
+    } else if (block->fd >= 0) {
+        munmap(block->host, block->max_length);
+        close(block->fd);
+#endif
+    } else {
+        qemu_anon_ram_free(block->host, block->max_length);
+    }
+    g_free(block);
+}
+
 void qemu_ram_free(ram_addr_t addr)
 {
     RAMBlock *block;
 
-    /* This assumes the iothread lock is taken here too.  */
     qemu_mutex_lock_ramlist();
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
-            QTAILQ_REMOVE(&ram_list.blocks, block, next);
+            QLIST_REMOVE_RCU(block, next);
             ram_list.mru_block = NULL;
+            /* Write list before version */
+            smp_wmb();
             ram_list.version++;
-            if (block->flags & RAM_PREALLOC) {
-                ;
-            } else if (xen_enabled()) {
-                xen_invalidate_map_cache_entry(block->host);
-#ifndef _WIN32
-            } else if (block->fd >= 0) {
-                munmap(block->host, block->max_length);
-                close(block->fd);
-#endif
-            } else {
-                qemu_anon_ram_free(block->host, block->max_length);
-            }
-            g_free(block);
+            call_rcu(block, reclaim_ramblock, rcu);
             break;
         }
     }
     qemu_mutex_unlock_ramlist();
-
 }
 
 #ifndef _WIN32
@@ -1560,7 +1625,7 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
     int flags;
     void *area, *vaddr;
 
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         offset = addr - block->offset;
         if (offset < block->max_length) {
             vaddr = ramblock_ptr(block, offset);
@@ -1597,7 +1662,6 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
                 memory_try_enable_merging(vaddr, length);
                 qemu_ram_setup_dump(vaddr, length);
             }
-            return;
         }
     }
 }
@@ -1605,49 +1669,78 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
 
 int qemu_get_ram_fd(ram_addr_t addr)
 {
-    RAMBlock *block = qemu_get_ram_block(addr);
+    RAMBlock *block;
+    int fd;
 
-    return block->fd;
+    rcu_read_lock();
+    block = qemu_get_ram_block(addr);
+    fd = block->fd;
+    rcu_read_unlock();
+    return fd;
 }
 
 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
 {
-    RAMBlock *block = qemu_get_ram_block(addr);
+    RAMBlock *block;
+    void *ptr;
 
-    return ramblock_ptr(block, 0);
+    rcu_read_lock();
+    block = qemu_get_ram_block(addr);
+    ptr = ramblock_ptr(block, 0);
+    rcu_read_unlock();
+    return ptr;
 }
 
 /* Return a host pointer to ram allocated with qemu_ram_alloc.
-   With the exception of the softmmu code in this file, this should
-   only be used for local memory (e.g. video ram) that the device owns,
-   and knows it isn't going to access beyond the end of the block.
-
-   It should not be used for general purpose DMA.
-   Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
+ * This should not be used for general purpose DMA.  Use address_space_map
+ * or address_space_rw instead. For local memory (e.g. video ram) that the
+ * device owns, use memory_region_get_ram_ptr.
+ *
+ * By the time this function returns, the returned pointer is not protected
+ * by RCU anymore.  If the caller is not within an RCU critical section and
+ * does not hold the iothread lock, it must have other means of protecting the
+ * pointer, such as a reference to the region that includes the incoming
+ * ram_addr_t.
  */
 void *qemu_get_ram_ptr(ram_addr_t addr)
 {
-    RAMBlock *block = qemu_get_ram_block(addr);
+    RAMBlock *block;
+    void *ptr;
 
-    if (xen_enabled()) {
+    rcu_read_lock();
+    block = qemu_get_ram_block(addr);
+
+    if (xen_enabled() && block->host == NULL) {
         /* We need to check if the requested address is in the RAM
          * because we don't want to map the entire memory in QEMU.
          * In that case just map until the end of the page.
          */
         if (block->offset == 0) {
-            return xen_map_cache(addr, 0, 0);
-        } else if (block->host == NULL) {
-            block->host =
-                xen_map_cache(block->offset, block->max_length, 1);
+            ptr = xen_map_cache(addr, 0, 0);
+            goto unlock;
         }
+
+        block->host = xen_map_cache(block->offset, block->max_length, 1);
     }
-    return ramblock_ptr(block, addr - block->offset);
+    ptr = ramblock_ptr(block, addr - block->offset);
+
+unlock:
+    rcu_read_unlock();
+    return ptr;
 }
 
 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
- * but takes a size argument */
+ * but takes a size argument.
+ *
+ * By the time this function returns, the returned pointer is not protected
+ * by RCU anymore.  If the caller is not within an RCU critical section and
+ * does not hold the iothread lock, it must have other means of protecting the
+ * pointer, such as a reference to the region that includes the incoming
+ * ram_addr_t.
+ */
 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
 {
+    void *ptr;
     if (*size == 0) {
         return NULL;
     }
@@ -1655,12 +1748,14 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
         return xen_map_cache(addr, *size, 1);
     } else {
         RAMBlock *block;
-
-        QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+        rcu_read_lock();
+        QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
             if (addr - block->offset < block->max_length) {
                 if (addr - block->offset + *size > block->max_length)
                     *size = block->max_length - addr + block->offset;
-                return ramblock_ptr(block, addr - block->offset);
+                ptr = ramblock_ptr(block, addr - block->offset);
+                rcu_read_unlock();
+                return ptr;
             }
         }
 
@@ -1670,23 +1765,35 @@ static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
 }
 
 /* Some of the softmmu routines need to translate from a host pointer
-   (typically a TLB entry) back to a ram offset.  */
+ * (typically a TLB entry) back to a ram offset.
+ *
+ * By the time this function returns, the returned pointer is not protected
+ * by RCU anymore.  If the caller is not within an RCU critical section and
+ * does not hold the iothread lock, it must have other means of protecting the
+ * pointer, such as a reference to the region that includes the incoming
+ * ram_addr_t.
+ */
 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
 {
     RAMBlock *block;
     uint8_t *host = ptr;
+    MemoryRegion *mr;
 
     if (xen_enabled()) {
+        rcu_read_lock();
         *ram_addr = xen_ram_addr_from_mapcache(ptr);
-        return qemu_get_ram_block(*ram_addr)->mr;
+        mr = qemu_get_ram_block(*ram_addr)->mr;
+        rcu_read_unlock();
+        return mr;
     }
 
-    block = ram_list.mru_block;
+    rcu_read_lock();
+    block = atomic_rcu_read(&ram_list.mru_block);
     if (block && block->host && host - block->host < block->max_length) {
         goto found;
     }
 
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         /* This case append when the block is not mapped. */
         if (block->host == NULL) {
             continue;
@@ -1696,11 +1803,14 @@ MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
         }
     }
 
+    rcu_read_unlock();
     return NULL;
 
 found:
     *ram_addr = block->offset + (host - block->host);
-    return block->mr;
+    mr = block->mr;
+    rcu_read_unlock();
+    return mr;
 }
 
 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
@@ -1961,9 +2071,12 @@ static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
     return phys_section_add(map, &section);
 }
 
-MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
+MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
 {
-    return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
+    AddressSpaceDispatch *d = atomic_rcu_read(&cpu->memory_dispatch);
+    MemoryRegionSection *sections = d->map.sections;
+
+    return sections[index & ~TARGET_PAGE_MASK].mr;
 }
 
 static void io_mem_init(void)
@@ -1997,6 +2110,12 @@ static void mem_begin(MemoryListener *listener)
     as->next_dispatch = d;
 }
 
+static void address_space_dispatch_free(AddressSpaceDispatch *d)
+{
+    phys_sections_free(&d->map);
+    g_free(d);
+}
+
 static void mem_commit(MemoryListener *listener)
 {
     AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
@@ -2005,11 +2124,9 @@ static void mem_commit(MemoryListener *listener)
 
     phys_page_compact_all(next, next->map.nodes_nb);
 
-    as->dispatch = next;
-
+    atomic_rcu_set(&as->dispatch, next);
     if (cur) {
-        phys_sections_free(&cur->map);
-        g_free(cur);
+        call_rcu(cur, address_space_dispatch_free, rcu);
     }
 }
 
@@ -2026,7 +2143,7 @@ static void tcg_commit(MemoryListener *listener)
         if (cpu->tcg_as_listener != listener) {
             continue;
         }
-        tlb_flush(cpu, 1);
+        cpu_reload_memory_map(cpu);
     }
 }
 
@@ -2068,8 +2185,10 @@ void address_space_destroy_dispatch(AddressSpace *as)
 {
     AddressSpaceDispatch *d = as->dispatch;
 
-    g_free(d);
-    as->dispatch = NULL;
+    atomic_rcu_set(&as->dispatch, NULL);
+    if (d) {
+        call_rcu(d, address_space_dispatch_free, rcu);
+    }
 }
 
 static void memory_map_init(void)
@@ -2948,8 +3067,10 @@ void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
 {
     RAMBlock *block;
 
-    QTAILQ_FOREACH(block, &ram_list.blocks, next) {
+    rcu_read_lock();
+    QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
         func(block->host, block->offset, block->used_length, opaque);
     }
+    rcu_read_unlock();
 }
 #endif
diff --git a/hw/9pfs/virtio-9p-synth.c b/hw/9pfs/virtio-9p-synth.c
index e75aa8772e..a0ab9a86a9 100644
--- a/hw/9pfs/virtio-9p-synth.c
+++ b/hw/9pfs/virtio-9p-synth.c
@@ -18,7 +18,7 @@
 #include "fsdev/qemu-fsdev.h"
 #include "virtio-9p-synth.h"
 #include "qemu/rcu.h"
-
+#include "qemu/rcu_queue.h"
 #include <sys/stat.h>
 
 /* Root node for synth file system */
diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 2eacac0787..44c6b93727 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -818,6 +818,13 @@ static char *qdev_get_fw_dev_path_from_handler(BusState *bus, DeviceState *dev)
     return d;
 }
 
+char *qdev_get_own_fw_dev_path_from_handler(BusState *bus, DeviceState *dev)
+{
+    Object *obj = OBJECT(dev);
+
+    return fw_path_provider_try_get_dev_path(obj, bus, dev);
+}
+
 static int qdev_get_fw_dev_path_helper(DeviceState *dev, char *p, int size)
 {
     int l = 0;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 0a4282adf3..7da70ff349 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -745,6 +745,9 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr)
 
 /* Map dev to context-entry then do a paging-structures walk to do a iommu
  * translation.
+ *
+ * Called from RCU critical section.
+ *
  * @bus_num: The bus number
  * @devfn: The devfn, which is the  combined of device and function number
  * @is_write: The access is a write operation
diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c
index 252ea5eb53..36f73e1f8b 100644
--- a/hw/pci-bridge/pci_bridge_dev.c
+++ b/hw/pci-bridge/pci_bridge_dev.c
@@ -97,6 +97,11 @@ static void pci_bridge_dev_exitfn(PCIDevice *dev)
     pci_bridge_exitfn(dev);
 }
 
+static void pci_bridge_dev_instance_finalize(Object *obj)
+{
+    shpc_free(PCI_DEVICE(obj));
+}
+
 static void pci_bridge_dev_write_config(PCIDevice *d,
                                         uint32_t address, uint32_t val, int len)
 {
@@ -154,10 +159,11 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
 }
 
 static const TypeInfo pci_bridge_dev_info = {
-    .name          = TYPE_PCI_BRIDGE_DEV,
-    .parent        = TYPE_PCI_BRIDGE,
-    .instance_size = sizeof(PCIBridgeDev),
-    .class_init = pci_bridge_dev_class_init,
+    .name              = TYPE_PCI_BRIDGE_DEV,
+    .parent            = TYPE_PCI_BRIDGE,
+    .instance_size     = sizeof(PCIBridgeDev),
+    .class_init        = pci_bridge_dev_class_init,
+    .instance_finalize = pci_bridge_dev_instance_finalize,
     .interfaces = (InterfaceInfo[]) {
         { TYPE_HOTPLUG_HANDLER },
         { }
diff --git a/hw/pci-host/apb.c b/hw/pci-host/apb.c
index f573875baf..832b6c7248 100644
--- a/hw/pci-host/apb.c
+++ b/hw/pci-host/apb.c
@@ -205,6 +205,7 @@ static AddressSpace *pbm_pci_dma_iommu(PCIBus *bus, void *opaque, int devfn)
     return &is->iommu_as;
 }
 
+/* Called from RCU critical section */
 static IOMMUTLBEntry pbm_translate_iommu(MemoryRegion *iommu, hwaddr addr,
                                          bool is_write)
 {
diff --git a/hw/pci/pcie_host.c b/hw/pci/pcie_host.c
index dfb4a2b505..d8afba863e 100644
--- a/hw/pci/pcie_host.c
+++ b/hw/pci/pcie_host.c
@@ -88,6 +88,8 @@ static void pcie_host_init(Object *obj)
     PCIExpressHost *e = PCIE_HOST_BRIDGE(obj);
 
     e->base_addr = PCIE_BASE_ADDR_UNMAPPED;
+    memory_region_init_io(&e->mmio, OBJECT(e), &pcie_mmcfg_ops, e, "pcie-mmcfg-mmio",
+                          PCIE_MMCFG_SIZE_MAX);
 }
 
 void pcie_host_mmcfg_unmap(PCIExpressHost *e)
@@ -104,8 +106,7 @@ void pcie_host_mmcfg_init(PCIExpressHost *e, uint32_t size)
     assert(size >= PCIE_MMCFG_SIZE_MIN);
     assert(size <= PCIE_MMCFG_SIZE_MAX);
     e->size = size;
-    memory_region_init_io(&e->mmio, OBJECT(e), &pcie_mmcfg_ops, e,
-                          "pcie-mmcfg", e->size);
+    memory_region_set_size(&e->mmio, e->size);
 }
 
 void pcie_host_mmcfg_map(PCIExpressHost *e, hwaddr addr,
@@ -121,10 +122,12 @@ void pcie_host_mmcfg_update(PCIExpressHost *e,
                             hwaddr addr,
                             uint32_t size)
 {
+    memory_region_transaction_begin();
     pcie_host_mmcfg_unmap(e);
     if (enable) {
         pcie_host_mmcfg_map(e, addr, size);
     }
+    memory_region_transaction_commit();
 }
 
 static const TypeInfo pcie_host_type_info = {
diff --git a/hw/pci/shpc.c b/hw/pci/shpc.c
index 27c496e8c3..5fd7f4bbb7 100644
--- a/hw/pci/shpc.c
+++ b/hw/pci/shpc.c
@@ -663,13 +663,22 @@ void shpc_cleanup(PCIDevice *d, MemoryRegion *bar)
     SHPCDevice *shpc = d->shpc;
     d->cap_present &= ~QEMU_PCI_CAP_SHPC;
     memory_region_del_subregion(bar, &shpc->mmio);
-    object_unparent(OBJECT(&shpc->mmio));
     /* TODO: cleanup config space changes? */
+}
+
+void shpc_free(PCIDevice *d)
+{
+    SHPCDevice *shpc = d->shpc;
+    if (!shpc) {
+        return;
+    }
+    object_unparent(OBJECT(&shpc->mmio));
     g_free(shpc->config);
     g_free(shpc->cmask);
     g_free(shpc->wmask);
     g_free(shpc->w1cmask);
     g_free(shpc);
+    d->shpc = NULL;
 }
 
 void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int l)
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index da474740c0..ba003da39e 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -59,6 +59,7 @@ static sPAPRTCETable *spapr_tce_find_by_liobn(uint32_t liobn)
     return NULL;
 }
 
+/* Called from RCU critical section */
 static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
                                                bool is_write)
 {
diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index dcb2bc5a6e..e30ff84c0c 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -24,6 +24,7 @@
 #include "hw/virtio/virtio-scsi.h"
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
+#include "hw/fw-path-provider.h"
 
 /* Features supported by host kernel. */
 static const int kernel_feature_bits[] = {
@@ -250,6 +251,12 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp)
         return;
     }
 
+    /* At present, channel and lun both are 0 for bootable vhost-scsi disk */
+    s->channel = 0;
+    s->lun = 0;
+    /* Note: we can also get the minimum tpgt from kernel */
+    s->target = vs->conf.boot_tpgt;
+
     error_setg(&s->migration_blocker,
             "vhost-scsi does not support migration");
     migrate_add_blocker(s->migration_blocker);
@@ -271,6 +278,19 @@ static void vhost_scsi_unrealize(DeviceState *dev, Error **errp)
     virtio_scsi_common_unrealize(dev, errp);
 }
 
+/*
+ * Implementation of an interface to adjust firmware path
+ * for the bootindex property handling.
+ */
+static char *vhost_scsi_get_fw_dev_path(FWPathProvider *p, BusState *bus,
+                                        DeviceState *dev)
+{
+    VHostSCSI *s = VHOST_SCSI(dev);
+    /* format: channel@channel/vhost-scsi@target,lun */
+    return g_strdup_printf("channel@%x/%s@%x,%x", s->channel,
+                           qdev_fw_name(dev), s->target, s->lun);
+}
+
 static Property vhost_scsi_properties[] = {
     DEFINE_VHOST_SCSI_PROPERTIES(VHostSCSI, parent_obj.conf),
     DEFINE_PROP_END_OF_LIST(),
@@ -280,6 +300,7 @@ static void vhost_scsi_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
+    FWPathProviderClass *fwc = FW_PATH_PROVIDER_CLASS(klass);
 
     dc->props = vhost_scsi_properties;
     set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
@@ -288,6 +309,15 @@ static void vhost_scsi_class_init(ObjectClass *klass, void *data)
     vdc->get_features = vhost_scsi_get_features;
     vdc->set_config = vhost_scsi_set_config;
     vdc->set_status = vhost_scsi_set_status;
+    fwc->get_dev_path = vhost_scsi_get_fw_dev_path;
+}
+
+static void vhost_scsi_instance_init(Object *obj)
+{
+    VHostSCSI *dev = VHOST_SCSI(obj);
+
+    device_add_bootindex_property(obj, &dev->bootindex, "bootindex", NULL,
+                                  DEVICE(dev), NULL);
 }
 
 static const TypeInfo vhost_scsi_info = {
@@ -295,6 +325,11 @@ static const TypeInfo vhost_scsi_info = {
     .parent = TYPE_VIRTIO_SCSI_COMMON,
     .instance_size = sizeof(VHostSCSI),
     .class_init = vhost_scsi_class_init,
+    .instance_init = vhost_scsi_instance_init,
+    .interfaces = (InterfaceInfo[]) {
+        { TYPE_FW_PATH_PROVIDER },
+        { }
+    },
 };
 
 static void virtio_register_types(void)
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index dde1d73b56..604cb5b749 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1238,6 +1238,8 @@ static void vhost_scsi_pci_instance_init(Object *obj)
 
     virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
                                 TYPE_VHOST_SCSI);
+    object_property_add_alias(obj, "bootindex", OBJECT(&dev->vdev),
+                              "bootindex", &error_abort);
 }
 
 static const TypeInfo vhost_scsi_pci_info = {
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 2c4828694b..ac06c6721c 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -24,6 +24,7 @@
 #include "exec/memory.h"
 #include "qemu/thread.h"
 #include "qom/cpu.h"
+#include "qemu/rcu.h"
 
 /* some important defines:
  *
@@ -268,6 +269,7 @@ CPUArchState *cpu_copy(CPUArchState *env);
 typedef struct RAMBlock RAMBlock;
 
 struct RAMBlock {
+    struct rcu_head rcu;
     struct MemoryRegion *mr;
     uint8_t *host;
     ram_addr_t offset;
@@ -275,11 +277,10 @@ struct RAMBlock {
     ram_addr_t max_length;
     void (*resized)(const char*, uint64_t length, void *host);
     uint32_t flags;
+    /* Protected by iothread lock.  */
     char idstr[256];
-    /* Reads can take either the iothread or the ramlist lock.
-     * Writes must take both locks.
-     */
-    QTAILQ_ENTRY(RAMBlock) next;
+    /* RCU-enabled, writes protected by the ramlist lock */
+    QLIST_ENTRY(RAMBlock) next;
     int fd;
 };
 
@@ -295,8 +296,8 @@ typedef struct RAMList {
     /* Protected by the iothread lock.  */
     unsigned long *dirty_memory[DIRTY_MEMORY_NUM];
     RAMBlock *mru_block;
-    /* Protected by the ramlist lock.  */
-    QTAILQ_HEAD(, RAMBlock) blocks;
+    /* RCU-enabled, writes protected by the ramlist lock. */
+    QLIST_HEAD(, RAMBlock) blocks;
     uint32_t version;
 } RAMList;
 extern RAMList ram_list;
diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h
index b8ecd6f68d..e0da9d7ad3 100644
--- a/include/exec/cputlb.h
+++ b/include/exec/cputlb.h
@@ -34,7 +34,7 @@ extern int tlb_flush_count;
 void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr);
 
 MemoryRegionSection *
-address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
+address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr, hwaddr *xlat,
                                   hwaddr *plen);
 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
                                        MemoryRegionSection *section,
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 6a154485ba..8eb0db3910 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -96,6 +96,8 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
                               int is_cpu_write_access);
 #if !defined(CONFIG_USER_ONLY)
+bool qemu_in_vcpu_thread(void);
+void cpu_reload_memory_map(CPUState *cpu);
 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as);
 /* cputlb.c */
 void tlb_flush_page(CPUState *cpu, target_ulong addr);
@@ -337,7 +339,8 @@ extern uintptr_t tci_tb_ptr;
 
 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align));
 
-struct MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index);
+struct MemoryRegion *iotlb_to_region(CPUState *cpu,
+                                     hwaddr index);
 bool io_mem_read(struct MemoryRegion *mr, hwaddr addr,
                  uint64_t *pvalue, unsigned size);
 bool io_mem_write(struct MemoryRegion *mr, hwaddr addr,
diff --git a/include/hw/pci/shpc.h b/include/hw/pci/shpc.h
index 025bc5b268..9bbea39996 100644
--- a/include/hw/pci/shpc.h
+++ b/include/hw/pci/shpc.h
@@ -41,6 +41,7 @@ void shpc_reset(PCIDevice *d);
 int shpc_bar_size(PCIDevice *dev);
 int shpc_init(PCIDevice *dev, PCIBus *sec_bus, MemoryRegion *bar, unsigned off);
 void shpc_cleanup(PCIDevice *dev, MemoryRegion *bar);
+void shpc_free(PCIDevice *dev);
 void shpc_cap_write_config(PCIDevice *d, uint32_t addr, uint32_t val, int len);
 
 
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 15a226f24a..4e673f9d29 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -342,6 +342,7 @@ void qbus_reset_all_fn(void *opaque);
 BusState *sysbus_get_default(void);
 
 char *qdev_get_fw_dev_path(DeviceState *dev);
+char *qdev_get_own_fw_dev_path_from_handler(BusState *bus, DeviceState *dev);
 
 /**
  * @qdev_machine_init
diff --git a/include/hw/virtio/vhost-scsi.h b/include/hw/virtio/vhost-scsi.h
index 85cc031281..dea0075626 100644
--- a/include/hw/virtio/vhost-scsi.h
+++ b/include/hw/virtio/vhost-scsi.h
@@ -60,11 +60,16 @@ typedef struct VHostSCSI {
     Error *migration_blocker;
 
     struct vhost_dev dev;
+    int32_t bootindex;
+    int channel;
+    int target;
+    int lun;
 } VHostSCSI;
 
 #define DEFINE_VHOST_SCSI_PROPERTIES(_state, _conf_field) \
     DEFINE_PROP_STRING("vhostfd", _state, _conf_field.vhostfd), \
     DEFINE_PROP_STRING("wwpn", _state, _conf_field.wwpn), \
+    DEFINE_PROP_UINT32("boot_tpgt", _state, _conf_field.boot_tpgt, 0), \
     DEFINE_PROP_UINT32("num_queues", _state, _conf_field.num_queues, 1), \
     DEFINE_PROP_UINT32("max_sectors", _state, _conf_field.max_sectors, 0xFFFF), \
     DEFINE_PROP_UINT32("cmd_per_lun", _state, _conf_field.cmd_per_lun, 128)
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
index bf17cc9ea5..c122e7ae5c 100644
--- a/include/hw/virtio/virtio-scsi.h
+++ b/include/hw/virtio/virtio-scsi.h
@@ -153,6 +153,7 @@ struct VirtIOSCSIConf {
     uint32_t cmd_per_lun;
     char *vhostfd;
     char *wwpn;
+    uint32_t boot_tpgt;
     IOThread *iothread;
 };
 
diff --git a/include/qemu/queue.h b/include/qemu/queue.h
index c602797652..80941506ce 100644
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@@ -139,17 +139,6 @@ struct {                                                                \
         (elm)->field.le_prev = &(head)->lh_first;                       \
 } while (/*CONSTCOND*/0)
 
-#define QLIST_INSERT_HEAD_RCU(head, elm, field) do {                    \
-        (elm)->field.le_prev = &(head)->lh_first;                       \
-        (elm)->field.le_next = (head)->lh_first;                        \
-        smp_wmb(); /* fill elm before linking it */                     \
-        if ((head)->lh_first != NULL)  {                                \
-            (head)->lh_first->field.le_prev = &(elm)->field.le_next;    \
-        }                                                               \
-        (head)->lh_first = (elm);                                       \
-        smp_wmb();                                                      \
-} while (/* CONSTCOND*/0)
-
 #define QLIST_REMOVE(elm, field) do {                                   \
         if ((elm)->field.le_next != NULL)                               \
                 (elm)->field.le_next->field.le_prev =                   \
diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index 068a279a79..506ab58eaf 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -140,6 +140,14 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
       }),                                                                \
       (RCUCBFunc *)(func))
 
+#define g_free_rcu(obj, field) \
+    call_rcu1(({                                                         \
+        char __attribute__((unused))                                     \
+            offset_must_be_zero[-offsetof(typeof(*(obj)), field)];       \
+        &(obj)->field;                                                   \
+      }),                                                                \
+      (RCUCBFunc *)g_free);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/include/qemu/rcu_queue.h b/include/qemu/rcu_queue.h
new file mode 100644
index 0000000000..3aca7a57e3
--- /dev/null
+++ b/include/qemu/rcu_queue.h
@@ -0,0 +1,134 @@
+#ifndef QEMU_RCU_QUEUE_H
+#define QEMU_RCU_QUEUE_H
+
+/*
+ * rcu_queue.h
+ *
+ * RCU-friendly versions of the queue.h primitives.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * Copyright (c) 2013 Mike D. Day, IBM Corporation.
+ *
+ * IBM's contributions to this file may be relicensed under LGPLv2 or later.
+ */
+
+#include "qemu/queue.h"
+#include "qemu/atomic.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * List access methods.
+ */
+#define QLIST_EMPTY_RCU(head) (atomic_rcu_read(&(head)->lh_first) == NULL)
+#define QLIST_FIRST_RCU(head) (atomic_rcu_read(&(head)->lh_first))
+#define QLIST_NEXT_RCU(elm, field) (atomic_rcu_read(&(elm)->field.le_next))
+
+/*
+ * List functions.
+ */
+
+
+/*
+ *  The difference between atomic_read/set and atomic_rcu_read/set
+ *  is in the including of a read/write memory barrier to the volatile
+ *  access. atomic_rcu_* macros include the memory barrier, the
+ *  plain atomic macros do not. Therefore, it should be correct to
+ *  issue a series of reads or writes to the same element using only
+ *  the atomic_* macro, until the last read or write, which should be
+ *  atomic_rcu_* to introduce a read or write memory barrier as
+ *  appropriate.
+ */
+
+/* Upon publication of the listelm->next value, list readers
+ * will see the new node when following next pointers from
+ * antecedent nodes, but may not see the new node when following
+ * prev pointers from subsequent nodes until after the RCU grace
+ * period expires.
+ * see linux/include/rculist.h __list_add_rcu(new, prev, next)
+ */
+#define QLIST_INSERT_AFTER_RCU(listelm, elm, field) do {    \
+    (elm)->field.le_next = (listelm)->field.le_next;        \
+    (elm)->field.le_prev = &(listelm)->field.le_next;       \
+    atomic_rcu_set(&(listelm)->field.le_next, (elm));       \
+    if ((elm)->field.le_next != NULL) {                     \
+       (elm)->field.le_next->field.le_prev =                \
+        &(elm)->field.le_next;                              \
+    }                                                       \
+} while (/*CONSTCOND*/0)
+
+/* Upon publication of the listelm->prev->next value, list
+ * readers will see the new element when following prev pointers
+ * from subsequent elements, but may not see the new element
+ * when following next pointers from antecedent elements
+ * until after the RCU grace period expires.
+ */
+#define QLIST_INSERT_BEFORE_RCU(listelm, elm, field) do {   \
+    (elm)->field.le_prev = (listelm)->field.le_prev;        \
+    (elm)->field.le_next = (listelm);                       \
+    atomic_rcu_set((listelm)->field.le_prev, (elm));        \
+    (listelm)->field.le_prev = &(elm)->field.le_next;       \
+} while (/*CONSTCOND*/0)
+
+/* Upon publication of the head->first value, list readers
+ * will see the new element when following the head, but may
+ * not see the new element when following prev pointers from
+ * subsequent elements until after the RCU grace period has
+ * expired.
+ */
+#define QLIST_INSERT_HEAD_RCU(head, elm, field) do {    \
+    (elm)->field.le_prev = &(head)->lh_first;           \
+    (elm)->field.le_next = (head)->lh_first;            \
+    atomic_rcu_set((&(head)->lh_first), (elm));         \
+    if ((elm)->field.le_next != NULL) {                 \
+       (elm)->field.le_next->field.le_prev =            \
+        &(elm)->field.le_next;                          \
+    }                                                   \
+} while (/*CONSTCOND*/0)
+
+
+/* prior to publication of the elm->prev->next value, some list
+ * readers may still see the removed element when following
+ * the antecedent's next pointer.
+ */
+#define QLIST_REMOVE_RCU(elm, field) do {           \
+    if ((elm)->field.le_next != NULL) {             \
+       (elm)->field.le_next->field.le_prev =        \
+        (elm)->field.le_prev;                       \
+    }                                               \
+    *(elm)->field.le_prev =  (elm)->field.le_next;  \
+} while (/*CONSTCOND*/0)
+
+/* List traversal must occur within an RCU critical section.  */
+#define QLIST_FOREACH_RCU(var, head, field)                 \
+        for ((var) = atomic_rcu_read(&(head)->lh_first);    \
+                (var);                                      \
+                (var) = atomic_rcu_read(&(var)->field.le_next))
+
+/* List traversal must occur within an RCU critical section.  */
+#define QLIST_FOREACH_SAFE_RCU(var, head, field, next_var)           \
+    for ((var) = (atomic_rcu_read(&(head)->lh_first));               \
+      (var) &&                                                       \
+          ((next_var) = atomic_rcu_read(&(var)->field.le_next), 1);  \
+           (var) = (next_var))
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* QEMU_RCU_QUEUE.H */
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 2098f1cb50..48fd6fb1d2 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -256,6 +256,7 @@ struct CPUState {
     sigjmp_buf jmp_env;
 
     AddressSpace *as;
+    struct AddressSpaceDispatch *memory_dispatch;
     MemoryListener *tcg_as_listener;
 
     void *env_ptr; /* CPUArchState */
diff --git a/memory.c b/memory.c
index 130152cf1d..20f6d9eeac 100644
--- a/memory.c
+++ b/memory.c
@@ -1943,6 +1943,7 @@ void memory_listener_unregister(MemoryListener *listener)
 
 void address_space_init(AddressSpace *as, MemoryRegion *root, const char *name)
 {
+    memory_region_ref(root);
     memory_region_transaction_begin();
     as->root = root;
     as->current_map = g_new(FlatView, 1);
@@ -1969,10 +1970,13 @@ static void do_address_space_destroy(AddressSpace *as)
     flatview_unref(as->current_map);
     g_free(as->name);
     g_free(as->ioeventfds);
+    memory_region_unref(as->root);
 }
 
 void address_space_destroy(AddressSpace *as)
 {
+    MemoryRegion *root = as->root;
+
     /* Flush out anything from MemoryListeners listening in on this */
     memory_region_transaction_begin();
     as->root = NULL;
@@ -1984,6 +1988,7 @@ void address_space_destroy(AddressSpace *as)
      * entries that the guest should never use.  Wait for the old
      * values to expire before freeing the data.
      */
+    as->root = root;
     call_rcu(as, do_address_space_destroy, rcu);
 }
 
diff --git a/scripts/dump-guest-memory.py b/scripts/dump-guest-memory.py
index 1ed8b67883..dc8e44acf8 100644
--- a/scripts/dump-guest-memory.py
+++ b/scripts/dump-guest-memory.py
@@ -108,16 +108,16 @@ shape and this command should mostly work."""
         assert (val["hi"] == 0)
         return val["lo"]
 
-    def qtailq_foreach(self, head, field_str):
-        var_p = head["tqh_first"]
+    def qlist_foreach(self, head, field_str):
+        var_p = head["lh_first"]
         while (var_p != 0):
             var = var_p.dereference()
             yield var
-            var_p = var[field_str]["tqe_next"]
+            var_p = var[field_str]["le_next"]
 
     def qemu_get_ram_block(self, ram_addr):
         ram_blocks = gdb.parse_and_eval("ram_list.blocks")
-        for block in self.qtailq_foreach(ram_blocks, "next"):
+        for block in self.qlist_foreach(ram_blocks, "next"):
             if (ram_addr - block["offset"] < block["length"]):
                 return block
         raise gdb.GdbError("Bad ram offset %x" % ram_addr)
diff --git a/scripts/kvm/kvm_stat b/scripts/kvm/kvm_stat
index c0c4ff0de3..c65cabda5a 100755
--- a/scripts/kvm/kvm_stat
+++ b/scripts/kvm/kvm_stat
@@ -145,6 +145,45 @@ svm_exit_reasons = {
     0x400: 'NPF',
 }
 
+# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
+aarch64_exit_reasons = {
+    0x00: 'UNKNOWN',
+    0x01: 'WFI',
+    0x03: 'CP15_32',
+    0x04: 'CP15_64',
+    0x05: 'CP14_MR',
+    0x06: 'CP14_LS',
+    0x07: 'FP_ASIMD',
+    0x08: 'CP10_ID',
+    0x0C: 'CP14_64',
+    0x0E: 'ILL_ISS',
+    0x11: 'SVC32',
+    0x12: 'HVC32',
+    0x13: 'SMC32',
+    0x15: 'SVC64',
+    0x16: 'HVC64',
+    0x17: 'SMC64',
+    0x18: 'SYS64',
+    0x20: 'IABT',
+    0x21: 'IABT_HYP',
+    0x22: 'PC_ALIGN',
+    0x24: 'DABT',
+    0x25: 'DABT_HYP',
+    0x26: 'SP_ALIGN',
+    0x28: 'FP_EXC32',
+    0x2C: 'FP_EXC64',
+    0x2F: 'SERROR',
+    0x30: 'BREAKPT',
+    0x31: 'BREAKPT_HYP',
+    0x32: 'SOFTSTP',
+    0x33: 'SOFTSTP_HYP',
+    0x34: 'WATCHPT',
+    0x35: 'WATCHPT_HYP',
+    0x38: 'BKPT32',
+    0x3A: 'VECTOR32',
+    0x3C: 'BRK64',
+}
+
 # From include/uapi/linux/kvm.h, KVM_EXIT_xxx
 userspace_exit_reasons = {
      0: 'UNKNOWN',
@@ -212,7 +251,8 @@ def ppc_init():
 
 def aarch64_init():
     globals().update({
-        'sc_perf_evt_open' : 241
+        'sc_perf_evt_open' : 241,
+        'exit_reasons' : aarch64_exit_reasons,
     })
 
 def detect_platform():
diff --git a/softmmu_template.h b/softmmu_template.h
index 6b4e615dbf..0e3dd35fe1 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -149,7 +149,7 @@ static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
 {
     uint64_t val;
     CPUState *cpu = ENV_GET_CPU(env);
-    MemoryRegion *mr = iotlb_to_region(cpu->as, physaddr);
+    MemoryRegion *mr = iotlb_to_region(cpu, physaddr);
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
     cpu->mem_io_pc = retaddr;
@@ -369,7 +369,7 @@ static inline void glue(io_write, SUFFIX)(CPUArchState *env,
                                           uintptr_t retaddr)
 {
     CPUState *cpu = ENV_GET_CPU(env);
-    MemoryRegion *mr = iotlb_to_region(cpu->as, physaddr);
+    MemoryRegion *mr = iotlb_to_region(cpu, physaddr);
 
     physaddr = (physaddr & TARGET_PAGE_MASK) + addr;
     if (mr != &io_mem_rom && mr != &io_mem_notdirty && !cpu_can_do_io(cpu)) {
diff --git a/tests/Makefile b/tests/Makefile
index 53a4c30641..307035c26c 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -62,6 +62,8 @@ check-unit-y += tests/test-int128$(EXESUF)
 gcov-files-test-int128-y =
 check-unit-y += tests/rcutorture$(EXESUF)
 gcov-files-rcutorture-y = util/rcu.c
+check-unit-y += tests/test-rcu-list$(EXESUF)
+gcov-files-test-rcu-list-y = util/rcu.c
 check-unit-y += tests/test-bitops$(EXESUF)
 check-unit-$(CONFIG_HAS_GLIB_SUBPROCESS_TESTS) += tests/test-qdev-global-props$(EXESUF)
 check-unit-y += tests/check-qom-interface$(EXESUF)
@@ -228,7 +230,7 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \
 	tests/test-qmp-commands.o tests/test-visitor-serialization.o \
 	tests/test-x86-cpuid.o tests/test-mul64.o tests/test-int128.o \
 	tests/test-opts-visitor.o tests/test-qmp-event.o \
-	tests/rcutorture.o
+	tests/rcutorture.o tests/test-rcu-list.o
 
 test-qapi-obj-y = tests/test-qapi-visit.o tests/test-qapi-types.o \
 		  tests/test-qapi-event.o
@@ -257,7 +259,8 @@ tests/test-x86-cpuid$(EXESUF): tests/test-x86-cpuid.o
 tests/test-xbzrle$(EXESUF): tests/test-xbzrle.o migration/xbzrle.o page_cache.o libqemuutil.a
 tests/test-cutils$(EXESUF): tests/test-cutils.o util/cutils.o
 tests/test-int128$(EXESUF): tests/test-int128.o
-tests/rcutorture$(EXESUF): tests/rcutorture.o libqemuutil.a
+tests/rcutorture$(EXESUF): tests/rcutorture.o libqemuutil.a libqemustub.a
+tests/test-rcu-list$(EXESUF): tests/test-rcu-list.o libqemuutil.a libqemustub.a
 
 tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
 	hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\
diff --git a/tests/test-rcu-list.c b/tests/test-rcu-list.c
new file mode 100644
index 0000000000..46b5e263e5
--- /dev/null
+++ b/tests/test-rcu-list.c
@@ -0,0 +1,306 @@
+/*
+ * rcuq_test.c
+ *
+ * usage: rcuq_test <readers> <duration>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (c) 2013 Mike D. Day, IBM Corporation.
+ */
+
+#include <glib.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "qemu/atomic.h"
+#include "qemu/rcu.h"
+#include "qemu/compiler.h"
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/rcu_queue.h"
+
+/*
+ * Test variables.
+ */
+
+long long n_reads = 0LL;
+long long n_updates = 0LL;
+long long n_reclaims = 0LL;
+long long n_nodes_removed = 0LL;
+long long n_nodes = 0LL;
+int g_test_in_charge = 0;
+
+int nthreadsrunning;
+
+char argsbuf[64];
+
+#define GOFLAG_INIT 0
+#define GOFLAG_RUN  1
+#define GOFLAG_STOP 2
+
+static volatile int goflag = GOFLAG_INIT;
+
+#define RCU_READ_RUN 1000
+#define RCU_UPDATE_RUN 10
+#define NR_THREADS 100
+#define RCU_Q_LEN 100
+
+static QemuThread threads[NR_THREADS];
+static struct rcu_reader_data *data[NR_THREADS];
+static int n_threads;
+
+static int select_random_el(int max)
+{
+    return (rand() % max);
+}
+
+
+static void create_thread(void *(*func)(void *))
+{
+    if (n_threads >= NR_THREADS) {
+        fprintf(stderr, "Thread limit of %d exceeded!\n", NR_THREADS);
+        exit(-1);
+    }
+    qemu_thread_create(&threads[n_threads], "test", func, &data[n_threads],
+                       QEMU_THREAD_JOINABLE);
+    n_threads++;
+}
+
+static void wait_all_threads(void)
+{
+    int i;
+
+    for (i = 0; i < n_threads; i++) {
+        qemu_thread_join(&threads[i]);
+    }
+    n_threads = 0;
+}
+
+
+struct list_element {
+    QLIST_ENTRY(list_element) entry;
+    struct rcu_head rcu;
+    long long val;
+};
+
+static void reclaim_list_el(struct rcu_head *prcu)
+{
+    struct list_element *el = container_of(prcu, struct list_element, rcu);
+    g_free(el);
+    atomic_add(&n_reclaims, 1);
+}
+
+static QLIST_HEAD(q_list_head, list_element) Q_list_head;
+
+static void *rcu_q_reader(void *arg)
+{
+    long long j, n_reads_local = 0;
+    struct list_element *el;
+
+    *(struct rcu_reader_data **)arg = &rcu_reader;
+    atomic_inc(&nthreadsrunning);
+    while (goflag == GOFLAG_INIT) {
+        g_usleep(1000);
+    }
+
+    while (goflag == GOFLAG_RUN) {
+        rcu_read_lock();
+        QLIST_FOREACH_RCU(el, &Q_list_head, entry) {
+            j = atomic_read(&el->val);
+            (void)j;
+            n_reads_local++;
+            if (goflag == GOFLAG_STOP) {
+                break;
+            }
+        }
+        rcu_read_unlock();
+
+        g_usleep(100);
+    }
+    atomic_add(&n_reads, n_reads_local);
+    return NULL;
+}
+
+
+static void *rcu_q_updater(void *arg)
+{
+    int j, target_el;
+    long long n_updates_local = 0;
+    long long n_removed_local = 0;
+    struct list_element *el, *prev_el;
+
+    *(struct rcu_reader_data **)arg = &rcu_reader;
+    atomic_inc(&nthreadsrunning);
+    while (goflag == GOFLAG_INIT) {
+        g_usleep(1000);
+    }
+
+    while (goflag == GOFLAG_RUN) {
+        target_el = select_random_el(RCU_Q_LEN);
+        j = 0;
+        /* FOREACH_RCU could work here but let's use both macros */
+        QLIST_FOREACH_SAFE_RCU(prev_el, &Q_list_head, entry, el) {
+            j++;
+            if (target_el == j) {
+                QLIST_REMOVE_RCU(prev_el, entry);
+                /* may be more than one updater in the future */
+                call_rcu1(&prev_el->rcu, reclaim_list_el);
+                n_removed_local++;
+                break;
+            }
+        }
+        if (goflag == GOFLAG_STOP) {
+            break;
+        }
+        target_el = select_random_el(RCU_Q_LEN);
+        j = 0;
+        QLIST_FOREACH_RCU(el, &Q_list_head, entry) {
+            j++;
+            if (target_el == j) {
+                prev_el = g_new(struct list_element, 1);
+                atomic_add(&n_nodes, 1);
+                prev_el->val = atomic_read(&n_nodes);
+                QLIST_INSERT_BEFORE_RCU(el, prev_el, entry);
+                break;
+            }
+        }
+
+        n_updates_local += 2;
+        synchronize_rcu();
+    }
+    synchronize_rcu();
+    atomic_add(&n_updates, n_updates_local);
+    atomic_add(&n_nodes_removed, n_removed_local);
+    return NULL;
+}
+
+static void rcu_qtest_init(void)
+{
+    struct list_element *new_el;
+    int i;
+    nthreadsrunning = 0;
+    srand(time(0));
+    for (i = 0; i < RCU_Q_LEN; i++) {
+        new_el = g_new(struct list_element, 1);
+        new_el->val = i;
+        QLIST_INSERT_HEAD_RCU(&Q_list_head, new_el, entry);
+    }
+    atomic_add(&n_nodes, RCU_Q_LEN);
+}
+
+static void rcu_qtest_run(int duration, int nreaders)
+{
+    int nthreads = nreaders + 1;
+    while (atomic_read(&nthreadsrunning) < nthreads) {
+        g_usleep(1000);
+    }
+
+    goflag = GOFLAG_RUN;
+    sleep(duration);
+    goflag = GOFLAG_STOP;
+    wait_all_threads();
+}
+
+
+static void rcu_qtest(const char *test, int duration, int nreaders)
+{
+    int i;
+    long long n_removed_local = 0;
+
+    struct list_element *el, *prev_el;
+
+    rcu_qtest_init();
+    for (i = 0; i < nreaders; i++) {
+        create_thread(rcu_q_reader);
+    }
+    create_thread(rcu_q_updater);
+    rcu_qtest_run(duration, nreaders);
+
+    QLIST_FOREACH_SAFE_RCU(prev_el, &Q_list_head, entry, el) {
+        QLIST_REMOVE_RCU(prev_el, entry);
+        call_rcu1(&prev_el->rcu, reclaim_list_el);
+        n_removed_local++;
+    }
+    atomic_add(&n_nodes_removed, n_removed_local);
+    synchronize_rcu();
+    while (n_nodes_removed > n_reclaims) {
+        g_usleep(100);
+        synchronize_rcu();
+    }
+    if (g_test_in_charge) {
+        g_assert_cmpint(n_nodes_removed, ==, n_reclaims);
+    } else {
+        printf("%s: %d readers; 1 updater; nodes read: "  \
+               "%lld, nodes removed: %lld; nodes reclaimed: %lld\n",
+               test, nthreadsrunning - 1, n_reads, n_nodes_removed, n_reclaims);
+        exit(0);
+    }
+}
+
+static void usage(int argc, char *argv[])
+{
+    fprintf(stderr, "Usage: %s duration nreaders\n", argv[0]);
+    exit(-1);
+}
+
+static int gtest_seconds;
+
+static void gtest_rcuq_one(void)
+{
+    rcu_qtest("rcuqtest", gtest_seconds / 4, 1);
+}
+
+static void gtest_rcuq_few(void)
+{
+    rcu_qtest("rcuqtest", gtest_seconds / 4, 5);
+}
+
+static void gtest_rcuq_many(void)
+{
+    rcu_qtest("rcuqtest", gtest_seconds / 2, 20);
+}
+
+
+int main(int argc, char *argv[])
+{
+    int duration = 0, readers = 0;
+
+    if (argc >= 2) {
+        if (argv[1][0] == '-') {
+            g_test_init(&argc, &argv, NULL);
+            if (g_test_quick()) {
+                gtest_seconds = 4;
+            } else {
+                gtest_seconds = 20;
+            }
+            g_test_add_func("/rcu/qlist/single-threaded", gtest_rcuq_one);
+            g_test_add_func("/rcu/qlist/short-few", gtest_rcuq_few);
+            g_test_add_func("/rcu/qlist/long-many", gtest_rcuq_many);
+            g_test_in_charge = 1;
+            return g_test_run();
+        }
+        duration = strtoul(argv[1], NULL, 0);
+    }
+    if (argc >= 3) {
+        readers = strtoul(argv[2], NULL, 0);
+    }
+    if (duration && readers) {
+        rcu_qtest(argv[0], duration, readers);
+        return 0;
+    }
+
+    usage(argc, argv);
+    return -1;
+}
diff --git a/util/rcu.c b/util/rcu.c
index c9c3e6e4ab..bd73b8eb47 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -35,6 +35,7 @@
 #include "qemu/rcu.h"
 #include "qemu/atomic.h"
 #include "qemu/thread.h"
+#include "qemu/main-loop.h"
 
 /*
  * Global grace period counter.  Bit 0 is always one in rcu_gp_ctr.
@@ -223,32 +224,38 @@ static void *call_rcu_thread(void *opaque)
          * Fetch rcu_call_count now, we only must process elements that were
          * added before synchronize_rcu() starts.
          */
-        while (n < RCU_CALL_MIN_SIZE && ++tries <= 5) {
-            g_usleep(100000);
-            qemu_event_reset(&rcu_call_ready_event);
-            n = atomic_read(&rcu_call_count);
-            if (n < RCU_CALL_MIN_SIZE) {
-                qemu_event_wait(&rcu_call_ready_event);
+        while (n == 0 || (n < RCU_CALL_MIN_SIZE && ++tries <= 5)) {
+            g_usleep(10000);
+            if (n == 0) {
+                qemu_event_reset(&rcu_call_ready_event);
                 n = atomic_read(&rcu_call_count);
+                if (n == 0) {
+                    qemu_event_wait(&rcu_call_ready_event);
+                }
             }
+            n = atomic_read(&rcu_call_count);
         }
 
         atomic_sub(&rcu_call_count, n);
         synchronize_rcu();
+        qemu_mutex_lock_iothread();
         while (n > 0) {
             node = try_dequeue();
             while (!node) {
+                qemu_mutex_unlock_iothread();
                 qemu_event_reset(&rcu_call_ready_event);
                 node = try_dequeue();
                 if (!node) {
                     qemu_event_wait(&rcu_call_ready_event);
                     node = try_dequeue();
                 }
+                qemu_mutex_lock_iothread();
             }
 
             n--;
             node->func(node);
         }
+        qemu_mutex_unlock_iothread();
     }
     abort();
 }
author	Peter Maydell <peter.maydell@linaro.org>	2015-02-24 13:58:18 +0000
committer	Peter Maydell <peter.maydell@linaro.org>	2015-02-24 13:58:18 +0000
commit	73104fd399c6778112f64fe0d439319f24508d9a (patch)
tree	c097e6a8acd33b8ad8561a2bccacc4247db76085
parent	3dc10613c313a042a111e46a977733411495ea8c (diff)
parent	0dc3f44aca18b1be8b425f3f4feb4b3e8d68de2e (diff)
download	qemu-73104fd399c6778112f64fe0d439319f24508d9a.tar.gz qemu-73104fd399c6778112f64fe0d439319f24508d9a.tar.bz2 qemu-73104fd399c6778112f64fe0d439319f24508d9a.zip