diff options
Diffstat (limited to 'arch/x86/xen/enlighten.c')
-rw-r--r-- | arch/x86/xen/enlighten.c | 118 |
1 files changed, 107 insertions, 11 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f1814fc2cb7..a6f8acbdfc9 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -31,6 +31,7 @@ #include <linux/pci.h> #include <linux/gfp.h> #include <linux/memblock.h> +#include <linux/syscore_ops.h> #include <xen/xen.h> #include <xen/interface/xen.h> @@ -1471,38 +1472,130 @@ asmlinkage void __init xen_start_kernel(void) #endif } -void __ref xen_hvm_init_shared_info(void) +#ifdef CONFIG_XEN_PVHVM +/* + * The pfn containing the shared_info is located somewhere in RAM. This + * will cause trouble if the current kernel is doing a kexec boot into a + * new kernel. The new kernel (and its startup code) can not know where + * the pfn is, so it can not reserve the page. The hypervisor will + * continue to update the pfn, and as a result memory corruption occours + * in the new kernel. + * + * One way to work around this issue is to allocate a page in the + * xen-platform pci device's BAR memory range. But pci init is done very + * late and the shared_info page is already in use very early to read + * the pvclock. So moving the pfn from RAM to MMIO is racy because some + * code paths on other vcpus could access the pfn during the small + * window when the old pfn is moved to the new pfn. There is even a + * small window were the old pfn is not backed by a mfn, and during that + * time all reads return -1. + * + * Because it is not known upfront where the MMIO region is located it + * can not be used right from the start in xen_hvm_init_shared_info. + * + * To minimise trouble the move of the pfn is done shortly before kexec. + * This does not eliminate the race because all vcpus are still online + * when the syscore_ops will be called. But hopefully there is no work + * pending at this point in time. Also the syscore_op is run last which + * reduces the risk further. + */ + +static struct shared_info *xen_hvm_shared_info; + +static void xen_hvm_connect_shared_info(unsigned long pfn) { - int cpu; struct xen_add_to_physmap xatp; - static struct shared_info *shared_info_page = 0; - if (!shared_info_page) - shared_info_page = (struct shared_info *) - extend_brk(PAGE_SIZE, PAGE_SIZE); xatp.domid = DOMID_SELF; xatp.idx = 0; xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; + xatp.gpfn = pfn; if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) BUG(); - HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; +} +static void xen_hvm_set_shared_info(struct shared_info *sip) +{ + int cpu; + + HYPERVISOR_shared_info = sip; /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info * page, we use it in the event channel upcall and in some pvclock * related functions. We don't need the vcpu_info placement * optimizations because we don't use any pv_mmu or pv_irq op on * HVM. - * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is - * online but xen_hvm_init_shared_info is run at resume time too and + * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is + * online but xen_hvm_set_shared_info is run at resume time too and * in that case multiple vcpus might be online. */ for_each_online_cpu(cpu) { per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; } } -#ifdef CONFIG_XEN_PVHVM +/* Reconnect the shared_info pfn to a mfn */ +void xen_hvm_resume_shared_info(void) +{ + xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); +} + +#ifdef CONFIG_KEXEC +static struct shared_info *xen_hvm_shared_info_kexec; +static unsigned long xen_hvm_shared_info_pfn_kexec; + +/* Remember a pfn in MMIO space for kexec reboot */ +void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn) +{ + xen_hvm_shared_info_kexec = sip; + xen_hvm_shared_info_pfn_kexec = pfn; +} + +static void xen_hvm_syscore_shutdown(void) +{ + struct xen_memory_reservation reservation = { + .domid = DOMID_SELF, + .nr_extents = 1, + }; + unsigned long prev_pfn; + int rc; + + if (!xen_hvm_shared_info_kexec) + return; + + prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT; + set_xen_guest_handle(reservation.extent_start, &prev_pfn); + + /* Move pfn to MMIO, disconnects previous pfn from mfn */ + xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec); + + /* Update pointers, following hypercall is also a memory barrier */ + xen_hvm_set_shared_info(xen_hvm_shared_info_kexec); + + /* Allocate new mfn for previous pfn */ + do { + rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); + if (rc == 0) + msleep(123); + } while (rc == 0); + + /* Make sure the previous pfn is really connected to a (new) mfn */ + BUG_ON(rc != 1); +} + +static struct syscore_ops xen_hvm_syscore_ops = { + .shutdown = xen_hvm_syscore_shutdown, +}; +#endif + +/* Use a pfn in RAM, may move to MMIO before kexec. */ +static void __init xen_hvm_init_shared_info(void) +{ + /* Remember pointer for resume */ + xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); + xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); + xen_hvm_set_shared_info(xen_hvm_shared_info); +} + static void __init init_hvm_pv_info(void) { int major, minor; @@ -1553,6 +1646,9 @@ static void __init xen_hvm_guest_init(void) init_hvm_pv_info(); xen_hvm_init_shared_info(); +#ifdef CONFIG_KEXEC + register_syscore_ops(&xen_hvm_syscore_ops); +#endif if (xen_feature(XENFEAT_hvm_callback_vector)) xen_have_vector_callback = 1; |