diff options
Diffstat (limited to 'arch')
39 files changed, 372 insertions, 295 deletions
diff --git a/arch/alpha/mm/extable.c b/arch/alpha/mm/extable.c index 62dc379d301a..813c9b63c0e1 100644 --- a/arch/alpha/mm/extable.c +++ b/arch/alpha/mm/extable.c @@ -48,6 +48,27 @@ void sort_extable(struct exception_table_entry *start, cmp_ex, swap_ex); } +#ifdef CONFIG_MODULES +/* + * Any entry referring to the module init will be at the beginning or + * the end. + */ +void trim_init_extable(struct module *m) +{ + /*trim the beginning*/ + while (m->num_exentries && + within_module_init(ex_to_addr(&m->extable[0]), m)) { + m->extable++; + m->num_exentries--; + } + /*trim the end*/ + while (m->num_exentries && + within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]), + m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ + const struct exception_table_entry * search_extable(const struct exception_table_entry *first, const struct exception_table_entry *last, diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 1167fe9cf6c4..98f94d041d9c 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c @@ -32,8 +32,6 @@ void module_free(struct module *mod, void *module_region) mod->arch.syminfo = NULL; vfree(module_region); - /* FIXME: if module_region == mod->init_region, trim exception - * table entries. */ } static inline int check_rela(Elf32_Rela *rela, struct module *module, diff --git a/arch/cris/kernel/module.c b/arch/cris/kernel/module.c index a187833febc8..abc13e368b90 100644 --- a/arch/cris/kernel/module.c +++ b/arch/cris/kernel/module.c @@ -48,8 +48,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { FREE_MODULE(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } /* We don't need anything special. */ diff --git a/arch/frv/kernel/module.c b/arch/frv/kernel/module.c index 850d168f69fc..711763c8a6f3 100644 --- a/arch/frv/kernel/module.c +++ b/arch/frv/kernel/module.c @@ -35,8 +35,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } /* We don't need anything special. */ diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c index cfc9127d2ced..0865e291c20d 100644 --- a/arch/h8300/kernel/module.c +++ b/arch/h8300/kernel/module.c @@ -23,8 +23,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } /* We don't need anything special. */ diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c index 71c50dd8f870..e95d5ad9285d 100644 --- a/arch/ia64/mm/extable.c +++ b/arch/ia64/mm/extable.c @@ -53,6 +53,32 @@ void sort_extable (struct exception_table_entry *start, cmp_ex, swap_ex); } +static inline unsigned long ex_to_addr(const struct exception_table_entry *x) +{ + return (unsigned long)&x->insn + x->insn; +} + +#ifdef CONFIG_MODULES +/* + * Any entry referring to the module init will be at the beginning or + * the end. + */ +void trim_init_extable(struct module *m) +{ + /*trim the beginning*/ + while (m->num_exentries && + within_module_init(ex_to_addr(&m->extable[0]), m)) { + m->extable++; + m->num_exentries--; + } + /*trim the end*/ + while (m->num_exentries && + within_module_init(ex_to_addr(&m->extable[m->num_exentries-1]), + m)) + m->num_exentries--; +} +#endif /* CONFIG_MODULES */ + const struct exception_table_entry * search_extable (const struct exception_table_entry *first, const struct exception_table_entry *last, diff --git a/arch/m32r/kernel/module.c b/arch/m32r/kernel/module.c index 8d4205794380..cb5f37d78d49 100644 --- a/arch/m32r/kernel/module.c +++ b/arch/m32r/kernel/module.c @@ -44,8 +44,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } /* We don't need anything special. */ diff --git a/arch/m68k/kernel/module.c b/arch/m68k/kernel/module.c index 774862bc6977..cd6bcb1c957e 100644 --- a/arch/m68k/kernel/module.c +++ b/arch/m68k/kernel/module.c @@ -31,8 +31,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } /* We don't need anything special. */ diff --git a/arch/m68knommu/kernel/module.c b/arch/m68knommu/kernel/module.c index 3b1a2ff61ddc..d11ffae7956a 100644 --- a/arch/m68knommu/kernel/module.c +++ b/arch/m68knommu/kernel/module.c @@ -23,8 +23,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } /* We don't need anything special. */ diff --git a/arch/mips/kernel/module.c b/arch/mips/kernel/module.c index 1f60e27523d9..3e9100dcc12d 100644 --- a/arch/mips/kernel/module.c +++ b/arch/mips/kernel/module.c @@ -68,8 +68,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c index 6b287f2e8e84..4fa0e3648d8e 100644 --- a/arch/mn10300/kernel/module.c +++ b/arch/mn10300/kernel/module.c @@ -48,8 +48,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - * table entries. */ } /* diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index ecd1c5024447..ef5caf2e6ed0 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -267,8 +267,6 @@ void module_free(struct module *mod, void *module_region) mod->arch.section = NULL; vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } /* Additional bytes needed in front of individual sections */ diff --git a/arch/powerpc/include/asm/mpc52xx_psc.h b/arch/powerpc/include/asm/mpc52xx_psc.h index a218da6bec7c..fb8412057450 100644 --- a/arch/powerpc/include/asm/mpc52xx_psc.h +++ b/arch/powerpc/include/asm/mpc52xx_psc.h @@ -28,6 +28,10 @@ #define MPC52xx_PSC_MAXNUM 6 /* Programmable Serial Controller (PSC) status register bits */ +#define MPC52xx_PSC_SR_UNEX_RX 0x0001 +#define MPC52xx_PSC_SR_DATA_VAL 0x0002 +#define MPC52xx_PSC_SR_DATA_OVR 0x0004 +#define MPC52xx_PSC_SR_CMDSEND 0x0008 #define MPC52xx_PSC_SR_CDE 0x0080 #define MPC52xx_PSC_SR_RXRDY 0x0100 #define MPC52xx_PSC_SR_RXFULL 0x0200 @@ -61,6 +65,12 @@ #define MPC52xx_PSC_RXTX_FIFO_EMPTY 0x0001 /* PSC interrupt status/mask bits */ +#define MPC52xx_PSC_IMR_UNEX_RX_SLOT 0x0001 +#define MPC52xx_PSC_IMR_DATA_VALID 0x0002 +#define MPC52xx_PSC_IMR_DATA_OVR 0x0004 +#define MPC52xx_PSC_IMR_CMD_SEND 0x0008 +#define MPC52xx_PSC_IMR_ERROR 0x0040 +#define MPC52xx_PSC_IMR_DEOF 0x0080 #define MPC52xx_PSC_IMR_TXRDY 0x0100 #define MPC52xx_PSC_IMR_RXRDY 0x0200 #define MPC52xx_PSC_IMR_DB 0x0400 @@ -117,6 +127,7 @@ #define MPC52xx_PSC_SICR_SIM_FIR (0x6 << 24) #define MPC52xx_PSC_SICR_SIM_CODEC_24 (0x7 << 24) #define MPC52xx_PSC_SICR_SIM_CODEC_32 (0xf << 24) +#define MPC52xx_PSC_SICR_AWR (1 << 30) #define MPC52xx_PSC_SICR_GENCLK (1 << 23) #define MPC52xx_PSC_SICR_I2S (1 << 22) #define MPC52xx_PSC_SICR_CLKPOL (1 << 21) diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 43e7e3a7f130..477c663e0140 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -43,8 +43,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index eed4a00cb676..ab2e3ed28abc 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -56,8 +56,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } static void diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index c19b0f7d2cc1..c2efdcde266f 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -46,8 +46,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } /* We don't need anything special. */ diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h index 47d5619d43fa..8303ac481034 100644 --- a/arch/sparc/include/asm/uaccess_32.h +++ b/arch/sparc/include/asm/uaccess_32.h @@ -17,6 +17,9 @@ #ifndef __ASSEMBLY__ +#define ARCH_HAS_SORT_EXTABLE +#define ARCH_HAS_SEARCH_EXTABLE + /* Sparc is not segmented, however we need to be able to fool access_ok() * when doing system calls from kernel mode legitimately. * diff --git a/arch/sparc/kernel/module.c b/arch/sparc/kernel/module.c index 90273765e81f..0ee642f63234 100644 --- a/arch/sparc/kernel/module.c +++ b/arch/sparc/kernel/module.c @@ -75,8 +75,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } /* Make generic code ignore STT_REGISTER dummy undefined symbols. */ diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c index 16cc28935e39..a61c349448e1 100644 --- a/arch/sparc/mm/extable.c +++ b/arch/sparc/mm/extable.c @@ -28,6 +28,10 @@ search_extable(const struct exception_table_entry *start, * word 3: last insn address + 4 bytes * word 4: fixup code address * + * Deleted entries are encoded as: + * word 1: unused + * word 2: -1 + * * See asm/uaccess.h for more details. */ @@ -39,6 +43,10 @@ search_extable(const struct exception_table_entry *start, continue; } + /* A deleted entry; see trim_init_extable */ + if (walk->fixup == -1) + continue; + if (walk->insn == value) return walk; } @@ -57,6 +65,27 @@ search_extable(const struct exception_table_entry *start, return NULL; } +#ifdef CONFIG_MODULES +/* We could memmove them around; easier to mark the trimmed ones. */ +void trim_init_extable(struct module *m) +{ + unsigned int i; + bool range; + + for (i = 0; i < m->num_exentries; i += range ? 2 : 1) { + range = m->extable[i].fixup == 0; + + if (within_module_init(m->extable[i].insn, m)) { + m->extable[i].fixup = -1; + if (range) + m->extable[i+1].fixup = -1; + } + if (range) + i++; + } +} +#endif /* CONFIG_MODULES */ + /* Special extable search, which handles ranges. Returns fixup */ unsigned long search_extables_range(unsigned long addr, unsigned long *g2) { diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h index 58da2480a7f4..9ce3f165111a 100644 --- a/arch/um/include/asm/pgtable.h +++ b/arch/um/include/asm/pgtable.h @@ -53,16 +53,21 @@ extern unsigned long end_iomem; #else # define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) #endif +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END +#define MODULES_LEN (MODULES_VADDR - MODULES_END) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY) #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) #define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) - +#define __PAGE_KERNEL_EXEC \ + (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) #define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED) #define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | _PAGE_ACCESSED) #define PAGE_KERNEL __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED) +#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC) /* * The i386 can't do page protection for execute, and considers that the same diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index 598b5c1903af..1b549bca4645 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -8,7 +8,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ subarch-obj-y = lib/semaphore_32.o lib/string_32.o subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o -subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o +subarch-obj-$(CONFIG_MODULES) += kernel/module.o USER_OBJS := bugs.o ptrace_user.o fault.o diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index c8b4cce9cfe1..2201e9c20e4a 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -8,10 +8,8 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \ setjmp.o signal.o stub.o stub_segv.o syscalls.o syscall_table.o \ sysrq.o ksyms.o tls.o -obj-$(CONFIG_MODULES) += um_module.o - subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o -subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o +subarch-obj-$(CONFIG_MODULES) += kernel/module.o ldt-y = ../sys-i386/ldt.o diff --git a/arch/um/sys-x86_64/um_module.c b/arch/um/sys-x86_64/um_module.c deleted file mode 100644 index 3dead392a415..000000000000 --- a/arch/um/sys-x86_64/um_module.c +++ /dev/null @@ -1,21 +0,0 @@ -#include <linux/vmalloc.h> -#include <linux/moduleloader.h> - -/* Copied from i386 arch/i386/kernel/module.c */ -void *module_alloc(unsigned long size) -{ - if (size == 0) - return NULL; - return vmalloc_exec(size); -} - -/* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) -{ - vfree(module_region); - /* - * FIXME: If module_region == mod->init_region, trim exception - * table entries. - */ -} - diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index 1caf57628b9c..313389cd50d2 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h @@ -17,8 +17,13 @@ /* Pages for switcher itself, then two pages per cpu */ #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) -/* We map at -4M for ease of mapping into the guest (one PTE page). */ +/* We map at -4M (-2M when PAE is activated) for ease of mapping + * into the guest (one PTE page). */ +#ifdef CONFIG_X86_PAE +#define SWITCHER_ADDR 0xFFE00000 +#else #define SWITCHER_ADDR 0xFFC00000 +#endif /* Found in switcher.S */ extern unsigned long default_idt_entries[]; diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index faae1996487b..d31c4a684078 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h @@ -12,11 +12,13 @@ #define LHCALL_TS 8 #define LHCALL_SET_CLOCKEVENT 9 #define LHCALL_HALT 10 +#define LHCALL_SET_PMD 13 #define LHCALL_SET_PTE 14 -#define LHCALL_SET_PMD 15 +#define LHCALL_SET_PGD 15 #define LHCALL_LOAD_TLS 16 #define LHCALL_NOTIFY 17 #define LHCALL_LOAD_GDT_ENTRY 18 +#define LHCALL_SEND_INTERRUPTS 19 #define LGUEST_TRAP_ENTRY 0x1F @@ -32,10 +34,10 @@ * operations? There are two ways: the direct way is to make a "hypercall", * to make requests of the Host Itself. * - * We use the KVM hypercall mechanism. Eighteen hypercalls are + * We use the KVM hypercall mechanism. Seventeen hypercalls are * available: the hypercall number is put in the %eax register, and the - * arguments (when required) are placed in %ebx, %ecx and %edx. If a return - * value makes sense, it's returned in %eax. + * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. + * If a return value makes sense, it's returned in %eax. * * Grossly invalid calls result in Sudden Death at the hands of the vengeful * Host, rather than returning failure. This reflects Winston Churchill's @@ -47,8 +49,9 @@ #define LHCALL_RING_SIZE 64 struct hcall_args { - /* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */ - unsigned long arg0, arg1, arg2, arg3; + /* These map directly onto eax, ebx, ecx, edx and esi + * in struct lguest_regs */ + unsigned long arg0, arg1, arg2, arg3, arg4; }; #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 2733fad45f98..5e67c1532314 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -46,6 +46,10 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) #endif +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END +#define MODULES_LEN (MODULES_VADDR - MODULES_END) + #define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) #endif /* _ASM_X86_PGTABLE_32_DEFS_H */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index a5ecc9c33e92..7f3eba08e7de 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -172,6 +172,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, flush_tlb_all(); } -extern void zap_low_mappings(void); +extern void zap_low_mappings(bool early); #endif /* _ASM_X86_TLBFLUSH_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4f78bd682125..f3477bb84566 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -73,7 +73,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-$(CONFIG_KPROBES) += kprobes.o -obj-$(CONFIG_MODULES) += module_$(BITS).o +obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 1a830cbd7015..dfdbf6403895 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -126,6 +126,7 @@ void foo(void) #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) BLANK(); OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); + OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending); OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); BLANK(); diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module.c index c23880b90b5c..89f386f044e4 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module.c @@ -1,6 +1,5 @@ -/* Kernel module help for x86-64 +/* Kernel module help for x86. Copyright (C) 2001 Rusty Russell. - Copyright (C) 2002,2003 Andi Kleen, SuSE Labs. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,23 +21,18 @@ #include <linux/fs.h> #include <linux/string.h> #include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/slab.h> #include <linux/bug.h> +#include <linux/mm.h> #include <asm/system.h> #include <asm/page.h> #include <asm/pgtable.h> +#if 0 +#define DEBUGP printk +#else #define DEBUGP(fmt...) - -#ifndef CONFIG_UML -void module_free(struct module *mod, void *module_region) -{ - vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ -} +#endif void *module_alloc(unsigned long size) { @@ -54,9 +48,15 @@ void *module_alloc(unsigned long size) if (!area) return NULL; - return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC); + return __vmalloc_area(area, GFP_KERNEL | __GFP_HIGHMEM, + PAGE_KERNEL_EXEC); +} + +/* Free memory returned from module_alloc */ +void module_free(struct module *mod, void *module_region) +{ + vfree(module_region); } -#endif /* We don't need anything special. */ int module_frob_arch_sections(Elf_Ehdr *hdr, @@ -67,6 +67,58 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, return 0; } +#ifdef CONFIG_X86_32 +int apply_relocate(Elf32_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + unsigned int i; + Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; + Elf32_Sym *sym; + uint32_t *location; + + DEBUGP("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rel[i].r_offset; + /* This is the symbol it is referring to. Note that all + undefined symbols have been resolved. */ + sym = (Elf32_Sym *)sechdrs[symindex].sh_addr + + ELF32_R_SYM(rel[i].r_info); + + switch (ELF32_R_TYPE(rel[i].r_info)) { + case R_386_32: + /* We add the value into the location given */ + *location += sym->st_value; + break; + case R_386_PC32: + /* Add the value, subtract its postition */ + *location += sym->st_value - (uint32_t)location; + break; + default: + printk(KERN_ERR "module %s: Unknown relocation: %u\n", + me->name, ELF32_R_TYPE(rel[i].r_info)); + return -ENOEXEC; + } + } + return 0; +} + +int apply_relocate_add(Elf32_Shdr *sechdrs, + const char *strtab, + unsigned int symindex, + unsigned int relsec, + struct module *me) +{ + printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", + me->name); + return -ENOEXEC; +} +#else /*X86_64*/ int apply_relocate_add(Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, @@ -147,6 +199,8 @@ int apply_relocate(Elf_Shdr *sechdrs, return -ENOSYS; } +#endif + int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c deleted file mode 100644 index 0edd819050e7..000000000000 --- a/arch/x86/kernel/module_32.c +++ /dev/null @@ -1,152 +0,0 @@ -/* Kernel module help for i386. - Copyright (C) 2001 Rusty Russell. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ -#include <linux/moduleloader.h> -#include <linux/elf.h> -#include <linux/vmalloc.h> -#include <linux/fs.h> -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/bug.h> - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(fmt...) -#endif - -void *module_alloc(unsigned long size) -{ - if (size == 0) - return NULL; - return vmalloc_exec(size); -} - - -/* Free memory returned from module_alloc */ -void module_free(struct module *mod, void *module_region) -{ - vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ -} - -/* We don't need anything special. */ -int module_frob_arch_sections(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - char *secstrings, - struct module *mod) -{ - return 0; -} - -int apply_relocate(Elf32_Shdr *sechdrs, - const char *strtab, - unsigned int symindex, - unsigned int relsec, - struct module *me) -{ - unsigned int i; - Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; - Elf32_Sym *sym; - uint32_t *location; - - DEBUGP("Applying relocate section %u to %u\n", relsec, - sechdrs[relsec].sh_info); - for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { - /* This is where to make the change */ - location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr - + rel[i].r_offset; - /* This is the symbol it is referring to. Note that all - undefined symbols have been resolved. */ - sym = (Elf32_Sym *)sechdrs[symindex].sh_addr - + ELF32_R_SYM(rel[i].r_info); - - switch (ELF32_R_TYPE(rel[i].r_info)) { - case R_386_32: - /* We add the value into the location given */ - *location += sym->st_value; - break; - case R_386_PC32: - /* Add the value, subtract its postition */ - *location += sym->st_value - (uint32_t)location; - break; - default: - printk(KERN_ERR "module %s: Unknown relocation: %u\n", - me->name, ELF32_R_TYPE(rel[i].r_info)); - return -ENOEXEC; - } - } - return 0; -} - -int apply_relocate_add(Elf32_Shdr *sechdrs, - const char *strtab, - unsigned int symindex, - unsigned int relsec, - struct module *me) -{ - printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", - me->name); - return -ENOEXEC; -} - -int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) -{ - const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, - *para = NULL; - char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".text", secstrings + s->sh_name)) - text = s; - if (!strcmp(".altinstructions", secstrings + s->sh_name)) - alt = s; - if (!strcmp(".smp_locks", secstrings + s->sh_name)) - locks = s; - if (!strcmp(".parainstructions", secstrings + s->sh_name)) - para = s; - } - - if (alt) { - /* patch .altinstructions */ - void *aseg = (void *)alt->sh_addr; - apply_alternatives(aseg, aseg + alt->sh_size); - } - if (locks && text) { - void *lseg = (void *)locks->sh_addr; - void *tseg = (void *)text->sh_addr; - alternatives_smp_module_add(me, me->name, - lseg, lseg + locks->sh_size, - tseg, tseg + text->sh_size); - } - - if (para) { - void *pseg = (void *)para->sh_addr; - apply_paravirt(pseg, pseg + para->sh_size); - } - - return module_bug_finalize(hdr, sechdrs, me); -} - -void module_arch_cleanup(struct module *mod) -{ - alternatives_smp_module_del(mod); - module_bug_cleanup(mod); -} diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d1c636bf31a7..be5ae80f897f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -301,15 +301,13 @@ static void __init reserve_brk(void) #ifdef CONFIG_BLK_DEV_INITRD -#ifdef CONFIG_X86_32 - #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) static void __init relocate_initrd(void) { u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; - u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; + u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; u64 ramdisk_here; unsigned long slop, clen, mapaddr; char *p, *q; @@ -365,14 +363,13 @@ static void __init relocate_initrd(void) ramdisk_image, ramdisk_image + ramdisk_size - 1, ramdisk_here, ramdisk_here + ramdisk_size - 1); } -#endif static void __init reserve_initrd(void) { u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; u64 ramdisk_end = ramdisk_image + ramdisk_size; - u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; + u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; if (!boot_params.hdr.type_of_loader || !ramdisk_image || !ramdisk_size) @@ -402,14 +399,8 @@ static void __init reserve_initrd(void) return; } -#ifdef CONFIG_X86_32 relocate_initrd(); -#else - printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08llx > 0x%08llx)\ndisabling initrd\n", - ramdisk_end, end_of_lowmem); - initrd_start = 0; -#endif + free_early(ramdisk_image, ramdisk_end); } #else diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 7c80007ea5f7..2fecda69ee64 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -873,7 +873,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) err = do_boot_cpu(apicid, cpu); - zap_low_mappings(); + zap_low_mappings(false); low_mappings = 0; #else err = do_boot_cpu(apicid, cpu); diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 4c85b2e2bb65..367e87882041 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -108,6 +108,8 @@ SECTIONS /* Data */ . = ALIGN(PAGE_SIZE); .data : AT(ADDR(.data) - LOAD_OFFSET) { + /* Start of data section */ + _sdata = .; DATA_DATA CONSTRUCTORS diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index 8dab8f7844d3..38718041efc3 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig @@ -2,7 +2,6 @@ config LGUEST_GUEST bool "Lguest guest support" select PARAVIRT depends on X86_32 - depends on !X86_PAE select VIRTIO select VIRTIO_RING select VIRTIO_CONSOLE diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 4e0c26559395..7bc65f0f62c4 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -87,7 +87,7 @@ struct lguest_data lguest_data = { /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a * ring buffer of stored hypercalls which the Host will run though next time we - * do a normal hypercall. Each entry in the ring has 4 slots for the hypercall + * do a normal hypercall. Each entry in the ring has 5 slots for the hypercall * arguments, and a "hcall_status" word which is 0 if the call is ready to go, * and 255 once the Host has finished with it. * @@ -96,7 +96,8 @@ struct lguest_data lguest_data = { * effect of causing the Host to run all the stored calls in the ring buffer * which empties it for next time! */ static void async_hcall(unsigned long call, unsigned long arg1, - unsigned long arg2, unsigned long arg3) + unsigned long arg2, unsigned long arg3, + unsigned long arg4) { /* Note: This code assumes we're uniprocessor. */ static unsigned int next_call; @@ -108,12 +109,13 @@ static void async_hcall(unsigned long call, unsigned long arg1, local_irq_save(flags); if (lguest_data.hcall_status[next_call] != 0xFF) { /* Table full, so do normal hcall which will flush table. */ - kvm_hypercall3(call, arg1, arg2, arg3); + kvm_hypercall4(call, arg1, arg2, arg3, arg4); } else { lguest_data.hcalls[next_call].arg0 = call; lguest_data.hcalls[next_call].arg1 = arg1; lguest_data.hcalls[next_call].arg2 = arg2; lguest_data.hcalls[next_call].arg3 = arg3; + lguest_data.hcalls[next_call].arg4 = arg4; /* Arguments must all be written before we mark it to go */ wmb(); lguest_data.hcall_status[next_call] = 0; @@ -141,7 +143,7 @@ static void lazy_hcall1(unsigned long call, if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) kvm_hypercall1(call, arg1); else - async_hcall(call, arg1, 0, 0); + async_hcall(call, arg1, 0, 0, 0); } static void lazy_hcall2(unsigned long call, @@ -151,7 +153,7 @@ static void lazy_hcall2(unsigned long call, if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) kvm_hypercall2(call, arg1, arg2); else - async_hcall(call, arg1, arg2, 0); + async_hcall(call, arg1, arg2, 0, 0); } static void lazy_hcall3(unsigned long call, @@ -162,9 +164,23 @@ static void lazy_hcall3(unsigned long call, if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) kvm_hypercall3(call, arg1, arg2, arg3); else - async_hcall(call, arg1, arg2, arg3); + async_hcall(call, arg1, arg2, arg3, 0); } +#ifdef CONFIG_X86_PAE +static void lazy_hcall4(unsigned long call, + unsigned long arg1, + unsigned long arg2, + unsigned long arg3, + unsigned long arg4) +{ + if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) + kvm_hypercall4(call, arg1, arg2, arg3, arg4); + else + async_hcall(call, arg1, arg2, arg3, arg4); +} +#endif + /* When lazy mode is turned off reset the per-cpu lazy mode variable and then * issue the do-nothing hypercall to flush any stored calls. */ static void lguest_leave_lazy_mmu_mode(void) @@ -179,7 +195,7 @@ static void lguest_end_context_switch(struct task_struct *next) paravirt_end_context_switch(next); } -/*G:033 +/*G:032 * After that diversion we return to our first native-instruction * replacements: four functions for interrupt control. * @@ -199,30 +215,28 @@ static unsigned long save_fl(void) { return lguest_data.irq_enabled; } -PV_CALLEE_SAVE_REGS_THUNK(save_fl); - -/* restore_flags() just sets the flags back to the value given. */ -static void restore_fl(unsigned long flags) -{ - lguest_data.irq_enabled = flags; -} -PV_CALLEE_SAVE_REGS_THUNK(restore_fl); /* Interrupts go off... */ static void irq_disable(void) { lguest_data.irq_enabled = 0; } + +/* Let's pause a moment. Remember how I said these are called so often? + * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to + * break some rules. In particular, these functions are assumed to save their + * own registers if they need to: normal C functions assume they can trash the + * eax register. To use normal C functions, we use + * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the + * C function, then restores it. */ +PV_CALLEE_SAVE_REGS_THUNK(save_fl); PV_CALLEE_SAVE_REGS_THUNK(irq_disable); +/*:*/ -/* Interrupts go on... */ -static void irq_enable(void) -{ - lguest_data.irq_enabled = X86_EFLAGS_IF; -} -PV_CALLEE_SAVE_REGS_THUNK(irq_enable); +/* These are in i386_head.S */ +extern void lg_irq_enable(void); +extern void lg_restore_fl(unsigned long flags); -/*:*/ /*M:003 Note that we don't check for outstanding interrupts when we re-enable * them (or when we unmask an interrupt). This seems to work for the moment, * since interrupts are rare and we'll just get the interrupt on the next timer @@ -368,8 +382,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, case 1: /* Basic feature request. */ /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ *cx &= 0x00002201; - /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */ - *dx &= 0x07808111; + /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */ + *dx &= 0x07808151; /* The Host can do a nice optimization if it knows that the * kernel mappings (addresses above 0xC0000000 or whatever * PAGE_OFFSET is set to) haven't changed. But Linux calls @@ -388,6 +402,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, if (*ax > 0x80000008) *ax = 0x80000008; break; + case 0x80000001: + /* Here we should fix nx cap depending on host. */ + /* For this version of PAE, we just clear NX bit. */ + *dx &= ~(1 << 20); + break; } } @@ -521,25 +540,52 @@ static void lguest_write_cr4(unsigned long val) static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { +#ifdef CONFIG_X86_PAE + lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr, + ptep->pte_low, ptep->pte_high); +#else lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low); +#endif } static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pteval) { - *ptep = pteval; + native_set_pte(ptep, pteval); lguest_pte_update(mm, addr, ptep); } -/* The Guest calls this to set a top-level entry. Again, we set the entry then - * tell the Host which top-level page we changed, and the index of the entry we - * changed. */ +/* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd + * to set a middle-level entry when PAE is activated. + * Again, we set the entry then tell the Host which page we changed, + * and the index of the entry we changed. */ +#ifdef CONFIG_X86_PAE +static void lguest_set_pud(pud_t *pudp, pud_t pudval) +{ + native_set_pud(pudp, pudval); + + /* 32 bytes aligned pdpt address and the index. */ + lazy_hcall2(LHCALL_SET_PGD, __pa(pudp) & 0xFFFFFFE0, + (__pa(pudp) & 0x1F) / sizeof(pud_t)); +} + static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) { - *pmdp = pmdval; + native_set_pmd(pmdp, pmdval); lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK, - (__pa(pmdp) & (PAGE_SIZE - 1)) / 4); + (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t)); } +#else + +/* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not + * activated. */ +static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) +{ + native_set_pmd(pmdp, pmdval); + lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK, + (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t)); +} +#endif /* There are a couple of legacy places where the kernel sets a PTE, but we * don't know the top level any more. This is useless for us, since we don't @@ -552,11 +598,31 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) * which brings boot back to 0.25 seconds. */ static void lguest_set_pte(pte_t *ptep, pte_t pteval) { - *ptep = pteval; + native_set_pte(ptep, pteval); + if (cr3_changed) + lazy_hcall1(LHCALL_FLUSH_TLB, 1); +} + +#ifdef CONFIG_X86_PAE +static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + native_set_pte_atomic(ptep, pte); if (cr3_changed) lazy_hcall1(LHCALL_FLUSH_TLB, 1); } +void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + native_pte_clear(mm, addr, ptep); + lguest_pte_update(mm, addr, ptep); +} + +void lguest_pmd_clear(pmd_t *pmdp) +{ + lguest_set_pmd(pmdp, __pmd(0)); +} +#endif + /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on * native page table operations. On native hardware you can set a new page * table entry whenever you want, but if you want to remove one you have to do @@ -628,13 +694,12 @@ static void __init lguest_init_IRQ(void) { unsigned int i; - for (i = 0; i < LGUEST_IRQS; i++) { - int vector = FIRST_EXTERNAL_VECTOR + i; + for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { /* Some systems map "vectors" to interrupts weirdly. Lguest has * a straightforward 1 to 1 mapping, so force that here. */ - __get_cpu_var(vector_irq)[vector] = i; - if (vector != SYSCALL_VECTOR) - set_intr_gate(vector, interrupt[i]); + __get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR; + if (i != SYSCALL_VECTOR) + set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); } /* This call is required to set up for 4k stacks, where we have * separate stacks for hard and soft interrupts. */ @@ -973,10 +1038,10 @@ static void lguest_restart(char *reason) * * Our current solution is to allow the paravirt back end to optionally patch * over the indirect calls to replace them with something more efficient. We - * patch the four most commonly called functions: disable interrupts, enable - * interrupts, restore interrupts and save interrupts. We usually have 6 or 10 - * bytes to patch into: the Guest versions of these operations are small enough - * that we can fit comfortably. + * patch two of the simplest of the most commonly called functions: disable + * interrupts and save interrupts. We usually have 6 or 10 bytes to patch + * into: the Guest versions of these operations are small enough that we can + * fit comfortably. * * First we need assembly templates of each of the patchable Guest operations, * and these are in i386_head.S. */ @@ -987,8 +1052,6 @@ static const struct lguest_insns const char *start, *end; } lguest_insns[] = { [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli }, - [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti }, - [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf }, [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, }; @@ -1026,6 +1089,7 @@ __init void lguest_init(void) pv_info.name = "lguest"; pv_info.paravirt_enabled = 1; pv_info.kernel_rpl = 1; + pv_info.shared_kernel_pmd = 1; /* We set up all the lguest overrides for sensitive operations. These * are detailed with the operations themselves. */ @@ -1033,9 +1097,9 @@ __init void lguest_init(void) /* interrupt-related operations */ pv_irq_ops.init_IRQ = lguest_init_IRQ; pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); - pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl); + pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); - pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable); + pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); pv_irq_ops.safe_halt = lguest_safe_halt; /* init-time operations */ @@ -1071,6 +1135,12 @@ __init void lguest_init(void) pv_mmu_ops.set_pte = lguest_set_pte; pv_mmu_ops.set_pte_at = lguest_set_pte_at; pv_mmu_ops.set_pmd = lguest_set_pmd; +#ifdef CONFIG_X86_PAE + pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic; + pv_mmu_ops.pte_clear = lguest_pte_clear; + pv_mmu_ops.pmd_clear = lguest_pmd_clear; + pv_mmu_ops.set_pud = lguest_set_pud; +#endif pv_mmu_ops.read_cr2 = lguest_read_cr2; pv_mmu_ops.read_cr3 = lguest_read_cr3; pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index f79541989471..a9c8cfe61cd4 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S @@ -46,10 +46,64 @@ ENTRY(lguest_entry) .globl lgstart_##name; .globl lgend_##name LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) -LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled) -LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled) LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) -/*:*/ + +/*G:033 But using those wrappers is inefficient (we'll see why that doesn't + * matter for save_fl and irq_disable later). If we write our routines + * carefully in assembler, we can avoid clobbering any registers and avoid + * jumping through the wrapper functions. + * + * I skipped over our first piece of assembler, but this one is worth studying + * in a bit more detail so I'll describe in easy stages. First, the routine + * to enable interrupts: */ +ENTRY(lg_irq_enable) + /* The reverse of irq_disable, this sets lguest_data.irq_enabled to + * X86_EFLAGS_IF (ie. "Interrupts enabled"). */ + movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled + /* But now we need to check if the Host wants to know: there might have + * been interrupts waiting to be delivered, in which case it will have + * set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we + * jump to send_interrupts, otherwise we're done. */ + testl $0, lguest_data+LGUEST_DATA_irq_pending + jnz send_interrupts + /* One cool thing about x86 is that you can do many things without using + * a register. In this case, the normal path hasn't needed to save or + * restore any registers at all! */ + ret +send_interrupts: + /* OK, now we need a register: eax is used for the hypercall number, + * which is LHCALL_SEND_INTERRUPTS. + * + * We used not to bother with this pending detection at all, which was + * much simpler. Sooner or later the Host would realize it had to + * send us an interrupt. But that turns out to make performance 7 + * times worse on a simple tcp benchmark. So now we do this the hard + * way. */ + pushl %eax + movl $LHCALL_SEND_INTERRUPTS, %eax + /* This is a vmcall instruction (same thing that KVM uses). Older + * assembler versions might not know the "vmcall" instruction, so we + * create one manually here. */ + .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ + popl %eax + ret + +/* Finally, the "popf" or "restore flags" routine. The %eax register holds the + * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're + * enabling interrupts again, if it's 0 we're leaving them off. */ +ENTRY(lg_restore_fl) + /* This is just "lguest_data.irq_enabled = flags;" */ + movl %eax, lguest_data+LGUEST_DATA_irq_enabled + /* Now, if the %eax value has enabled interrupts and + * lguest_data.irq_pending is set, we want to tell the Host so it can + * deliver any outstanding interrupts. Fortunately, both values will + * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl" + * instruction will AND them together for us. If both are set, we + * jump to send_interrupts. */ + testl lguest_data+LGUEST_DATA_irq_pending, %eax + jnz send_interrupts + /* Again, the normal path has used no extra registers. Clever, huh? */ + ret /* These demark the EIP range where host should never deliver interrupts. */ .global lguest_noirq_start diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 949708d7a481..9ff3c0816d15 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -564,7 +564,7 @@ static inline void save_pg_dir(void) } #endif /* !CONFIG_ACPI_SLEEP */ -void zap_low_mappings(void) +void zap_low_mappings(bool early) { int i; @@ -581,7 +581,11 @@ void zap_low_mappings(void) set_pgd(swapper_pg_dir+i, __pgd(0)); #endif } - flush_tlb_all(); + + if (early) + __flush_tlb(); + else + flush_tlb_all(); } pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); @@ -956,7 +960,7 @@ void __init mem_init(void) test_wp_bit(); save_pg_dir(); - zap_low_mappings(); + zap_low_mappings(true); } #ifdef CONFIG_MEMORY_HOTPLUG diff --git a/arch/xtensa/kernel/module.c b/arch/xtensa/kernel/module.c index 3981a466c779..c1accea8cb56 100644 --- a/arch/xtensa/kernel/module.c +++ b/arch/xtensa/kernel/module.c @@ -34,8 +34,6 @@ void *module_alloc(unsigned long size) void module_free(struct module *mod, void *module_region) { vfree(module_region); - /* FIXME: If module_region == mod->init_region, trim exception - table entries. */ } int module_frob_arch_sections(Elf32_Ehdr *hdr, |