From 021ef050fc092d5638e69868d126c18006ea7296 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Sat, 19 Jan 2013 10:29:37 -0800 Subject: x86-32: Start out cr0 clean, disable paging before modifying cr3/4 Patch 5a5a51db78e x86-32: Start out eflags and cr4 clean ... made x86-32 match x86-64 in that we initialize %eflags and %cr4 from scratch. This broke OLPC XO-1.5, because the XO enters the kernel with paging enabled, which the kernel doesn't expect. Since we no longer support 386 (the source of most of the variability in %cr0 configuration), we can simply match further x86-64 and initialize %cr0 to a fixed value -- the one variable part remaining in %cr0 is for FPU control, but all that is handled later on in initialization; in particular, configuring %cr0 as if the FPU is present until proven otherwise is correct and necessary for the probe to work. To deal with the XO case sanely, explicitly disable paging in %cr0 before we muck with %cr3, %cr4 or EFER -- those operations are inherently unsafe with paging enabled. NOTE: There is still a lot of 386-related junk in head_32.S which we can and should get rid of, however, this is intended as a minimal fix whereas the cleanup can be deferred to the next merge window. Reported-by: Andres Salomon Tested-by: Daniel Drake Link: http://lkml.kernel.org/r/50FA0661.2060400@linux.intel.com Signed-off-by: H. Peter Anvin --- arch/x86/kernel/head_32.S | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 8e7f6556028..c8932c79e78 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -300,6 +300,12 @@ ENTRY(startup_32_smp) leal -__PAGE_OFFSET(%ecx),%esp default_entry: +#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ + X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ + X86_CR0_PG) + movl $(CR0_STATE & ~X86_CR0_PG),%eax + movl %eax,%cr0 + /* * New page tables may be in 4Mbyte page mode and may * be using the global pages. @@ -364,8 +370,7 @@ default_entry: */ movl $pa(initial_page_table), %eax movl %eax,%cr3 /* set the page table pointer.. */ - movl %cr0,%eax - orl $X86_CR0_PG,%eax + movl $CR0_STATE,%eax movl %eax,%cr0 /* ..and set paging (PG) bit */ ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */ 1: -- cgit v1.2.3 From 444723dccc3c855fe88ea138cdec46f30e707b74 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 24 Jan 2013 09:27:31 +0000 Subject: x86-64: Fix unwind annotations in recent NMI changes While in one case a plain annotation is necessary, in the other case the stack adjustment can simply be folded into the immediately preceding RESTORE_ALL, thus getting the correct annotation for free. Signed-off-by: Jan Beulich Cc: Steven Rostedt Cc: Linus Torvalds Cc: Alexander van Heukelum Link: http://lkml.kernel.org/r/51010C9302000078000B9045@nat28.tlf.novell.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 07a7a04529b..cb3c591339a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1781,6 +1781,7 @@ first_nmi: * Leave room for the "copied" frame */ subq $(5*8), %rsp + CFI_ADJUST_CFA_OFFSET 5*8 /* Copy the stack frame to the Saved frame */ .rept 5 @@ -1863,10 +1864,8 @@ end_repeat_nmi: nmi_swapgs: SWAPGS_UNSAFE_STACK nmi_restore: - RESTORE_ALL 8 - - /* Pop the extra iret frame */ - addq $(5*8), %rsp + /* Pop the extra iret frame at once */ + RESTORE_ALL 6*8 /* Clear the NMI executing stack variable */ movq $0, 5*8(%rsp) -- cgit v1.2.3 From 57c4f43043f89e18e0a386e096e57457f11f668b Mon Sep 17 00:00:00 2001 From: Alex Shi Date: Tue, 18 Dec 2012 12:22:14 -0800 Subject: arch/x86/platform/uv: Fix incorrect tlb flush all issue The flush tlb optimization code has logical issue on UV platform. It doesn't flush the full range at all, since it simply ignores its 'end' parameter (and hence also the "all" indicator) in uv_flush_tlb_others() function. Cliff's notes: | I tested the patch on a UV. It has the effect of either | clearing 1 or all TLBs in a cpu. I added some debugging to | test for the cases when clearing all TLBs is overkill, and in | practice it happens very seldom. Reported-by: Jan Beulich Signed-off-by: Alex Shi Signed-off-by: Cliff Wickman Tested-by: Cliff Wickman Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv.h | 2 +- arch/x86/platform/uv/tlb_uv.c | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index b47c2a82ff1..062921ef34e 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -16,7 +16,7 @@ extern void uv_system_init(void); extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long start, - unsigned end, + unsigned long end, unsigned int cpu); #else /* X86_UV */ diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index b8b3a37c80c..dbbdca5f508 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1034,7 +1034,8 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp, * globally purge translation cache of a virtual address or all TLB's * @cpumask: mask of all cpu's in which the address is to be removed * @mm: mm_struct containing virtual address range - * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) + * @start: start virtual address to be removed from TLB + * @end: end virtual address to be remove from TLB * @cpu: the current cpu * * This is the entry point for initiating any UV global TLB shootdown. @@ -1056,7 +1057,7 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp, */ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, unsigned long start, - unsigned end, unsigned int cpu) + unsigned long end, unsigned int cpu) { int locals = 0; int remotes = 0; @@ -1113,7 +1114,10 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, record_send_statistics(stat, locals, hubs, remotes, bau_desc); - bau_desc->payload.address = start; + if (!end || (end - start) <= PAGE_SIZE) + bau_desc->payload.address = start; + else + bau_desc->payload.address = TLB_FLUSH_ALL; bau_desc->payload.sending_cpu = cpu; /* * uv_flush_send_and_wait returns 0 if all cpu's were messaged, -- cgit v1.2.3 From ed8e47fefc668c1a90d90382517a77ff332a916c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 18 Dec 2012 12:22:17 -0800 Subject: x86/olpc: Fix olpc-xo1-sci.c build errors Fix build errors when CONFIG_INPUT=m. This is not pretty, but all of the OLPC kconfig options are bool instead of tristate. arch/x86/built-in.o: In function `send_lid_state': olpc-xo1-sci.c:(.text+0x1d323): undefined reference to `input_event' olpc-xo1-sci.c:(.text+0x1d338): undefined reference to `input_event' ... In the long run, fixing this driver kconfig to be tristate instead of bool would be a very good change. Signed-off-by: Randy Dunlap Cc: Andres Salomon Cc: Chris Ball Cc: Jon Nettleton Cc: Daniel Drake Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 79795af5981..225543bf45a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2138,6 +2138,7 @@ config OLPC_XO1_RTC config OLPC_XO1_SCI bool "OLPC XO-1 SCI extras" depends on OLPC && OLPC_XO1_PM + depends on INPUT=y select POWER_SUPPLY select GPIO_CS5535 select MFD_CORE -- cgit v1.2.3 From 73b664ceb5f815c38def1c68912b83f83455e9eb Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Fri, 16 Nov 2012 11:17:14 +0100 Subject: x86/dma-debug: Bump PREALLOC_DMA_DEBUG_ENTRIES I ran out of free entries when I had CONFIG_DMA_API_DEBUG enabled. Some other archs seem to default to 65536, so increase this limit for x86 too. Signed-off-by: Maarten Lankhorst Cc: Bjorn Helgaas Link: http://lkml.kernel.org/r/50A612AA.7040206@canonical.com Signed-off-by: Ingo Molnar ---- --- arch/x86/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 0f5dec5c80e..872079a67e4 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -56,7 +56,7 @@ struct device x86_dma_fallback_dev = { EXPORT_SYMBOL(x86_dma_fallback_dev); /* Number of entries preallocated for DMA-API debugging */ -#define PREALLOC_DMA_DEBUG_ENTRIES 32768 +#define PREALLOC_DMA_DEBUG_ENTRIES 65536 int dma_set_mask(struct device *dev, u64 mask) { -- cgit v1.2.3 From c903f0456bc69176912dee6dd25c6a66ee1aed00 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 15 Nov 2012 13:06:22 +0000 Subject: x86/msr: Add capabilities check At the moment the MSR driver only relies upon file system checks. This means that anything as root with any capability set can write to MSRs. Historically that wasn't very interesting but on modern processors the MSRs are such that writing to them provides several ways to execute arbitary code in kernel space. Sample code and documentation on doing this is circulating and MSR attacks are used on Windows 64bit rootkits already. In the Linux case you still need to be able to open the device file so the impact is fairly limited and reduces the security of some capability and security model based systems down towards that of a generic "root owns the box" setup. Therefore they should require CAP_SYS_RAWIO to prevent an elevation of capabilities. The impact of this is fairly minimal on most setups because they don't have heavy use of capabilities. Those using SELinux, SMACK or AppArmor rules might want to consider if their rulesets on the MSR driver could be tighter. Signed-off-by: Alan Cox Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra Cc: Horses Signed-off-by: Ingo Molnar --- arch/x86/kernel/msr.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index a7c5661f849..4929502c137 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -174,6 +174,9 @@ static int msr_open(struct inode *inode, struct file *file) unsigned int cpu; struct cpuinfo_x86 *c; + if (!capable(CAP_SYS_RAWIO)) + return -EPERM; + cpu = iminor(file->f_path.dentry->d_inode); if (cpu >= nr_cpu_ids || !cpu_online(cpu)) return -ENXIO; /* No such CPU */ -- cgit v1.2.3 From 83e68189745ad931c2afd45d8ee3303929233e7f Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Wed, 14 Nov 2012 09:42:35 +0000 Subject: efi: Make 'efi_enabled' a function to query EFI facilities Originally 'efi_enabled' indicated whether a kernel was booted from EFI firmware. Over time its semantics have changed, and it now indicates whether or not we are booted on an EFI machine with bit-native firmware, e.g. 64-bit kernel with 64-bit firmware. The immediate motivation for this patch is the bug report at, https://bugs.launchpad.net/ubuntu-cdimage/+bug/1040557 which details how running a platform driver on an EFI machine that is designed to run under BIOS can cause the machine to become bricked. Also, the following report, https://bugzilla.kernel.org/show_bug.cgi?id=47121 details how running said driver can also cause Machine Check Exceptions. Drivers need a new means of detecting whether they're running on an EFI machine, as sadly the expression, if (!efi_enabled) hasn't been a sufficient condition for quite some time. Users actually want to query 'efi_enabled' for different reasons - what they really want access to is the list of available EFI facilities. For instance, the x86 reboot code needs to know whether it can invoke the ResetSystem() function provided by the EFI runtime services, while the ACPI OSL code wants to know whether the EFI config tables were mapped successfully. There are also checks in some of the platform driver code to simply see if they're running on an EFI machine (which would make it a bad idea to do BIOS-y things). This patch is a prereq for the samsung-laptop fix patch. Cc: David Airlie Cc: Corentin Chary Cc: Matthew Garrett Cc: Dave Jiang Cc: Olof Johansson Cc: Peter Jones Cc: Colin Ian King Cc: Steve Langasek Cc: Tony Luck Cc: Konrad Rzeszutek Wilk Cc: Rafael J. Wysocki Cc: Signed-off-by: Matt Fleming Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/efi.h | 1 + arch/x86/kernel/reboot.c | 2 +- arch/x86/kernel/setup.c | 28 +++++++++++----------- arch/x86/platform/efi/efi.c | 57 +++++++++++++++++++++++++++------------------ 4 files changed, 50 insertions(+), 38 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 6e8fdf5ad11..28677c55113 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -94,6 +94,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, #endif /* CONFIG_X86_32 */ extern int add_efi_memmap; +extern unsigned long x86_efi_facility; extern void efi_set_executable(efi_memory_desc_t *md, bool executable); extern int efi_memblock_x86_reserve_range(void); extern void efi_call_phys_prelog(void); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 4e8ba39eaf0..76fa1e9a2b3 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -584,7 +584,7 @@ static void native_machine_emergency_restart(void) break; case BOOT_EFI: - if (efi_enabled) + if (efi_enabled(EFI_RUNTIME_SERVICES)) efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD, diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 00f6c1472b8..8b24289cc10 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -807,15 +807,15 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_EFI if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, "EL32", 4)) { - efi_enabled = 1; - efi_64bit = false; + set_bit(EFI_BOOT, &x86_efi_facility); } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, "EL64", 4)) { - efi_enabled = 1; - efi_64bit = true; + set_bit(EFI_BOOT, &x86_efi_facility); + set_bit(EFI_64BIT, &x86_efi_facility); } - if (efi_enabled && efi_memblock_x86_reserve_range()) - efi_enabled = 0; + + if (efi_enabled(EFI_BOOT)) + efi_memblock_x86_reserve_range(); #endif x86_init.oem.arch_setup(); @@ -888,7 +888,7 @@ void __init setup_arch(char **cmdline_p) finish_e820_parsing(); - if (efi_enabled) + if (efi_enabled(EFI_BOOT)) efi_init(); dmi_scan_machine(); @@ -971,7 +971,7 @@ void __init setup_arch(char **cmdline_p) * The EFI specification says that boot service code won't be called * after ExitBootServices(). This is, in fact, a lie. */ - if (efi_enabled) + if (efi_enabled(EFI_MEMMAP)) efi_reserve_boot_services(); /* preallocate 4k for mptable mpc */ @@ -1114,7 +1114,7 @@ void __init setup_arch(char **cmdline_p) #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) - if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) + if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY)) conswitchp = &vga_con; #elif defined(CONFIG_DUMMY_CONSOLE) conswitchp = &dummy_con; @@ -1131,14 +1131,14 @@ void __init setup_arch(char **cmdline_p) register_refined_jiffies(CLOCK_TICK_RATE); #ifdef CONFIG_EFI - /* Once setup is done above, disable efi_enabled on mismatched - * firmware/kernel archtectures since there is no support for - * runtime services. + /* Once setup is done above, unmap the EFI memory map on + * mismatched firmware/kernel archtectures since there is no + * support for runtime services. */ - if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) { + if (efi_enabled(EFI_BOOT) && + IS_ENABLED(CONFIG_X86_64) != efi_enabled(EFI_64BIT)) { pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); efi_unmap_memmap(); - efi_enabled = 0; } #endif } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index ad4439145f8..5426e482db6 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -51,9 +51,6 @@ #define EFI_DEBUG 1 -int efi_enabled; -EXPORT_SYMBOL(efi_enabled); - struct efi __read_mostly efi = { .mps = EFI_INVALID_TABLE_ADDR, .acpi = EFI_INVALID_TABLE_ADDR, @@ -69,19 +66,28 @@ EXPORT_SYMBOL(efi); struct efi_memory_map memmap; -bool efi_64bit; - static struct efi efi_phys __initdata; static efi_system_table_t efi_systab __initdata; static inline bool efi_is_native(void) { - return IS_ENABLED(CONFIG_X86_64) == efi_64bit; + return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT); +} + +unsigned long x86_efi_facility; + +/* + * Returns 1 if 'facility' is enabled, 0 otherwise. + */ +int efi_enabled(int facility) +{ + return test_bit(facility, &x86_efi_facility) != 0; } +EXPORT_SYMBOL(efi_enabled); static int __init setup_noefi(char *arg) { - efi_enabled = 0; + clear_bit(EFI_BOOT, &x86_efi_facility); return 0; } early_param("noefi", setup_noefi); @@ -426,6 +432,7 @@ void __init efi_reserve_boot_services(void) void __init efi_unmap_memmap(void) { + clear_bit(EFI_MEMMAP, &x86_efi_facility); if (memmap.map) { early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); memmap.map = NULL; @@ -460,7 +467,7 @@ void __init efi_free_boot_services(void) static int __init efi_systab_init(void *phys) { - if (efi_64bit) { + if (efi_enabled(EFI_64BIT)) { efi_system_table_64_t *systab64; u64 tmp = 0; @@ -552,7 +559,7 @@ static int __init efi_config_init(u64 tables, int nr_tables) void *config_tables, *tablep; int i, sz; - if (efi_64bit) + if (efi_enabled(EFI_64BIT)) sz = sizeof(efi_config_table_64_t); else sz = sizeof(efi_config_table_32_t); @@ -572,7 +579,7 @@ static int __init efi_config_init(u64 tables, int nr_tables) efi_guid_t guid; unsigned long table; - if (efi_64bit) { + if (efi_enabled(EFI_64BIT)) { u64 table64; guid = ((efi_config_table_64_t *)tablep)->guid; table64 = ((efi_config_table_64_t *)tablep)->table; @@ -684,7 +691,6 @@ void __init efi_init(void) if (boot_params.efi_info.efi_systab_hi || boot_params.efi_info.efi_memmap_hi) { pr_info("Table located above 4GB, disabling EFI.\n"); - efi_enabled = 0; return; } efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab; @@ -694,10 +700,10 @@ void __init efi_init(void) ((__u64)boot_params.efi_info.efi_systab_hi<<32)); #endif - if (efi_systab_init(efi_phys.systab)) { - efi_enabled = 0; + if (efi_systab_init(efi_phys.systab)) return; - } + + set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); /* * Show what we know for posterity @@ -715,10 +721,10 @@ void __init efi_init(void) efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); - if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) { - efi_enabled = 0; + if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) return; - } + + set_bit(EFI_CONFIG_TABLES, &x86_efi_facility); /* * Note: We currently don't support runtime services on an EFI @@ -727,15 +733,17 @@ void __init efi_init(void) if (!efi_is_native()) pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); - else if (efi_runtime_init()) { - efi_enabled = 0; - return; + else { + if (efi_runtime_init()) + return; + set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); } - if (efi_memmap_init()) { - efi_enabled = 0; + if (efi_memmap_init()) return; - } + + set_bit(EFI_MEMMAP, &x86_efi_facility); + #ifdef CONFIG_X86_32 if (efi_is_native()) { x86_platform.get_wallclock = efi_get_time; @@ -969,6 +977,9 @@ u32 efi_mem_type(unsigned long phys_addr) efi_memory_desc_t *md; void *p; + if (!efi_enabled(EFI_MEMMAP)) + return 0; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; if ((md->phys_addr <= phys_addr) && -- cgit v1.2.3