summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig72
-rw-r--r--arch/x86/Kconfig.cpu73
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/Makefile_32.cpu1
-rw-r--r--arch/x86/boot/.gitignore1
-rw-r--r--arch/x86/boot/Makefile4
-rw-r--r--arch/x86/boot/compressed/eboot.c123
-rw-r--r--arch/x86/boot/compressed/head_32.S8
-rw-r--r--arch/x86/boot/compressed/head_64.S8
-rw-r--r--arch/x86/boot/compressed/misc.c2
-rw-r--r--arch/x86/boot/compressed/misc.h1
-rw-r--r--arch/x86/boot/header.S39
-rw-r--r--arch/x86/boot/setup.ld2
-rw-r--r--arch/x86/boot/tools/build.c81
-rw-r--r--arch/x86/crypto/Makefile5
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S1102
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c558
-rw-r--r--arch/x86/crypto/camellia_glue.c92
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S348
-rw-r--r--arch/x86/crypto/cast5_avx_glue.c79
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S206
-rw-r--r--arch/x86/crypto/cast6_avx_glue.c77
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c (renamed from arch/x86/crypto/crc32c-intel.c)81
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S460
-rw-r--r--arch/x86/crypto/glue_helper-asm-avx.S91
-rw-r--r--arch/x86/crypto/glue_helper.c12
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S166
-rw-r--r--arch/x86/crypto/serpent_avx_glue.c49
-rw-r--r--arch/x86/crypto/serpent_sse2_glue.c12
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S208
-rw-r--r--arch/x86/crypto/twofish_avx_glue.c73
-rw-r--r--arch/x86/crypto/twofish_glue_3way.c20
-rw-r--r--arch/x86/ia32/ia32_aout.c5
-rw-r--r--arch/x86/ia32/ia32_signal.c55
-rw-r--r--arch/x86/ia32/ia32entry.S12
-rw-r--r--arch/x86/ia32/sys_ia32.c11
-rw-r--r--arch/x86/include/asm/Kbuild26
-rw-r--r--arch/x86/include/asm/atomic.h16
-rw-r--r--arch/x86/include/asm/boot.h9
-rw-r--r--arch/x86/include/asm/bootparam_utils.h38
-rw-r--r--arch/x86/include/asm/clocksource.h1
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h55
-rw-r--r--arch/x86/include/asm/context_tracking.h (renamed from arch/x86/include/asm/rcu.h)15
-rw-r--r--arch/x86/include/asm/cpu.h4
-rw-r--r--arch/x86/include/asm/cpufeature.h10
-rw-r--r--arch/x86/include/asm/crypto/camellia.h82
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h28
-rw-r--r--arch/x86/include/asm/crypto/serpent-avx.h27
-rw-r--r--arch/x86/include/asm/crypto/twofish.h4
-rw-r--r--arch/x86/include/asm/debugreg.h79
-rw-r--r--arch/x86/include/asm/device.h3
-rw-r--r--arch/x86/include/asm/dma-mapping.h1
-rw-r--r--arch/x86/include/asm/e820.h74
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/elf.h6
-rw-r--r--arch/x86/include/asm/fixmap.h5
-rw-r--r--arch/x86/include/asm/ftrace.h1
-rw-r--r--arch/x86/include/asm/futex.h12
-rw-r--r--arch/x86/include/asm/hpet.h5
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h5
-rw-r--r--arch/x86/include/asm/hw_irq.h13
-rw-r--r--arch/x86/include/asm/hypervisor.h13
-rw-r--r--arch/x86/include/asm/ia32.h10
-rw-r--r--arch/x86/include/asm/io_apic.h28
-rw-r--r--arch/x86/include/asm/irq_remapping.h40
-rw-r--r--arch/x86/include/asm/ist.h17
-rw-r--r--arch/x86/include/asm/kexec.h3
-rw-r--r--arch/x86/include/asm/kvm_guest.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h24
-rw-r--r--arch/x86/include/asm/kvm_para.h107
-rw-r--r--arch/x86/include/asm/linkage.h18
-rw-r--r--arch/x86/include/asm/local.h18
-rw-r--r--arch/x86/include/asm/mce.h72
-rw-r--r--arch/x86/include/asm/module.h2
-rw-r--r--arch/x86/include/asm/msr.h11
-rw-r--r--arch/x86/include/asm/mtrr.h93
-rw-r--r--arch/x86/include/asm/numachip/numachip.h19
-rw-r--r--arch/x86/include/asm/paravirt.h2
-rw-r--r--arch/x86/include/asm/parport.h4
-rw-r--r--arch/x86/include/asm/pci.h15
-rw-r--r--arch/x86/include/asm/pci_x86.h7
-rw-r--r--arch/x86/include/asm/percpu.h3
-rw-r--r--arch/x86/include/asm/perf_event.h13
-rw-r--r--arch/x86/include/asm/pgtable.h34
-rw-r--r--arch/x86/include/asm/pgtable_32.h7
-rw-r--r--arch/x86/include/asm/pgtable_64.h3
-rw-r--r--arch/x86/include/asm/pgtable_types.h20
-rw-r--r--arch/x86/include/asm/posix_types.h10
-rw-r--r--arch/x86/include/asm/processor-flags.h97
-rw-r--r--arch/x86/include/asm/processor.h37
-rw-r--r--arch/x86/include/asm/ptrace.h91
-rw-r--r--arch/x86/include/asm/pvclock.h47
-rw-r--r--arch/x86/include/asm/required-features.h8
-rw-r--r--arch/x86/include/asm/setup.h5
-rw-r--r--arch/x86/include/asm/sigcontext.h216
-rw-r--r--arch/x86/include/asm/signal.h142
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/svm.h132
-rw-r--r--arch/x86/include/asm/sys_ia32.h4
-rw-r--r--arch/x86/include/asm/syscalls.h12
-rw-r--r--arch/x86/include/asm/tlbflush.h3
-rw-r--r--arch/x86/include/asm/trace_clock.h20
-rw-r--r--arch/x86/include/asm/uaccess.h42
-rw-r--r--arch/x86/include/asm/unistd.h18
-rw-r--r--arch/x86/include/asm/uv/uv.h2
-rw-r--r--arch/x86/include/asm/vm86.h128
-rw-r--r--arch/x86/include/asm/vmx.h92
-rw-r--r--arch/x86/include/asm/vsyscall.h34
-rw-r--r--arch/x86/include/asm/x86_init.h27
-rw-r--r--arch/x86/include/asm/xen/interface.h1
-rw-r--r--arch/x86/include/asm/xor.h491
-rw-r--r--arch/x86/include/asm/xor_32.h309
-rw-r--r--arch/x86/include/asm/xor_64.h305
-rw-r--r--arch/x86/include/uapi/asm/Kbuild58
-rw-r--r--arch/x86/include/uapi/asm/a.out.h (renamed from arch/x86/include/asm/a.out.h)0
-rw-r--r--arch/x86/include/uapi/asm/auxvec.h (renamed from arch/x86/include/asm/auxvec.h)0
-rw-r--r--arch/x86/include/uapi/asm/bitsperlong.h (renamed from arch/x86/include/asm/bitsperlong.h)0
-rw-r--r--arch/x86/include/uapi/asm/boot.h10
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h (renamed from arch/x86/include/asm/bootparam.h)62
-rw-r--r--arch/x86/include/uapi/asm/byteorder.h (renamed from arch/x86/include/asm/byteorder.h)0
-rw-r--r--arch/x86/include/uapi/asm/debugreg.h80
-rw-r--r--arch/x86/include/uapi/asm/e820.h75
-rw-r--r--arch/x86/include/uapi/asm/errno.h (renamed from arch/x86/include/asm/errno.h)0
-rw-r--r--arch/x86/include/uapi/asm/fcntl.h (renamed from arch/x86/include/asm/fcntl.h)0
-rw-r--r--arch/x86/include/uapi/asm/hw_breakpoint.h1
-rw-r--r--arch/x86/include/uapi/asm/hyperv.h (renamed from arch/x86/include/asm/hyperv.h)0
-rw-r--r--arch/x86/include/uapi/asm/ioctl.h (renamed from arch/x86/include/asm/ioctl.h)0
-rw-r--r--arch/x86/include/uapi/asm/ioctls.h (renamed from arch/x86/include/asm/ioctls.h)0
-rw-r--r--arch/x86/include/uapi/asm/ipcbuf.h (renamed from arch/x86/include/asm/ipcbuf.h)0
-rw-r--r--arch/x86/include/uapi/asm/ist.h29
-rw-r--r--arch/x86/include/uapi/asm/kvm.h (renamed from arch/x86/include/asm/kvm.h)0
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h100
-rw-r--r--arch/x86/include/uapi/asm/ldt.h (renamed from arch/x86/include/asm/ldt.h)0
-rw-r--r--arch/x86/include/uapi/asm/mce.h34
-rw-r--r--arch/x86/include/uapi/asm/mman.h (renamed from arch/x86/include/asm/mman.h)3
-rw-r--r--arch/x86/include/uapi/asm/msgbuf.h (renamed from arch/x86/include/asm/msgbuf.h)0
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h (renamed from arch/x86/include/asm/msr-index.h)42
-rw-r--r--arch/x86/include/uapi/asm/msr.h15
-rw-r--r--arch/x86/include/uapi/asm/mtrr.h117
-rw-r--r--arch/x86/include/uapi/asm/param.h (renamed from arch/x86/include/asm/param.h)0
-rw-r--r--arch/x86/include/uapi/asm/perf_regs.h (renamed from arch/x86/include/asm/perf_regs.h)0
-rw-r--r--arch/x86/include/uapi/asm/poll.h (renamed from arch/x86/include/asm/poll.h)0
-rw-r--r--arch/x86/include/uapi/asm/posix_types.h9
-rw-r--r--arch/x86/include/uapi/asm/posix_types_32.h (renamed from arch/x86/include/asm/posix_types_32.h)0
-rw-r--r--arch/x86/include/uapi/asm/posix_types_64.h (renamed from arch/x86/include/asm/posix_types_64.h)0
-rw-r--r--arch/x86/include/uapi/asm/posix_types_x32.h (renamed from arch/x86/include/asm/posix_types_x32.h)0
-rw-r--r--arch/x86/include/uapi/asm/prctl.h (renamed from arch/x86/include/asm/prctl.h)0
-rw-r--r--arch/x86/include/uapi/asm/processor-flags.h99
-rw-r--r--arch/x86/include/uapi/asm/ptrace-abi.h (renamed from arch/x86/include/asm/ptrace-abi.h)0
-rw-r--r--arch/x86/include/uapi/asm/ptrace.h78
-rw-r--r--arch/x86/include/uapi/asm/resource.h (renamed from arch/x86/include/asm/resource.h)0
-rw-r--r--arch/x86/include/uapi/asm/sembuf.h (renamed from arch/x86/include/asm/sembuf.h)0
-rw-r--r--arch/x86/include/uapi/asm/setup.h1
-rw-r--r--arch/x86/include/uapi/asm/shmbuf.h (renamed from arch/x86/include/asm/shmbuf.h)0
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h221
-rw-r--r--arch/x86/include/uapi/asm/sigcontext32.h (renamed from arch/x86/include/asm/sigcontext32.h)0
-rw-r--r--arch/x86/include/uapi/asm/siginfo.h (renamed from arch/x86/include/asm/siginfo.h)0
-rw-r--r--arch/x86/include/uapi/asm/signal.h139
-rw-r--r--arch/x86/include/uapi/asm/socket.h (renamed from arch/x86/include/asm/socket.h)0
-rw-r--r--arch/x86/include/uapi/asm/sockios.h (renamed from arch/x86/include/asm/sockios.h)0
-rw-r--r--arch/x86/include/uapi/asm/stat.h (renamed from arch/x86/include/asm/stat.h)0
-rw-r--r--arch/x86/include/uapi/asm/statfs.h (renamed from arch/x86/include/asm/statfs.h)0
-rw-r--r--arch/x86/include/uapi/asm/svm.h132
-rw-r--r--arch/x86/include/uapi/asm/swab.h (renamed from arch/x86/include/asm/swab.h)29
-rw-r--r--arch/x86/include/uapi/asm/termbits.h (renamed from arch/x86/include/asm/termbits.h)0
-rw-r--r--arch/x86/include/uapi/asm/termios.h (renamed from arch/x86/include/asm/termios.h)0
-rw-r--r--arch/x86/include/uapi/asm/types.h (renamed from arch/x86/include/asm/types.h)0
-rw-r--r--arch/x86/include/uapi/asm/ucontext.h (renamed from arch/x86/include/asm/ucontext.h)0
-rw-r--r--arch/x86/include/uapi/asm/unistd.h17
-rw-r--r--arch/x86/include/uapi/asm/vm86.h129
-rw-r--r--arch/x86/include/uapi/asm/vmx.h109
-rw-r--r--arch/x86/include/uapi/asm/vsyscall.h17
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c12
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/apic/apic.c101
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c486
-rw-r--r--arch/x86/kernel/apic/ipi.c2
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c21
-rw-r--r--arch/x86/kernel/apm_32.c11
-rw-r--r--arch/x86/kernel/cpu/amd.c12
-rw-r--r--arch/x86/kernel/cpu/bugs.c41
-rw-r--r--arch/x86/kernel/cpu/common.c14
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c7
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c82
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c209
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c8
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c11
-rw-r--r--arch/x86/kernel/cpu/perf_event.c122
-rw-r--r--arch/x86/kernel/cpu/perf_event.h30
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c331
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c15
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c6
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c4
-rw-r--r--arch/x86/kernel/cpu/proc.c7
-rw-r--r--arch/x86/kernel/cpu/vmware.c13
-rw-r--r--arch/x86/kernel/crash.c32
-rw-r--r--arch/x86/kernel/entry_32.S20
-rw-r--r--arch/x86/kernel/entry_64.S69
-rw-r--r--arch/x86/kernel/head32.c3
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S115
-rw-r--r--arch/x86/kernel/head_64.S16
-rw-r--r--arch/x86/kernel/hpet.c6
-rw-r--r--arch/x86/kernel/i387.c6
-rw-r--r--arch/x86/kernel/irqinit.c40
-rw-r--r--arch/x86/kernel/kprobes/Makefile7
-rw-r--r--arch/x86/kernel/kprobes/common.h (renamed from arch/x86/kernel/kprobes-common.h)11
-rw-r--r--arch/x86/kernel/kprobes/core.c (renamed from arch/x86/kernel/kprobes.c)76
-rw-r--r--arch/x86/kernel/kprobes/ftrace.c93
-rw-r--r--arch/x86/kernel/kprobes/opt.c (renamed from arch/x86/kernel/kprobes-opt.c)2
-rw-r--r--arch/x86/kernel/kvm.c31
-rw-r--r--arch/x86/kernel/kvmclock.c88
-rw-r--r--arch/x86/kernel/msr.c3
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/process.c69
-rw-r--r--arch/x86/kernel/process_32.c12
-rw-r--r--arch/x86/kernel/process_64.c10
-rw-r--r--arch/x86/kernel/ptrace.c7
-rw-r--r--arch/x86/kernel/pvclock.c143
-rw-r--r--arch/x86/kernel/quirks.c4
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/rtc.c7
-rw-r--r--arch/x86/kernel/setup.c116
-rw-r--r--arch/x86/kernel/signal.c34
-rw-r--r--arch/x86/kernel/smpboot.c151
-rw-r--r--arch/x86/kernel/step.c9
-rw-r--r--arch/x86/kernel/sys_x86_64.c153
-rw-r--r--arch/x86/kernel/topology.c101
-rw-r--r--arch/x86/kernel/trace_clock.c21
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kernel/tsc.c9
-rw-r--r--arch/x86/kernel/uprobes.c58
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vsyscall_64.c110
-rw-r--r--arch/x86/kernel/x86_init.c24
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c5
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c65
-rw-r--r--arch/x86/kvm/paging_tmpl.h115
-rw-r--r--arch/x86/kvm/svm.c48
-rw-r--r--arch/x86/kvm/trace.h63
-rw-r--r--arch/x86/kvm/vmx.c203
-rw-r--r--arch/x86/kvm/x86.c572
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/lguest/boot.c2
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/cmpxchg.c54
-rw-r--r--arch/x86/lib/copy_page_64.S120
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/lib/usercopy_32.c57
-rw-r--r--arch/x86/mm/fault.c33
-rw-r--r--arch/x86/mm/hugetlbpage.c130
-rw-r--r--arch/x86/mm/init_32.c5
-rw-r--r--arch/x86/mm/init_64.c11
-rw-r--r--arch/x86/mm/pgtable.c10
-rw-r--r--arch/x86/mm/tlb.c8
-rw-r--r--arch/x86/net/bpf_jit_comp.c21
-rw-r--r--arch/x86/pci/Makefile1
-rw-r--r--arch/x86/pci/acpi.c57
-rw-r--r--arch/x86/pci/bus_numa.c4
-rw-r--r--arch/x86/pci/common.c61
-rw-r--r--arch/x86/pci/fixup.c30
-rw-r--r--arch/x86/pci/legacy.c4
-rw-r--r--arch/x86/pci/mmconfig-shared.c21
-rw-r--r--arch/x86/pci/mmconfig_32.c2
-rw-r--r--arch/x86/pci/mmconfig_64.c4
-rw-r--r--arch/x86/pci/mrst.c6
-rw-r--r--arch/x86/pci/numachip.c129
-rw-r--r--arch/x86/pci/numaq_32.c2
-rw-r--r--arch/x86/pci/pcbios.c4
-rw-r--r--arch/x86/platform/ce4100/ce4100.c3
-rw-r--r--arch/x86/platform/efi/efi-bgrt.c2
-rw-r--r--arch/x86/platform/efi/efi.c59
-rw-r--r--arch/x86/platform/efi/efi_64.c22
-rw-r--r--arch/x86/platform/iris/iris.c67
-rw-r--r--arch/x86/platform/mrst/mrst.c2
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-pm.c8
-rw-r--r--arch/x86/platform/olpc/olpc-xo1-sci.c18
-rw-r--r--arch/x86/platform/scx200/scx200_32.c6
-rw-r--r--arch/x86/platform/uv/tlb_uv.c10
-rw-r--r--arch/x86/power/cpu.c82
-rw-r--r--arch/x86/syscalls/syscall_32.tbl9
-rw-r--r--arch/x86/syscalls/syscall_64.tbl5
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk6
-rw-r--r--arch/x86/tools/insn_sanity.c10
-rw-r--r--arch/x86/tools/relocs.c6
-rw-r--r--arch/x86/um/Kconfig6
-rw-r--r--arch/x86/um/asm/ptrace.h1
-rw-r--r--arch/x86/um/shared/sysdep/syscalls.h2
-rw-r--r--arch/x86/um/signal.c9
-rw-r--r--arch/x86/um/sys_call_table_32.c4
-rw-r--r--arch/x86/um/sys_call_table_64.c1
-rw-r--r--arch/x86/um/syscalls_32.c15
-rw-r--r--arch/x86/vdso/vclock_gettime.c83
-rw-r--r--arch/x86/vdso/vgetcpu.c11
-rw-r--r--arch/x86/vdso/vma.c2
-rw-r--r--arch/x86/xen/Kconfig3
-rw-r--r--arch/x86/xen/enlighten.c33
-rw-r--r--arch/x86/xen/mmu.c17
-rw-r--r--arch/x86/xen/smp.c9
-rw-r--r--arch/x86/xen/xen-asm_32.S14
308 files changed, 9569 insertions, 5190 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 9084c7bdc65..5e05ee3a981 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -22,11 +22,12 @@ config X86
def_bool y
select HAVE_AOUT if X86_32
select HAVE_UNSTABLE_SCHED_CLOCK
+ select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_WANTS_PROT_NUMA_PROT_NONE
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_PCSPKR_PLATFORM
select HAVE_PERF_EVENTS
- select HAVE_IRQ_WORK
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
select HAVE_MEMBLOCK
@@ -38,10 +39,12 @@ config X86
select HAVE_DMA_CONTIGUOUS if !SWIOTLB
select HAVE_KRETPROBES
select HAVE_OPTPROBES
+ select HAVE_KPROBES_ON_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FENTRY if X86_64
select HAVE_C_RECORDMCOUNT
select HAVE_DYNAMIC_FTRACE
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_FP_TEST
@@ -69,8 +72,8 @@ config X86
select HAVE_PERF_USER_STACK_DUMP
select HAVE_DEBUG_KMEMLEAK
select ANON_INODES
- select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
- select HAVE_CMPXCHG_LOCAL if !M386
+ select HAVE_ALIGNED_STRUCT_PAGE if SLUB
+ select HAVE_CMPXCHG_LOCAL
select HAVE_CMPXCHG_DOUBLE
select HAVE_ARCH_KMEMCHECK
select HAVE_USER_RETURN_NOTIFIER
@@ -104,14 +107,16 @@ config X86
select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
select GENERIC_TIME_VSYSCALL if X86_64
select KTIME_SCALAR if X86_32
+ select ALWAYS_USE_PERSISTENT_CLOCK
select GENERIC_STRNCPY_FROM_USER
select GENERIC_STRNLEN_USER
- select HAVE_RCU_USER_QS if X86_64
+ select HAVE_CONTEXT_TRACKING if X86_64
select HAVE_IRQ_TIME_ACCOUNTING
- select GENERIC_KERNEL_THREAD
- select GENERIC_KERNEL_EXECVE
select MODULES_USE_ELF_REL if X86_32
select MODULES_USE_ELF_RELA if X86_64
+ select CLONE_BACKWARDS if X86_32
+ select GENERIC_SIGALTSTACK
+ select ARCH_USE_BUILTIN_BSWAP
config INSTRUCTION_DECODER
def_bool y
@@ -171,13 +176,8 @@ config ARCH_MAY_HAVE_PC_FDC
def_bool y
depends on ISA_DMA_API
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
- depends on !X86_XADD
-
config RWSEM_XCHGADD_ALGORITHM
def_bool y
- depends on X86_XADD
config GENERIC_CALIBRATE_DELAY
def_bool y
@@ -310,7 +310,7 @@ config X86_X2APIC
If you don't know what to do here, say N.
config X86_MPPARSE
- bool "Enable MPS table" if ACPI
+ bool "Enable MPS table" if ACPI || SFI
default y
depends on X86_LOCAL_APIC
---help---
@@ -374,6 +374,7 @@ config X86_NUMACHIP
depends on NUMA
depends on SMP
depends on X86_X2APIC
+ depends on PCI_MMCONFIG
---help---
Adds support for Numascale NumaChip large-SMP systems. Needed to
enable more than ~168 cores.
@@ -1100,7 +1101,7 @@ config HIGHMEM4G
config HIGHMEM64G
bool "64GB"
- depends on !M386 && !M486
+ depends on !M486
select X86_PAE
---help---
Select this if you have a 32-bit processor and more than 4
@@ -1698,6 +1699,50 @@ config HOTPLUG_CPU
automatically on SMP systems. )
Say N if you want to disable CPU hotplug.
+config BOOTPARAM_HOTPLUG_CPU0
+ bool "Set default setting of cpu0_hotpluggable"
+ default n
+ depends on HOTPLUG_CPU && EXPERIMENTAL
+ ---help---
+ Set whether default state of cpu0_hotpluggable is on or off.
+
+ Say Y here to enable CPU0 hotplug by default. If this switch
+ is turned on, there is no need to give cpu0_hotplug kernel
+ parameter and the CPU0 hotplug feature is enabled by default.
+
+ Please note: there are two known CPU0 dependencies if you want
+ to enable the CPU0 hotplug feature either by this switch or by
+ cpu0_hotplug kernel parameter.
+
+ First, resume from hibernate or suspend always starts from CPU0.
+ So hibernate and suspend are prevented if CPU0 is offline.
+
+ Second dependency is PIC interrupts always go to CPU0. CPU0 can not
+ offline if any interrupt can not migrate out of CPU0. There may
+ be other CPU0 dependencies.
+
+ Please make sure the dependencies are under your control before
+ you enable this feature.
+
+ Say N if you don't want to enable CPU0 hotplug feature by default.
+ You still can enable the CPU0 hotplug feature at boot by kernel
+ parameter cpu0_hotplug.
+
+config DEBUG_HOTPLUG_CPU0
+ def_bool n
+ prompt "Debug CPU0 hotplug"
+ depends on HOTPLUG_CPU && EXPERIMENTAL
+ ---help---
+ Enabling this option offlines CPU0 (if CPU0 can be offlined) as
+ soon as possible and boots up userspace with CPU0 offlined. User
+ can online CPU0 back after boot time.
+
+ To debug CPU0 hotplug, you need to enable CPU0 offline/online
+ feature by either turning on CONFIG_BOOTPARAM_HOTPLUG_CPU0 during
+ compilation or giving cpu0_hotplug kernel parameter at boot.
+
+ If unsure, say N.
+
config COMPAT_VDSO
def_bool y
prompt "Compat VDSO support"
@@ -2096,6 +2141,7 @@ config OLPC_XO1_RTC
config OLPC_XO1_SCI
bool "OLPC XO-1 SCI extras"
depends on OLPC && OLPC_XO1_PM
+ depends on INPUT=y
select POWER_SUPPLY
select GPIO_CS5535
select MFD_CORE
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index f3b86d0df44..c026cca5602 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -4,23 +4,24 @@ choice
default M686 if X86_32
default GENERIC_CPU if X86_64
-config M386
- bool "386"
- depends on X86_32 && !UML
+config M486
+ bool "486"
+ depends on X86_32
---help---
- This is the processor type of your CPU. This information is used for
- optimizing purposes. In order to compile a kernel that can run on
- all x86 CPU types (albeit not optimally fast), you can specify
- "386" here.
+ This is the processor type of your CPU. This information is
+ used for optimizing purposes. In order to compile a kernel
+ that can run on all supported x86 CPU types (albeit not
+ optimally fast), you can specify "486" here.
+
+ Note that the 386 is no longer supported, this includes
+ AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI 486DLC/DLC2,
+ UMC 486SX-S and the NexGen Nx586.
The kernel will not necessarily run on earlier architectures than
the one you have chosen, e.g. a Pentium optimized kernel will run on
a PPro, but not necessarily on a i486.
Here are the settings recommended for greatest speed:
- - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
- 486DLC/DLC2, and UMC 486SX-S. Only "386" kernels will run on a 386
- class machine.
- "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
- "586" for generic Pentium CPUs lacking the TSC
@@ -43,16 +44,7 @@ config M386
- "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
- "VIA C7" for VIA C7.
- If you don't know what to do, choose "386".
-
-config M486
- bool "486"
- depends on X86_32
- ---help---
- Select this for a 486 series processor, either Intel or one of the
- compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX,
- DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or
- U5S.
+ If you don't know what to do, choose "486".
config M586
bool "586/K5/5x86/6x86/6x86MX"
@@ -305,24 +297,16 @@ config X86_INTERNODE_CACHE_SHIFT
default "12" if X86_VSMP
default X86_L1_CACHE_SHIFT
-config X86_CMPXCHG
- def_bool y
- depends on X86_64 || (X86_32 && !M386)
-
config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
- default "4" if MELAN || M486 || M386 || MGEODEGX1
+ default "4" if MELAN || M486 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
-config X86_XADD
- def_bool y
- depends on !M386
-
config X86_PPRO_FENCE
bool "PentiumPro memory ordering errata workaround"
- depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
+ depends on M686 || M586MMX || M586TSC || M586 || M486 || MGEODEGX1
---help---
Old PentiumPro multiprocessor systems had errata that could cause
memory operations to violate the x86 ordering standard in rare cases.
@@ -335,27 +319,11 @@ config X86_PPRO_FENCE
config X86_F00F_BUG
def_bool y
- depends on M586MMX || M586TSC || M586 || M486 || M386
+ depends on M586MMX || M586TSC || M586 || M486
config X86_INVD_BUG
def_bool y
- depends on M486 || M386
-
-config X86_WP_WORKS_OK
- def_bool y
- depends on !M386
-
-config X86_INVLPG
- def_bool y
- depends on X86_32 && !M386
-
-config X86_BSWAP
- def_bool y
- depends on X86_32 && !M386
-
-config X86_POPAD_OK
- def_bool y
- depends on X86_32 && !M386
+ depends on M486
config X86_ALIGNMENT_16
def_bool y
@@ -412,12 +380,11 @@ config X86_MINIMUM_CPU_FAMILY
default "64" if X86_64
default "6" if X86_32 && X86_P6_NOP
default "5" if X86_32 && X86_CMPXCHG64
- default "4" if X86_32 && (X86_XADD || X86_CMPXCHG || X86_BSWAP || X86_WP_WORKS_OK)
- default "3"
+ default "4"
config X86_DEBUGCTLMSR
def_bool y
- depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) && !UML
+ depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486) && !UML
menuconfig PROCESSOR_SELECT
bool "Supported processor vendors" if EXPERT
@@ -441,7 +408,7 @@ config CPU_SUP_INTEL
config CPU_SUP_CYRIX_32
default y
bool "Support Cyrix processors" if PROCESSOR_SELECT
- depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
+ depends on M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for Cyrix processors
@@ -495,7 +462,7 @@ config CPU_SUP_TRANSMETA_32
config CPU_SUP_UMC_32
default y
bool "Support UMC processors" if PROCESSOR_SELECT
- depends on M386 || M486 || (EXPERT && !64BIT)
+ depends on M486 || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for UMC processors
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index fa981ca1bfe..5c477260294 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -127,9 +127,10 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI
# does binutils support specific instructions?
asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
+avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
-KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
-KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr)
+KBUILD_AFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
+KBUILD_CFLAGS += $(cfi) $(cfi-sigframe) $(cfi-sections) $(asinstr) $(avx_instr) $(avx2_instr)
LDFLAGS := -m elf_$(UTS_MACHINE)
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 86cee7b749e..6647ed49c66 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -10,7 +10,6 @@ tune = $(call cc-option,-mcpu=$(1),$(2))
endif
align := $(cc-option-align)
-cflags-$(CONFIG_M386) += -march=i386
cflags-$(CONFIG_M486) += -march=i486
cflags-$(CONFIG_M586) += -march=i586
cflags-$(CONFIG_M586TSC) += -march=i586
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index 851fe936d24..e3cf9f682be 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -2,7 +2,6 @@ bootsect
bzImage
cpustr.h
mkcpustr
-offsets.h
voffset.h
zoffset.h
setup
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index ccce0ed67dd..379814bc41e 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -71,7 +71,7 @@ GCOV_PROFILE := n
$(obj)/bzImage: asflags-y := $(SVGA_MODE)
quiet_cmd_image = BUILD $@
-cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin > $@
+cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/zoffset.h > $@
$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
$(call if_changed,image)
@@ -92,7 +92,7 @@ targets += voffset.h
$(obj)/voffset.h: vmlinux FORCE
$(call if_changed,voffset)
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|startup_64\|efi_pe_entry\|efi_stub_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
quiet_cmd_zoffset = ZOFFSET $@
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index e87b0cac14b..f8fa41190c3 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -8,6 +8,7 @@
* ----------------------------------------------------------------------- */
#include <linux/efi.h>
+#include <linux/pci.h>
#include <asm/efi.h>
#include <asm/setup.h>
#include <asm/desc.h>
@@ -245,6 +246,123 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size)
*size = len;
}
+static efi_status_t setup_efi_pci(struct boot_params *params)
+{
+ efi_pci_io_protocol *pci;
+ efi_status_t status;
+ void **pci_handle;
+ efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID;
+ unsigned long nr_pci, size = 0;
+ int i;
+ struct setup_data *data;
+
+ data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
+
+ while (data && data->next)
+ data = (struct setup_data *)(unsigned long)data->next;
+
+ status = efi_call_phys5(sys_table->boottime->locate_handle,
+ EFI_LOCATE_BY_PROTOCOL, &pci_proto,
+ NULL, &size, pci_handle);
+
+ if (status == EFI_BUFFER_TOO_SMALL) {
+ status = efi_call_phys3(sys_table->boottime->allocate_pool,
+ EFI_LOADER_DATA, size, &pci_handle);
+
+ if (status != EFI_SUCCESS)
+ return status;
+
+ status = efi_call_phys5(sys_table->boottime->locate_handle,
+ EFI_LOCATE_BY_PROTOCOL, &pci_proto,
+ NULL, &size, pci_handle);
+ }
+
+ if (status != EFI_SUCCESS)
+ goto free_handle;
+
+ nr_pci = size / sizeof(void *);
+ for (i = 0; i < nr_pci; i++) {
+ void *h = pci_handle[i];
+ uint64_t attributes;
+ struct pci_setup_rom *rom;
+
+ status = efi_call_phys3(sys_table->boottime->handle_protocol,
+ h, &pci_proto, &pci);
+
+ if (status != EFI_SUCCESS)
+ continue;
+
+ if (!pci)
+ continue;
+
+#ifdef CONFIG_X86_64
+ status = efi_call_phys4(pci->attributes, pci,
+ EfiPciIoAttributeOperationGet, 0,
+ &attributes);
+#else
+ status = efi_call_phys5(pci->attributes, pci,
+ EfiPciIoAttributeOperationGet, 0, 0,
+ &attributes);
+#endif
+ if (status != EFI_SUCCESS)
+ continue;
+
+ if (!pci->romimage || !pci->romsize)
+ continue;
+
+ size = pci->romsize + sizeof(*rom);
+
+ status = efi_call_phys3(sys_table->boottime->allocate_pool,
+ EFI_LOADER_DATA, size, &rom);
+
+ if (status != EFI_SUCCESS)
+ continue;
+
+ rom->data.type = SETUP_PCI;
+ rom->data.len = size - sizeof(struct setup_data);
+ rom->data.next = 0;
+ rom->pcilen = pci->romsize;
+
+ status = efi_call_phys5(pci->pci.read, pci,
+ EfiPciIoWidthUint16, PCI_VENDOR_ID,
+ 1, &(rom->vendor));
+
+ if (status != EFI_SUCCESS)
+ goto free_struct;
+
+ status = efi_call_phys5(pci->pci.read, pci,
+ EfiPciIoWidthUint16, PCI_DEVICE_ID,
+ 1, &(rom->devid));
+
+ if (status != EFI_SUCCESS)
+ goto free_struct;
+
+ status = efi_call_phys5(pci->get_location, pci,
+ &(rom->segment), &(rom->bus),
+ &(rom->device), &(rom->function));
+
+ if (status != EFI_SUCCESS)
+ goto free_struct;
+
+ memcpy(rom->romdata, pci->romimage, pci->romsize);
+
+ if (data)
+ data->next = (unsigned long)rom;
+ else
+ params->hdr.setup_data = (unsigned long)rom;
+
+ data = (struct setup_data *)rom;
+
+ continue;
+ free_struct:
+ efi_call_phys1(sys_table->boottime->free_pool, rom);
+ }
+
+free_handle:
+ efi_call_phys1(sys_table->boottime->free_pool, pci_handle);
+ return status;
+}
+
/*
* See if we have Graphics Output Protocol
*/
@@ -316,10 +434,9 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
* Once we've found a GOP supporting ConOut,
* don't bother looking any further.
*/
+ first_gop = gop;
if (conout_found)
break;
-
- first_gop = gop;
}
}
@@ -1028,6 +1145,8 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
setup_graphics(boot_params);
+ setup_efi_pci(boot_params);
+
status = efi_call_phys3(sys_table->boottime->allocate_pool,
EFI_LOADER_DATA, sizeof(*gdt),
(void **)&gdt);
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index aa4aaf1b238..1e3184f6072 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -35,11 +35,11 @@ ENTRY(startup_32)
#ifdef CONFIG_EFI_STUB
jmp preferred_addr
- .balign 0x10
/*
* We don't need the return address, so set up the stack so
- * efi_main() can find its arugments.
+ * efi_main() can find its arguments.
*/
+ENTRY(efi_pe_entry)
add $0x4, %esp
call make_boot_params
@@ -50,8 +50,10 @@ ENTRY(startup_32)
pushl %eax
pushl %esi
pushl %ecx
+ sub $0x4, %esp
- .org 0x30,0x90
+ENTRY(efi_stub_entry)
+ add $0x4, %esp
call efi_main
cmpl $0, %eax
movl %eax, %esi
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 2c4b171eec3..f5d1aaa0dec 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -201,12 +201,12 @@ ENTRY(startup_64)
*/
#ifdef CONFIG_EFI_STUB
/*
- * The entry point for the PE/COFF executable is 0x210, so only
- * legacy boot loaders will execute this jmp.
+ * The entry point for the PE/COFF executable is efi_pe_entry, so
+ * only legacy boot loaders will execute this jmp.
*/
jmp preferred_addr
- .org 0x210
+ENTRY(efi_pe_entry)
mov %rcx, %rdi
mov %rdx, %rsi
pushq %rdi
@@ -218,7 +218,7 @@ ENTRY(startup_64)
popq %rsi
popq %rdi
- .org 0x230,0x90
+ENTRY(efi_stub_entry)
call efi_main
movq %rax,%rsi
cmpq $0,%rax
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 88f7ff6da40..7cb56c6ca35 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -325,6 +325,8 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
{
real_mode = rmode;
+ sanitize_boot_params(real_mode);
+
if (real_mode->screen_info.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 0e6dc0ee0ee..674019d8e23 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -18,6 +18,7 @@
#include <asm/page.h>
#include <asm/boot.h>
#include <asm/bootparam.h>
+#include <asm/bootparam_utils.h>
#define BOOT_BOOT_H
#include "../ctype.h"
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 8c132a625b9..944ce595f76 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -21,6 +21,7 @@
#include <asm/e820.h>
#include <asm/page_types.h>
#include <asm/setup.h>
+#include <asm/bootparam.h>
#include "boot.h"
#include "voffset.h"
#include "zoffset.h"
@@ -255,6 +256,9 @@ section_table:
# header, from the old boot sector.
.section ".header", "a"
+ .globl sentinel
+sentinel: .byte 0xff, 0xff /* Used to detect broken loaders */
+
.globl hdr
hdr:
setup_sects: .byte 0 /* Filled in by build.c */
@@ -279,7 +283,7 @@ _start:
# Part 2 of the header, from the old setup.S
.ascii "HdrS" # header signature
- .word 0x020b # header version number (>= 0x0105)
+ .word 0x020c # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -297,13 +301,7 @@ type_of_loader: .byte 0 # 0 means ancient bootloader, newer
# flags, unused bits must be zero (RFU) bit within loadflags
loadflags:
-LOADED_HIGH = 1 # If set, the kernel is loaded high
-CAN_USE_HEAP = 0x80 # If set, the loader also has set
- # heap_end_ptr to tell how much
- # space behind setup.S can be used for
- # heap purposes.
- # Only the loader knows what is free
- .byte LOADED_HIGH
+ .byte LOADED_HIGH # The kernel is to be loaded high
setup_move_size: .word 0x8000 # size to move, when setup is not
# loaded at 0x90000. We will move setup
@@ -369,7 +367,23 @@ relocatable_kernel: .byte 1
relocatable_kernel: .byte 0
#endif
min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment
-pad3: .word 0
+
+xloadflags:
+#ifdef CONFIG_X86_64
+# define XLF0 XLF_KERNEL_64 /* 64-bit kernel */
+#else
+# define XLF0 0
+#endif
+#ifdef CONFIG_EFI_STUB
+# ifdef CONFIG_X86_64
+# define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */
+# else
+# define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */
+# endif
+#else
+# define XLF23 0
+#endif
+ .word XLF0 | XLF23
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
#added with boot protocol
@@ -397,8 +411,13 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
#define INIT_SIZE VO_INIT_SIZE
#endif
init_size: .long INIT_SIZE # kernel initialization size
-handover_offset: .long 0x30 # offset to the handover
+handover_offset:
+#ifdef CONFIG_EFI_STUB
+ .long 0x30 # offset to the handover
# protocol entry point
+#else
+ .long 0
+#endif
# End of setup header #####################################################
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 03c0683636b..96a6c756353 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -13,7 +13,7 @@ SECTIONS
.bstext : { *(.bstext) }
.bsdata : { *(.bsdata) }
- . = 497;
+ . = 495;
.header : { *(.header) }
.entrytext : { *(.entrytext) }
.inittext : { *(.inittext) }
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index 4b8e165ee57..94c54465002 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -52,6 +52,10 @@ int is_big_kernel;
#define PECOFF_RELOC_RESERVE 0x20
+unsigned long efi_stub_entry;
+unsigned long efi_pe_entry;
+unsigned long startup_64;
+
/*----------------------------------------------------------------------*/
static const u32 crctab32[] = {
@@ -132,7 +136,7 @@ static void die(const char * str, ...)
static void usage(void)
{
- die("Usage: build setup system [> image]");
+ die("Usage: build setup system [zoffset.h] [> image]");
}
#ifdef CONFIG_EFI_STUB
@@ -206,30 +210,54 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz)
*/
put_unaligned_le32(file_sz - 512, &buf[pe_header + 0x1c]);
-#ifdef CONFIG_X86_32
/*
- * Address of entry point.
- *
- * The EFI stub entry point is +16 bytes from the start of
- * the .text section.
+ * Address of entry point for PE/COFF executable
*/
- put_unaligned_le32(text_start + 16, &buf[pe_header + 0x28]);
-#else
- /*
- * Address of entry point. startup_32 is at the beginning and
- * the 64-bit entry point (startup_64) is always 512 bytes
- * after. The EFI stub entry point is 16 bytes after that, as
- * the first instruction allows legacy loaders to jump over
- * the EFI stub initialisation
- */
- put_unaligned_le32(text_start + 528, &buf[pe_header + 0x28]);
-#endif /* CONFIG_X86_32 */
+ put_unaligned_le32(text_start + efi_pe_entry, &buf[pe_header + 0x28]);
update_pecoff_section_header(".text", text_start, text_sz);
}
#endif /* CONFIG_EFI_STUB */
+
+/*
+ * Parse zoffset.h and find the entry points. We could just #include zoffset.h
+ * but that would mean tools/build would have to be rebuilt every time. It's
+ * not as if parsing it is hard...
+ */
+#define PARSE_ZOFS(p, sym) do { \
+ if (!strncmp(p, "#define ZO_" #sym " ", 11+sizeof(#sym))) \
+ sym = strtoul(p + 11 + sizeof(#sym), NULL, 16); \
+} while (0)
+
+static void parse_zoffset(char *fname)
+{
+ FILE *file;
+ char *p;
+ int c;
+
+ file = fopen(fname, "r");
+ if (!file)
+ die("Unable to open `%s': %m", fname);
+ c = fread(buf, 1, sizeof(buf) - 1, file);
+ if (ferror(file))
+ die("read-error on `zoffset.h'");
+ buf[c] = 0;
+
+ p = (char *)buf;
+
+ while (p && *p) {
+ PARSE_ZOFS(p, efi_stub_entry);
+ PARSE_ZOFS(p, efi_pe_entry);
+ PARSE_ZOFS(p, startup_64);
+
+ p = strchr(p, '\n');
+ while (p && (*p == '\r' || *p == '\n'))
+ p++;
+ }
+}
+
int main(int argc, char ** argv)
{
unsigned int i, sz, setup_sectors;
@@ -241,7 +269,19 @@ int main(int argc, char ** argv)
void *kernel;
u32 crc = 0xffffffffUL;
- if (argc != 3)
+ /* Defaults for old kernel */
+#ifdef CONFIG_X86_32
+ efi_pe_entry = 0x10;
+ efi_stub_entry = 0x30;
+#else
+ efi_pe_entry = 0x210;
+ efi_stub_entry = 0x230;
+ startup_64 = 0x200;
+#endif
+
+ if (argc == 4)
+ parse_zoffset(argv[3]);
+ else if (argc != 3)
usage();
/* Copy the setup code */
@@ -299,6 +339,11 @@ int main(int argc, char ** argv)
#ifdef CONFIG_EFI_STUB
update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz));
+
+#ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */
+ efi_stub_entry -= 0x200;
+#endif
+ put_unaligned_le32(efi_stub_entry, &buf[0x264]);
#endif
crc = partial_crc32(buf, i, crc);
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 5bacb4a226a..e0ca7c9ac38 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
+obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += camellia-aesni-avx-x86_64.o
obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
@@ -34,6 +35,8 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
+camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
+ camellia_aesni_avx_glue.o
cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
@@ -47,3 +50,5 @@ serpent-avx-x86_64-y := serpent-avx-x86_64-asm_64.o serpent_avx_glue.o
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o
+crc32c-intel-y := crc32c-intel_glue.o
+crc32c-intel-$(CONFIG_CRYPTO_CRC32C_X86_64) += crc32c-pcl-intel-asm_64.o
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
new file mode 100644
index 00000000000..2306d2e4816
--- /dev/null
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -0,0 +1,1102 @@
+/*
+ * x86_64/AVX/AES-NI assembler implementation of Camellia
+ *
+ * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+/*
+ * Version licensed under 2-clause BSD License is available at:
+ * http://koti.mbnet.fi/axh/crypto/camellia-BSD-1.2.0-aesni1.tar.xz
+ */
+
+#define CAMELLIA_TABLE_BYTE_LEN 272
+
+/* struct camellia_ctx: */
+#define key_table 0
+#define key_length CAMELLIA_TABLE_BYTE_LEN
+
+/* register macros */
+#define CTX %rdi
+
+/**********************************************************************
+ 16-way camellia
+ **********************************************************************/
+#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
+ vpand x, mask4bit, tmp0; \
+ vpandn x, mask4bit, x; \
+ vpsrld $4, x, x; \
+ \
+ vpshufb tmp0, lo_t, tmp0; \
+ vpshufb x, hi_t, x; \
+ vpxor tmp0, x, x;
+
+/*
+ * IN:
+ * x0..x7: byte-sliced AB state
+ * mem_cd: register pointer storing CD state
+ * key: index for key material
+ * OUT:
+ * x0..x7: new byte-sliced CD state
+ */
+#define roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \
+ t7, mem_cd, key) \
+ /* \
+ * S-function with AES subbytes \
+ */ \
+ vmovdqa .Linv_shift_row, t4; \
+ vbroadcastss .L0f0f0f0f, t7; \
+ vmovdqa .Lpre_tf_lo_s1, t0; \
+ vmovdqa .Lpre_tf_hi_s1, t1; \
+ \
+ /* AES inverse shift rows */ \
+ vpshufb t4, x0, x0; \
+ vpshufb t4, x7, x7; \
+ vpshufb t4, x1, x1; \
+ vpshufb t4, x4, x4; \
+ vpshufb t4, x2, x2; \
+ vpshufb t4, x5, x5; \
+ vpshufb t4, x3, x3; \
+ vpshufb t4, x6, x6; \
+ \
+ /* prefilter sboxes 1, 2 and 3 */ \
+ vmovdqa .Lpre_tf_lo_s4, t2; \
+ vmovdqa .Lpre_tf_hi_s4, t3; \
+ filter_8bit(x0, t0, t1, t7, t6); \
+ filter_8bit(x7, t0, t1, t7, t6); \
+ filter_8bit(x1, t0, t1, t7, t6); \
+ filter_8bit(x4, t0, t1, t7, t6); \
+ filter_8bit(x2, t0, t1, t7, t6); \
+ filter_8bit(x5, t0, t1, t7, t6); \
+ \
+ /* prefilter sbox 4 */ \
+ vpxor t4, t4, t4; \
+ filter_8bit(x3, t2, t3, t7, t6); \
+ filter_8bit(x6, t2, t3, t7, t6); \
+ \
+ /* AES subbytes + AES shift rows */ \
+ vmovdqa .Lpost_tf_lo_s1, t0; \
+ vmovdqa .Lpost_tf_hi_s1, t1; \
+ vaesenclast t4, x0, x0; \
+ vaesenclast t4, x7, x7; \
+ vaesenclast t4, x1, x1; \
+ vaesenclast t4, x4, x4; \
+ vaesenclast t4, x2, x2; \
+ vaesenclast t4, x5, x5; \
+ vaesenclast t4, x3, x3; \
+ vaesenclast t4, x6, x6; \
+ \
+ /* postfilter sboxes 1 and 4 */ \
+ vmovdqa .Lpost_tf_lo_s3, t2; \
+ vmovdqa .Lpost_tf_hi_s3, t3; \
+ filter_8bit(x0, t0, t1, t7, t6); \
+ filter_8bit(x7, t0, t1, t7, t6); \
+ filter_8bit(x3, t0, t1, t7, t6); \
+ filter_8bit(x6, t0, t1, t7, t6); \
+ \
+ /* postfilter sbox 3 */ \
+ vmovdqa .Lpost_tf_lo_s2, t4; \
+ vmovdqa .Lpost_tf_hi_s2, t5; \
+ filter_8bit(x2, t2, t3, t7, t6); \
+ filter_8bit(x5, t2, t3, t7, t6); \
+ \
+ vpxor t6, t6, t6; \
+ vmovq key, t0; \
+ \
+ /* postfilter sbox 2 */ \
+ filter_8bit(x1, t4, t5, t7, t2); \
+ filter_8bit(x4, t4, t5, t7, t2); \
+ \
+ vpsrldq $5, t0, t5; \
+ vpsrldq $1, t0, t1; \
+ vpsrldq $2, t0, t2; \
+ vpsrldq $3, t0, t3; \
+ vpsrldq $4, t0, t4; \
+ vpshufb t6, t0, t0; \
+ vpshufb t6, t1, t1; \
+ vpshufb t6, t2, t2; \
+ vpshufb t6, t3, t3; \
+ vpshufb t6, t4, t4; \
+ vpsrldq $2, t5, t7; \
+ vpshufb t6, t7, t7; \
+ \
+ /* \
+ * P-function \
+ */ \
+ vpxor x5, x0, x0; \
+ vpxor x6, x1, x1; \
+ vpxor x7, x2, x2; \
+ vpxor x4, x3, x3; \
+ \
+ vpxor x2, x4, x4; \
+ vpxor x3, x5, x5; \
+ vpxor x0, x6, x6; \
+ vpxor x1, x7, x7; \
+ \
+ vpxor x7, x0, x0; \
+ vpxor x4, x1, x1; \
+ vpxor x5, x2, x2; \
+ vpxor x6, x3, x3; \
+ \
+ vpxor x3, x4, x4; \
+ vpxor x0, x5, x5; \
+ vpxor x1, x6, x6; \
+ vpxor x2, x7, x7; /* note: high and low parts swapped */ \
+ \
+ /* \
+ * Add key material and result to CD (x becomes new CD) \
+ */ \
+ \
+ vpxor t3, x4, x4; \
+ vpxor 0 * 16(mem_cd), x4, x4; \
+ \
+ vpxor t2, x5, x5; \
+ vpxor 1 * 16(mem_cd), x5, x5; \
+ \
+ vpsrldq $1, t5, t3; \
+ vpshufb t6, t5, t5; \
+ vpshufb t6, t3, t6; \
+ \
+ vpxor t1, x6, x6; \
+ vpxor 2 * 16(mem_cd), x6, x6; \
+ \
+ vpxor t0, x7, x7; \
+ vpxor 3 * 16(mem_cd), x7, x7; \
+ \
+ vpxor t7, x0, x0; \
+ vpxor 4 * 16(mem_cd), x0, x0; \
+ \
+ vpxor t6, x1, x1; \
+ vpxor 5 * 16(mem_cd), x1, x1; \
+ \
+ vpxor t5, x2, x2; \
+ vpxor 6 * 16(mem_cd), x2, x2; \
+ \
+ vpxor t4, x3, x3; \
+ vpxor 7 * 16(mem_cd), x3, x3;
+
+/*
+ * Size optimization... with inlined roundsm16, binary would be over 5 times
+ * larger and would only be 0.5% faster (on sandy-bridge).
+ */
+.align 8
+roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
+ roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
+ %rcx, (%r9));
+ ret;
+
+.align 8
+roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
+ roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
+ %xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
+ %rax, (%r9));
+ ret;
+
+/*
+ * IN/OUT:
+ * x0..x7: byte-sliced AB state preloaded
+ * mem_ab: byte-sliced AB state in memory
+ * mem_cb: byte-sliced CD state in memory
+ */
+#define two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, i, dir, store_ab) \
+ leaq (key_table + (i) * 8)(CTX), %r9; \
+ call roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd; \
+ \
+ vmovdqu x4, 0 * 16(mem_cd); \
+ vmovdqu x5, 1 * 16(mem_cd); \
+ vmovdqu x6, 2 * 16(mem_cd); \
+ vmovdqu x7, 3 * 16(mem_cd); \
+ vmovdqu x0, 4 * 16(mem_cd); \
+ vmovdqu x1, 5 * 16(mem_cd); \
+ vmovdqu x2, 6 * 16(mem_cd); \
+ vmovdqu x3, 7 * 16(mem_cd); \
+ \
+ leaq (key_table + ((i) + (dir)) * 8)(CTX), %r9; \
+ call roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab; \
+ \
+ store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab);
+
+#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */
+
+#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \
+ /* Store new AB state */ \
+ vmovdqu x0, 0 * 16(mem_ab); \
+ vmovdqu x1, 1 * 16(mem_ab); \
+ vmovdqu x2, 2 * 16(mem_ab); \
+ vmovdqu x3, 3 * 16(mem_ab); \
+ vmovdqu x4, 4 * 16(mem_ab); \
+ vmovdqu x5, 5 * 16(mem_ab); \
+ vmovdqu x6, 6 * 16(mem_ab); \
+ vmovdqu x7, 7 * 16(mem_ab);
+
+#define enc_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, i) \
+ two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \
+ two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \
+ two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store);
+
+#define dec_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, i) \
+ two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \
+ two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \
+ two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store);
+
+/*
+ * IN:
+ * v0..3: byte-sliced 32-bit integers
+ * OUT:
+ * v0..3: (IN <<< 1)
+ */
+#define rol32_1_16(v0, v1, v2, v3, t0, t1, t2, zero) \
+ vpcmpgtb v0, zero, t0; \
+ vpaddb v0, v0, v0; \
+ vpabsb t0, t0; \
+ \
+ vpcmpgtb v1, zero, t1; \
+ vpaddb v1, v1, v1; \
+ vpabsb t1, t1; \
+ \
+ vpcmpgtb v2, zero, t2; \
+ vpaddb v2, v2, v2; \
+ vpabsb t2, t2; \
+ \
+ vpor t0, v1, v1; \
+ \
+ vpcmpgtb v3, zero, t0; \
+ vpaddb v3, v3, v3; \
+ vpabsb t0, t0; \
+ \
+ vpor t1, v2, v2; \
+ vpor t2, v3, v3; \
+ vpor t0, v0, v0;
+
+/*
+ * IN:
+ * r: byte-sliced AB state in memory
+ * l: byte-sliced CD state in memory
+ * OUT:
+ * x0..x7: new byte-sliced CD state
+ */
+#define fls16(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \
+ tt1, tt2, tt3, kll, klr, krl, krr) \
+ /* \
+ * t0 = kll; \
+ * t0 &= ll; \
+ * lr ^= rol32(t0, 1); \
+ */ \
+ vpxor tt0, tt0, tt0; \
+ vmovd kll, t0; \
+ vpshufb tt0, t0, t3; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t2; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t1; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t0; \
+ \
+ vpand l0, t0, t0; \
+ vpand l1, t1, t1; \
+ vpand l2, t2, t2; \
+ vpand l3, t3, t3; \
+ \
+ rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
+ \
+ vpxor l4, t0, l4; \
+ vmovdqu l4, 4 * 16(l); \
+ vpxor l5, t1, l5; \
+ vmovdqu l5, 5 * 16(l); \
+ vpxor l6, t2, l6; \
+ vmovdqu l6, 6 * 16(l); \
+ vpxor l7, t3, l7; \
+ vmovdqu l7, 7 * 16(l); \
+ \
+ /* \
+ * t2 = krr; \
+ * t2 |= rr; \
+ * rl ^= t2; \
+ */ \
+ \
+ vmovd krr, t0; \
+ vpshufb tt0, t0, t3; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t2; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t1; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t0; \
+ \
+ vpor 4 * 16(r), t0, t0; \
+ vpor 5 * 16(r), t1, t1; \
+ vpor 6 * 16(r), t2, t2; \
+ vpor 7 * 16(r), t3, t3; \
+ \
+ vpxor 0 * 16(r), t0, t0; \
+ vpxor 1 * 16(r), t1, t1; \
+ vpxor 2 * 16(r), t2, t2; \
+ vpxor 3 * 16(r), t3, t3; \
+ vmovdqu t0, 0 * 16(r); \
+ vmovdqu t1, 1 * 16(r); \
+ vmovdqu t2, 2 * 16(r); \
+ vmovdqu t3, 3 * 16(r); \
+ \
+ /* \
+ * t2 = krl; \
+ * t2 &= rl; \
+ * rr ^= rol32(t2, 1); \
+ */ \
+ vmovd krl, t0; \
+ vpshufb tt0, t0, t3; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t2; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t1; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t0; \
+ \
+ vpand 0 * 16(r), t0, t0; \
+ vpand 1 * 16(r), t1, t1; \
+ vpand 2 * 16(r), t2, t2; \
+ vpand 3 * 16(r), t3, t3; \
+ \
+ rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
+ \
+ vpxor 4 * 16(r), t0, t0; \
+ vpxor 5 * 16(r), t1, t1; \
+ vpxor 6 * 16(r), t2, t2; \
+ vpxor 7 * 16(r), t3, t3; \
+ vmovdqu t0, 4 * 16(r); \
+ vmovdqu t1, 5 * 16(r); \
+ vmovdqu t2, 6 * 16(r); \
+ vmovdqu t3, 7 * 16(r); \
+ \
+ /* \
+ * t0 = klr; \
+ * t0 |= lr; \
+ * ll ^= t0; \
+ */ \
+ \
+ vmovd klr, t0; \
+ vpshufb tt0, t0, t3; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t2; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t1; \
+ vpsrldq $1, t0, t0; \
+ vpshufb tt0, t0, t0; \
+ \
+ vpor l4, t0, t0; \
+ vpor l5, t1, t1; \
+ vpor l6, t2, t2; \
+ vpor l7, t3, t3; \
+ \
+ vpxor l0, t0, l0; \
+ vmovdqu l0, 0 * 16(l); \
+ vpxor l1, t1, l1; \
+ vmovdqu l1, 1 * 16(l); \
+ vpxor l2, t2, l2; \
+ vmovdqu l2, 2 * 16(l); \
+ vpxor l3, t3, l3; \
+ vmovdqu l3, 3 * 16(l);
+
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+ vpunpckhdq x1, x0, t2; \
+ vpunpckldq x1, x0, x0; \
+ \
+ vpunpckldq x3, x2, t1; \
+ vpunpckhdq x3, x2, x2; \
+ \
+ vpunpckhqdq t1, x0, x1; \
+ vpunpcklqdq t1, x0, x0; \
+ \
+ vpunpckhqdq x2, t2, x3; \
+ vpunpcklqdq x2, t2, x2;
+
+#define byteslice_16x16b(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, a3, \
+ b3, c3, d3, st0, st1) \
+ vmovdqu d2, st0; \
+ vmovdqu d3, st1; \
+ transpose_4x4(a0, a1, a2, a3, d2, d3); \
+ transpose_4x4(b0, b1, b2, b3, d2, d3); \
+ vmovdqu st0, d2; \
+ vmovdqu st1, d3; \
+ \
+ vmovdqu a0, st0; \
+ vmovdqu a1, st1; \
+ transpose_4x4(c0, c1, c2, c3, a0, a1); \
+ transpose_4x4(d0, d1, d2, d3, a0, a1); \
+ \
+ vmovdqu .Lshufb_16x16b, a0; \
+ vmovdqu st1, a1; \
+ vpshufb a0, a2, a2; \
+ vpshufb a0, a3, a3; \
+ vpshufb a0, b0, b0; \
+ vpshufb a0, b1, b1; \
+ vpshufb a0, b2, b2; \
+ vpshufb a0, b3, b3; \
+ vpshufb a0, a1, a1; \
+ vpshufb a0, c0, c0; \
+ vpshufb a0, c1, c1; \
+ vpshufb a0, c2, c2; \
+ vpshufb a0, c3, c3; \
+ vpshufb a0, d0, d0; \
+ vpshufb a0, d1, d1; \
+ vpshufb a0, d2, d2; \
+ vpshufb a0, d3, d3; \
+ vmovdqu d3, st1; \
+ vmovdqu st0, d3; \
+ vpshufb a0, d3, a0; \
+ vmovdqu d2, st0; \
+ \
+ transpose_4x4(a0, b0, c0, d0, d2, d3); \
+ transpose_4x4(a1, b1, c1, d1, d2, d3); \
+ vmovdqu st0, d2; \
+ vmovdqu st1, d3; \
+ \
+ vmovdqu b0, st0; \
+ vmovdqu b1, st1; \
+ transpose_4x4(a2, b2, c2, d2, b0, b1); \
+ transpose_4x4(a3, b3, c3, d3, b0, b1); \
+ vmovdqu st0, b0; \
+ vmovdqu st1, b1; \
+ /* does not adjust output bytes inside vectors */
+
+/* load blocks to registers and apply pre-whitening */
+#define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, rio, key) \
+ vmovq key, x0; \
+ vpshufb .Lpack_bswap, x0, x0; \
+ \
+ vpxor 0 * 16(rio), x0, y7; \
+ vpxor 1 * 16(rio), x0, y6; \
+ vpxor 2 * 16(rio), x0, y5; \
+ vpxor 3 * 16(rio), x0, y4; \
+ vpxor 4 * 16(rio), x0, y3; \
+ vpxor 5 * 16(rio), x0, y2; \
+ vpxor 6 * 16(rio), x0, y1; \
+ vpxor 7 * 16(rio), x0, y0; \
+ vpxor 8 * 16(rio), x0, x7; \
+ vpxor 9 * 16(rio), x0, x6; \
+ vpxor 10 * 16(rio), x0, x5; \
+ vpxor 11 * 16(rio), x0, x4; \
+ vpxor 12 * 16(rio), x0, x3; \
+ vpxor 13 * 16(rio), x0, x2; \
+ vpxor 14 * 16(rio), x0, x1; \
+ vpxor 15 * 16(rio), x0, x0;
+
+/* byteslice pre-whitened blocks and store to temporary memory */
+#define inpack16_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, mem_ab, mem_cd) \
+ byteslice_16x16b(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
+ y5, y6, y7, (mem_ab), (mem_cd)); \
+ \
+ vmovdqu x0, 0 * 16(mem_ab); \
+ vmovdqu x1, 1 * 16(mem_ab); \
+ vmovdqu x2, 2 * 16(mem_ab); \
+ vmovdqu x3, 3 * 16(mem_ab); \
+ vmovdqu x4, 4 * 16(mem_ab); \
+ vmovdqu x5, 5 * 16(mem_ab); \
+ vmovdqu x6, 6 * 16(mem_ab); \
+ vmovdqu x7, 7 * 16(mem_ab); \
+ vmovdqu y0, 0 * 16(mem_cd); \
+ vmovdqu y1, 1 * 16(mem_cd); \
+ vmovdqu y2, 2 * 16(mem_cd); \
+ vmovdqu y3, 3 * 16(mem_cd); \
+ vmovdqu y4, 4 * 16(mem_cd); \
+ vmovdqu y5, 5 * 16(mem_cd); \
+ vmovdqu y6, 6 * 16(mem_cd); \
+ vmovdqu y7, 7 * 16(mem_cd);
+
+/* de-byteslice, apply post-whitening and store blocks */
+#define outunpack16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
+ y5, y6, y7, key, stack_tmp0, stack_tmp1) \
+ byteslice_16x16b(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, y3, \
+ y7, x3, x7, stack_tmp0, stack_tmp1); \
+ \
+ vmovdqu x0, stack_tmp0; \
+ \
+ vmovq key, x0; \
+ vpshufb .Lpack_bswap, x0, x0; \
+ \
+ vpxor x0, y7, y7; \
+ vpxor x0, y6, y6; \
+ vpxor x0, y5, y5; \
+ vpxor x0, y4, y4; \
+ vpxor x0, y3, y3; \
+ vpxor x0, y2, y2; \
+ vpxor x0, y1, y1; \
+ vpxor x0, y0, y0; \
+ vpxor x0, x7, x7; \
+ vpxor x0, x6, x6; \
+ vpxor x0, x5, x5; \
+ vpxor x0, x4, x4; \
+ vpxor x0, x3, x3; \
+ vpxor x0, x2, x2; \
+ vpxor x0, x1, x1; \
+ vpxor stack_tmp0, x0, x0;
+
+#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7, rio) \
+ vmovdqu x0, 0 * 16(rio); \
+ vmovdqu x1, 1 * 16(rio); \
+ vmovdqu x2, 2 * 16(rio); \
+ vmovdqu x3, 3 * 16(rio); \
+ vmovdqu x4, 4 * 16(rio); \
+ vmovdqu x5, 5 * 16(rio); \
+ vmovdqu x6, 6 * 16(rio); \
+ vmovdqu x7, 7 * 16(rio); \
+ vmovdqu y0, 8 * 16(rio); \
+ vmovdqu y1, 9 * 16(rio); \
+ vmovdqu y2, 10 * 16(rio); \
+ vmovdqu y3, 11 * 16(rio); \
+ vmovdqu y4, 12 * 16(rio); \
+ vmovdqu y5, 13 * 16(rio); \
+ vmovdqu y6, 14 * 16(rio); \
+ vmovdqu y7, 15 * 16(rio);
+
+.data
+.align 16
+
+#define SHUFB_BYTES(idx) \
+ 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
+
+.Lshufb_16x16b:
+ .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3);
+
+.Lpack_bswap:
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x80808080
+ .long 0x80808080
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/*
+ * pre-SubByte transform
+ *
+ * pre-lookup for sbox1, sbox2, sbox3:
+ * swap_bitendianness(
+ * isom_map_camellia_to_aes(
+ * camellia_f(
+ * swap_bitendianess(in)
+ * )
+ * )
+ * )
+ *
+ * (note: '⊕ 0xc5' inside camellia_f())
+ */
+.Lpre_tf_lo_s1:
+ .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86
+ .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88
+.Lpre_tf_hi_s1:
+ .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a
+ .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23
+
+/*
+ * pre-SubByte transform
+ *
+ * pre-lookup for sbox4:
+ * swap_bitendianness(
+ * isom_map_camellia_to_aes(
+ * camellia_f(
+ * swap_bitendianess(in <<< 1)
+ * )
+ * )
+ * )
+ *
+ * (note: '⊕ 0xc5' inside camellia_f())
+ */
+.Lpre_tf_lo_s4:
+ .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25
+ .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74
+.Lpre_tf_hi_s4:
+ .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72
+ .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox1, sbox4:
+ * swap_bitendianness(
+ * camellia_h(
+ * isom_map_aes_to_camellia(
+ * swap_bitendianness(
+ * aes_inverse_affine_transform(in)
+ * )
+ * )
+ * )
+ * )
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s1:
+ .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31
+ .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1
+.Lpost_tf_hi_s1:
+ .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8
+ .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox2:
+ * swap_bitendianness(
+ * camellia_h(
+ * isom_map_aes_to_camellia(
+ * swap_bitendianness(
+ * aes_inverse_affine_transform(in)
+ * )
+ * )
+ * )
+ * ) <<< 1
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s2:
+ .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62
+ .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3
+.Lpost_tf_hi_s2:
+ .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51
+ .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox3:
+ * swap_bitendianness(
+ * camellia_h(
+ * isom_map_aes_to_camellia(
+ * swap_bitendianness(
+ * aes_inverse_affine_transform(in)
+ * )
+ * )
+ * )
+ * ) >>> 1
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s3:
+ .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98
+ .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8
+.Lpost_tf_hi_s3:
+ .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54
+ .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+ .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+ .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* 4-bit mask */
+.align 4
+.L0f0f0f0f:
+ .long 0x0f0f0f0f
+
+.text
+
+.align 8
+.type __camellia_enc_blk16,@function;
+
+__camellia_enc_blk16:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rax: temporary storage, 256 bytes
+ * %xmm0..%xmm15: 16 plaintext blocks
+ * output:
+ * %xmm0..%xmm15: 16 encrypted blocks, order swapped:
+ * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
+ */
+
+ leaq 8 * 16(%rax), %rcx;
+
+ inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx);
+
+ enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx, 0);
+
+ fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15,
+ ((key_table + (8) * 8) + 0)(CTX),
+ ((key_table + (8) * 8) + 4)(CTX),
+ ((key_table + (8) * 8) + 8)(CTX),
+ ((key_table + (8) * 8) + 12)(CTX));
+
+ enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx, 8);
+
+ fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15,
+ ((key_table + (16) * 8) + 0)(CTX),
+ ((key_table + (16) * 8) + 4)(CTX),
+ ((key_table + (16) * 8) + 8)(CTX),
+ ((key_table + (16) * 8) + 12)(CTX));
+
+ enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx, 16);
+
+ movl $24, %r8d;
+ cmpl $16, key_length(CTX);
+ jne .Lenc_max32;
+
+.Lenc_done:
+ /* load CD for output */
+ vmovdqu 0 * 16(%rcx), %xmm8;
+ vmovdqu 1 * 16(%rcx), %xmm9;
+ vmovdqu 2 * 16(%rcx), %xmm10;
+ vmovdqu 3 * 16(%rcx), %xmm11;
+ vmovdqu 4 * 16(%rcx), %xmm12;
+ vmovdqu 5 * 16(%rcx), %xmm13;
+ vmovdqu 6 * 16(%rcx), %xmm14;
+ vmovdqu 7 * 16(%rcx), %xmm15;
+
+ outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, (key_table)(CTX, %r8, 8), (%rax), 1 * 16(%rax));
+
+ ret;
+
+.align 8
+.Lenc_max32:
+ movl $32, %r8d;
+
+ fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15,
+ ((key_table + (24) * 8) + 0)(CTX),
+ ((key_table + (24) * 8) + 4)(CTX),
+ ((key_table + (24) * 8) + 8)(CTX),
+ ((key_table + (24) * 8) + 12)(CTX));
+
+ enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx, 24);
+
+ jmp .Lenc_done;
+
+.align 8
+.type __camellia_dec_blk16,@function;
+
+__camellia_dec_blk16:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rax: temporary storage, 256 bytes
+ * %r8d: 24 for 16 byte key, 32 for larger
+ * %xmm0..%xmm15: 16 encrypted blocks
+ * output:
+ * %xmm0..%xmm15: 16 plaintext blocks, order swapped:
+ * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
+ */
+
+ leaq 8 * 16(%rax), %rcx;
+
+ inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx);
+
+ cmpl $32, %r8d;
+ je .Ldec_max32;
+
+.Ldec_max24:
+ dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx, 16);
+
+ fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15,
+ ((key_table + (16) * 8) + 8)(CTX),
+ ((key_table + (16) * 8) + 12)(CTX),
+ ((key_table + (16) * 8) + 0)(CTX),
+ ((key_table + (16) * 8) + 4)(CTX));
+
+ dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx, 8);
+
+ fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15,
+ ((key_table + (8) * 8) + 8)(CTX),
+ ((key_table + (8) * 8) + 12)(CTX),
+ ((key_table + (8) * 8) + 0)(CTX),
+ ((key_table + (8) * 8) + 4)(CTX));
+
+ dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx, 0);
+
+ /* load CD for output */
+ vmovdqu 0 * 16(%rcx), %xmm8;
+ vmovdqu 1 * 16(%rcx), %xmm9;
+ vmovdqu 2 * 16(%rcx), %xmm10;
+ vmovdqu 3 * 16(%rcx), %xmm11;
+ vmovdqu 4 * 16(%rcx), %xmm12;
+ vmovdqu 5 * 16(%rcx), %xmm13;
+ vmovdqu 6 * 16(%rcx), %xmm14;
+ vmovdqu 7 * 16(%rcx), %xmm15;
+
+ outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
+
+ ret;
+
+.align 8
+.Ldec_max32:
+ dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rax, %rcx, 24);
+
+ fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15,
+ ((key_table + (24) * 8) + 8)(CTX),
+ ((key_table + (24) * 8) + 12)(CTX),
+ ((key_table + (24) * 8) + 0)(CTX),
+ ((key_table + (24) * 8) + 4)(CTX));
+
+ jmp .Ldec_max24;
+
+.align 8
+.global camellia_ecb_enc_16way
+.type camellia_ecb_enc_16way,@function;
+
+camellia_ecb_enc_16way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ */
+
+ inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rdx, (key_table)(CTX));
+
+ /* now dst can be used as temporary buffer (even in src == dst case) */
+ movq %rsi, %rax;
+
+ call __camellia_enc_blk16;
+
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ ret;
+
+.align 8
+.global camellia_ecb_dec_16way
+.type camellia_ecb_dec_16way,@function;
+
+camellia_ecb_dec_16way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ */
+
+ cmpl $16, key_length(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rdx, (key_table)(CTX, %r8, 8));
+
+ /* now dst can be used as temporary buffer (even in src == dst case) */
+ movq %rsi, %rax;
+
+ call __camellia_dec_blk16;
+
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ ret;
+
+.align 8
+.global camellia_cbc_dec_16way
+.type camellia_cbc_dec_16way,@function;
+
+camellia_cbc_dec_16way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ */
+
+ cmpl $16, key_length(CTX);
+ movl $32, %r8d;
+ movl $24, %eax;
+ cmovel %eax, %r8d; /* max */
+
+ inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+ %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+ %xmm15, %rdx, (key_table)(CTX, %r8, 8));
+
+ /*
+ * dst might still be in-use (in case dst == src), so use stack for
+ * temporary storage.
+ */
+ subq $(16 * 16), %rsp;
+ movq %rsp, %rax;
+
+ call __camellia_dec_blk16;
+
+ addq $(16 * 16), %rsp;
+
+ vpxor (0 * 16)(%rdx), %xmm6, %xmm6;
+ vpxor (1 * 16)(%rdx), %xmm5, %xmm5;
+ vpxor (2 * 16)(%rdx), %xmm4, %xmm4;
+ vpxor (3 * 16)(%rdx), %xmm3, %xmm3;
+ vpxor (4 * 16)(%rdx), %xmm2, %xmm2;
+ vpxor (5 * 16)(%rdx), %xmm1, %xmm1;
+ vpxor (6 * 16)(%rdx), %xmm0, %xmm0;
+ vpxor (7 * 16)(%rdx), %xmm15, %xmm15;
+ vpxor (8 * 16)(%rdx), %xmm14, %xmm14;
+ vpxor (9 * 16)(%rdx), %xmm13, %xmm13;
+ vpxor (10 * 16)(%rdx), %xmm12, %xmm12;
+ vpxor (11 * 16)(%rdx), %xmm11, %xmm11;
+ vpxor (12 * 16)(%rdx), %xmm10, %xmm10;
+ vpxor (13 * 16)(%rdx), %xmm9, %xmm9;
+ vpxor (14 * 16)(%rdx), %xmm8, %xmm8;
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ ret;
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+.align 8
+.global camellia_ctr_16way
+.type camellia_ctr_16way,@function;
+
+camellia_ctr_16way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst (16 blocks)
+ * %rdx: src (16 blocks)
+ * %rcx: iv (little endian, 128bit)
+ */
+
+ subq $(16 * 16), %rsp;
+ movq %rsp, %rax;
+
+ vmovdqa .Lbswap128_mask, %xmm14;
+
+ /* load IV and byteswap */
+ vmovdqu (%rcx), %xmm0;
+ vpshufb %xmm14, %xmm0, %xmm15;
+ vmovdqu %xmm15, 15 * 16(%rax);
+
+ vpcmpeqd %xmm15, %xmm15, %xmm15;
+ vpsrldq $8, %xmm15, %xmm15; /* low: -1, high: 0 */
+
+ /* construct IVs */
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm13;
+ vmovdqu %xmm13, 14 * 16(%rax);
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm13;
+ vmovdqu %xmm13, 13 * 16(%rax);
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm12;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm11;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm10;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm9;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm8;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm7;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm6;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm5;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm4;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm3;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm2;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vpshufb %xmm14, %xmm0, %xmm1;
+ inc_le128(%xmm0, %xmm15, %xmm13);
+ vmovdqa %xmm0, %xmm13;
+ vpshufb %xmm14, %xmm0, %xmm0;
+ inc_le128(%xmm13, %xmm15, %xmm14);
+ vmovdqu %xmm13, (%rcx);
+
+ /* inpack16_pre: */
+ vmovq (key_table)(CTX), %xmm15;
+ vpshufb .Lpack_bswap, %xmm15, %xmm15;
+ vpxor %xmm0, %xmm15, %xmm0;
+ vpxor %xmm1, %xmm15, %xmm1;
+ vpxor %xmm2, %xmm15, %xmm2;
+ vpxor %xmm3, %xmm15, %xmm3;
+ vpxor %xmm4, %xmm15, %xmm4;
+ vpxor %xmm5, %xmm15, %xmm5;
+ vpxor %xmm6, %xmm15, %xmm6;
+ vpxor %xmm7, %xmm15, %xmm7;
+ vpxor %xmm8, %xmm15, %xmm8;
+ vpxor %xmm9, %xmm15, %xmm9;
+ vpxor %xmm10, %xmm15, %xmm10;
+ vpxor %xmm11, %xmm15, %xmm11;
+ vpxor %xmm12, %xmm15, %xmm12;
+ vpxor 13 * 16(%rax), %xmm15, %xmm13;
+ vpxor 14 * 16(%rax), %xmm15, %xmm14;
+ vpxor 15 * 16(%rax), %xmm15, %xmm15;
+
+ call __camellia_enc_blk16;
+
+ addq $(16 * 16), %rsp;
+
+ vpxor 0 * 16(%rdx), %xmm7, %xmm7;
+ vpxor 1 * 16(%rdx), %xmm6, %xmm6;
+ vpxor 2 * 16(%rdx), %xmm5, %xmm5;
+ vpxor 3 * 16(%rdx), %xmm4, %xmm4;
+ vpxor 4 * 16(%rdx), %xmm3, %xmm3;
+ vpxor 5 * 16(%rdx), %xmm2, %xmm2;
+ vpxor 6 * 16(%rdx), %xmm1, %xmm1;
+ vpxor 7 * 16(%rdx), %xmm0, %xmm0;
+ vpxor 8 * 16(%rdx), %xmm15, %xmm15;
+ vpxor 9 * 16(%rdx), %xmm14, %xmm14;
+ vpxor 10 * 16(%rdx), %xmm13, %xmm13;
+ vpxor 11 * 16(%rdx), %xmm12, %xmm12;
+ vpxor 12 * 16(%rdx), %xmm11, %xmm11;
+ vpxor 13 * 16(%rdx), %xmm10, %xmm10;
+ vpxor 14 * 16(%rdx), %xmm9, %xmm9;
+ vpxor 15 * 16(%rdx), %xmm8, %xmm8;
+ write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+ %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+ %xmm8, %rsi);
+
+ ret;
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
new file mode 100644
index 00000000000..96cbb6068fc
--- /dev/null
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -0,0 +1,558 @@
+/*
+ * Glue Code for x86_64/AVX/AES-NI assembler optimized version of Camellia
+ *
+ * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/ctr.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+#include <asm/crypto/camellia.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
+
+#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
+
+/* 16-way AES-NI parallel cipher functions */
+asmlinkage void camellia_ecb_enc_16way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void camellia_ecb_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+asmlinkage void camellia_cbc_dec_16way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void camellia_ctr_16way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src, le128 *iv);
+
+static const struct common_glue_ctx camellia_enc = {
+ .num_funcs = 3,
+ .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_enc_16way) }
+ }, {
+ .num_blocks = 2,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk_2way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_enc_blk) }
+ } }
+};
+
+static const struct common_glue_ctx camellia_ctr = {
+ .num_funcs = 3,
+ .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_ctr_16way) }
+ }, {
+ .num_blocks = 2,
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr_2way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(camellia_crypt_ctr) }
+ } }
+};
+
+static const struct common_glue_ctx camellia_dec = {
+ .num_funcs = 3,
+ .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_ecb_dec_16way) }
+ }, {
+ .num_blocks = 2,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk_2way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .ecb = GLUE_FUNC_CAST(camellia_dec_blk) }
+ } }
+};
+
+static const struct common_glue_ctx camellia_dec_cbc = {
+ .num_funcs = 3,
+ .fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+
+ .funcs = { {
+ .num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_cbc_dec_16way) }
+ }, {
+ .num_blocks = 2,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_decrypt_cbc_2way) }
+ }, {
+ .num_blocks = 1,
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(camellia_dec_blk) }
+ } }
+};
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_ecb_crypt_128bit(&camellia_enc, desc, dst, src, nbytes);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_ecb_crypt_128bit(&camellia_dec, desc, dst, src, nbytes);
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(camellia_enc_blk), desc,
+ dst, src, nbytes);
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_cbc_decrypt_128bit(&camellia_dec_cbc, desc, dst, src,
+ nbytes);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ return glue_ctr_crypt_128bit(&camellia_ctr, desc, dst, src, nbytes);
+}
+
+static inline bool camellia_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+{
+ return glue_fpu_begin(CAMELLIA_BLOCK_SIZE,
+ CAMELLIA_AESNI_PARALLEL_BLOCKS, NULL, fpu_enabled,
+ nbytes);
+}
+
+static inline void camellia_fpu_end(bool fpu_enabled)
+{
+ glue_fpu_end(fpu_enabled);
+}
+
+static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ return __camellia_setkey(crypto_tfm_ctx(tfm), in_key, key_len,
+ &tfm->crt_flags);
+}
+
+struct crypt_priv {
+ struct camellia_ctx *ctx;
+ bool fpu_enabled;
+};
+
+static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+ const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ struct crypt_priv *ctx = priv;
+ int i;
+
+ ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
+
+ if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
+ camellia_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
+ srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
+ nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
+ }
+
+ while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
+ camellia_enc_blk_2way(ctx->ctx, srcdst, srcdst);
+ srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
+ nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
+ }
+
+ for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+ camellia_enc_blk(ctx->ctx, srcdst, srcdst);
+}
+
+static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+ const unsigned int bsize = CAMELLIA_BLOCK_SIZE;
+ struct crypt_priv *ctx = priv;
+ int i;
+
+ ctx->fpu_enabled = camellia_fpu_begin(ctx->fpu_enabled, nbytes);
+
+ if (nbytes >= CAMELLIA_AESNI_PARALLEL_BLOCKS * bsize) {
+ camellia_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
+ srcdst += bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
+ nbytes -= bsize * CAMELLIA_AESNI_PARALLEL_BLOCKS;
+ }
+
+ while (nbytes >= CAMELLIA_PARALLEL_BLOCKS * bsize) {
+ camellia_dec_blk_2way(ctx->ctx, srcdst, srcdst);
+ srcdst += bsize * CAMELLIA_PARALLEL_BLOCKS;
+ nbytes -= bsize * CAMELLIA_PARALLEL_BLOCKS;
+ }
+
+ for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+ camellia_dec_blk(ctx->ctx, srcdst, srcdst);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
+ struct crypt_priv crypt_ctx = {
+ .ctx = &ctx->camellia_ctx,
+ .fpu_enabled = false,
+ };
+ struct lrw_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .table_ctx = &ctx->lrw_table,
+ .crypt_ctx = &crypt_ctx,
+ .crypt_fn = encrypt_callback,
+ };
+ int ret;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ret = lrw_crypt(desc, dst, src, nbytes, &req);
+ camellia_fpu_end(crypt_ctx.fpu_enabled);
+
+ return ret;
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct camellia_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
+ struct crypt_priv crypt_ctx = {
+ .ctx = &ctx->camellia_ctx,
+ .fpu_enabled = false,
+ };
+ struct lrw_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .table_ctx = &ctx->lrw_table,
+ .crypt_ctx = &crypt_ctx,
+ .crypt_fn = decrypt_callback,
+ };
+ int ret;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ret = lrw_crypt(desc, dst, src, nbytes, &req);
+ camellia_fpu_end(crypt_ctx.fpu_enabled);
+
+ return ret;
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
+ struct crypt_priv crypt_ctx = {
+ .ctx = &ctx->crypt_ctx,
+ .fpu_enabled = false,
+ };
+ struct xts_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .tweak_ctx = &ctx->tweak_ctx,
+ .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
+ .crypt_ctx = &crypt_ctx,
+ .crypt_fn = encrypt_callback,
+ };
+ int ret;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ret = xts_crypt(desc, dst, src, nbytes, &req);
+ camellia_fpu_end(crypt_ctx.fpu_enabled);
+
+ return ret;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+ struct scatterlist *src, unsigned int nbytes)
+{
+ struct camellia_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ be128 buf[CAMELLIA_AESNI_PARALLEL_BLOCKS];
+ struct crypt_priv crypt_ctx = {
+ .ctx = &ctx->crypt_ctx,
+ .fpu_enabled = false,
+ };
+ struct xts_crypt_req req = {
+ .tbuf = buf,
+ .tbuflen = sizeof(buf),
+
+ .tweak_ctx = &ctx->tweak_ctx,
+ .tweak_fn = XTS_TWEAK_CAST(camellia_enc_blk),
+ .crypt_ctx = &crypt_ctx,
+ .crypt_fn = decrypt_callback,
+ };
+ int ret;
+
+ desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ ret = xts_crypt(desc, dst, src, nbytes, &req);
+ camellia_fpu_end(crypt_ctx.fpu_enabled);
+
+ return ret;
+}
+
+static struct crypto_alg cmll_algs[10] = { {
+ .cra_name = "__ecb-camellia-aesni",
+ .cra_driver_name = "__driver-ecb-camellia-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__cbc-camellia-aesni",
+ .cra_driver_name = "__driver-cbc-camellia-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__ctr-camellia-aesni",
+ .cra_driver_name = "__driver-ctr-camellia-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct camellia_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_setkey,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ },
+ },
+}, {
+ .cra_name = "__lrw-camellia-aesni",
+ .cra_driver_name = "__driver-lrw-camellia-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_lrw_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_exit = lrw_camellia_exit_tfm,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE +
+ CAMELLIA_BLOCK_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE +
+ CAMELLIA_BLOCK_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = lrw_camellia_setkey,
+ .encrypt = lrw_encrypt,
+ .decrypt = lrw_decrypt,
+ },
+ },
+}, {
+ .cra_name = "__xts-camellia-aesni",
+ .cra_driver_name = "__driver-xts-camellia-aesni",
+ .cra_priority = 0,
+ .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct camellia_xts_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_blkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .blkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = xts_camellia_setkey,
+ .encrypt = xts_encrypt,
+ .decrypt = xts_decrypt,
+ },
+ },
+}, {
+ .cra_name = "ecb(camellia)",
+ .cra_driver_name = "ecb-camellia-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+}, {
+ .cra_name = "cbc(camellia)",
+ .cra_driver_name = "cbc-camellia-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = __ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+}, {
+ .cra_name = "ctr(camellia)",
+ .cra_driver_name = "ctr-camellia-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_encrypt,
+ .geniv = "chainiv",
+ },
+ },
+}, {
+ .cra_name = "lrw(camellia)",
+ .cra_driver_name = "lrw-camellia-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE +
+ CAMELLIA_BLOCK_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE +
+ CAMELLIA_BLOCK_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+}, {
+ .cra_name = "xts(camellia)",
+ .cra_driver_name = "xts-camellia-aesni",
+ .cra_priority = 400,
+ .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+ .cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct async_helper_ctx),
+ .cra_alignmask = 0,
+ .cra_type = &crypto_ablkcipher_type,
+ .cra_module = THIS_MODULE,
+ .cra_init = ablk_init,
+ .cra_exit = ablk_exit,
+ .cra_u = {
+ .ablkcipher = {
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE * 2,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE * 2,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = ablk_set_key,
+ .encrypt = ablk_encrypt,
+ .decrypt = ablk_decrypt,
+ },
+ },
+} };
+
+static int __init camellia_aesni_init(void)
+{
+ u64 xcr0;
+
+ if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+ pr_info("AVX or AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+
+ xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+ if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+ pr_info("AVX detected but unusable.\n");
+ return -ENODEV;
+ }
+
+ return crypto_register_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+}
+
+static void __exit camellia_aesni_fini(void)
+{
+ crypto_unregister_algs(cmll_algs, ARRAY_SIZE(cmll_algs));
+}
+
+module_init(camellia_aesni_init);
+module_exit(camellia_aesni_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX optimized");
+MODULE_ALIAS("camellia");
+MODULE_ALIAS("camellia-asm");
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index 42ffd2bbab5..5cb86ccd4ac 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -32,53 +32,24 @@
#include <crypto/algapi.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
+#include <asm/crypto/camellia.h>
#include <asm/crypto/glue_helper.h>
-#define CAMELLIA_MIN_KEY_SIZE 16
-#define CAMELLIA_MAX_KEY_SIZE 32
-#define CAMELLIA_BLOCK_SIZE 16
-#define CAMELLIA_TABLE_BYTE_LEN 272
-
-struct camellia_ctx {
- u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
- u32 key_length;
-};
-
/* regular block cipher functions */
asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
+EXPORT_SYMBOL_GPL(__camellia_enc_blk);
asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
+EXPORT_SYMBOL_GPL(camellia_dec_blk);
/* 2-way parallel cipher functions */
asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
+EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way);
asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
const u8 *src);
-
-static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __camellia_enc_blk(ctx, dst, src, false);
-}
-
-static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __camellia_enc_blk(ctx, dst, src, true);
-}
-
-static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __camellia_enc_blk_2way(ctx, dst, src, false);
-}
-
-static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __camellia_enc_blk_2way(ctx, dst, src, true);
-}
+EXPORT_SYMBOL_GPL(camellia_dec_blk_2way);
static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
@@ -1275,9 +1246,8 @@ static void camellia_setup192(const unsigned char *key, u64 *subkey)
camellia_setup256(kk, subkey);
}
-static int __camellia_setkey(struct camellia_ctx *cctx,
- const unsigned char *key,
- unsigned int key_len, u32 *flags)
+int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key,
+ unsigned int key_len, u32 *flags)
{
if (key_len != 16 && key_len != 24 && key_len != 32) {
*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
@@ -1300,6 +1270,7 @@ static int __camellia_setkey(struct camellia_ctx *cctx,
return 0;
}
+EXPORT_SYMBOL_GPL(__camellia_setkey);
static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
unsigned int key_len)
@@ -1308,7 +1279,7 @@ static int camellia_setkey(struct crypto_tfm *tfm, const u8 *in_key,
&tfm->crt_flags);
}
-static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
+void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
{
u128 iv = *src;
@@ -1316,22 +1287,23 @@ static void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src)
u128_xor(&dst[1], &dst[1], &iv);
}
+EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way);
-static void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
if (dst != src)
*dst = *src;
- u128_to_be128(&ctrblk, iv);
- u128_inc(iv);
+ le128_to_be128(&ctrblk, iv);
+ le128_inc(iv);
camellia_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
}
+EXPORT_SYMBOL_GPL(camellia_crypt_ctr);
-static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
- u128 *iv)
+void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblks[2];
@@ -1340,13 +1312,14 @@ static void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
dst[1] = src[1];
}
- u128_to_be128(&ctrblks[0], iv);
- u128_inc(iv);
- u128_to_be128(&ctrblks[1], iv);
- u128_inc(iv);
+ le128_to_be128(&ctrblks[0], iv);
+ le128_inc(iv);
+ le128_to_be128(&ctrblks[1], iv);
+ le128_inc(iv);
camellia_enc_blk_xor_2way(ctx, (u8 *)dst, (u8 *)ctrblks);
}
+EXPORT_SYMBOL_GPL(camellia_crypt_ctr_2way);
static const struct common_glue_ctx camellia_enc = {
.num_funcs = 2,
@@ -1464,13 +1437,8 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
camellia_dec_blk(ctx, srcdst, srcdst);
}
-struct camellia_lrw_ctx {
- struct lrw_table_ctx lrw_table;
- struct camellia_ctx camellia_ctx;
-};
-
-static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
+int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
{
struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
int err;
@@ -1484,6 +1452,7 @@ static int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
return lrw_init_table(&ctx->lrw_table,
key + keylen - CAMELLIA_BLOCK_SIZE);
}
+EXPORT_SYMBOL_GPL(lrw_camellia_setkey);
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
@@ -1519,20 +1488,16 @@ static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
return lrw_crypt(desc, dst, src, nbytes, &req);
}
-static void lrw_exit_tfm(struct crypto_tfm *tfm)
+void lrw_camellia_exit_tfm(struct crypto_tfm *tfm)
{
struct camellia_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
lrw_free_table(&ctx->lrw_table);
}
+EXPORT_SYMBOL_GPL(lrw_camellia_exit_tfm);
-struct camellia_xts_ctx {
- struct camellia_ctx tweak_ctx;
- struct camellia_ctx crypt_ctx;
-};
-
-static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int keylen)
+int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen)
{
struct camellia_xts_ctx *ctx = crypto_tfm_ctx(tfm);
u32 *flags = &tfm->crt_flags;
@@ -1555,6 +1520,7 @@ static int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
return __camellia_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
flags);
}
+EXPORT_SYMBOL_GPL(xts_camellia_setkey);
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
@@ -1679,7 +1645,7 @@ static struct crypto_alg camellia_algs[6] = { {
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
- .cra_exit = lrw_exit_tfm,
+ .cra_exit = lrw_camellia_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = CAMELLIA_MIN_KEY_SIZE +
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index a41a3aaba22..15b00ac7cbd 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -25,10 +25,10 @@
.file "cast5-avx-x86_64-asm_64.S"
-.extern cast5_s1
-.extern cast5_s2
-.extern cast5_s3
-.extern cast5_s4
+.extern cast_s1
+.extern cast_s2
+.extern cast_s3
+.extern cast_s4
/* structure of crypto context */
#define km 0
@@ -36,10 +36,10 @@
#define rr ((16*4)+16)
/* s-boxes */
-#define s1 cast5_s1
-#define s2 cast5_s2
-#define s3 cast5_s3
-#define s4 cast5_s4
+#define s1 cast_s1
+#define s2 cast_s2
+#define s3 cast_s3
+#define s4 cast_s4
/**********************************************************************
16-way AVX cast5
@@ -180,31 +180,17 @@
vpunpcklqdq t1, t0, x0; \
vpunpckhqdq t1, t0, x1;
-#define inpack_blocks(in, x0, x1, t0, t1, rmask) \
- vmovdqu (0*4*4)(in), x0; \
- vmovdqu (1*4*4)(in), x1; \
+#define inpack_blocks(x0, x1, t0, t1, rmask) \
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1; \
\
transpose_2x4(x0, x1, t0, t1)
-#define outunpack_blocks(out, x0, x1, t0, t1, rmask) \
+#define outunpack_blocks(x0, x1, t0, t1, rmask) \
transpose_2x4(x0, x1, t0, t1) \
\
vpshufb rmask, x0, x0; \
- vpshufb rmask, x1, x1; \
- vmovdqu x0, (0*4*4)(out); \
- vmovdqu x1, (1*4*4)(out);
-
-#define outunpack_xor_blocks(out, x0, x1, t0, t1, rmask) \
- transpose_2x4(x0, x1, t0, t1) \
- \
- vpshufb rmask, x0, x0; \
- vpshufb rmask, x1, x1; \
- vpxor (0*4*4)(out), x0, x0; \
- vmovdqu x0, (0*4*4)(out); \
- vpxor (1*4*4)(out), x1, x1; \
- vmovdqu x1, (1*4*4)(out);
+ vpshufb rmask, x1, x1;
.data
@@ -213,6 +199,8 @@
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.Lbswap_iv_mask:
+ .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
.L16_mask:
.byte 16, 16, 16, 16
.L32_mask:
@@ -223,35 +211,42 @@
.text
.align 16
-.global __cast5_enc_blk_16way
-.type __cast5_enc_blk_16way,@function;
+.type __cast5_enc_blk16,@function;
-__cast5_enc_blk_16way:
+__cast5_enc_blk16:
/* input:
* %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: bool, if true: xor output
+ * RL1: blocks 1 and 2
+ * RR1: blocks 3 and 4
+ * RL2: blocks 5 and 6
+ * RR2: blocks 7 and 8
+ * RL3: blocks 9 and 10
+ * RR3: blocks 11 and 12
+ * RL4: blocks 13 and 14
+ * RR4: blocks 15 and 16
+ * output:
+ * RL1: encrypted blocks 1 and 2
+ * RR1: encrypted blocks 3 and 4
+ * RL2: encrypted blocks 5 and 6
+ * RR2: encrypted blocks 7 and 8
+ * RL3: encrypted blocks 9 and 10
+ * RR3: encrypted blocks 11 and 12
+ * RL4: encrypted blocks 13 and 14
+ * RR4: encrypted blocks 15 and 16
*/
pushq %rbp;
pushq %rbx;
- pushq %rcx;
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
enc_preload_rkr();
- leaq 1*(2*4*4)(%rdx), %rax;
- inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM);
- inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM);
- leaq 2*(2*4*4)(%rdx), %rax;
- inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM);
- leaq 3*(2*4*4)(%rdx), %rax;
- inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM);
-
- movq %rsi, %r11;
+ inpack_blocks(RL1, RR1, RTMP, RX, RKM);
+ inpack_blocks(RL2, RR2, RTMP, RX, RKM);
+ inpack_blocks(RL3, RR3, RTMP, RX, RKM);
+ inpack_blocks(RL4, RR4, RTMP, RX, RKM);
round(RL, RR, 0, 1);
round(RR, RL, 1, 2);
@@ -276,44 +271,41 @@ __cast5_enc_blk_16way:
round(RR, RL, 15, 1);
__skip_enc:
- popq %rcx;
popq %rbx;
popq %rbp;
vmovdqa .Lbswap_mask, RKM;
- leaq 1*(2*4*4)(%r11), %rax;
- testb %cl, %cl;
- jnz __enc_xor16;
-
- outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
- outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
- leaq 2*(2*4*4)(%r11), %rax;
- outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
- leaq 3*(2*4*4)(%r11), %rax;
- outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
-
- ret;
-
-__enc_xor16:
- outunpack_xor_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
- outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
- leaq 2*(2*4*4)(%r11), %rax;
- outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
- leaq 3*(2*4*4)(%r11), %rax;
- outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
+ outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
+ outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
+ outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
+ outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
ret;
.align 16
-.global cast5_dec_blk_16way
-.type cast5_dec_blk_16way,@function;
+.type __cast5_dec_blk16,@function;
-cast5_dec_blk_16way:
+__cast5_dec_blk16:
/* input:
* %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
+ * RL1: encrypted blocks 1 and 2
+ * RR1: encrypted blocks 3 and 4
+ * RL2: encrypted blocks 5 and 6
+ * RR2: encrypted blocks 7 and 8
+ * RL3: encrypted blocks 9 and 10
+ * RR3: encrypted blocks 11 and 12
+ * RL4: encrypted blocks 13 and 14
+ * RR4: encrypted blocks 15 and 16
+ * output:
+ * RL1: decrypted blocks 1 and 2
+ * RR1: decrypted blocks 3 and 4
+ * RL2: decrypted blocks 5 and 6
+ * RR2: decrypted blocks 7 and 8
+ * RL3: decrypted blocks 9 and 10
+ * RR3: decrypted blocks 11 and 12
+ * RL4: decrypted blocks 13 and 14
+ * RR4: decrypted blocks 15 and 16
*/
pushq %rbp;
@@ -324,15 +316,10 @@ cast5_dec_blk_16way:
vmovd .L32_mask, R32;
dec_preload_rkr();
- leaq 1*(2*4*4)(%rdx), %rax;
- inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM);
- inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM);
- leaq 2*(2*4*4)(%rdx), %rax;
- inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM);
- leaq 3*(2*4*4)(%rdx), %rax;
- inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM);
-
- movq %rsi, %r11;
+ inpack_blocks(RL1, RR1, RTMP, RX, RKM);
+ inpack_blocks(RL2, RR2, RTMP, RX, RKM);
+ inpack_blocks(RL3, RR3, RTMP, RX, RKM);
+ inpack_blocks(RL4, RR4, RTMP, RX, RKM);
movzbl rr(CTX), %eax;
testl %eax, %eax;
@@ -361,16 +348,211 @@ __dec_tail:
popq %rbx;
popq %rbp;
- leaq 1*(2*4*4)(%r11), %rax;
- outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
- outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
- leaq 2*(2*4*4)(%r11), %rax;
- outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
- leaq 3*(2*4*4)(%r11), %rax;
- outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
+ outunpack_blocks(RR1, RL1, RTMP, RX, RKM);
+ outunpack_blocks(RR2, RL2, RTMP, RX, RKM);
+ outunpack_blocks(RR3, RL3, RTMP, RX, RKM);
+ outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
ret;
__skip_dec:
vpsrldq $4, RKR, RKR;
jmp __dec_tail;
+
+.align 16
+.global cast5_ecb_enc_16way
+.type cast5_ecb_enc_16way,@function;
+
+cast5_ecb_enc_16way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ movq %rsi, %r11;
+
+ vmovdqu (0*4*4)(%rdx), RL1;
+ vmovdqu (1*4*4)(%rdx), RR1;
+ vmovdqu (2*4*4)(%rdx), RL2;
+ vmovdqu (3*4*4)(%rdx), RR2;
+ vmovdqu (4*4*4)(%rdx), RL3;
+ vmovdqu (5*4*4)(%rdx), RR3;
+ vmovdqu (6*4*4)(%rdx), RL4;
+ vmovdqu (7*4*4)(%rdx), RR4;
+
+ call __cast5_enc_blk16;
+
+ vmovdqu RR1, (0*4*4)(%r11);
+ vmovdqu RL1, (1*4*4)(%r11);
+ vmovdqu RR2, (2*4*4)(%r11);
+ vmovdqu RL2, (3*4*4)(%r11);
+ vmovdqu RR3, (4*4*4)(%r11);
+ vmovdqu RL3, (5*4*4)(%r11);
+ vmovdqu RR4, (6*4*4)(%r11);
+ vmovdqu RL4, (7*4*4)(%r11);
+
+ ret;
+
+.align 16
+.global cast5_ecb_dec_16way
+.type cast5_ecb_dec_16way,@function;
+
+cast5_ecb_dec_16way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ movq %rsi, %r11;
+
+ vmovdqu (0*4*4)(%rdx), RL1;
+ vmovdqu (1*4*4)(%rdx), RR1;
+ vmovdqu (2*4*4)(%rdx), RL2;
+ vmovdqu (3*4*4)(%rdx), RR2;
+ vmovdqu (4*4*4)(%rdx), RL3;
+ vmovdqu (5*4*4)(%rdx), RR3;
+ vmovdqu (6*4*4)(%rdx), RL4;
+ vmovdqu (7*4*4)(%rdx), RR4;
+
+ call __cast5_dec_blk16;
+
+ vmovdqu RR1, (0*4*4)(%r11);
+ vmovdqu RL1, (1*4*4)(%r11);
+ vmovdqu RR2, (2*4*4)(%r11);
+ vmovdqu RL2, (3*4*4)(%r11);
+ vmovdqu RR3, (4*4*4)(%r11);
+ vmovdqu RL3, (5*4*4)(%r11);
+ vmovdqu RR4, (6*4*4)(%r11);
+ vmovdqu RL4, (7*4*4)(%r11);
+
+ ret;
+
+.align 16
+.global cast5_cbc_dec_16way
+.type cast5_cbc_dec_16way,@function;
+
+cast5_cbc_dec_16way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ pushq %r12;
+
+ movq %rsi, %r11;
+ movq %rdx, %r12;
+
+ vmovdqu (0*16)(%rdx), RL1;
+ vmovdqu (1*16)(%rdx), RR1;
+ vmovdqu (2*16)(%rdx), RL2;
+ vmovdqu (3*16)(%rdx), RR2;
+ vmovdqu (4*16)(%rdx), RL3;
+ vmovdqu (5*16)(%rdx), RR3;
+ vmovdqu (6*16)(%rdx), RL4;
+ vmovdqu (7*16)(%rdx), RR4;
+
+ call __cast5_dec_blk16;
+
+ /* xor with src */
+ vmovq (%r12), RX;
+ vpshufd $0x4f, RX, RX;
+ vpxor RX, RR1, RR1;
+ vpxor 0*16+8(%r12), RL1, RL1;
+ vpxor 1*16+8(%r12), RR2, RR2;
+ vpxor 2*16+8(%r12), RL2, RL2;
+ vpxor 3*16+8(%r12), RR3, RR3;
+ vpxor 4*16+8(%r12), RL3, RL3;
+ vpxor 5*16+8(%r12), RR4, RR4;
+ vpxor 6*16+8(%r12), RL4, RL4;
+
+ vmovdqu RR1, (0*16)(%r11);
+ vmovdqu RL1, (1*16)(%r11);
+ vmovdqu RR2, (2*16)(%r11);
+ vmovdqu RL2, (3*16)(%r11);
+ vmovdqu RR3, (4*16)(%r11);
+ vmovdqu RL3, (5*16)(%r11);
+ vmovdqu RR4, (6*16)(%r11);
+ vmovdqu RL4, (7*16)(%r11);
+
+ popq %r12;
+
+ ret;
+
+.align 16
+.global cast5_ctr_16way
+.type cast5_ctr_16way,@function;
+
+cast5_ctr_16way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: iv (big endian, 64bit)
+ */
+
+ pushq %r12;
+
+ movq %rsi, %r11;
+ movq %rdx, %r12;
+
+ vpcmpeqd RTMP, RTMP, RTMP;
+ vpsrldq $8, RTMP, RTMP; /* low: -1, high: 0 */
+
+ vpcmpeqd RKR, RKR, RKR;
+ vpaddq RKR, RKR, RKR; /* low: -2, high: -2 */
+ vmovdqa .Lbswap_iv_mask, R1ST;
+ vmovdqa .Lbswap128_mask, RKM;
+
+ /* load IV and byteswap */
+ vmovq (%rcx), RX;
+ vpshufb R1ST, RX, RX;
+
+ /* construct IVs */
+ vpsubq RTMP, RX, RX; /* le: IV1, IV0 */
+ vpshufb RKM, RX, RL1; /* be: IV0, IV1 */
+ vpsubq RKR, RX, RX;
+ vpshufb RKM, RX, RR1; /* be: IV2, IV3 */
+ vpsubq RKR, RX, RX;
+ vpshufb RKM, RX, RL2; /* be: IV4, IV5 */
+ vpsubq RKR, RX, RX;
+ vpshufb RKM, RX, RR2; /* be: IV6, IV7 */
+ vpsubq RKR, RX, RX;
+ vpshufb RKM, RX, RL3; /* be: IV8, IV9 */
+ vpsubq RKR, RX, RX;
+ vpshufb RKM, RX, RR3; /* be: IV10, IV11 */
+ vpsubq RKR, RX, RX;
+ vpshufb RKM, RX, RL4; /* be: IV12, IV13 */
+ vpsubq RKR, RX, RX;
+ vpshufb RKM, RX, RR4; /* be: IV14, IV15 */
+
+ /* store last IV */
+ vpsubq RTMP, RX, RX; /* le: IV16, IV14 */
+ vpshufb R1ST, RX, RX; /* be: IV16, IV16 */
+ vmovq RX, (%rcx);
+
+ call __cast5_enc_blk16;
+
+ /* dst = src ^ iv */
+ vpxor (0*16)(%r12), RR1, RR1;
+ vpxor (1*16)(%r12), RL1, RL1;
+ vpxor (2*16)(%r12), RR2, RR2;
+ vpxor (3*16)(%r12), RL2, RL2;
+ vpxor (4*16)(%r12), RR3, RR3;
+ vpxor (5*16)(%r12), RL3, RL3;
+ vpxor (6*16)(%r12), RR4, RR4;
+ vpxor (7*16)(%r12), RL4, RL4;
+ vmovdqu RR1, (0*16)(%r11);
+ vmovdqu RL1, (1*16)(%r11);
+ vmovdqu RR2, (2*16)(%r11);
+ vmovdqu RL2, (3*16)(%r11);
+ vmovdqu RR3, (4*16)(%r11);
+ vmovdqu RL3, (5*16)(%r11);
+ vmovdqu RR4, (6*16)(%r11);
+ vmovdqu RL4, (7*16)(%r11);
+
+ popq %r12;
+
+ ret;
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index e0ea14f9547..c6631813dc1 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -37,29 +37,14 @@
#define CAST5_PARALLEL_BLOCKS 16
-asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst,
+asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
const u8 *src);
-
-static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __cast5_enc_blk_16way(ctx, dst, src, false);
-}
-
-static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __cast5_enc_blk_16way(ctx, dst, src, true);
-}
-
-static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- cast5_dec_blk_16way(ctx, dst, src);
-}
-
+asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
+ __be64 *iv);
static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
@@ -79,8 +64,11 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = CAST5_BLOCK_SIZE;
unsigned int nbytes;
+ void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
int err;
+ fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
+
err = blkcipher_walk_virt(desc, walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
@@ -93,10 +81,7 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
do {
- if (enc)
- cast5_enc_blk_xway(ctx, wdst, wsrc);
- else
- cast5_dec_blk_xway(ctx, wdst, wsrc);
+ fn(ctx, wdst, wsrc);
wsrc += bsize * CAST5_PARALLEL_BLOCKS;
wdst += bsize * CAST5_PARALLEL_BLOCKS;
@@ -107,12 +92,11 @@ static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
goto done;
}
+ fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
+
/* Handle leftovers */
do {
- if (enc)
- __cast5_encrypt(ctx, wdst, wsrc);
- else
- __cast5_decrypt(ctx, wdst, wsrc);
+ fn(ctx, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
@@ -194,9 +178,7 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
- u64 ivs[CAST5_PARALLEL_BLOCKS - 1];
u64 last_iv;
- int i;
/* Start of the last block. */
src += nbytes / bsize - 1;
@@ -211,13 +193,7 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
src -= CAST5_PARALLEL_BLOCKS - 1;
dst -= CAST5_PARALLEL_BLOCKS - 1;
- for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
- ivs[i] = src[i];
-
- cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
-
- for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
- *(dst + (i + 1)) ^= *(ivs + i);
+ cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
@@ -298,23 +274,12 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
- u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
- __be64 ctrblocks[CAST5_PARALLEL_BLOCKS];
- int i;
/* Process multi-block batch */
if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
do {
- /* create ctrblks for parallel encrypt */
- for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) {
- if (dst != src)
- dst[i] = src[i];
-
- ctrblocks[i] = cpu_to_be64(ctrblk++);
- }
-
- cast5_enc_blk_xway_xor(ctx, (u8 *)dst,
- (u8 *)ctrblocks);
+ cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
+ (__be64 *)walk->iv);
src += CAST5_PARALLEL_BLOCKS;
dst += CAST5_PARALLEL_BLOCKS;
@@ -327,13 +292,16 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
/* Handle leftovers */
do {
+ u64 ctrblk;
+
if (dst != src)
*dst = *src;
- ctrblocks[0] = cpu_to_be64(ctrblk++);
+ ctrblk = *(u64 *)walk->iv;
+ be64_add_cpu((__be64 *)walk->iv, 1);
- __cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
- *dst ^= ctrblocks[0];
+ __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
+ *dst ^= ctrblk;
src += 1;
dst += 1;
@@ -341,7 +309,6 @@ static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
} while (nbytes >= bsize);
done:
- *(__be64 *)walk->iv = cpu_to_be64(ctrblk);
return nbytes;
}
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 218d283772f..2569d0da841 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -23,22 +23,24 @@
*
*/
+#include "glue_helper-asm-avx.S"
+
.file "cast6-avx-x86_64-asm_64.S"
-.extern cast6_s1
-.extern cast6_s2
-.extern cast6_s3
-.extern cast6_s4
+.extern cast_s1
+.extern cast_s2
+.extern cast_s3
+.extern cast_s4
/* structure of crypto context */
#define km 0
#define kr (12*4*4)
/* s-boxes */
-#define s1 cast6_s1
-#define s2 cast6_s2
-#define s3 cast6_s3
-#define s4 cast6_s4
+#define s1 cast_s1
+#define s2 cast_s2
+#define s3 cast_s3
+#define s4 cast_s4
/**********************************************************************
8-way AVX cast6
@@ -205,11 +207,7 @@
vpunpcklqdq x3, t2, x2; \
vpunpckhqdq x3, t2, x3;
-#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2, rmask) \
- vmovdqu (0*4*4)(in), x0; \
- vmovdqu (1*4*4)(in), x1; \
- vmovdqu (2*4*4)(in), x2; \
- vmovdqu (3*4*4)(in), x3; \
+#define inpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1; \
vpshufb rmask, x2, x2; \
@@ -217,39 +215,21 @@
\
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \
+#define outunpack_blocks(x0, x1, x2, x3, t0, t1, t2, rmask) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
vpshufb rmask, x0, x0; \
vpshufb rmask, x1, x1; \
vpshufb rmask, x2, x2; \
- vpshufb rmask, x3, x3; \
- vmovdqu x0, (0*4*4)(out); \
- vmovdqu x1, (1*4*4)(out); \
- vmovdqu x2, (2*4*4)(out); \
- vmovdqu x3, (3*4*4)(out);
-
-#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \
- transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
- \
- vpshufb rmask, x0, x0; \
- vpshufb rmask, x1, x1; \
- vpshufb rmask, x2, x2; \
- vpshufb rmask, x3, x3; \
- vpxor (0*4*4)(out), x0, x0; \
- vmovdqu x0, (0*4*4)(out); \
- vpxor (1*4*4)(out), x1, x1; \
- vmovdqu x1, (1*4*4)(out); \
- vpxor (2*4*4)(out), x2, x2; \
- vmovdqu x2, (2*4*4)(out); \
- vpxor (3*4*4)(out), x3, x3; \
- vmovdqu x3, (3*4*4)(out);
+ vpshufb rmask, x3, x3;
.data
.align 16
.Lbswap_mask:
.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Lrkr_enc_Q_Q_QBAR_QBAR:
.byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12
.Lrkr_enc_QBAR_QBAR_QBAR_QBAR:
@@ -269,31 +249,26 @@
.text
-.align 16
-.global __cast6_enc_blk_8way
-.type __cast6_enc_blk_8way,@function;
+.align 8
+.type __cast6_enc_blk8,@function;
-__cast6_enc_blk_8way:
+__cast6_enc_blk8:
/* input:
* %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: bool, if true: xor output
+ * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
+ * output:
+ * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
*/
pushq %rbp;
pushq %rbx;
- pushq %rcx;
vmovdqa .Lbswap_mask, RKM;
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
- leaq (4*4*4)(%rdx), %rax;
- inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
- inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
-
- movq %rsi, %r11;
+ inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+ inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
preload_rkr(0, dummy, none);
Q(0);
@@ -311,36 +286,25 @@ __cast6_enc_blk_8way:
QBAR(10);
QBAR(11);
- popq %rcx;
popq %rbx;
popq %rbp;
vmovdqa .Lbswap_mask, RKM;
- leaq (4*4*4)(%r11), %rax;
-
- testb %cl, %cl;
- jnz __enc_xor8;
-
- outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
- outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
-
- ret;
-__enc_xor8:
- outunpack_xor_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
- outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+ outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+ outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
ret;
-.align 16
-.global cast6_dec_blk_8way
-.type cast6_dec_blk_8way,@function;
+.align 8
+.type __cast6_dec_blk8,@function;
-cast6_dec_blk_8way:
+__cast6_dec_blk8:
/* input:
* %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
+ * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
+ * output:
+ * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
*/
pushq %rbp;
@@ -350,11 +314,8 @@ cast6_dec_blk_8way:
vmovd .Lfirst_mask, R1ST;
vmovd .L32_mask, R32;
- leaq (4*4*4)(%rdx), %rax;
- inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
- inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
-
- movq %rsi, %r11;
+ inpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+ inpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q);
Q(11);
@@ -376,8 +337,103 @@ cast6_dec_blk_8way:
popq %rbp;
vmovdqa .Lbswap_mask, RKM;
- leaq (4*4*4)(%r11), %rax;
- outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
- outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+ outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+ outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
+
+ ret;
+
+.align 8
+.global cast6_ecb_enc_8way
+.type cast6_ecb_enc_8way,@function;
+
+cast6_ecb_enc_8way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ movq %rsi, %r11;
+
+ load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ call __cast6_enc_blk8;
+
+ store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ ret;
+
+.align 8
+.global cast6_ecb_dec_8way
+.type cast6_ecb_dec_8way,@function;
+
+cast6_ecb_dec_8way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ movq %rsi, %r11;
+
+ load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ call __cast6_dec_blk8;
+
+ store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ ret;
+
+.align 8
+.global cast6_cbc_dec_8way
+.type cast6_cbc_dec_8way,@function;
+
+cast6_cbc_dec_8way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ pushq %r12;
+
+ movq %rsi, %r11;
+ movq %rdx, %r12;
+
+ load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ call __cast6_dec_blk8;
+
+ store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ popq %r12;
+
+ ret;
+
+.align 8
+.global cast6_ctr_8way
+.type cast6_ctr_8way,@function;
+
+cast6_ctr_8way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: iv (little endian, 128bit)
+ */
+
+ pushq %r12;
+
+ movq %rsi, %r11;
+ movq %rdx, %r12;
+
+ load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
+ RD2, RX, RKR, RKM);
+
+ call __cast6_enc_blk8;
+
+ store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ popq %r12;
ret;
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 15e5f85a501..92f7ca24790 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -40,79 +40,34 @@
#define CAST6_PARALLEL_BLOCKS 8
-asmlinkage void __cast6_enc_blk_8way(struct cast6_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void cast6_dec_blk_8way(struct cast6_ctx *ctx, u8 *dst,
+asmlinkage void cast6_ecb_enc_8way(struct cast6_ctx *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void cast6_ecb_dec_8way(struct cast6_ctx *ctx, u8 *dst,
const u8 *src);
-static inline void cast6_enc_blk_xway(struct cast6_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __cast6_enc_blk_8way(ctx, dst, src, false);
-}
-
-static inline void cast6_enc_blk_xway_xor(struct cast6_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __cast6_enc_blk_8way(ctx, dst, src, true);
-}
-
-static inline void cast6_dec_blk_xway(struct cast6_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- cast6_dec_blk_8way(ctx, dst, src);
-}
-
-
-static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
-{
- u128 ivs[CAST6_PARALLEL_BLOCKS - 1];
- unsigned int j;
-
- for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
- ivs[j] = src[j];
-
- cast6_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
-
- for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
- u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
-}
+asmlinkage void cast6_cbc_dec_8way(struct cast6_ctx *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void cast6_ctr_8way(struct cast6_ctx *ctx, u8 *dst, const u8 *src,
+ le128 *iv);
-static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
- u128_to_be128(&ctrblk, iv);
- u128_inc(iv);
+ le128_to_be128(&ctrblk, iv);
+ le128_inc(iv);
__cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk);
}
-static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
- u128 *iv)
-{
- be128 ctrblks[CAST6_PARALLEL_BLOCKS];
- unsigned int i;
-
- for (i = 0; i < CAST6_PARALLEL_BLOCKS; i++) {
- if (dst != src)
- dst[i] = src[i];
-
- u128_to_be128(&ctrblks[i], iv);
- u128_inc(iv);
- }
-
- cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
-}
-
static const struct common_glue_ctx cast6_enc = {
.num_funcs = 2,
.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
- .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_enc_blk_xway) }
+ .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_enc_8way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
@@ -125,7 +80,7 @@ static const struct common_glue_ctx cast6_ctr = {
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
- .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr_xway) }
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_ctr_8way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
@@ -138,7 +93,7 @@ static const struct common_glue_ctx cast6_dec = {
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
- .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_dec_blk_xway) }
+ .fn_u = { .ecb = GLUE_FUNC_CAST(cast6_ecb_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
@@ -151,7 +106,7 @@ static const struct common_glue_ctx cast6_dec_cbc = {
.funcs = { {
.num_blocks = CAST6_PARALLEL_BLOCKS,
- .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_decrypt_cbc_xway) }
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_cbc_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
@@ -215,7 +170,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
- cast6_enc_blk_xway(ctx->ctx, srcdst, srcdst);
+ cast6_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
return;
}
@@ -232,7 +187,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
- cast6_dec_blk_xway(ctx->ctx, srcdst, srcdst);
+ cast6_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
return;
}
diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel_glue.c
index 493f959261f..6812ad98355 100644
--- a/arch/x86/crypto/crc32c-intel.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -32,6 +32,8 @@
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
+#include <asm/i387.h>
+#include <asm/fpu-internal.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
@@ -44,6 +46,31 @@
#define REX_PRE
#endif
+#ifdef CONFIG_X86_64
+/*
+ * use carryless multiply version of crc32c when buffer
+ * size is >= 512 (when eager fpu is enabled) or
+ * >= 1024 (when eager fpu is disabled) to account
+ * for fpu state save/restore overhead.
+ */
+#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512
+#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024
+
+asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
+ unsigned int crc_init);
+static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
+#if defined(X86_FEATURE_EAGER_FPU)
+#define set_pcl_breakeven_point() \
+do { \
+ if (!use_eager_fpu()) \
+ crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \
+} while (0)
+#else
+#define set_pcl_breakeven_point() \
+ (crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU)
+#endif
+#endif /* CONFIG_X86_64 */
+
static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
{
while (length--) {
@@ -154,6 +181,52 @@ static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
return 0;
}
+#ifdef CONFIG_X86_64
+static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
+ unsigned int len)
+{
+ u32 *crcp = shash_desc_ctx(desc);
+
+ /*
+ * use faster PCL version if datasize is large enough to
+ * overcome kernel fpu state save/restore overhead
+ */
+ if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
+ kernel_fpu_begin();
+ *crcp = crc_pcl(data, len, *crcp);
+ kernel_fpu_end();
+ } else
+ *crcp = crc32c_intel_le_hw(*crcp, data, len);
+ return 0;
+}
+
+static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
+ u8 *out)
+{
+ if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
+ kernel_fpu_begin();
+ *(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
+ kernel_fpu_end();
+ } else
+ *(__le32 *)out =
+ ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
+ return 0;
+}
+
+static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out);
+}
+
+static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data,
+ unsigned int len, u8 *out)
+{
+ return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
+ out);
+}
+#endif /* CONFIG_X86_64 */
+
static struct shash_alg alg = {
.setkey = crc32c_intel_setkey,
.init = crc32c_intel_init,
@@ -184,6 +257,14 @@ static int __init crc32c_intel_mod_init(void)
{
if (!x86_match_cpu(crc32c_cpu_id))
return -ENODEV;
+#ifdef CONFIG_X86_64
+ if (cpu_has_pclmulqdq) {
+ alg.update = crc32c_pcl_intel_update;
+ alg.finup = crc32c_pcl_intel_finup;
+ alg.digest = crc32c_pcl_intel_digest;
+ set_pcl_breakeven_point();
+ }
+#endif
return crypto_register_shash(&alg);
}
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
new file mode 100644
index 00000000000..93c6d39237a
--- /dev/null
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -0,0 +1,460 @@
+/*
+ * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
+ *
+ * The white paper on CRC32C calculations with PCLMULQDQ instruction can be
+ * downloaded from:
+ * http://download.intel.com/design/intarch/papers/323405.pdf
+ *
+ * Copyright (C) 2012 Intel Corporation.
+ *
+ * Authors:
+ * Wajdi Feghali <wajdi.k.feghali@intel.com>
+ * James Guilford <james.guilford@intel.com>
+ * David Cote <david.m.cote@intel.com>
+ * Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
+
+.macro LABEL prefix n
+\prefix\n\():
+.endm
+
+.macro JMPTBL_ENTRY i
+.word crc_\i - crc_array
+.endm
+
+.macro JNC_LESS_THAN j
+ jnc less_than_\j
+.endm
+
+# Define threshold where buffers are considered "small" and routed to more
+# efficient "by-1" code. This "by-1" code only handles up to 255 bytes, so
+# SMALL_SIZE can be no larger than 255.
+
+#define SMALL_SIZE 200
+
+.if (SMALL_SIZE > 255)
+.error "SMALL_ SIZE must be < 256"
+.endif
+
+# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
+
+.global crc_pcl
+crc_pcl:
+#define bufp %rdi
+#define bufp_dw %edi
+#define bufp_w %di
+#define bufp_b %dil
+#define bufptmp %rcx
+#define block_0 %rcx
+#define block_1 %rdx
+#define block_2 %r11
+#define len %rsi
+#define len_dw %esi
+#define len_w %si
+#define len_b %sil
+#define crc_init_arg %rdx
+#define tmp %rbx
+#define crc_init %r8
+#define crc_init_dw %r8d
+#define crc1 %r9
+#define crc2 %r10
+
+ pushq %rbx
+ pushq %rdi
+ pushq %rsi
+
+ ## Move crc_init for Linux to a different
+ mov crc_init_arg, crc_init
+
+ ################################################################
+ ## 1) ALIGN:
+ ################################################################
+
+ mov bufp, bufptmp # rdi = *buf
+ neg bufp
+ and $7, bufp # calculate the unalignment amount of
+ # the address
+ je proc_block # Skip if aligned
+
+ ## If len is less than 8 and we're unaligned, we need to jump
+ ## to special code to avoid reading beyond the end of the buffer
+ cmp $8, len
+ jae do_align
+ # less_than_8 expects length in upper 3 bits of len_dw
+ # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
+ shl $32-3+1, len_dw
+ jmp less_than_8_post_shl1
+
+do_align:
+ #### Calculate CRC of unaligned bytes of the buffer (if any)
+ movq (bufptmp), tmp # load a quadward from the buffer
+ add bufp, bufptmp # align buffer pointer for quadword
+ # processing
+ sub bufp, len # update buffer length
+align_loop:
+ crc32b %bl, crc_init_dw # compute crc32 of 1-byte
+ shr $8, tmp # get next byte
+ dec bufp
+ jne align_loop
+
+proc_block:
+
+ ################################################################
+ ## 2) PROCESS BLOCKS:
+ ################################################################
+
+ ## compute num of bytes to be processed
+ movq len, tmp # save num bytes in tmp
+
+ cmpq $128*24, len
+ jae full_block
+
+continue_block:
+ cmpq $SMALL_SIZE, len
+ jb small
+
+ ## len < 128*24
+ movq $2731, %rax # 2731 = ceil(2^16 / 24)
+ mul len_dw
+ shrq $16, %rax
+
+ ## eax contains floor(bytes / 24) = num 24-byte chunks to do
+
+ ## process rax 24-byte chunks (128 >= rax >= 0)
+
+ ## compute end address of each block
+ ## block 0 (base addr + RAX * 8)
+ ## block 1 (base addr + RAX * 16)
+ ## block 2 (base addr + RAX * 24)
+ lea (bufptmp, %rax, 8), block_0
+ lea (block_0, %rax, 8), block_1
+ lea (block_1, %rax, 8), block_2
+
+ xor crc1, crc1
+ xor crc2, crc2
+
+ ## branch into array
+ lea jump_table(%rip), bufp
+ movzxw (bufp, %rax, 2), len
+ offset=crc_array-jump_table
+ lea offset(bufp, len, 1), bufp
+ jmp *bufp
+
+ ################################################################
+ ## 2a) PROCESS FULL BLOCKS:
+ ################################################################
+full_block:
+ movq $128,%rax
+ lea 128*8*2(block_0), block_1
+ lea 128*8*3(block_0), block_2
+ add $128*8*1, block_0
+
+ xor crc1,crc1
+ xor crc2,crc2
+
+ # Fall thruogh into top of crc array (crc_128)
+
+ ################################################################
+ ## 3) CRC Array:
+ ################################################################
+
+crc_array:
+ i=128
+.rept 128-1
+.altmacro
+LABEL crc_ %i
+.noaltmacro
+ crc32q -i*8(block_0), crc_init
+ crc32q -i*8(block_1), crc1
+ crc32q -i*8(block_2), crc2
+ i=(i-1)
+.endr
+
+.altmacro
+LABEL crc_ %i
+.noaltmacro
+ crc32q -i*8(block_0), crc_init
+ crc32q -i*8(block_1), crc1
+# SKIP crc32 -i*8(block_2), crc2 ; Don't do this one yet
+
+ mov block_2, block_0
+
+ ################################################################
+ ## 4) Combine three results:
+ ################################################################
+
+ lea (K_table-16)(%rip), bufp # first entry is for idx 1
+ shlq $3, %rax # rax *= 8
+ subq %rax, tmp # tmp -= rax*8
+ shlq $1, %rax
+ subq %rax, tmp # tmp -= rax*16
+ # (total tmp -= rax*24)
+ addq %rax, bufp
+
+ movdqa (bufp), %xmm0 # 2 consts: K1:K2
+
+ movq crc_init, %xmm1 # CRC for block 1
+ pclmulqdq $0x00,%xmm0,%xmm1 # Multiply by K2
+
+ movq crc1, %xmm2 # CRC for block 2
+ pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1
+
+ pxor %xmm2,%xmm1
+ movq %xmm1, %rax
+ xor -i*8(block_2), %rax
+ mov crc2, crc_init
+ crc32 %rax, crc_init
+
+################################################################
+## 5) Check for end:
+################################################################
+
+LABEL crc_ 0
+ mov tmp, len
+ cmp $128*24, tmp
+ jae full_block
+ cmp $24, tmp
+ jae continue_block
+
+less_than_24:
+ shl $32-4, len_dw # less_than_16 expects length
+ # in upper 4 bits of len_dw
+ jnc less_than_16
+ crc32q (bufptmp), crc_init
+ crc32q 8(bufptmp), crc_init
+ jz do_return
+ add $16, bufptmp
+ # len is less than 8 if we got here
+ # less_than_8 expects length in upper 3 bits of len_dw
+ # less_than_8_post_shl1 expects length = carryflag * 8 + len_dw[31:30]
+ shl $2, len_dw
+ jmp less_than_8_post_shl1
+
+ #######################################################################
+ ## 6) LESS THAN 256-bytes REMAIN AT THIS POINT (8-bits of len are full)
+ #######################################################################
+small:
+ shl $32-8, len_dw # Prepare len_dw for less_than_256
+ j=256
+.rept 5 # j = {256, 128, 64, 32, 16}
+.altmacro
+LABEL less_than_ %j # less_than_j: Length should be in
+ # upper lg(j) bits of len_dw
+ j=(j/2)
+ shl $1, len_dw # Get next MSB
+ JNC_LESS_THAN %j
+.noaltmacro
+ i=0
+.rept (j/8)
+ crc32q i(bufptmp), crc_init # Compute crc32 of 8-byte data
+ i=i+8
+.endr
+ jz do_return # Return if remaining length is zero
+ add $j, bufptmp # Advance buf
+.endr
+
+less_than_8: # Length should be stored in
+ # upper 3 bits of len_dw
+ shl $1, len_dw
+less_than_8_post_shl1:
+ jnc less_than_4
+ crc32l (bufptmp), crc_init_dw # CRC of 4 bytes
+ jz do_return # return if remaining data is zero
+ add $4, bufptmp
+less_than_4: # Length should be stored in
+ # upper 2 bits of len_dw
+ shl $1, len_dw
+ jnc less_than_2
+ crc32w (bufptmp), crc_init_dw # CRC of 2 bytes
+ jz do_return # return if remaining data is zero
+ add $2, bufptmp
+less_than_2: # Length should be stored in the MSB
+ # of len_dw
+ shl $1, len_dw
+ jnc less_than_1
+ crc32b (bufptmp), crc_init_dw # CRC of 1 byte
+less_than_1: # Length should be zero
+do_return:
+ movq crc_init, %rax
+ popq %rsi
+ popq %rdi
+ popq %rbx
+ ret
+
+ ################################################################
+ ## jump table Table is 129 entries x 2 bytes each
+ ################################################################
+.align 4
+jump_table:
+ i=0
+.rept 129
+.altmacro
+JMPTBL_ENTRY %i
+.noaltmacro
+ i=i+1
+.endr
+ ################################################################
+ ## PCLMULQDQ tables
+ ## Table is 128 entries x 2 quad words each
+ ################################################################
+.data
+.align 64
+K_table:
+ .quad 0x14cd00bd6,0x105ec76f0
+ .quad 0x0ba4fc28e,0x14cd00bd6
+ .quad 0x1d82c63da,0x0f20c0dfe
+ .quad 0x09e4addf8,0x0ba4fc28e
+ .quad 0x039d3b296,0x1384aa63a
+ .quad 0x102f9b8a2,0x1d82c63da
+ .quad 0x14237f5e6,0x01c291d04
+ .quad 0x00d3b6092,0x09e4addf8
+ .quad 0x0c96cfdc0,0x0740eef02
+ .quad 0x18266e456,0x039d3b296
+ .quad 0x0daece73e,0x0083a6eec
+ .quad 0x0ab7aff2a,0x102f9b8a2
+ .quad 0x1248ea574,0x1c1733996
+ .quad 0x083348832,0x14237f5e6
+ .quad 0x12c743124,0x02ad91c30
+ .quad 0x0b9e02b86,0x00d3b6092
+ .quad 0x018b33a4e,0x06992cea2
+ .quad 0x1b331e26a,0x0c96cfdc0
+ .quad 0x17d35ba46,0x07e908048
+ .quad 0x1bf2e8b8a,0x18266e456
+ .quad 0x1a3e0968a,0x11ed1f9d8
+ .quad 0x0ce7f39f4,0x0daece73e
+ .quad 0x061d82e56,0x0f1d0f55e
+ .quad 0x0d270f1a2,0x0ab7aff2a
+ .quad 0x1c3f5f66c,0x0a87ab8a8
+ .quad 0x12ed0daac,0x1248ea574
+ .quad 0x065863b64,0x08462d800
+ .quad 0x11eef4f8e,0x083348832
+ .quad 0x1ee54f54c,0x071d111a8
+ .quad 0x0b3e32c28,0x12c743124
+ .quad 0x0064f7f26,0x0ffd852c6
+ .quad 0x0dd7e3b0c,0x0b9e02b86
+ .quad 0x0f285651c,0x0dcb17aa4
+ .quad 0x010746f3c,0x018b33a4e
+ .quad 0x1c24afea4,0x0f37c5aee
+ .quad 0x0271d9844,0x1b331e26a
+ .quad 0x08e766a0c,0x06051d5a2
+ .quad 0x093a5f730,0x17d35ba46
+ .quad 0x06cb08e5c,0x11d5ca20e
+ .quad 0x06b749fb2,0x1bf2e8b8a
+ .quad 0x1167f94f2,0x021f3d99c
+ .quad 0x0cec3662e,0x1a3e0968a
+ .quad 0x19329634a,0x08f158014
+ .quad 0x0e6fc4e6a,0x0ce7f39f4
+ .quad 0x08227bb8a,0x1a5e82106
+ .quad 0x0b0cd4768,0x061d82e56
+ .quad 0x13c2b89c4,0x188815ab2
+ .quad 0x0d7a4825c,0x0d270f1a2
+ .quad 0x10f5ff2ba,0x105405f3e
+ .quad 0x00167d312,0x1c3f5f66c
+ .quad 0x0f6076544,0x0e9adf796
+ .quad 0x026f6a60a,0x12ed0daac
+ .quad 0x1a2adb74e,0x096638b34
+ .quad 0x19d34af3a,0x065863b64
+ .quad 0x049c3cc9c,0x1e50585a0
+ .quad 0x068bce87a,0x11eef4f8e
+ .quad 0x1524fa6c6,0x19f1c69dc
+ .quad 0x16cba8aca,0x1ee54f54c
+ .quad 0x042d98888,0x12913343e
+ .quad 0x1329d9f7e,0x0b3e32c28
+ .quad 0x1b1c69528,0x088f25a3a
+ .quad 0x02178513a,0x0064f7f26
+ .quad 0x0e0ac139e,0x04e36f0b0
+ .quad 0x0170076fa,0x0dd7e3b0c
+ .quad 0x141a1a2e2,0x0bd6f81f8
+ .quad 0x16ad828b4,0x0f285651c
+ .quad 0x041d17b64,0x19425cbba
+ .quad 0x1fae1cc66,0x010746f3c
+ .quad 0x1a75b4b00,0x18db37e8a
+ .quad 0x0f872e54c,0x1c24afea4
+ .quad 0x01e41e9fc,0x04c144932
+ .quad 0x086d8e4d2,0x0271d9844
+ .quad 0x160f7af7a,0x052148f02
+ .quad 0x05bb8f1bc,0x08e766a0c
+ .quad 0x0a90fd27a,0x0a3c6f37a
+ .quad 0x0b3af077a,0x093a5f730
+ .quad 0x04984d782,0x1d22c238e
+ .quad 0x0ca6ef3ac,0x06cb08e5c
+ .quad 0x0234e0b26,0x063ded06a
+ .quad 0x1d88abd4a,0x06b749fb2
+ .quad 0x04597456a,0x04d56973c
+ .quad 0x0e9e28eb4,0x1167f94f2
+ .quad 0x07b3ff57a,0x19385bf2e
+ .quad 0x0c9c8b782,0x0cec3662e
+ .quad 0x13a9cba9e,0x0e417f38a
+ .quad 0x093e106a4,0x19329634a
+ .quad 0x167001a9c,0x14e727980
+ .quad 0x1ddffc5d4,0x0e6fc4e6a
+ .quad 0x00df04680,0x0d104b8fc
+ .quad 0x02342001e,0x08227bb8a
+ .quad 0x00a2a8d7e,0x05b397730
+ .quad 0x168763fa6,0x0b0cd4768
+ .quad 0x1ed5a407a,0x0e78eb416
+ .quad 0x0d2c3ed1a,0x13c2b89c4
+ .quad 0x0995a5724,0x1641378f0
+ .quad 0x19b1afbc4,0x0d7a4825c
+ .quad 0x109ffedc0,0x08d96551c
+ .quad 0x0f2271e60,0x10f5ff2ba
+ .quad 0x00b0bf8ca,0x00bf80dd2
+ .quad 0x123888b7a,0x00167d312
+ .quad 0x1e888f7dc,0x18dcddd1c
+ .quad 0x002ee03b2,0x0f6076544
+ .quad 0x183e8d8fe,0x06a45d2b2
+ .quad 0x133d7a042,0x026f6a60a
+ .quad 0x116b0f50c,0x1dd3e10e8
+ .quad 0x05fabe670,0x1a2adb74e
+ .quad 0x130004488,0x0de87806c
+ .quad 0x000bcf5f6,0x19d34af3a
+ .quad 0x18f0c7078,0x014338754
+ .quad 0x017f27698,0x049c3cc9c
+ .quad 0x058ca5f00,0x15e3e77ee
+ .quad 0x1af900c24,0x068bce87a
+ .quad 0x0b5cfca28,0x0dd07448e
+ .quad 0x0ded288f8,0x1524fa6c6
+ .quad 0x059f229bc,0x1d8048348
+ .quad 0x06d390dec,0x16cba8aca
+ .quad 0x037170390,0x0a3e3e02c
+ .quad 0x06353c1cc,0x042d98888
+ .quad 0x0c4584f5c,0x0d73c7bea
+ .quad 0x1f16a3418,0x1329d9f7e
+ .quad 0x0531377e2,0x185137662
+ .quad 0x1d8d9ca7c,0x1b1c69528
+ .quad 0x0b25b29f2,0x18a08b5bc
+ .quad 0x19fb2a8b0,0x02178513a
+ .quad 0x1a08fe6ac,0x1da758ae0
+ .quad 0x045cddf4e,0x0e0ac139e
+ .quad 0x1a91647f2,0x169cf9eb0
+ .quad 0x1a0f717c4,0x0170076fa
diff --git a/arch/x86/crypto/glue_helper-asm-avx.S b/arch/x86/crypto/glue_helper-asm-avx.S
new file mode 100644
index 00000000000..f7b6ea2ddfd
--- /dev/null
+++ b/arch/x86/crypto/glue_helper-asm-avx.S
@@ -0,0 +1,91 @@
+/*
+ * Shared glue code for 128bit block ciphers, AVX assembler macros
+ *
+ * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define load_8way(src, x0, x1, x2, x3, x4, x5, x6, x7) \
+ vmovdqu (0*16)(src), x0; \
+ vmovdqu (1*16)(src), x1; \
+ vmovdqu (2*16)(src), x2; \
+ vmovdqu (3*16)(src), x3; \
+ vmovdqu (4*16)(src), x4; \
+ vmovdqu (5*16)(src), x5; \
+ vmovdqu (6*16)(src), x6; \
+ vmovdqu (7*16)(src), x7;
+
+#define store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7) \
+ vmovdqu x0, (0*16)(dst); \
+ vmovdqu x1, (1*16)(dst); \
+ vmovdqu x2, (2*16)(dst); \
+ vmovdqu x3, (3*16)(dst); \
+ vmovdqu x4, (4*16)(dst); \
+ vmovdqu x5, (5*16)(dst); \
+ vmovdqu x6, (6*16)(dst); \
+ vmovdqu x7, (7*16)(dst);
+
+#define store_cbc_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
+ vpxor (0*16)(src), x1, x1; \
+ vpxor (1*16)(src), x2, x2; \
+ vpxor (2*16)(src), x3, x3; \
+ vpxor (3*16)(src), x4, x4; \
+ vpxor (4*16)(src), x5, x5; \
+ vpxor (5*16)(src), x6, x6; \
+ vpxor (6*16)(src), x7, x7; \
+ store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
+
+#define inc_le128(x, minus_one, tmp) \
+ vpcmpeqq minus_one, x, tmp; \
+ vpsubq minus_one, x, x; \
+ vpslldq $8, tmp, tmp; \
+ vpsubq tmp, x, x;
+
+#define load_ctr_8way(iv, bswap, x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2) \
+ vpcmpeqd t0, t0, t0; \
+ vpsrldq $8, t0, t0; /* low: -1, high: 0 */ \
+ vmovdqa bswap, t1; \
+ \
+ /* load IV and byteswap */ \
+ vmovdqu (iv), x7; \
+ vpshufb t1, x7, x0; \
+ \
+ /* construct IVs */ \
+ inc_le128(x7, t0, t2); \
+ vpshufb t1, x7, x1; \
+ inc_le128(x7, t0, t2); \
+ vpshufb t1, x7, x2; \
+ inc_le128(x7, t0, t2); \
+ vpshufb t1, x7, x3; \
+ inc_le128(x7, t0, t2); \
+ vpshufb t1, x7, x4; \
+ inc_le128(x7, t0, t2); \
+ vpshufb t1, x7, x5; \
+ inc_le128(x7, t0, t2); \
+ vpshufb t1, x7, x6; \
+ inc_le128(x7, t0, t2); \
+ vmovdqa x7, t2; \
+ vpshufb t1, x7, x7; \
+ inc_le128(t2, t0, t1); \
+ vmovdqu t2, (iv);
+
+#define store_ctr_8way(src, dst, x0, x1, x2, x3, x4, x5, x6, x7) \
+ vpxor (0*16)(src), x0, x0; \
+ vpxor (1*16)(src), x1, x1; \
+ vpxor (2*16)(src), x2, x2; \
+ vpxor (3*16)(src), x3, x3; \
+ vpxor (4*16)(src), x4, x4; \
+ vpxor (5*16)(src), x5, x5; \
+ vpxor (6*16)(src), x6, x6; \
+ vpxor (7*16)(src), x7, x7; \
+ store_8way(dst, x0, x1, x2, x3, x4, x5, x6, x7);
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 30b3927bd73..22ce4f683e5 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -221,16 +221,16 @@ static void glue_ctr_crypt_final_128bit(const common_glue_ctr_func_t fn_ctr,
u8 *src = (u8 *)walk->src.virt.addr;
u8 *dst = (u8 *)walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
- u128 ctrblk;
+ le128 ctrblk;
u128 tmp;
- be128_to_u128(&ctrblk, (be128 *)walk->iv);
+ be128_to_le128(&ctrblk, (be128 *)walk->iv);
memcpy(&tmp, src, nbytes);
fn_ctr(ctx, &tmp, &tmp, &ctrblk);
memcpy(dst, &tmp, nbytes);
- u128_to_be128((be128 *)walk->iv, &ctrblk);
+ le128_to_be128((be128 *)walk->iv, &ctrblk);
}
EXPORT_SYMBOL_GPL(glue_ctr_crypt_final_128bit);
@@ -243,11 +243,11 @@ static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
unsigned int nbytes = walk->nbytes;
u128 *src = (u128 *)walk->src.virt.addr;
u128 *dst = (u128 *)walk->dst.virt.addr;
- u128 ctrblk;
+ le128 ctrblk;
unsigned int num_blocks, func_bytes;
unsigned int i;
- be128_to_u128(&ctrblk, (be128 *)walk->iv);
+ be128_to_le128(&ctrblk, (be128 *)walk->iv);
/* Process multi-block batch */
for (i = 0; i < gctx->num_funcs; i++) {
@@ -269,7 +269,7 @@ static unsigned int __glue_ctr_crypt_128bit(const struct common_glue_ctx *gctx,
}
done:
- u128_to_be128((be128 *)walk->iv, &ctrblk);
+ le128_to_be128((be128 *)walk->iv, &ctrblk);
return nbytes;
}
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index 504106bf04a..02b0e9fe997 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -24,7 +24,16 @@
*
*/
+#include "glue_helper-asm-avx.S"
+
.file "serpent-avx-x86_64-asm_64.S"
+
+.data
+.align 16
+
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
.text
#define CTX %rdi
@@ -550,51 +559,27 @@
vpunpcklqdq x3, t2, x2; \
vpunpckhqdq x3, t2, x3;
-#define read_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
- vmovdqu (0*4*4)(in), x0; \
- vmovdqu (1*4*4)(in), x1; \
- vmovdqu (2*4*4)(in), x2; \
- vmovdqu (3*4*4)(in), x3; \
- \
+#define read_blocks(x0, x1, x2, x3, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-#define write_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
- transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
- \
- vmovdqu x0, (0*4*4)(out); \
- vmovdqu x1, (1*4*4)(out); \
- vmovdqu x2, (2*4*4)(out); \
- vmovdqu x3, (3*4*4)(out);
-
-#define xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
- transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
- \
- vpxor (0*4*4)(out), x0, x0; \
- vmovdqu x0, (0*4*4)(out); \
- vpxor (1*4*4)(out), x1, x1; \
- vmovdqu x1, (1*4*4)(out); \
- vpxor (2*4*4)(out), x2, x2; \
- vmovdqu x2, (2*4*4)(out); \
- vpxor (3*4*4)(out), x3, x3; \
- vmovdqu x3, (3*4*4)(out);
+#define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
+ transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
.align 8
-.global __serpent_enc_blk_8way_avx
-.type __serpent_enc_blk_8way_avx,@function;
+.type __serpent_enc_blk8_avx,@function;
-__serpent_enc_blk_8way_avx:
+__serpent_enc_blk8_avx:
/* input:
* %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: bool, if true: xor output
+ * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
+ * output:
+ * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
*/
vpcmpeqd RNOT, RNOT, RNOT;
- leaq (4*4*4)(%rdx), %rax;
- read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
- read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+ read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
K2(RA, RB, RC, RD, RE, 0);
S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1);
@@ -630,38 +615,26 @@ __serpent_enc_blk_8way_avx:
S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31);
S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32);
- leaq (4*4*4)(%rsi), %rax;
-
- testb %cl, %cl;
- jnz __enc_xor8;
-
- write_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
- write_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
-
- ret;
-
-__enc_xor8:
- xor_blocks(%rsi, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
- xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+ write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
ret;
.align 8
-.global serpent_dec_blk_8way_avx
-.type serpent_dec_blk_8way_avx,@function;
+.type __serpent_dec_blk8_avx,@function;
-serpent_dec_blk_8way_avx:
+__serpent_dec_blk8_avx:
/* input:
* %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
+ * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
+ * output:
+ * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: decrypted blocks
*/
vpcmpeqd RNOT, RNOT, RNOT;
- leaq (4*4*4)(%rdx), %rax;
- read_blocks(%rdx, RA1, RB1, RC1, RD1, RK0, RK1, RK2);
- read_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
+ read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
+ read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
K2(RA, RB, RC, RD, RE, 32);
SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31);
@@ -697,8 +670,85 @@ serpent_dec_blk_8way_avx:
SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1);
S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0);
- leaq (4*4*4)(%rsi), %rax;
- write_blocks(%rsi, RC1, RD1, RB1, RE1, RK0, RK1, RK2);
- write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+ write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
+ write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
+
+ ret;
+
+.align 8
+.global serpent_ecb_enc_8way_avx
+.type serpent_ecb_enc_8way_avx,@function;
+
+serpent_ecb_enc_8way_avx:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ call __serpent_enc_blk8_avx;
+
+ store_8way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ ret;
+
+.align 8
+.global serpent_ecb_dec_8way_avx
+.type serpent_ecb_dec_8way_avx,@function;
+
+serpent_ecb_dec_8way_avx:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ call __serpent_dec_blk8_avx;
+
+ store_8way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
+
+ ret;
+
+.align 8
+.global serpent_cbc_dec_8way_avx
+.type serpent_cbc_dec_8way_avx,@function;
+
+serpent_cbc_dec_8way_avx:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ call __serpent_dec_blk8_avx;
+
+ store_cbc_8way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
+
+ ret;
+
+.align 8
+.global serpent_ctr_8way_avx
+.type serpent_ctr_8way_avx,@function;
+
+serpent_ctr_8way_avx:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: iv (little endian, 128bit)
+ */
+
+ load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
+ RD2, RK0, RK1, RK2);
+
+ call __serpent_enc_blk8_avx;
+
+ store_ctr_8way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
ret;
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 3f543a04cf1..52abaaf28e7 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -42,55 +42,24 @@
#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
-static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
-{
- u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
- unsigned int j;
-
- for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
- ivs[j] = src[j];
-
- serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
-
- for (j = 0; j < SERPENT_PARALLEL_BLOCKS - 1; j++)
- u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
-}
-
-static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
- u128_to_be128(&ctrblk, iv);
- u128_inc(iv);
+ le128_to_be128(&ctrblk, iv);
+ le128_inc(iv);
__serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk);
}
-static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
- u128 *iv)
-{
- be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
- unsigned int i;
-
- for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
- if (dst != src)
- dst[i] = src[i];
-
- u128_to_be128(&ctrblks[i], iv);
- u128_inc(iv);
- }
-
- serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
-}
-
static const struct common_glue_ctx serpent_enc = {
.num_funcs = 2,
.fpu_blocks_limit = SERPENT_PARALLEL_BLOCKS,
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_enc_blk_xway) }
+ .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_enc_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_encrypt) }
@@ -103,7 +72,7 @@ static const struct common_glue_ctx serpent_ctr = {
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr_xway) }
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_ctr_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(serpent_crypt_ctr) }
@@ -116,7 +85,7 @@ static const struct common_glue_ctx serpent_dec = {
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_dec_blk_xway) }
+ .fn_u = { .ecb = GLUE_FUNC_CAST(serpent_ecb_dec_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(__serpent_decrypt) }
@@ -129,7 +98,7 @@ static const struct common_glue_ctx serpent_dec_cbc = {
.funcs = { {
.num_blocks = SERPENT_PARALLEL_BLOCKS,
- .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_decrypt_cbc_xway) }
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(serpent_cbc_dec_8way_avx) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__serpent_decrypt) }
@@ -193,7 +162,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
- serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
+ serpent_ecb_enc_8way_avx(ctx->ctx, srcdst, srcdst);
return;
}
@@ -210,7 +179,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
- serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
+ serpent_ecb_dec_8way_avx(ctx->ctx, srcdst, srcdst);
return;
}
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 9107a9908c4..97a356ece24 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -59,19 +59,19 @@ static void serpent_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
}
-static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+static void serpent_crypt_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
- u128_to_be128(&ctrblk, iv);
- u128_inc(iv);
+ le128_to_be128(&ctrblk, iv);
+ le128_inc(iv);
__serpent_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, src, (u128 *)&ctrblk);
}
static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
- u128 *iv)
+ le128 *iv)
{
be128 ctrblks[SERPENT_PARALLEL_BLOCKS];
unsigned int i;
@@ -80,8 +80,8 @@ static void serpent_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
if (dst != src)
dst[i] = src[i];
- u128_to_be128(&ctrblks[i], iv);
- u128_inc(iv);
+ le128_to_be128(&ctrblks[i], iv);
+ le128_inc(iv);
}
serpent_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index 1585abb13dd..ebac16bfa83 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -23,7 +23,16 @@
*
*/
+#include "glue_helper-asm-avx.S"
+
.file "twofish-avx-x86_64-asm_64.S"
+
+.data
+.align 16
+
+.Lbswap128_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
.text
/* structure of crypto context */
@@ -217,69 +226,45 @@
vpunpcklqdq x3, t2, x2; \
vpunpckhqdq x3, t2, x3;
-#define inpack_blocks(in, x0, x1, x2, x3, wkey, t0, t1, t2) \
- vpxor (0*4*4)(in), wkey, x0; \
- vpxor (1*4*4)(in), wkey, x1; \
- vpxor (2*4*4)(in), wkey, x2; \
- vpxor (3*4*4)(in), wkey, x3; \
+#define inpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \
+ vpxor x0, wkey, x0; \
+ vpxor x1, wkey, x1; \
+ vpxor x2, wkey, x2; \
+ vpxor x3, wkey, x3; \
\
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
-#define outunpack_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
- transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
- \
- vpxor x0, wkey, x0; \
- vmovdqu x0, (0*4*4)(out); \
- vpxor x1, wkey, x1; \
- vmovdqu x1, (1*4*4)(out); \
- vpxor x2, wkey, x2; \
- vmovdqu x2, (2*4*4)(out); \
- vpxor x3, wkey, x3; \
- vmovdqu x3, (3*4*4)(out);
-
-#define outunpack_xor_blocks(out, x0, x1, x2, x3, wkey, t0, t1, t2) \
+#define outunpack_blocks(x0, x1, x2, x3, wkey, t0, t1, t2) \
transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
\
- vpxor x0, wkey, x0; \
- vpxor (0*4*4)(out), x0, x0; \
- vmovdqu x0, (0*4*4)(out); \
- vpxor x1, wkey, x1; \
- vpxor (1*4*4)(out), x1, x1; \
- vmovdqu x1, (1*4*4)(out); \
- vpxor x2, wkey, x2; \
- vpxor (2*4*4)(out), x2, x2; \
- vmovdqu x2, (2*4*4)(out); \
- vpxor x3, wkey, x3; \
- vpxor (3*4*4)(out), x3, x3; \
- vmovdqu x3, (3*4*4)(out);
+ vpxor x0, wkey, x0; \
+ vpxor x1, wkey, x1; \
+ vpxor x2, wkey, x2; \
+ vpxor x3, wkey, x3;
.align 8
-.global __twofish_enc_blk_8way
-.type __twofish_enc_blk_8way,@function;
+.type __twofish_enc_blk8,@function;
-__twofish_enc_blk_8way:
+__twofish_enc_blk8:
/* input:
* %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
- * %rcx: bool, if true: xor output
+ * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
+ * output:
+ * RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
*/
+ vmovdqu w(CTX), RK1;
+
pushq %rbp;
pushq %rbx;
pushq %rcx;
- vmovdqu w(CTX), RK1;
-
- leaq (4*4*4)(%rdx), %rax;
- inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
+ inpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
preload_rgi(RA1);
rotate_1l(RD1);
- inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
+ inpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
rotate_1l(RD2);
- movq %rsi, %r11;
-
encrypt_cycle(0);
encrypt_cycle(1);
encrypt_cycle(2);
@@ -295,47 +280,33 @@ __twofish_enc_blk_8way:
popq %rbx;
popq %rbp;
- leaq (4*4*4)(%r11), %rax;
-
- testb %cl, %cl;
- jnz __enc_xor8;
-
- outunpack_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
- outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
-
- ret;
-
-__enc_xor8:
- outunpack_xor_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
- outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
+ outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+ outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
ret;
.align 8
-.global twofish_dec_blk_8way
-.type twofish_dec_blk_8way,@function;
+.type __twofish_dec_blk8,@function;
-twofish_dec_blk_8way:
+__twofish_dec_blk8:
/* input:
* %rdi: ctx, CTX
- * %rsi: dst
- * %rdx: src
+ * RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
+ * output:
+ * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks
*/
+ vmovdqu (w+4*4)(CTX), RK1;
+
pushq %rbp;
pushq %rbx;
- vmovdqu (w+4*4)(CTX), RK1;
-
- leaq (4*4*4)(%rdx), %rax;
- inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+ inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
preload_rgi(RC1);
rotate_1l(RA1);
- inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
+ inpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
rotate_1l(RA2);
- movq %rsi, %r11;
-
decrypt_cycle(7);
decrypt_cycle(6);
decrypt_cycle(5);
@@ -350,8 +321,103 @@ twofish_dec_blk_8way:
popq %rbx;
popq %rbp;
- leaq (4*4*4)(%r11), %rax;
- outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
- outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
+ outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
+ outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
+
+ ret;
+
+.align 8
+.global twofish_ecb_enc_8way
+.type twofish_ecb_enc_8way,@function;
+
+twofish_ecb_enc_8way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ movq %rsi, %r11;
+
+ load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ call __twofish_enc_blk8;
+
+ store_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
+
+ ret;
+
+.align 8
+.global twofish_ecb_dec_8way
+.type twofish_ecb_dec_8way,@function;
+
+twofish_ecb_dec_8way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ movq %rsi, %r11;
+
+ load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
+
+ call __twofish_dec_blk8;
+
+ store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ ret;
+
+.align 8
+.global twofish_cbc_dec_8way
+.type twofish_cbc_dec_8way,@function;
+
+twofish_cbc_dec_8way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ */
+
+ pushq %r12;
+
+ movq %rsi, %r11;
+ movq %rdx, %r12;
+
+ load_8way(%rdx, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
+
+ call __twofish_dec_blk8;
+
+ store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+ popq %r12;
+
+ ret;
+
+.align 8
+.global twofish_ctr_8way
+.type twofish_ctr_8way,@function;
+
+twofish_ctr_8way:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: iv (little endian, 128bit)
+ */
+
+ pushq %r12;
+
+ movq %rsi, %r11;
+ movq %rdx, %r12;
+
+ load_ctr_8way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
+ RD2, RX0, RX1, RY0);
+
+ call __twofish_enc_blk8;
+
+ store_ctr_8way(%r12, %r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
+
+ popq %r12;
ret;
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index e7708b5442e..94ac91d26e4 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -45,66 +45,23 @@
#define TWOFISH_PARALLEL_BLOCKS 8
-static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __twofish_enc_blk_3way(ctx, dst, src, false);
-}
-
/* 8-way parallel cipher functions */
-asmlinkage void __twofish_enc_blk_8way(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void twofish_dec_blk_8way(struct twofish_ctx *ctx, u8 *dst,
+asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
-static inline void twofish_enc_blk_xway(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __twofish_enc_blk_8way(ctx, dst, src, false);
-}
-
-static inline void twofish_enc_blk_xway_xor(struct twofish_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __twofish_enc_blk_8way(ctx, dst, src, true);
-}
+asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
+ const u8 *src, le128 *iv);
-static inline void twofish_dec_blk_xway(struct twofish_ctx *ctx, u8 *dst,
+static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
- twofish_dec_blk_8way(ctx, dst, src);
-}
-
-static void twofish_dec_blk_cbc_xway(void *ctx, u128 *dst, const u128 *src)
-{
- u128 ivs[TWOFISH_PARALLEL_BLOCKS - 1];
- unsigned int j;
-
- for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
- ivs[j] = src[j];
-
- twofish_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
-
- for (j = 0; j < TWOFISH_PARALLEL_BLOCKS - 1; j++)
- u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
+ __twofish_enc_blk_3way(ctx, dst, src, false);
}
-static void twofish_enc_blk_ctr_xway(void *ctx, u128 *dst, const u128 *src,
- u128 *iv)
-{
- be128 ctrblks[TWOFISH_PARALLEL_BLOCKS];
- unsigned int i;
-
- for (i = 0; i < TWOFISH_PARALLEL_BLOCKS; i++) {
- if (dst != src)
- dst[i] = src[i];
-
- u128_to_be128(&ctrblks[i], iv);
- u128_inc(iv);
- }
-
- twofish_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
-}
static const struct common_glue_ctx twofish_enc = {
.num_funcs = 3,
@@ -112,7 +69,7 @@ static const struct common_glue_ctx twofish_enc = {
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
- .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_xway) }
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
}, {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
@@ -128,7 +85,7 @@ static const struct common_glue_ctx twofish_ctr = {
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
- .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_xway) }
+ .fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
}, {
.num_blocks = 3,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
@@ -144,7 +101,7 @@ static const struct common_glue_ctx twofish_dec = {
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
- .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_xway) }
+ .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
}, {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
@@ -160,7 +117,7 @@ static const struct common_glue_ctx twofish_dec_cbc = {
.funcs = { {
.num_blocks = TWOFISH_PARALLEL_BLOCKS,
- .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_xway) }
+ .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
}, {
.num_blocks = 3,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
@@ -227,7 +184,7 @@ static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
- twofish_enc_blk_xway(ctx->ctx, srcdst, srcdst);
+ twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
return;
}
@@ -249,7 +206,7 @@ static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
if (nbytes == bsize * TWOFISH_PARALLEL_BLOCKS) {
- twofish_dec_blk_xway(ctx->ctx, srcdst, srcdst);
+ twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
return;
}
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index aa3eb358b7e..13e63b3e1df 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -62,15 +62,15 @@ void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src)
}
EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
-void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
be128 ctrblk;
if (dst != src)
*dst = *src;
- u128_to_be128(&ctrblk, iv);
- u128_inc(iv);
+ le128_to_be128(&ctrblk, iv);
+ le128_inc(iv);
twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
u128_xor(dst, dst, (u128 *)&ctrblk);
@@ -78,7 +78,7 @@ void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
- u128 *iv)
+ le128 *iv)
{
be128 ctrblks[3];
@@ -88,12 +88,12 @@ void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
dst[2] = src[2];
}
- u128_to_be128(&ctrblks[0], iv);
- u128_inc(iv);
- u128_to_be128(&ctrblks[1], iv);
- u128_inc(iv);
- u128_to_be128(&ctrblks[2], iv);
- u128_inc(iv);
+ le128_to_be128(&ctrblks[0], iv);
+ le128_inc(iv);
+ le128_to_be128(&ctrblks[1], iv);
+ le128_inc(iv);
+ le128_to_be128(&ctrblks[2], iv);
+ le128_inc(iv);
twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
}
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 07b3a68d2d2..a703af19c28 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -35,7 +35,7 @@
#undef WARN_OLD
#undef CORE_DUMP /* definitely broken */
-static int load_aout_binary(struct linux_binprm *, struct pt_regs *regs);
+static int load_aout_binary(struct linux_binprm *);
static int load_aout_library(struct file *);
#ifdef CORE_DUMP
@@ -260,9 +260,10 @@ static u32 __user *create_aout_tables(char __user *p, struct linux_binprm *bprm)
* These are the functions used to load a.out style executables and shared
* libraries. There is no binary dependent code anywhere else.
*/
-static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+static int load_aout_binary(struct linux_binprm *bprm)
{
unsigned long error, fd_offset, rlim;
+ struct pt_regs *regs = current_pt_regs();
struct exec ex;
int retval;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index efc6a958b71..a1daf4a6500 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -136,52 +136,6 @@ asmlinkage long sys32_sigsuspend(int history0, int history1, old_sigset_t mask)
return sigsuspend(&blocked);
}
-asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr,
- stack_ia32_t __user *uoss_ptr,
- struct pt_regs *regs)
-{
- stack_t uss, uoss;
- int ret, err = 0;
- mm_segment_t seg;
-
- if (uss_ptr) {
- u32 ptr;
-
- memset(&uss, 0, sizeof(stack_t));
- if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)))
- return -EFAULT;
-
- get_user_try {
- get_user_ex(ptr, &uss_ptr->ss_sp);
- get_user_ex(uss.ss_flags, &uss_ptr->ss_flags);
- get_user_ex(uss.ss_size, &uss_ptr->ss_size);
- } get_user_catch(err);
-
- if (err)
- return -EFAULT;
- uss.ss_sp = compat_ptr(ptr);
- }
- seg = get_fs();
- set_fs(KERNEL_DS);
- ret = do_sigaltstack((stack_t __force __user *) (uss_ptr ? &uss : NULL),
- (stack_t __force __user *) &uoss, regs->sp);
- set_fs(seg);
- if (ret >= 0 && uoss_ptr) {
- if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)))
- return -EFAULT;
-
- put_user_try {
- put_user_ex(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp);
- put_user_ex(uoss.ss_flags, &uoss_ptr->ss_flags);
- put_user_ex(uoss.ss_size, &uoss_ptr->ss_size);
- } put_user_catch(err);
-
- if (err)
- ret = -EFAULT;
- }
- return ret;
-}
-
/*
* Do a signal return; undo the signal stack.
*/
@@ -292,7 +246,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
struct rt_sigframe_ia32 __user *frame;
sigset_t set;
unsigned int ax;
- struct pt_regs tregs;
frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
@@ -306,8 +259,7 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
- tregs = *regs;
- if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
+ if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
return ax;
@@ -515,10 +467,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- put_user_ex(sas_ss_flags(regs->sp),
- &frame->uc.uc_stack.ss_flags);
- put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer;
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 076745fc804..142c4ceff11 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -207,7 +207,7 @@ sysexit_from_sys_call:
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jnz ia32_ret_from_sys_call
TRACE_IRQS_ON
- sti
+ ENABLE_INTERRUPTS(CLBR_NONE)
movl %eax,%esi /* second arg, syscall return value */
cmpl $-MAX_ERRNO,%eax /* is it an error ? */
jbe 1f
@@ -217,7 +217,7 @@ sysexit_from_sys_call:
call __audit_syscall_exit
movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
- cli
+ DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
jz \exit
@@ -464,14 +464,18 @@ GLOBAL(\label)
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
- PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
PTREGSCALL stub32_execve, compat_sys_execve, %rcx
PTREGSCALL stub32_fork, sys_fork, %rdi
- PTREGSCALL stub32_clone, sys32_clone, %rdx
PTREGSCALL stub32_vfork, sys_vfork, %rdi
PTREGSCALL stub32_iopl, sys_iopl, %rsi
ALIGN
+GLOBAL(stub32_clone)
+ leaq sys_clone(%rip),%rax
+ mov %r8, %rcx
+ jmp ia32_ptregs_common
+
+ ALIGN
ia32_ptregs_common:
popq %r11
CFI_ENDPROC
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 86d68d1c880..d0b689ba7be 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -385,17 +385,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
return ret;
}
-asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
- struct pt_regs *regs)
-{
- void __user *parent_tid = (void __user *)regs->dx;
- void __user *child_tid = (void __user *)regs->di;
-
- if (!newsp)
- newsp = regs->sp;
- return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
-}
-
/*
* Some system calls that need sign extended arguments. This could be
* done by a generic wrapper.
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 79fd8a3418f..7f669853317 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -1,30 +1,4 @@
-include include/asm-generic/Kbuild.asm
-header-y += boot.h
-header-y += bootparam.h
-header-y += debugreg.h
-header-y += e820.h
-header-y += hw_breakpoint.h
-header-y += hyperv.h
-header-y += ist.h
-header-y += ldt.h
-header-y += mce.h
-header-y += msr-index.h
-header-y += msr.h
-header-y += mtrr.h
-header-y += perf_regs.h
-header-y += posix_types_32.h
-header-y += posix_types_64.h
-header-y += posix_types_x32.h
-header-y += prctl.h
-header-y += processor-flags.h
-header-y += ptrace-abi.h
-header-y += sigcontext32.h
-header-y += svm.h
-header-y += ucontext.h
-header-y += vm86.h
-header-y += vmx.h
-header-y += vsyscall.h
genhdr-y += unistd_32.h
genhdr-y += unistd_64.h
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index b6c3b821acf..722aa3b0462 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -172,23 +172,7 @@ static inline int atomic_add_negative(int i, atomic_t *v)
*/
static inline int atomic_add_return(int i, atomic_t *v)
{
-#ifdef CONFIG_M386
- int __i;
- unsigned long flags;
- if (unlikely(boot_cpu_data.x86 <= 3))
- goto no_xadd;
-#endif
- /* Modern 486+ processor */
return i + xadd(&v->counter, i);
-
-#ifdef CONFIG_M386
-no_xadd: /* Legacy 386 processor */
- raw_local_irq_save(flags);
- __i = atomic_read(v);
- atomic_set(v, i + __i);
- raw_local_irq_restore(flags);
- return i + __i;
-#endif
}
/**
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index b13fe63bdc5..4fa687a47a6 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -1,14 +1,9 @@
#ifndef _ASM_X86_BOOT_H
#define _ASM_X86_BOOT_H
-/* Internal svga startup constants */
-#define NORMAL_VGA 0xffff /* 80x25 mode */
-#define EXTENDED_VGA 0xfffe /* 80x50 mode */
-#define ASK_VGA 0xfffd /* ask for it at bootup */
-
-#ifdef __KERNEL__
#include <asm/pgtable_types.h>
+#include <uapi/asm/boot.h>
/* Physical address where kernel should be loaded. */
#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
@@ -42,6 +37,4 @@
#define BOOT_STACK_SIZE 0x1000
#endif
-#endif /* __KERNEL__ */
-
#endif /* _ASM_X86_BOOT_H */
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
new file mode 100644
index 00000000000..5b5e9cb774b
--- /dev/null
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -0,0 +1,38 @@
+#ifndef _ASM_X86_BOOTPARAM_UTILS_H
+#define _ASM_X86_BOOTPARAM_UTILS_H
+
+#include <asm/bootparam.h>
+
+/*
+ * This file is included from multiple environments. Do not
+ * add completing #includes to make it standalone.
+ */
+
+/*
+ * Deal with bootloaders which fail to initialize unknown fields in
+ * boot_params to zero. The list fields in this list are taken from
+ * analysis of kexec-tools; if other broken bootloaders initialize a
+ * different set of fields we will need to figure out how to disambiguate.
+ *
+ */
+static void sanitize_boot_params(struct boot_params *boot_params)
+{
+ if (boot_params->sentinel) {
+ /*fields in boot_params are not valid, clear them */
+ memset(&boot_params->olpc_ofw_header, 0,
+ (char *)&boot_params->alt_mem_k -
+ (char *)&boot_params->olpc_ofw_header);
+ memset(&boot_params->kbd_status, 0,
+ (char *)&boot_params->hdr -
+ (char *)&boot_params->kbd_status);
+ memset(&boot_params->_pad7[0], 0,
+ (char *)&boot_params->edd_mbr_sig_buffer[0] -
+ (char *)&boot_params->_pad7[0]);
+ memset(&boot_params->_pad8[0], 0,
+ (char *)&boot_params->eddbuf[0] -
+ (char *)&boot_params->_pad8[0]);
+ memset(&boot_params->_pad9[0], 0, sizeof(boot_params->_pad9));
+ }
+}
+
+#endif /* _ASM_X86_BOOTPARAM_UTILS_H */
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h
index 0bdbbb3b9ce..16a57f4ed64 100644
--- a/arch/x86/include/asm/clocksource.h
+++ b/arch/x86/include/asm/clocksource.h
@@ -8,6 +8,7 @@
#define VCLOCK_NONE 0 /* No vDSO clock available. */
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
#define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */
+#define VCLOCK_PVCLOCK 3 /* vDSO should use vread_pvclock. */
struct arch_clocksource_data {
int vclock_mode;
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 53f4b219336..f8bf2eecab8 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -34,9 +34,7 @@ static inline void set_64bit(volatile u64 *ptr, u64 value)
: "memory");
}
-#ifdef CONFIG_X86_CMPXCHG
#define __HAVE_ARCH_CMPXCHG 1
-#endif
#ifdef CONFIG_X86_CMPXCHG64
#define cmpxchg64(ptr, o, n) \
@@ -73,59 +71,6 @@ static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
return prev;
}
-#ifndef CONFIG_X86_CMPXCHG
-/*
- * Building a kernel capable running on 80386. It may be necessary to
- * simulate the cmpxchg on the 80386 CPU. For that purpose we define
- * a function for each of the sizes we support.
- */
-
-extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
-extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
-extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
-
-static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
- unsigned long new, int size)
-{
- switch (size) {
- case 1:
- return cmpxchg_386_u8(ptr, old, new);
- case 2:
- return cmpxchg_386_u16(ptr, old, new);
- case 4:
- return cmpxchg_386_u32(ptr, old, new);
- }
- return old;
-}
-
-#define cmpxchg(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- if (likely(boot_cpu_data.x86 > 3)) \
- __ret = (__typeof__(*(ptr)))__cmpxchg((ptr), \
- (unsigned long)(o), (unsigned long)(n), \
- sizeof(*(ptr))); \
- else \
- __ret = (__typeof__(*(ptr)))cmpxchg_386((ptr), \
- (unsigned long)(o), (unsigned long)(n), \
- sizeof(*(ptr))); \
- __ret; \
-})
-#define cmpxchg_local(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- if (likely(boot_cpu_data.x86 > 3)) \
- __ret = (__typeof__(*(ptr)))__cmpxchg_local((ptr), \
- (unsigned long)(o), (unsigned long)(n), \
- sizeof(*(ptr))); \
- else \
- __ret = (__typeof__(*(ptr)))cmpxchg_386((ptr), \
- (unsigned long)(o), (unsigned long)(n), \
- sizeof(*(ptr))); \
- __ret; \
-})
-#endif
-
#ifndef CONFIG_X86_CMPXCHG64
/*
* Building a kernel capable running on 80386 and 80486. It may be necessary
diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/context_tracking.h
index d1ac07a2397..1616562683e 100644
--- a/arch/x86/include/asm/rcu.h
+++ b/arch/x86/include/asm/context_tracking.h
@@ -1,27 +1,26 @@
-#ifndef _ASM_X86_RCU_H
-#define _ASM_X86_RCU_H
+#ifndef _ASM_X86_CONTEXT_TRACKING_H
+#define _ASM_X86_CONTEXT_TRACKING_H
#ifndef __ASSEMBLY__
-
-#include <linux/rcupdate.h>
+#include <linux/context_tracking.h>
#include <asm/ptrace.h>
static inline void exception_enter(struct pt_regs *regs)
{
- rcu_user_exit();
+ user_exit();
}
static inline void exception_exit(struct pt_regs *regs)
{
-#ifdef CONFIG_RCU_USER_QS
+#ifdef CONFIG_CONTEXT_TRACKING
if (user_mode(regs))
- rcu_user_enter();
+ user_enter();
#endif
}
#else /* __ASSEMBLY__ */
-#ifdef CONFIG_RCU_USER_QS
+#ifdef CONFIG_CONTEXT_TRACKING
# define SCHEDULE_USER call schedule_user
#else
# define SCHEDULE_USER call schedule
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index 4564c8e28a3..5f9a1243190 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -28,6 +28,10 @@ struct x86_cpu {
#ifdef CONFIG_HOTPLUG_CPU
extern int arch_register_cpu(int num);
extern void arch_unregister_cpu(int);
+extern void __cpuinit start_cpu0(void);
+#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
+extern int _debug_hotplug_cpu(int cpu, int action);
+#endif
#endif
DECLARE_PER_CPU(int, cpu_state);
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 8c297aa53ee..93fe929d1ce 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -167,6 +167,7 @@
#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */
#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB (6*32+24) /* NB performance counter extensions */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -202,6 +203,7 @@
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
#define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
+#define X86_FEATURE_TSC_ADJUST (9*32+ 1) /* TSC adjustment MSR 0x3b */
#define X86_FEATURE_BMI1 (9*32+ 3) /* 1st group bit manipulation extensions */
#define X86_FEATURE_HLE (9*32+ 4) /* Hardware Lock Elision */
#define X86_FEATURE_AVX2 (9*32+ 5) /* AVX2 instructions */
@@ -308,15 +310,11 @@ extern const char * const x86_power_flags[32];
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE)
+#define cpu_has_perfctr_nb boot_cpu_has(X86_FEATURE_PERFCTR_NB)
#define cpu_has_cx8 boot_cpu_has(X86_FEATURE_CX8)
#define cpu_has_cx16 boot_cpu_has(X86_FEATURE_CX16)
#define cpu_has_eager_fpu boot_cpu_has(X86_FEATURE_EAGER_FPU)
-
-#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
-# define cpu_has_invlpg 1
-#else
-# define cpu_has_invlpg (boot_cpu_data.x86 > 3)
-#endif
+#define cpu_has_topoext boot_cpu_has(X86_FEATURE_TOPOEXT)
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/crypto/camellia.h b/arch/x86/include/asm/crypto/camellia.h
new file mode 100644
index 00000000000..98038add801
--- /dev/null
+++ b/arch/x86/include/asm/crypto/camellia.h
@@ -0,0 +1,82 @@
+#ifndef ASM_X86_CAMELLIA_H
+#define ASM_X86_CAMELLIA_H
+
+#include <linux/kernel.h>
+#include <linux/crypto.h>
+
+#define CAMELLIA_MIN_KEY_SIZE 16
+#define CAMELLIA_MAX_KEY_SIZE 32
+#define CAMELLIA_BLOCK_SIZE 16
+#define CAMELLIA_TABLE_BYTE_LEN 272
+#define CAMELLIA_PARALLEL_BLOCKS 2
+
+struct camellia_ctx {
+ u64 key_table[CAMELLIA_TABLE_BYTE_LEN / sizeof(u64)];
+ u32 key_length;
+};
+
+struct camellia_lrw_ctx {
+ struct lrw_table_ctx lrw_table;
+ struct camellia_ctx camellia_ctx;
+};
+
+struct camellia_xts_ctx {
+ struct camellia_ctx tweak_ctx;
+ struct camellia_ctx crypt_ctx;
+};
+
+extern int __camellia_setkey(struct camellia_ctx *cctx,
+ const unsigned char *key,
+ unsigned int key_len, u32 *flags);
+
+extern int lrw_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen);
+extern void lrw_camellia_exit_tfm(struct crypto_tfm *tfm);
+
+extern int xts_camellia_setkey(struct crypto_tfm *tfm, const u8 *key,
+ unsigned int keylen);
+
+/* regular block cipher functions */
+asmlinkage void __camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src, bool xor);
+asmlinkage void camellia_dec_blk(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+/* 2-way parallel cipher functions */
+asmlinkage void __camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src, bool xor);
+asmlinkage void camellia_dec_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src);
+
+static inline void camellia_enc_blk(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk(ctx, dst, src, false);
+}
+
+static inline void camellia_enc_blk_xor(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk(ctx, dst, src, true);
+}
+
+static inline void camellia_enc_blk_2way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk_2way(ctx, dst, src, false);
+}
+
+static inline void camellia_enc_blk_xor_2way(struct camellia_ctx *ctx, u8 *dst,
+ const u8 *src)
+{
+ __camellia_enc_blk_2way(ctx, dst, src, true);
+}
+
+/* glue helpers */
+extern void camellia_decrypt_cbc_2way(void *ctx, u128 *dst, const u128 *src);
+extern void camellia_crypt_ctr(void *ctx, u128 *dst, const u128 *src,
+ le128 *iv);
+extern void camellia_crypt_ctr_2way(void *ctx, u128 *dst, const u128 *src,
+ le128 *iv);
+
+#endif /* ASM_X86_CAMELLIA_H */
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 3e408bddc96..e2d65b061d2 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -13,7 +13,7 @@
typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
typedef void (*common_glue_cbc_func_t)(void *ctx, u128 *dst, const u128 *src);
typedef void (*common_glue_ctr_func_t)(void *ctx, u128 *dst, const u128 *src,
- u128 *iv);
+ le128 *iv);
#define GLUE_FUNC_CAST(fn) ((common_glue_func_t)(fn))
#define GLUE_CBC_FUNC_CAST(fn) ((common_glue_cbc_func_t)(fn))
@@ -71,23 +71,29 @@ static inline void glue_fpu_end(bool fpu_enabled)
kernel_fpu_end();
}
-static inline void u128_to_be128(be128 *dst, const u128 *src)
+static inline void le128_to_be128(be128 *dst, const le128 *src)
{
- dst->a = cpu_to_be64(src->a);
- dst->b = cpu_to_be64(src->b);
+ dst->a = cpu_to_be64(le64_to_cpu(src->a));
+ dst->b = cpu_to_be64(le64_to_cpu(src->b));
}
-static inline void be128_to_u128(u128 *dst, const be128 *src)
+static inline void be128_to_le128(le128 *dst, const be128 *src)
{
- dst->a = be64_to_cpu(src->a);
- dst->b = be64_to_cpu(src->b);
+ dst->a = cpu_to_le64(be64_to_cpu(src->a));
+ dst->b = cpu_to_le64(be64_to_cpu(src->b));
}
-static inline void u128_inc(u128 *i)
+static inline void le128_inc(le128 *i)
{
- i->b++;
- if (!i->b)
- i->a++;
+ u64 a = le64_to_cpu(i->a);
+ u64 b = le64_to_cpu(i->b);
+
+ b++;
+ if (!b)
+ a++;
+
+ i->a = cpu_to_le64(a);
+ i->b = cpu_to_le64(b);
}
extern int glue_ecb_crypt_128bit(const struct common_glue_ctx *gctx,
diff --git a/arch/x86/include/asm/crypto/serpent-avx.h b/arch/x86/include/asm/crypto/serpent-avx.h
index 432deedd294..0da1d3e2a55 100644
--- a/arch/x86/include/asm/crypto/serpent-avx.h
+++ b/arch/x86/include/asm/crypto/serpent-avx.h
@@ -6,27 +6,14 @@
#define SERPENT_PARALLEL_BLOCKS 8
-asmlinkage void __serpent_enc_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src, bool xor);
-asmlinkage void serpent_dec_blk_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+asmlinkage void serpent_ecb_enc_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void serpent_ecb_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
const u8 *src);
-static inline void serpent_enc_blk_xway(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __serpent_enc_blk_8way_avx(ctx, dst, src, false);
-}
-
-static inline void serpent_enc_blk_xway_xor(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- __serpent_enc_blk_8way_avx(ctx, dst, src, true);
-}
-
-static inline void serpent_dec_blk_xway(struct serpent_ctx *ctx, u8 *dst,
- const u8 *src)
-{
- serpent_dec_blk_8way_avx(ctx, dst, src);
-}
+asmlinkage void serpent_cbc_dec_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src);
+asmlinkage void serpent_ctr_8way_avx(struct serpent_ctx *ctx, u8 *dst,
+ const u8 *src, le128 *iv);
#endif
diff --git a/arch/x86/include/asm/crypto/twofish.h b/arch/x86/include/asm/crypto/twofish.h
index 9d2c514bd5f..878c51ceebb 100644
--- a/arch/x86/include/asm/crypto/twofish.h
+++ b/arch/x86/include/asm/crypto/twofish.h
@@ -31,9 +31,9 @@ asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
/* helpers from twofish_x86_64-3way module */
extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
- u128 *iv);
+ le128 *iv);
extern void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src,
- u128 *iv);
+ le128 *iv);
extern int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen);
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 2d91580bf22..4b528a970bd 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -2,83 +2,8 @@
#define _ASM_X86_DEBUGREG_H
-/* Indicate the register numbers for a number of the specific
- debug registers. Registers 0-3 contain the addresses we wish to trap on */
-#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */
-#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */
-
-#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */
-#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */
-
-/* Define a few things for the status register. We can use this to determine
- which debugging register was responsible for the trap. The other bits
- are either reserved or not of interest to us. */
-
-/* Define reserved bits in DR6 which are always set to 1 */
-#define DR6_RESERVED (0xFFFF0FF0)
-
-#define DR_TRAP0 (0x1) /* db0 */
-#define DR_TRAP1 (0x2) /* db1 */
-#define DR_TRAP2 (0x4) /* db2 */
-#define DR_TRAP3 (0x8) /* db3 */
-#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
-
-#define DR_STEP (0x4000) /* single-step */
-#define DR_SWITCH (0x8000) /* task switch */
-
-/* Now define a bunch of things for manipulating the control register.
- The top two bytes of the control register consist of 4 fields of 4
- bits - each field corresponds to one of the four debug registers,
- and indicates what types of access we trap on, and how large the data
- field is that we are looking at */
-
-#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
-#define DR_CONTROL_SIZE 4 /* 4 control bits per register */
-
-#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */
-#define DR_RW_WRITE (0x1)
-#define DR_RW_READ (0x3)
-
-#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
-#define DR_LEN_2 (0x4)
-#define DR_LEN_4 (0xC)
-#define DR_LEN_8 (0x8)
-
-/* The low byte to the control register determine which registers are
- enabled. There are 4 fields of two bits. One bit is "local", meaning
- that the processor will reset the bit after a task switch and the other
- is global meaning that we have to explicitly reset the bit. With linux,
- you can use either one, since we explicitly zero the register when we enter
- kernel mode. */
-
-#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
-#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
-#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
-#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
-#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
-
-#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
-#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
-
-/* The second byte to the control register has a few special things.
- We can slow the instruction pipeline for instructions coming via the
- gdt or the ldt if we want to. I am not sure why this is an advantage */
-
-#ifdef __i386__
-#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
-#else
-#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */
-#endif
-
-#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
-#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
-
-/*
- * HW breakpoint additions
- */
-#ifdef __KERNEL__
-
#include <linux/bug.h>
+#include <uapi/asm/debugreg.h>
DECLARE_PER_CPU(unsigned long, cpu_dr7);
@@ -190,6 +115,4 @@ static inline void debug_stack_usage_dec(void) { }
#endif /* X86_64 */
-#endif /* __KERNEL__ */
-
#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h
index 93e1c55f14a..03dd72957d2 100644
--- a/arch/x86/include/asm/device.h
+++ b/arch/x86/include/asm/device.h
@@ -2,9 +2,6 @@
#define _ASM_X86_DEVICE_H
struct dev_archdata {
-#ifdef CONFIG_ACPI
- void *acpi_handle;
-#endif
#ifdef CONFIG_X86_DEV_DMA_OPS
struct dma_map_ops *dma_ops;
#endif
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index f7b4c7903e7..808dae63eee 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -47,6 +47,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
struct dma_map_ops *ops = get_dma_ops(dev);
+ debug_dma_mapping_error(dev, dma_addr);
if (ops->mapping_error)
return ops->mapping_error(dev, dma_addr);
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 37782566af2..cccd07fa5e3 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -1,81 +1,14 @@
#ifndef _ASM_X86_E820_H
#define _ASM_X86_E820_H
-#define E820MAP 0x2d0 /* our map */
-#define E820MAX 128 /* number of entries in E820MAP */
-/*
- * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the
- * constrained space in the zeropage. If we have more nodes than
- * that, and if we've booted off EFI firmware, then the EFI tables
- * passed us from the EFI firmware can list more nodes. Size our
- * internal memory map tables to have room for these additional
- * nodes, based on up to three entries per node for which the
- * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT),
- * plus E820MAX, allowing space for the possible duplicate E820
- * entries that might need room in the same arrays, prior to the
- * call to sanitize_e820_map() to remove duplicates. The allowance
- * of three memory map entries per node is "enough" entries for
- * the initial hardware platform motivating this mechanism to make
- * use of additional EFI map entries. Future platforms may want
- * to allow more than three entries per node or otherwise refine
- * this size.
- */
-
-/*
- * Odd: 'make headers_check' complains about numa.h if I try
- * to collapse the next two #ifdef lines to a single line:
- * #if defined(__KERNEL__) && defined(CONFIG_EFI)
- */
-#ifdef __KERNEL__
#ifdef CONFIG_EFI
#include <linux/numa.h>
#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES)
#else /* ! CONFIG_EFI */
#define E820_X_MAX E820MAX
#endif
-#else /* ! __KERNEL__ */
-#define E820_X_MAX E820MAX
-#endif
-
-#define E820NR 0x1e8 /* # entries in E820MAP */
-
-#define E820_RAM 1
-#define E820_RESERVED 2
-#define E820_ACPI 3
-#define E820_NVS 4
-#define E820_UNUSABLE 5
-
-/*
- * reserved RAM used by kernel itself
- * if CONFIG_INTEL_TXT is enabled, memory of this type will be
- * included in the S3 integrity calculation and so should not include
- * any memory that BIOS might alter over the S3 transition
- */
-#define E820_RESERVED_KERN 128
-
+#include <uapi/asm/e820.h>
#ifndef __ASSEMBLY__
-#include <linux/types.h>
-struct e820entry {
- __u64 addr; /* start of memory segment */
- __u64 size; /* size of memory segment */
- __u32 type; /* type of memory segment */
-} __attribute__((packed));
-
-struct e820map {
- __u32 nr_map;
- struct e820entry map[E820_X_MAX];
-};
-
-#define ISA_START_ADDRESS 0xa0000
-#define ISA_END_ADDRESS 0x100000
-
-#define BIOS_BEGIN 0x000a0000
-#define BIOS_END 0x00100000
-
-#define BIOS_ROM_BASE 0xffe00000
-#define BIOS_ROM_END 0xffffffff
-
-#ifdef __KERNEL__
/* see comment in arch/x86/kernel/e820.c */
extern struct e820map e820;
extern struct e820map e820_saved;
@@ -137,13 +70,8 @@ static inline bool is_ISA_range(u64 s, u64 e)
return s >= ISA_START_ADDRESS && e <= ISA_END_ADDRESS;
}
-#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
-
-#ifdef __KERNEL__
#include <linux/ioport.h>
#define HIGH_MEMORY (1024*1024)
-#endif /* __KERNEL__ */
-
#endif /* _ASM_X86_E820_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 6e8fdf5ad11..28677c55113 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -94,6 +94,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size,
#endif /* CONFIG_X86_32 */
extern int add_efi_memmap;
+extern unsigned long x86_efi_facility;
extern void efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int efi_memblock_x86_reserve_range(void);
extern void efi_call_phys_prelog(void);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 5939f44fe0c..9c999c1674f 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -354,12 +354,10 @@ static inline int mmap_is_ia32(void)
return 0;
}
-/* The first two values are special, do not change. See align_addr() */
+/* Do not change the values. See get_align_mask() */
enum align_flags {
ALIGN_VA_32 = BIT(0),
ALIGN_VA_64 = BIT(1),
- ALIGN_VDSO = BIT(2),
- ALIGN_TOPDOWN = BIT(3),
};
struct va_alignment {
@@ -368,5 +366,5 @@ struct va_alignment {
} ____cacheline_aligned;
extern struct va_alignment va_align;
-extern unsigned long align_addr(unsigned long, struct file *, enum align_flags);
+extern unsigned long align_vdso_addr(unsigned long);
#endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 4da3c0c4c97..a09c2857106 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -19,6 +19,7 @@
#include <asm/acpi.h>
#include <asm/apicdef.h>
#include <asm/page.h>
+#include <asm/pvclock.h>
#ifdef CONFIG_X86_32
#include <linux/threads.h>
#include <asm/kmap_types.h>
@@ -81,6 +82,10 @@ enum fixed_addresses {
VVAR_PAGE,
VSYSCALL_HPET,
#endif
+#ifdef CONFIG_PARAVIRT_CLOCK
+ PVCLOCK_FIXMAP_BEGIN,
+ PVCLOCK_FIXMAP_END = PVCLOCK_FIXMAP_BEGIN+PVCLOCK_VSYSCALL_NR_PAGES-1,
+#endif
FIX_DBGP_BASE,
FIX_EARLYCON_MEM_BASE,
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 9a25b522d37..86cb51e1ca9 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -44,7 +44,6 @@
#ifdef CONFIG_DYNAMIC_FTRACE
#define ARCH_SUPPORTS_FTRACE_OPS 1
-#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
#endif
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index f373046e63e..be27ba1e947 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -55,12 +55,6 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
-#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
- /* Real i386 machines can only support FUTEX_OP_SET */
- if (op != FUTEX_OP_SET && boot_cpu_data.x86 == 3)
- return -ENOSYS;
-#endif
-
pagefault_disable();
switch (op) {
@@ -118,12 +112,6 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
{
int ret = 0;
-#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP)
- /* Real i386 machines have no cmpxchg instruction */
- if (boot_cpu_data.x86 == 3)
- return -ENOSYS;
-#endif
-
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 434e2106cc8..b18df579c0e 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -80,9 +80,9 @@ extern void hpet_msi_write(struct hpet_dev *hdev, struct msi_msg *msg);
extern void hpet_msi_read(struct hpet_dev *hdev, struct msi_msg *msg);
#ifdef CONFIG_PCI_MSI
-extern int arch_setup_hpet_msi(unsigned int irq, unsigned int id);
+extern int default_setup_hpet_msi(unsigned int irq, unsigned int id);
#else
-static inline int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
+static inline int default_setup_hpet_msi(unsigned int irq, unsigned int id)
{
return -EINVAL;
}
@@ -111,6 +111,7 @@ extern void hpet_unregister_irq_handler(rtc_irq_handler handler);
static inline int hpet_enable(void) { return 0; }
static inline int is_hpet_enabled(void) { return 0; }
#define hpet_readl(a) 0
+#define default_setup_hpet_msi NULL
#endif
#endif /* _ASM_X86_HPET_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 824ca07860d..ef1c4d2d41e 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -1,7 +1,8 @@
#ifndef _I386_HW_BREAKPOINT_H
#define _I386_HW_BREAKPOINT_H
-#ifdef __KERNEL__
+#include <uapi/asm/hw_breakpoint.h>
+
#define __ARCH_HW_BREAKPOINT_H
/*
@@ -71,6 +72,4 @@ extern int arch_bp_generic_fields(int x86_len, int x86_type,
extern struct pmu perf_ops_bp;
-#endif /* __KERNEL__ */
#endif /* _I386_HW_BREAKPOINT_H */
-
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index eb92a6ed2be..10a78c3d3d5 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -101,6 +101,7 @@ static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
irq_attr->polarity = polarity;
}
+/* Intel specific interrupt remapping information */
struct irq_2_iommu {
struct intel_iommu *iommu;
u16 irte_index;
@@ -108,6 +109,12 @@ struct irq_2_iommu {
u8 irte_mask;
};
+/* AMD specific interrupt remapping information */
+struct irq_2_irte {
+ u16 devid; /* Device ID for IRTE table */
+ u16 index; /* Index into IRTE table*/
+};
+
/*
* This is performance-critical, we want to do it O(1)
*
@@ -120,7 +127,11 @@ struct irq_cfg {
u8 vector;
u8 move_in_progress : 1;
#ifdef CONFIG_IRQ_REMAP
- struct irq_2_iommu irq_2_iommu;
+ u8 remapped : 1;
+ union {
+ struct irq_2_iommu irq_2_iommu;
+ struct irq_2_irte irq_2_irte;
+ };
#endif
};
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h
index b518c750993..86095ed1413 100644
--- a/arch/x86/include/asm/hypervisor.h
+++ b/arch/x86/include/asm/hypervisor.h
@@ -25,6 +25,7 @@
extern void init_hypervisor(struct cpuinfo_x86 *c);
extern void init_hypervisor_platform(void);
+extern bool hypervisor_x2apic_available(void);
/*
* x86 hypervisor information
@@ -41,6 +42,9 @@ struct hypervisor_x86 {
/* Platform setup (run once per boot) */
void (*init_platform)(void);
+
+ /* X2APIC detection (run once per boot) */
+ bool (*x2apic_available)(void);
};
extern const struct hypervisor_x86 *x86_hyper;
@@ -51,13 +55,4 @@ extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
extern const struct hypervisor_x86 x86_hyper_kvm;
-static inline bool hypervisor_x2apic_available(void)
-{
- if (kvm_para_available())
- return true;
- if (xen_x2apic_para_available())
- return true;
- return false;
-}
-
#endif
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index e6232773ce4..4c6da2e4bb1 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -29,16 +29,10 @@ struct old_sigaction32 {
unsigned int sa_restorer; /* Another 32 bit pointer */
};
-typedef struct sigaltstack_ia32 {
- unsigned int ss_sp;
- int ss_flags;
- unsigned int ss_size;
-} stack_ia32_t;
-
struct ucontext_ia32 {
unsigned int uc_flags;
unsigned int uc_link;
- stack_ia32_t uc_stack;
+ compat_stack_t uc_stack;
struct sigcontext_ia32 uc_mcontext;
compat_sigset_t uc_sigmask; /* mask last for extensibility */
};
@@ -46,7 +40,7 @@ struct ucontext_ia32 {
struct ucontext_x32 {
unsigned int uc_flags;
unsigned int uc_link;
- stack_ia32_t uc_stack;
+ compat_stack_t uc_stack;
unsigned int uc__pad0; /* needed for alignment */
struct sigcontext uc_mcontext; /* the 64-bit sigcontext type */
compat_sigset_t uc_sigmask; /* mask last for extensibility */
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 73d8c5398ea..459e50a424d 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -144,11 +144,24 @@ extern int timer_through_8259;
(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
struct io_apic_irq_attr;
+struct irq_cfg;
extern int io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr);
void setup_IO_APIC_irq_extra(u32 gsi);
extern void ioapic_insert_resources(void);
+extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
+ unsigned int, int,
+ struct io_apic_irq_attr *);
+extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
+ unsigned int, int,
+ struct io_apic_irq_attr *);
+extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
+
+extern void native_compose_msi_msg(struct pci_dev *pdev,
+ unsigned int irq, unsigned int dest,
+ struct msi_msg *msg, u8 hpet_id);
+extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
int io_apic_setup_irq_pin_once(unsigned int irq, int node, struct io_apic_irq_attr *attr);
extern int save_ioapic_entries(void);
@@ -179,6 +192,12 @@ extern void __init native_io_apic_init_mappings(void);
extern unsigned int native_io_apic_read(unsigned int apic, unsigned int reg);
extern void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int val);
extern void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int val);
+extern void native_disable_io_apic(void);
+extern void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
+extern void intel_ir_io_apic_print_entries(unsigned int apic, unsigned int nr_entries);
+extern int native_ioapic_set_affinity(struct irq_data *,
+ const struct cpumask *,
+ bool);
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
{
@@ -193,6 +212,9 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned
{
x86_io_apic_ops.modify(apic, reg, value);
}
+
+extern void io_apic_eoi(unsigned int apic, unsigned int vector);
+
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
@@ -223,6 +245,12 @@ static inline void disable_ioapic_support(void) { }
#define native_io_apic_read NULL
#define native_io_apic_write NULL
#define native_io_apic_modify NULL
+#define native_disable_io_apic NULL
+#define native_io_apic_print_entries NULL
+#define native_ioapic_set_affinity NULL
+#define native_setup_ioapic_entry NULL
+#define native_compose_msi_msg NULL
+#define native_eoi_ioapic_pin NULL
#endif
#endif /* _ASM_X86_IO_APIC_H */
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 5fb9bbbd2f1..95fd3527f63 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -26,8 +26,6 @@
#ifdef CONFIG_IRQ_REMAP
-extern int irq_remapping_enabled;
-
extern void setup_irq_remapping_ops(void);
extern int irq_remapping_supported(void);
extern int irq_remapping_prepare(void);
@@ -40,21 +38,19 @@ extern int setup_ioapic_remapped_entry(int irq,
unsigned int destination,
int vector,
struct io_apic_irq_attr *attr);
-extern int set_remapped_irq_affinity(struct irq_data *data,
- const struct cpumask *mask,
- bool force);
extern void free_remapped_irq(int irq);
extern void compose_remapped_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id);
-extern int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec);
-extern int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
- int index, int sub_handle);
extern int setup_hpet_msi_remapped(unsigned int irq, unsigned int id);
+extern void panic_if_irq_remap(const char *msg);
+extern bool setup_remapped_irq(int irq,
+ struct irq_cfg *cfg,
+ struct irq_chip *chip);
-#else /* CONFIG_IRQ_REMAP */
+void irq_remap_modify_chip_defaults(struct irq_chip *chip);
-#define irq_remapping_enabled 0
+#else /* CONFIG_IRQ_REMAP */
static inline void setup_irq_remapping_ops(void) { }
static inline int irq_remapping_supported(void) { return 0; }
@@ -71,30 +67,30 @@ static inline int setup_ioapic_remapped_entry(int irq,
{
return -ENODEV;
}
-static inline int set_remapped_irq_affinity(struct irq_data *data,
- const struct cpumask *mask,
- bool force)
-{
- return 0;
-}
static inline void free_remapped_irq(int irq) { }
static inline void compose_remapped_msi_msg(struct pci_dev *pdev,
unsigned int irq, unsigned int dest,
struct msi_msg *msg, u8 hpet_id)
{
}
-static inline int msi_alloc_remapped_irq(struct pci_dev *pdev, int irq, int nvec)
+static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
{
return -ENODEV;
}
-static inline int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq,
- int index, int sub_handle)
+
+static inline void panic_if_irq_remap(const char *msg)
+{
+}
+
+static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
{
- return -ENODEV;
}
-static inline int setup_hpet_msi_remapped(unsigned int irq, unsigned int id)
+
+static inline bool setup_remapped_irq(int irq,
+ struct irq_cfg *cfg,
+ struct irq_chip *chip)
{
- return -ENODEV;
+ return false;
}
#endif /* CONFIG_IRQ_REMAP */
diff --git a/arch/x86/include/asm/ist.h b/arch/x86/include/asm/ist.h
index 7e5dff1de0e..c9803f1a203 100644
--- a/arch/x86/include/asm/ist.h
+++ b/arch/x86/include/asm/ist.h
@@ -1,6 +1,3 @@
-#ifndef _ASM_X86_IST_H
-#define _ASM_X86_IST_H
-
/*
* Include file for the interface to IST BIOS
* Copyright 2002 Andy Grover <andrew.grover@intel.com>
@@ -15,20 +12,12 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
+#ifndef _ASM_X86_IST_H
+#define _ASM_X86_IST_H
+#include <uapi/asm/ist.h>
-#include <linux/types.h>
-
-struct ist_info {
- __u32 signature;
- __u32 command;
- __u32 event;
- __u32 perf_level;
-};
-
-#ifdef __KERNEL__
extern struct ist_info ist_info;
-#endif /* __KERNEL__ */
#endif /* _ASM_X86_IST_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 317ff1703d0..6080d2694ba 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -163,6 +163,9 @@ struct kimage_arch {
};
#endif
+typedef void crash_vmclear_fn(void);
+extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/include/asm/kvm_guest.h b/arch/x86/include/asm/kvm_guest.h
new file mode 100644
index 00000000000..a92b1763c41
--- /dev/null
+++ b/arch/x86/include/asm/kvm_guest.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_X86_KVM_GUEST_H
+#define _ASM_X86_KVM_GUEST_H
+
+int kvm_setup_vsyscall_timeinfo(void);
+
+#endif /* _ASM_X86_KVM_GUEST_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b2e11f45243..dc87b65e9c3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -22,6 +22,8 @@
#include <linux/kvm_para.h>
#include <linux/kvm_types.h>
#include <linux/perf_event.h>
+#include <linux/pvclock_gtod.h>
+#include <linux/clocksource.h>
#include <asm/pvclock-abi.h>
#include <asm/desc.h>
@@ -442,6 +444,7 @@ struct kvm_vcpu_arch {
s8 virtual_tsc_shift;
u32 virtual_tsc_mult;
u32 virtual_tsc_khz;
+ s64 ia32_tsc_adjust_msr;
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
unsigned nmi_pending; /* NMI queued after currently running handler */
@@ -559,6 +562,12 @@ struct kvm_arch {
u64 cur_tsc_write;
u64 cur_tsc_offset;
u8 cur_tsc_generation;
+ int nr_vcpus_matched_tsc;
+
+ spinlock_t pvclock_gtod_sync_lock;
+ bool use_master_clock;
+ u64 master_kernel_ns;
+ cycle_t master_cycle_now;
struct kvm_xen_hvm_config xen_hvm_config;
@@ -612,6 +621,12 @@ struct kvm_vcpu_stat {
struct x86_instruction_info;
+struct msr_data {
+ bool host_initiated;
+ u32 index;
+ u64 data;
+};
+
struct kvm_x86_ops {
int (*cpu_has_kvm_support)(void); /* __init */
int (*disabled_by_bios)(void); /* __init */
@@ -634,7 +649,7 @@ struct kvm_x86_ops {
void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
- int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
+ int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
void (*get_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
@@ -697,10 +712,11 @@ struct kvm_x86_ops {
bool (*has_wbinvd_exit)(void);
void (*set_tsc_khz)(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale);
+ u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu);
void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
u64 (*compute_tsc_offset)(struct kvm_vcpu *vcpu, u64 target_tsc);
- u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu);
+ u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc);
void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2);
@@ -785,7 +801,7 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
void kvm_enable_efer_bits(u64);
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *data);
-int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
+int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
struct x86_emulate_ctxt;
@@ -812,7 +828,7 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
-int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
+int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index eb3e9d85e1f..65231e173ba 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -1,103 +1,8 @@
#ifndef _ASM_X86_KVM_PARA_H
#define _ASM_X86_KVM_PARA_H
-#include <linux/types.h>
-#include <asm/hyperv.h>
-
-/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
- * should be used to determine that a VM is running under KVM.
- */
-#define KVM_CPUID_SIGNATURE 0x40000000
-
-/* This CPUID returns a feature bitmap in eax. Before enabling a particular
- * paravirtualization, the appropriate feature bit should be checked.
- */
-#define KVM_CPUID_FEATURES 0x40000001
-#define KVM_FEATURE_CLOCKSOURCE 0
-#define KVM_FEATURE_NOP_IO_DELAY 1
-#define KVM_FEATURE_MMU_OP 2
-/* This indicates that the new set of kvmclock msrs
- * are available. The use of 0x11 and 0x12 is deprecated
- */
-#define KVM_FEATURE_CLOCKSOURCE2 3
-#define KVM_FEATURE_ASYNC_PF 4
-#define KVM_FEATURE_STEAL_TIME 5
-#define KVM_FEATURE_PV_EOI 6
-
-/* The last 8 bits are used to indicate how to interpret the flags field
- * in pvclock structure. If no bits are set, all flags are ignored.
- */
-#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24
-
-#define MSR_KVM_WALL_CLOCK 0x11
-#define MSR_KVM_SYSTEM_TIME 0x12
-
-#define KVM_MSR_ENABLED 1
-/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
-#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00
-#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
-#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
-#define MSR_KVM_STEAL_TIME 0x4b564d03
-#define MSR_KVM_PV_EOI_EN 0x4b564d04
-
-struct kvm_steal_time {
- __u64 steal;
- __u32 version;
- __u32 flags;
- __u32 pad[12];
-};
-
-#define KVM_STEAL_ALIGNMENT_BITS 5
-#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
-#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
-
-#define KVM_MAX_MMU_OP_BATCH 32
-
-#define KVM_ASYNC_PF_ENABLED (1 << 0)
-#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
-
-/* Operations for KVM_HC_MMU_OP */
-#define KVM_MMU_OP_WRITE_PTE 1
-#define KVM_MMU_OP_FLUSH_TLB 2
-#define KVM_MMU_OP_RELEASE_PT 3
-
-/* Payload for KVM_HC_MMU_OP */
-struct kvm_mmu_op_header {
- __u32 op;
- __u32 pad;
-};
-
-struct kvm_mmu_op_write_pte {
- struct kvm_mmu_op_header header;
- __u64 pte_phys;
- __u64 pte_val;
-};
-
-struct kvm_mmu_op_flush_tlb {
- struct kvm_mmu_op_header header;
-};
-
-struct kvm_mmu_op_release_pt {
- struct kvm_mmu_op_header header;
- __u64 pt_phys;
-};
-
-#define KVM_PV_REASON_PAGE_NOT_PRESENT 1
-#define KVM_PV_REASON_PAGE_READY 2
-
-struct kvm_vcpu_pv_apf_data {
- __u32 reason;
- __u8 pad[60];
- __u32 enabled;
-};
-
-#define KVM_PV_EOI_BIT 0
-#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
-#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
-#define KVM_PV_EOI_DISABLED 0x0
-
-#ifdef __KERNEL__
#include <asm/processor.h>
+#include <uapi/asm/kvm_para.h>
extern void kvmclock_init(void);
extern int kvm_register_clock(char *txt);
@@ -180,13 +85,13 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
return ret;
}
-static inline int kvm_para_available(void)
+static inline bool kvm_para_available(void)
{
unsigned int eax, ebx, ecx, edx;
char signature[13];
if (boot_cpu_data.cpuid_level < 0)
- return 0; /* So we don't blow up on old processors */
+ return false; /* So we don't blow up on old processors */
if (cpu_has_hypervisor) {
cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
@@ -196,10 +101,10 @@ static inline int kvm_para_available(void)
signature[12] = 0;
if (strcmp(signature, "KVMKVMKVM") == 0)
- return 1;
+ return true;
}
- return 0;
+ return false;
}
static inline unsigned int kvm_arch_para_features(void)
@@ -228,6 +133,4 @@ static inline void kvm_disable_steal_time(void)
}
#endif
-#endif /* __KERNEL__ */
-
#endif /* _ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 48142971b25..79327e9483a 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -27,20 +27,20 @@
#define __asmlinkage_protect0(ret) \
__asmlinkage_protect_n(ret)
#define __asmlinkage_protect1(ret, arg1) \
- __asmlinkage_protect_n(ret, "g" (arg1))
+ __asmlinkage_protect_n(ret, "m" (arg1))
#define __asmlinkage_protect2(ret, arg1, arg2) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2))
#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3))
#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
- "g" (arg4))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+ "m" (arg4))
#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
- "g" (arg4), "g" (arg5))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+ "m" (arg4), "m" (arg5))
#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \
- __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
- "g" (arg4), "g" (arg5), "g" (arg6))
+ __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \
+ "m" (arg4), "m" (arg5), "m" (arg6))
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h
index c8bed0da434..2d89e3980cb 100644
--- a/arch/x86/include/asm/local.h
+++ b/arch/x86/include/asm/local.h
@@ -124,27 +124,11 @@ static inline int local_add_negative(long i, local_t *l)
*/
static inline long local_add_return(long i, local_t *l)
{
- long __i;
-#ifdef CONFIG_M386
- unsigned long flags;
- if (unlikely(boot_cpu_data.x86 <= 3))
- goto no_xadd;
-#endif
- /* Modern 486+ processor */
- __i = i;
+ long __i = i;
asm volatile(_ASM_XADD "%0, %1;"
: "+r" (i), "+m" (l->a.counter)
: : "memory");
return i + __i;
-
-#ifdef CONFIG_M386
-no_xadd: /* Legacy 386 processor */
- local_irq_save(flags);
- __i = local_read(l);
- local_set(l, i + __i);
- local_irq_restore(flags);
- return i + __i;
-#endif
}
static inline long local_sub_return(long i, local_t *l)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 54d73b1f00a..f4076af1f4e 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -1,8 +1,7 @@
#ifndef _ASM_X86_MCE_H
#define _ASM_X86_MCE_H
-#include <linux/types.h>
-#include <asm/ioctls.h>
+#include <uapi/asm/mce.h>
/*
* Machine Check support for x86
@@ -16,7 +15,7 @@
#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
#define MCG_EXT_CNT_SHIFT 16
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
-#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
+#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
/* MCG_STATUS register defines */
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
@@ -64,28 +63,15 @@
#define MCJ_EXCEPTION 0x8 /* raise as exception */
#define MCJ_IRQ_BRAODCAST 0x10 /* do IRQ broadcasting */
-/* Fields are zero when not available */
-struct mce {
- __u64 status;
- __u64 misc;
- __u64 addr;
- __u64 mcgstatus;
- __u64 ip;
- __u64 tsc; /* cpu time stamp counter */
- __u64 time; /* wall time_t when error was detected */
- __u8 cpuvendor; /* cpu vendor as encoded in system.h */
- __u8 inject_flags; /* software inject flags */
- __u16 pad;
- __u32 cpuid; /* CPUID 1 EAX */
- __u8 cs; /* code segment */
- __u8 bank; /* machine check bank */
- __u8 cpu; /* cpu number; obsolete; use extcpu now */
- __u8 finished; /* entry is valid */
- __u32 extcpu; /* linux cpu number that detected the error */
- __u32 socketid; /* CPU socket ID */
- __u32 apicid; /* CPU initial apic ID */
- __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
-};
+#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
+
+/* Software defined banks */
+#define MCE_EXTENDED_BANK 128
+#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0)
+#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
+
+#define MCE_LOG_LEN 32
+#define MCE_LOG_SIGNATURE "MACHINECHECK"
/*
* This structure contains all data related to the MCE log. Also
@@ -93,9 +79,6 @@ struct mce {
* debugging tools. Each entry is only valid when its finished flag
* is set.
*/
-
-#define MCE_LOG_LEN 32
-
struct mce_log {
char signature[12]; /* "MACHINECHECK" */
unsigned len; /* = MCE_LOG_LEN */
@@ -105,20 +88,22 @@ struct mce_log {
struct mce entry[MCE_LOG_LEN];
};
-#define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */
-
-#define MCE_LOG_SIGNATURE "MACHINECHECK"
-
-#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
-#define MCE_GET_LOG_LEN _IOR('M', 2, int)
-#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)
-
-/* Software defined banks */
-#define MCE_EXTENDED_BANK 128
-#define MCE_THERMAL_BANK MCE_EXTENDED_BANK + 0
-#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1)
+struct mca_config {
+ bool dont_log_ce;
+ bool cmci_disabled;
+ bool ignore_ce;
+ bool disabled;
+ bool ser;
+ bool bios_cmci_threshold;
+ u8 banks;
+ s8 bootlog;
+ int tolerant;
+ int monarch_timeout;
+ int panic_timeout;
+ u32 rip_msr;
+};
-#ifdef __KERNEL__
+extern struct mca_config mca_cfg;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -126,7 +111,6 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
#include <linux/init.h>
#include <linux/atomic.h>
-extern int mce_disabled;
extern int mce_p5_enabled;
#ifdef CONFIG_X86_MCE
@@ -159,9 +143,6 @@ DECLARE_PER_CPU(struct device *, mce_device);
#define MAX_NR_BANKS 32
#ifdef CONFIG_X86_MCE_INTEL
-extern int mce_cmci_disabled;
-extern int mce_ignore_ce;
-extern int mce_bios_cmci_threshold;
void mce_intel_feature_init(struct cpuinfo_x86 *c);
void cmci_clear(void);
void cmci_reenable(void);
@@ -247,5 +228,4 @@ struct cper_sec_mem_err;
extern void apei_mce_report_mem_error(int corrected,
struct cper_sec_mem_err *mem_err);
-#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 9eae7752ae9..e3b7819caee 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -5,8 +5,6 @@
#ifdef CONFIG_X86_64
/* X86_64 does not define MODULE_PROC_FAMILY */
-#elif defined CONFIG_M386
-#define MODULE_PROC_FAMILY "386 "
#elif defined CONFIG_M486
#define MODULE_PROC_FAMILY "486 "
#elif defined CONFIG_M586
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index 813ed103f45..9264802e282 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -1,18 +1,10 @@
#ifndef _ASM_X86_MSR_H
#define _ASM_X86_MSR_H
-#include <asm/msr-index.h>
+#include <uapi/asm/msr.h>
#ifndef __ASSEMBLY__
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-#define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8])
-#define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8])
-
-#ifdef __KERNEL__
-
#include <asm/asm.h>
#include <asm/errno.h>
#include <asm/cpumask.h>
@@ -271,6 +263,5 @@ static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
return wrmsr_safe_regs(regs);
}
#endif /* CONFIG_SMP */
-#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_MSR_H */
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 7e3f17f92c6..e235582f993 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -23,97 +23,8 @@
#ifndef _ASM_X86_MTRR_H
#define _ASM_X86_MTRR_H
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <linux/errno.h>
+#include <uapi/asm/mtrr.h>
-#define MTRR_IOCTL_BASE 'M'
-
-/* Warning: this structure has a different order from i386
- on x86-64. The 32bit emulation code takes care of that.
- But you need to use this for 64bit, otherwise your X server
- will break. */
-
-#ifdef __i386__
-struct mtrr_sentry {
- unsigned long base; /* Base address */
- unsigned int size; /* Size of region */
- unsigned int type; /* Type of region */
-};
-
-struct mtrr_gentry {
- unsigned int regnum; /* Register number */
- unsigned long base; /* Base address */
- unsigned int size; /* Size of region */
- unsigned int type; /* Type of region */
-};
-
-#else /* __i386__ */
-
-struct mtrr_sentry {
- __u64 base; /* Base address */
- __u32 size; /* Size of region */
- __u32 type; /* Type of region */
-};
-
-struct mtrr_gentry {
- __u64 base; /* Base address */
- __u32 size; /* Size of region */
- __u32 regnum; /* Register number */
- __u32 type; /* Type of region */
- __u32 _pad; /* Unused */
-};
-
-#endif /* !__i386__ */
-
-struct mtrr_var_range {
- __u32 base_lo;
- __u32 base_hi;
- __u32 mask_lo;
- __u32 mask_hi;
-};
-
-/* In the Intel processor's MTRR interface, the MTRR type is always held in
- an 8 bit field: */
-typedef __u8 mtrr_type;
-
-#define MTRR_NUM_FIXED_RANGES 88
-#define MTRR_MAX_VAR_RANGES 256
-
-struct mtrr_state_type {
- struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
- mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
- unsigned char enabled;
- unsigned char have_fixed;
- mtrr_type def_type;
-};
-
-#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
-#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
-
-/* These are the various ioctls */
-#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry)
-#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry)
-#define MTRRIOC_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry)
-#define MTRRIOC_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry)
-#define MTRRIOC_KILL_ENTRY _IOW(MTRR_IOCTL_BASE, 4, struct mtrr_sentry)
-#define MTRRIOC_ADD_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 5, struct mtrr_sentry)
-#define MTRRIOC_SET_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 6, struct mtrr_sentry)
-#define MTRRIOC_DEL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 7, struct mtrr_sentry)
-#define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry)
-#define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry)
-
-/* These are the region types */
-#define MTRR_TYPE_UNCACHABLE 0
-#define MTRR_TYPE_WRCOMB 1
-/*#define MTRR_TYPE_ 2*/
-/*#define MTRR_TYPE_ 3*/
-#define MTRR_TYPE_WRTHROUGH 4
-#define MTRR_TYPE_WRPROT 5
-#define MTRR_TYPE_WRBACK 6
-#define MTRR_NUM_TYPES 7
-
-#ifdef __KERNEL__
/* The following functions are for use by other drivers */
# ifdef CONFIG_MTRR
@@ -208,6 +119,4 @@ struct mtrr_gentry32 {
_IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32)
#endif /* CONFIG_COMPAT */
-#endif /* __KERNEL__ */
-
#endif /* _ASM_X86_MTRR_H */
diff --git a/arch/x86/include/asm/numachip/numachip.h b/arch/x86/include/asm/numachip/numachip.h
new file mode 100644
index 00000000000..1c6f7f6212c
--- /dev/null
+++ b/arch/x86/include/asm/numachip/numachip.h
@@ -0,0 +1,19 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-specific header file
+ *
+ * Copyright (C) 2012 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ */
+
+#ifndef _ASM_X86_NUMACHIP_NUMACHIP_H
+#define _ASM_X86_NUMACHIP_NUMACHIP_H
+
+extern int __init pci_numachip_init(void);
+
+#endif /* _ASM_X86_NUMACHIP_NUMACHIP_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index a0facf3908d..5edd1742cfd 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -528,7 +528,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
PVOP_VCALL4(pv_mmu_ops.set_pte_at, mm, addr, ptep, pte.pte);
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmd)
{
@@ -539,7 +538,6 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
PVOP_VCALL4(pv_mmu_ops.set_pmd_at, mm, addr, pmdp,
native_pmd_val(pmd));
}
-#endif
static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
diff --git a/arch/x86/include/asm/parport.h b/arch/x86/include/asm/parport.h
index 3c4ffeb467e..0d2d3b29118 100644
--- a/arch/x86/include/asm/parport.h
+++ b/arch/x86/include/asm/parport.h
@@ -1,8 +1,8 @@
#ifndef _ASM_X86_PARPORT_H
#define _ASM_X86_PARPORT_H
-static int __devinit parport_pc_find_isa_ports(int autoirq, int autodma);
-static int __devinit parport_pc_find_nonpci_ports(int autoirq, int autodma)
+static int parport_pc_find_isa_ports(int autoirq, int autodma);
+static int parport_pc_find_nonpci_ports(int autoirq, int autodma)
{
return parport_pc_find_isa_ports(autoirq, autodma);
}
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 6e41b934392..c28fd02f4bf 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -121,9 +121,12 @@ static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
#define arch_teardown_msi_irq x86_teardown_msi_irq
#define arch_restore_msi_irqs x86_restore_msi_irqs
/* implemented in arch/x86/kernel/apic/io_apic. */
+struct msi_desc;
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
void native_teardown_msi_irq(unsigned int irq);
void native_restore_msi_irqs(struct pci_dev *dev, int irq);
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ unsigned int irq_base, unsigned int irq_offset);
/* default to the implementation in drivers/lib/msi.c */
#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
#define HAVE_DEFAULT_MSI_RESTORE_IRQS
@@ -171,4 +174,16 @@ cpumask_of_pcibus(const struct pci_bus *bus)
}
#endif
+struct pci_setup_rom {
+ struct setup_data data;
+ uint16_t vendor;
+ uint16_t devid;
+ uint64_t pcilen;
+ unsigned long segment;
+ unsigned long bus;
+ unsigned long device;
+ unsigned long function;
+ uint8_t romdata[0];
+};
+
#endif /* _ASM_X86_PCI_H */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index 73e8eeff22e..747e5a38b59 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -140,11 +140,10 @@ struct pci_mmcfg_region {
extern int __init pci_mmcfg_arch_init(void);
extern void __init pci_mmcfg_arch_free(void);
-extern int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
+extern int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg);
extern void pci_mmcfg_arch_unmap(struct pci_mmcfg_region *cfg);
-extern int __devinit pci_mmconfig_insert(struct device *dev,
- u16 seg, u8 start,
- u8 end, phys_addr_t addr);
+extern int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
+ phys_addr_t addr);
extern int pci_mmconfig_delete(u16 seg, u8 start, u8 end);
extern struct pci_mmcfg_region *pci_mmconfig_lookup(int segment, int bus);
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 1104afaba52..0da5200ee79 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -406,7 +406,6 @@ do { \
#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval)
#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval)
-#ifndef CONFIG_M386
#define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val)
#define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val)
@@ -421,8 +420,6 @@ do { \
#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval)
-#endif /* !CONFIG_M386 */
-
#ifdef CONFIG_X86_CMPXCHG64
#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
({ \
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4fabcdf1cfa..57cb6340221 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -29,8 +29,13 @@
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
-#define AMD_PERFMON_EVENTSEL_GUESTONLY (1ULL << 40)
-#define AMD_PERFMON_EVENTSEL_HOSTONLY (1ULL << 41)
+#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
+#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
+#define AMD64_EVENTSEL_HOSTONLY (1ULL << 41)
+
+#define AMD64_EVENTSEL_INT_CORE_SEL_SHIFT 37
+#define AMD64_EVENTSEL_INT_CORE_SEL_MASK \
+ (0xFULL << AMD64_EVENTSEL_INT_CORE_SEL_SHIFT)
#define AMD64_EVENTSEL_EVENT \
(ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32))
@@ -46,8 +51,12 @@
#define AMD64_RAW_EVENT_MASK \
(X86_RAW_EVENT_MASK | \
AMD64_EVENTSEL_EVENT)
+#define AMD64_RAW_EVENT_MASK_NB \
+ (AMD64_EVENTSEL_EVENT | \
+ ARCH_PERFMON_EVENTSEL_UMASK)
#define AMD64_NUM_COUNTERS 4
#define AMD64_NUM_COUNTERS_CORE 6
+#define AMD64_NUM_COUNTERS_NB 4
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c
#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8)
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a1f780d45f7..fc304279b55 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -142,6 +142,11 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
}
+static inline unsigned long pud_pfn(pud_t pud)
+{
+ return (pud_val(pud) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
static inline int pmd_large(pmd_t pte)
@@ -404,7 +409,14 @@ static inline int pte_same(pte_t a, pte_t b)
static inline int pte_present(pte_t a)
{
- return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
+ return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE |
+ _PAGE_NUMA);
+}
+
+#define pte_accessible pte_accessible
+static inline int pte_accessible(pte_t a)
+{
+ return pte_flags(a) & _PAGE_PRESENT;
}
static inline int pte_hidden(pte_t pte)
@@ -420,7 +432,8 @@ static inline int pmd_present(pmd_t pmd)
* the _PAGE_PSE flag will remain set at all times while the
* _PAGE_PRESENT bit is clear).
*/
- return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
+ return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE |
+ _PAGE_NUMA);
}
static inline int pmd_none(pmd_t pmd)
@@ -479,6 +492,11 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
static inline int pmd_bad(pmd_t pmd)
{
+#ifdef CONFIG_NUMA_BALANCING
+ /* pmd_numa check */
+ if ((pmd_flags(pmd) & (_PAGE_NUMA|_PAGE_PRESENT)) == _PAGE_NUMA)
+ return 0;
+#endif
return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
}
@@ -768,6 +786,18 @@ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
memcpy(dst, src, count * sizeof(pgd_t));
}
+/*
+ * The x86 doesn't have any external MMU info: the kernel page
+ * tables contain all the necessary information.
+ */
+static inline void update_mmu_cache(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep)
+{
+}
+static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmd)
+{
+}
#include <asm-generic/pgtable.h>
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 8faa215a503..9ee322103c6 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -66,13 +66,6 @@ do { \
__flush_tlb_one((vaddr)); \
} while (0)
-/*
- * The i386 doesn't have any external MMU info: the kernel page
- * tables contain all the necessary information.
- */
-#define update_mmu_cache(vma, address, ptep) do { } while (0)
-#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
-
#endif /* !__ASSEMBLY__ */
/*
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 47356f9df82..615b0c78449 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -142,9 +142,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
#define pte_unmap(pte) ((void)(pte))/* NOP */
-#define update_mmu_cache(vma, address, ptep) do { } while (0)
-#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
-
/* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index ec8a1fc9505..3c32db8c539 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -64,6 +64,26 @@
#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+/*
+ * _PAGE_NUMA indicates that this page will trigger a numa hinting
+ * minor page fault to gather numa placement statistics (see
+ * pte_numa()). The bit picked (8) is within the range between
+ * _PAGE_FILE (6) and _PAGE_PROTNONE (8) bits. Therefore, it doesn't
+ * require changes to the swp entry format because that bit is always
+ * zero when the pte is not present.
+ *
+ * The bit picked must be always zero when the pmd is present and not
+ * present, so that we don't lose information when we set it while
+ * atomically clearing the present bit.
+ *
+ * Because we shared the same bit (8) with _PAGE_PROTNONE this can be
+ * interpreted as _PAGE_NUMA only in places that _PAGE_PROTNONE
+ * couldn't reach, like handle_mm_fault() (see access_error in
+ * arch/x86/mm/fault.c, the vma protection must not be PROT_NONE for
+ * handle_mm_fault() to be invoked).
+ */
+#define _PAGE_NUMA _PAGE_PROTNONE
+
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_DIRTY)
#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h
index bad3665c25f..f565f6dd59d 100644
--- a/arch/x86/include/asm/posix_types.h
+++ b/arch/x86/include/asm/posix_types.h
@@ -1,15 +1,5 @@
-#ifdef __KERNEL__
# ifdef CONFIG_X86_32
# include <asm/posix_types_32.h>
# else
# include <asm/posix_types_64.h>
# endif
-#else
-# ifdef __i386__
-# include <asm/posix_types_32.h>
-# elif defined(__ILP32__)
-# include <asm/posix_types_x32.h>
-# else
-# include <asm/posix_types_64.h>
-# endif
-#endif
diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 680cf09ed10..39fb618e221 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -1,106 +1,11 @@
#ifndef _ASM_X86_PROCESSOR_FLAGS_H
#define _ASM_X86_PROCESSOR_FLAGS_H
-/* Various flags defined: can be included from assembler. */
-/*
- * EFLAGS bits
- */
-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
-#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */
-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
-#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */
-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+#include <uapi/asm/processor-flags.h>
-/*
- * Basic CPU control in CR0
- */
-#define X86_CR0_PE 0x00000001 /* Protection Enable */
-#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */
-#define X86_CR0_EM 0x00000004 /* Emulation */
-#define X86_CR0_TS 0x00000008 /* Task Switched */
-#define X86_CR0_ET 0x00000010 /* Extension Type */
-#define X86_CR0_NE 0x00000020 /* Numeric Error */
-#define X86_CR0_WP 0x00010000 /* Write Protect */
-#define X86_CR0_AM 0x00040000 /* Alignment Mask */
-#define X86_CR0_NW 0x20000000 /* Not Write-through */
-#define X86_CR0_CD 0x40000000 /* Cache Disable */
-#define X86_CR0_PG 0x80000000 /* Paging */
-
-/*
- * Paging options in CR3
- */
-#define X86_CR3_PWT 0x00000008 /* Page Write Through */
-#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */
-#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
-
-/*
- * Intel CPU features in CR4
- */
-#define X86_CR4_VME 0x00000001 /* enable vm86 extensions */
-#define X86_CR4_PVI 0x00000002 /* virtual interrupts flag enable */
-#define X86_CR4_TSD 0x00000004 /* disable time stamp at ipl 3 */
-#define X86_CR4_DE 0x00000008 /* enable debugging extensions */
-#define X86_CR4_PSE 0x00000010 /* enable page size extensions */
-#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */
-#define X86_CR4_MCE 0x00000040 /* Machine check enable */
-#define X86_CR4_PGE 0x00000080 /* enable global pages */
-#define X86_CR4_PCE 0x00000100 /* enable performance counters at ipl 3 */
-#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */
-#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
-#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
-#define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
-#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */
-#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
-#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */
-#define X86_CR4_SMAP 0x00200000 /* enable SMAP support */
-
-/*
- * x86-64 Task Priority Register, CR8
- */
-#define X86_CR8_TPR 0x0000000F /* task priority register */
-
-/*
- * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h>
- */
-
-/*
- * NSC/Cyrix CPU configuration register indexes
- */
-#define CX86_PCR0 0x20
-#define CX86_GCR 0xb8
-#define CX86_CCR0 0xc0
-#define CX86_CCR1 0xc1
-#define CX86_CCR2 0xc2
-#define CX86_CCR3 0xc3
-#define CX86_CCR4 0xe8
-#define CX86_CCR5 0xe9
-#define CX86_CCR6 0xea
-#define CX86_CCR7 0xeb
-#define CX86_PCR1 0xf0
-#define CX86_DIR0 0xfe
-#define CX86_DIR1 0xff
-#define CX86_ARR_BASE 0xc4
-#define CX86_RCR_BASE 0xdc
-
-#ifdef __KERNEL__
#ifdef CONFIG_VM86
#define X86_VM_MASK X86_EFLAGS_VM
#else
#define X86_VM_MASK 0 /* No VM86 support */
#endif
-#endif
-
#endif /* _ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ad1fc851167..888184b2fc8 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -178,8 +178,6 @@ static inline int hlt_works(int cpu)
extern void cpu_detect(struct cpuinfo_x86 *c);
-extern struct pt_regs *idle_regs(struct pt_regs *);
-
extern void early_cpu_init(void);
extern void identify_boot_cpu(void);
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
@@ -187,7 +185,7 @@ extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern unsigned short num_cache_leaves;
+extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
extern void detect_extended_topology(struct cpuinfo_x86 *c);
extern void detect_ht(struct cpuinfo_x86 *c);
@@ -672,18 +670,29 @@ static inline void sync_core(void)
{
int tmp;
-#if defined(CONFIG_M386) || defined(CONFIG_M486)
- if (boot_cpu_data.x86 < 5)
- /* There is no speculative execution.
- * jmp is a barrier to prefetching. */
- asm volatile("jmp 1f\n1:\n" ::: "memory");
- else
+#ifdef CONFIG_M486
+ /*
+ * Do a CPUID if available, otherwise do a jump. The jump
+ * can conveniently enough be the jump around CPUID.
+ */
+ asm volatile("cmpl %2,%1\n\t"
+ "jl 1f\n\t"
+ "cpuid\n"
+ "1:"
+ : "=a" (tmp)
+ : "rm" (boot_cpu_data.cpuid_level), "ri" (0), "0" (1)
+ : "ebx", "ecx", "edx", "memory");
+#else
+ /*
+ * CPUID is a barrier to speculative execution.
+ * Prefetched instructions are automatically
+ * invalidated when modified.
+ */
+ asm volatile("cpuid"
+ : "=a" (tmp)
+ : "0" (1)
+ : "ebx", "ecx", "edx", "memory");
#endif
- /* cpuid is a barrier to speculative execution.
- * Prefetched instructions are automatically
- * invalidated when modified. */
- asm volatile("cpuid" : "=a" (tmp) : "0" (1)
- : "ebx", "ecx", "edx", "memory");
}
static inline void __monitor(const void *eax, unsigned long ecx,
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 19f16ebaf4f..942a08623a1 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -1,44 +1,12 @@
#ifndef _ASM_X86_PTRACE_H
#define _ASM_X86_PTRACE_H
-#include <linux/compiler.h> /* For __user */
-#include <asm/ptrace-abi.h>
-#include <asm/processor-flags.h>
-
-#ifdef __KERNEL__
#include <asm/segment.h>
#include <asm/page_types.h>
-#endif
+#include <uapi/asm/ptrace.h>
#ifndef __ASSEMBLY__
-
#ifdef __i386__
-/* this struct defines the way the registers are stored on the
- stack during a system call. */
-
-#ifndef __KERNEL__
-
-struct pt_regs {
- long ebx;
- long ecx;
- long edx;
- long esi;
- long edi;
- long ebp;
- long eax;
- int xds;
- int xes;
- int xfs;
- int xgs;
- long orig_eax;
- long eip;
- int xcs;
- long eflags;
- long esp;
- int xss;
-};
-
-#else /* __KERNEL__ */
struct pt_regs {
unsigned long bx;
@@ -60,42 +28,8 @@ struct pt_regs {
unsigned long ss;
};
-#endif /* __KERNEL__ */
-
#else /* __i386__ */
-#ifndef __KERNEL__
-
-struct pt_regs {
- unsigned long r15;
- unsigned long r14;
- unsigned long r13;
- unsigned long r12;
- unsigned long rbp;
- unsigned long rbx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
- unsigned long r11;
- unsigned long r10;
- unsigned long r9;
- unsigned long r8;
- unsigned long rax;
- unsigned long rcx;
- unsigned long rdx;
- unsigned long rsi;
- unsigned long rdi;
- unsigned long orig_rax;
-/* end of arguments */
-/* cpu exception frame or undefined */
- unsigned long rip;
- unsigned long cs;
- unsigned long eflags;
- unsigned long rsp;
- unsigned long ss;
-/* top of stack page */
-};
-
-#else /* __KERNEL__ */
-
struct pt_regs {
unsigned long r15;
unsigned long r14;
@@ -124,12 +58,8 @@ struct pt_regs {
/* top of stack page */
};
-#endif /* __KERNEL__ */
#endif /* !__i386__ */
-
-#ifdef __KERNEL__
-
#include <linux/init.h>
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt_types.h>
@@ -203,6 +133,13 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
#endif
}
+
+#define current_user_stack_pointer() this_cpu_read(old_rsp)
+/* ia32 vs. x32 difference */
+#define compat_user_stack_pointer() \
+ (test_thread_flag(TIF_IA32) \
+ ? current_pt_regs()->sp \
+ : this_cpu_read(old_rsp))
#endif
#ifdef CONFIG_X86_32
@@ -239,6 +176,15 @@ static inline unsigned long regs_get_register(struct pt_regs *regs,
{
if (unlikely(offset > MAX_REG_OFFSET))
return 0;
+#ifdef CONFIG_X86_32
+ /*
+ * Traps from the kernel do not save sp and ss.
+ * Use the helper function to retrieve sp.
+ */
+ if (offset == offsetof(struct pt_regs, sp) &&
+ regs->cs == __KERNEL_CS)
+ return kernel_stack_pointer(regs);
+#endif
return *(unsigned long *)((unsigned long)regs + offset);
}
@@ -292,8 +238,5 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate);
-#endif /* __KERNEL__ */
-
#endif /* !__ASSEMBLY__ */
-
#endif /* _ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index c59cc97fe6c..109a9dd5d45 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -6,6 +6,7 @@
/* some helper functions for xen and kvm pv clock sources */
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
void pvclock_set_flags(u8 flags);
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
@@ -56,4 +57,50 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
return product;
}
+static __always_inline
+u64 pvclock_get_nsec_offset(const struct pvclock_vcpu_time_info *src)
+{
+ u64 delta = __native_read_tsc() - src->tsc_timestamp;
+ return pvclock_scale_delta(delta, src->tsc_to_system_mul,
+ src->tsc_shift);
+}
+
+static __always_inline
+unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
+ cycle_t *cycles, u8 *flags)
+{
+ unsigned version;
+ cycle_t ret, offset;
+ u8 ret_flags;
+
+ version = src->version;
+ /* Note: emulated platforms which do not advertise SSE2 support
+ * result in kvmclock not using the necessary RDTSC barriers.
+ * Without barriers, it is possible that RDTSC instruction reads from
+ * the time stamp counter outside rdtsc_barrier protected section
+ * below, resulting in violation of monotonicity.
+ */
+ rdtsc_barrier();
+ offset = pvclock_get_nsec_offset(src);
+ ret = src->system_time + offset;
+ ret_flags = src->flags;
+ rdtsc_barrier();
+
+ *cycles = ret;
+ *flags = ret_flags;
+ return version;
+}
+
+struct pvclock_vsyscall_time_info {
+ struct pvclock_vcpu_time_info pvti;
+ u32 migrate_count;
+} __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
+#define PVCLOCK_VSYSCALL_NR_PAGES (((NR_CPUS-1)/(PAGE_SIZE/PVTI_SIZE))+1)
+
+int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
+ int size);
+struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu);
+
#endif /* _ASM_X86_PVCLOCK_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 6c7fc25f2c3..5c6e4fb370f 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -47,6 +47,12 @@
# define NEED_NOPL 0
#endif
+#ifdef CONFIG_MATOM
+# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
+#else
+# define NEED_MOVBE 0
+#endif
+
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT
/* Paravirtualized systems may not have PSE or PGE available */
@@ -80,7 +86,7 @@
#define REQUIRED_MASK2 0
#define REQUIRED_MASK3 (NEED_NOPL)
-#define REQUIRED_MASK4 0
+#define REQUIRED_MASK4 (NEED_MOVBE)
#define REQUIRED_MASK5 0
#define REQUIRED_MASK6 0
#define REQUIRED_MASK7 0
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index d0f19f9fb84..b7bf3505e1e 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -1,7 +1,8 @@
#ifndef _ASM_X86_SETUP_H
#define _ASM_X86_SETUP_H
-#ifdef __KERNEL__
+#include <uapi/asm/setup.h>
+
#define COMMAND_LINE_SIZE 2048
@@ -123,6 +124,4 @@ void __init x86_64_start_reservations(char *real_mode_data);
.size .brk.name,.-1b; \
.popsection
#endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-
#endif /* _ASM_X86_SETUP_H */
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 5ca71c065ee..9dfce4e0417 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -1,104 +1,9 @@
#ifndef _ASM_X86_SIGCONTEXT_H
#define _ASM_X86_SIGCONTEXT_H
-#include <linux/compiler.h>
-#include <linux/types.h>
-
-#define FP_XSTATE_MAGIC1 0x46505853U
-#define FP_XSTATE_MAGIC2 0x46505845U
-#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2)
-
-/*
- * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame
- * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes
- * are used to extended the fpstate pointer in the sigcontext, which now
- * includes the extended state information along with fpstate information.
- *
- * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved
- * area and FP_XSTATE_MAGIC2 at the end of memory layout
- * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the
- * extended state information in the memory layout pointed by the fpstate
- * pointer in sigcontext.
- */
-struct _fpx_sw_bytes {
- __u32 magic1; /* FP_XSTATE_MAGIC1 */
- __u32 extended_size; /* total size of the layout referred by
- * fpstate pointer in the sigcontext.
- */
- __u64 xstate_bv;
- /* feature bit mask (including fp/sse/extended
- * state) that is present in the memory
- * layout.
- */
- __u32 xstate_size; /* actual xsave state size, based on the
- * features saved in the layout.
- * 'extended_size' will be greater than
- * 'xstate_size'.
- */
- __u32 padding[7]; /* for future use. */
-};
+#include <uapi/asm/sigcontext.h>
#ifdef __i386__
-/*
- * As documented in the iBCS2 standard..
- *
- * The first part of "struct _fpstate" is just the normal i387
- * hardware setup, the extra "status" word is used to save the
- * coprocessor status word before entering the handler.
- *
- * Pentium III FXSR, SSE support
- * Gareth Hughes <gareth@valinux.com>, May 2000
- *
- * The FPU state data structure has had to grow to accommodate the
- * extended FPU state required by the Streaming SIMD Extensions.
- * There is no documented standard to accomplish this at the moment.
- */
-struct _fpreg {
- unsigned short significand[4];
- unsigned short exponent;
-};
-
-struct _fpxreg {
- unsigned short significand[4];
- unsigned short exponent;
- unsigned short padding[3];
-};
-
-struct _xmmreg {
- unsigned long element[4];
-};
-
-struct _fpstate {
- /* Regular FPU environment */
- unsigned long cw;
- unsigned long sw;
- unsigned long tag;
- unsigned long ipoff;
- unsigned long cssel;
- unsigned long dataoff;
- unsigned long datasel;
- struct _fpreg _st[8];
- unsigned short status;
- unsigned short magic; /* 0xffff = regular FPU data only */
-
- /* FXSR FPU environment */
- unsigned long _fxsr_env[6]; /* FXSR FPU env is ignored */
- unsigned long mxcsr;
- unsigned long reserved;
- struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
- struct _xmmreg _xmm[8];
- unsigned long padding1[44];
-
- union {
- unsigned long padding2[12];
- struct _fpx_sw_bytes sw_reserved; /* represents the extended
- * state info */
- };
-};
-
-#define X86_FXSR_MAGIC 0x0000
-
-#ifdef __KERNEL__
struct sigcontext {
unsigned short gs, __gsh;
unsigned short fs, __fsh;
@@ -131,62 +36,7 @@ struct sigcontext {
unsigned long oldmask;
unsigned long cr2;
};
-#else /* __KERNEL__ */
-/*
- * User-space might still rely on the old definition:
- */
-struct sigcontext {
- unsigned short gs, __gsh;
- unsigned short fs, __fsh;
- unsigned short es, __esh;
- unsigned short ds, __dsh;
- unsigned long edi;
- unsigned long esi;
- unsigned long ebp;
- unsigned long esp;
- unsigned long ebx;
- unsigned long edx;
- unsigned long ecx;
- unsigned long eax;
- unsigned long trapno;
- unsigned long err;
- unsigned long eip;
- unsigned short cs, __csh;
- unsigned long eflags;
- unsigned long esp_at_signal;
- unsigned short ss, __ssh;
- struct _fpstate __user *fpstate;
- unsigned long oldmask;
- unsigned long cr2;
-};
-#endif /* !__KERNEL__ */
-
#else /* __i386__ */
-
-/* FXSAVE frame */
-/* Note: reserved1/2 may someday contain valuable data. Always save/restore
- them when you change signal frames. */
-struct _fpstate {
- __u16 cwd;
- __u16 swd;
- __u16 twd; /* Note this is not the same as the
- 32bit/x87/FSAVE twd */
- __u16 fop;
- __u64 rip;
- __u64 rdp;
- __u32 mxcsr;
- __u32 mxcsr_mask;
- __u32 st_space[32]; /* 8*16 bytes for each FP-reg */
- __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */
- __u32 reserved2[12];
- union {
- __u32 reserved3[12];
- struct _fpx_sw_bytes sw_reserved; /* represents the extended
- * state information */
- };
-};
-
-#ifdef __KERNEL__
struct sigcontext {
unsigned long r8;
unsigned long r9;
@@ -225,69 +75,5 @@ struct sigcontext {
void __user *fpstate; /* zero when no FPU/extended context */
unsigned long reserved1[8];
};
-#else /* __KERNEL__ */
-/*
- * User-space might still rely on the old definition:
- */
-struct sigcontext {
- __u64 r8;
- __u64 r9;
- __u64 r10;
- __u64 r11;
- __u64 r12;
- __u64 r13;
- __u64 r14;
- __u64 r15;
- __u64 rdi;
- __u64 rsi;
- __u64 rbp;
- __u64 rbx;
- __u64 rdx;
- __u64 rax;
- __u64 rcx;
- __u64 rsp;
- __u64 rip;
- __u64 eflags; /* RFLAGS */
- __u16 cs;
- __u16 gs;
- __u16 fs;
- __u16 __pad0;
- __u64 err;
- __u64 trapno;
- __u64 oldmask;
- __u64 cr2;
- struct _fpstate __user *fpstate; /* zero when no FPU context */
-#ifdef __ILP32__
- __u32 __fpstate_pad;
-#endif
- __u64 reserved1[8];
-};
-#endif /* !__KERNEL__ */
-
#endif /* !__i386__ */
-
-struct _xsave_hdr {
- __u64 xstate_bv;
- __u64 reserved1[2];
- __u64 reserved2[5];
-};
-
-struct _ymmh_state {
- /* 16 * 16 bytes for each YMMH-reg */
- __u32 ymmh_space[64];
-};
-
-/*
- * Extended state pointed by the fpstate pointer in the sigcontext.
- * In addition to the fpstate, information encoded in the xstate_hdr
- * indicates the presence of other extended state information
- * supported by the processor and OS.
- */
-struct _xstate {
- struct _fpstate fpstate;
- struct _xsave_hdr xstate_hdr;
- struct _ymmh_state ymmh;
- /* new processor state extensions go here */
-};
-
#endif /* _ASM_X86_SIGCONTEXT_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 323973f4abf..216bf364a7e 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -2,14 +2,6 @@
#define _ASM_X86_SIGNAL_H
#ifndef __ASSEMBLY__
-#include <linux/types.h>
-#include <linux/time.h>
-#include <linux/compiler.h>
-
-/* Avoid too many header ordering problems. */
-struct siginfo;
-
-#ifdef __KERNEL__
#include <linux/linkage.h>
/* Most things should be clean enough to redefine this at will, if care
@@ -35,102 +27,11 @@ typedef struct {
typedef sigset_t compat_sigset_t;
#endif
-#else
-/* Here we must cater to libcs that poke about in kernel headers. */
-
-#define NSIG 32
-typedef unsigned long sigset_t;
-
-#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
-
-#define SIGHUP 1
-#define SIGINT 2
-#define SIGQUIT 3
-#define SIGILL 4
-#define SIGTRAP 5
-#define SIGABRT 6
-#define SIGIOT 6
-#define SIGBUS 7
-#define SIGFPE 8
-#define SIGKILL 9
-#define SIGUSR1 10
-#define SIGSEGV 11
-#define SIGUSR2 12
-#define SIGPIPE 13
-#define SIGALRM 14
-#define SIGTERM 15
-#define SIGSTKFLT 16
-#define SIGCHLD 17
-#define SIGCONT 18
-#define SIGSTOP 19
-#define SIGTSTP 20
-#define SIGTTIN 21
-#define SIGTTOU 22
-#define SIGURG 23
-#define SIGXCPU 24
-#define SIGXFSZ 25
-#define SIGVTALRM 26
-#define SIGPROF 27
-#define SIGWINCH 28
-#define SIGIO 29
-#define SIGPOLL SIGIO
-/*
-#define SIGLOST 29
-*/
-#define SIGPWR 30
-#define SIGSYS 31
-#define SIGUNUSED 31
-
-/* These should not be considered constants from userland. */
-#define SIGRTMIN 32
-#define SIGRTMAX _NSIG
-
-/*
- * SA_FLAGS values:
- *
- * SA_ONSTACK indicates that a registered stack_t will be used.
- * SA_RESTART flag to get restarting signals (which were the default long ago)
- * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
- * SA_RESETHAND clears the handler when the signal is delivered.
- * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
- * SA_NODEFER prevents the current signal from being masked in the handler.
- *
- * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
- * Unix names RESETHAND and NODEFER respectively.
- */
-#define SA_NOCLDSTOP 0x00000001u
-#define SA_NOCLDWAIT 0x00000002u
-#define SA_SIGINFO 0x00000004u
-#define SA_ONSTACK 0x08000000u
-#define SA_RESTART 0x10000000u
-#define SA_NODEFER 0x40000000u
-#define SA_RESETHAND 0x80000000u
-
-#define SA_NOMASK SA_NODEFER
-#define SA_ONESHOT SA_RESETHAND
-
-#define SA_RESTORER 0x04000000
-
-/*
- * sigaltstack controls
- */
-#define SS_ONSTACK 1
-#define SS_DISABLE 2
-
-#define MINSIGSTKSZ 2048
-#define SIGSTKSZ 8192
-
-#include <asm-generic/signal-defs.h>
-
+#include <uapi/asm/signal.h>
#ifndef __ASSEMBLY__
-
-# ifdef __KERNEL__
extern void do_notify_resume(struct pt_regs *, void *, __u32);
-# endif /* __KERNEL__ */
-
#ifdef __i386__
-# ifdef __KERNEL__
struct old_sigaction {
__sighandler_t sa_handler;
old_sigset_t sa_mask;
@@ -149,45 +50,8 @@ struct k_sigaction {
struct sigaction sa;
};
-# else /* __KERNEL__ */
-/* Here we must cater to libcs that poke about in kernel headers. */
-
-struct sigaction {
- union {
- __sighandler_t _sa_handler;
- void (*_sa_sigaction)(int, struct siginfo *, void *);
- } _u;
- sigset_t sa_mask;
- unsigned long sa_flags;
- void (*sa_restorer)(void);
-};
-
-#define sa_handler _u._sa_handler
-#define sa_sigaction _u._sa_sigaction
-
-# endif /* ! __KERNEL__ */
#else /* __i386__ */
-
-struct sigaction {
- __sighandler_t sa_handler;
- unsigned long sa_flags;
- __sigrestore_t sa_restorer;
- sigset_t sa_mask; /* mask last for extensibility */
-};
-
-struct k_sigaction {
- struct sigaction sa;
-};
-
#endif /* !__i386__ */
-
-typedef struct sigaltstack {
- void __user *ss_sp;
- int ss_flags;
- size_t ss_size;
-} stack_t;
-
-#ifdef __KERNEL__
#include <asm/sigcontext.h>
#ifdef __i386__
@@ -260,9 +124,5 @@ struct pt_regs;
#endif /* !__i386__ */
-#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-
-#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
-
#endif /* _ASM_X86_SIGNAL_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4f19a152603..b073aaea747 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -166,6 +166,7 @@ void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
void x86_idle_thread_init(unsigned int cpu, struct task_struct *idle);
+void smp_store_boot_cpu_info(void);
void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index cdf5674dd23..6136d99f537 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -1,134 +1,8 @@
#ifndef __SVM_H
#define __SVM_H
-#define SVM_EXIT_READ_CR0 0x000
-#define SVM_EXIT_READ_CR3 0x003
-#define SVM_EXIT_READ_CR4 0x004
-#define SVM_EXIT_READ_CR8 0x008
-#define SVM_EXIT_WRITE_CR0 0x010
-#define SVM_EXIT_WRITE_CR3 0x013
-#define SVM_EXIT_WRITE_CR4 0x014
-#define SVM_EXIT_WRITE_CR8 0x018
-#define SVM_EXIT_READ_DR0 0x020
-#define SVM_EXIT_READ_DR1 0x021
-#define SVM_EXIT_READ_DR2 0x022
-#define SVM_EXIT_READ_DR3 0x023
-#define SVM_EXIT_READ_DR4 0x024
-#define SVM_EXIT_READ_DR5 0x025
-#define SVM_EXIT_READ_DR6 0x026
-#define SVM_EXIT_READ_DR7 0x027
-#define SVM_EXIT_WRITE_DR0 0x030
-#define SVM_EXIT_WRITE_DR1 0x031
-#define SVM_EXIT_WRITE_DR2 0x032
-#define SVM_EXIT_WRITE_DR3 0x033
-#define SVM_EXIT_WRITE_DR4 0x034
-#define SVM_EXIT_WRITE_DR5 0x035
-#define SVM_EXIT_WRITE_DR6 0x036
-#define SVM_EXIT_WRITE_DR7 0x037
-#define SVM_EXIT_EXCP_BASE 0x040
-#define SVM_EXIT_INTR 0x060
-#define SVM_EXIT_NMI 0x061
-#define SVM_EXIT_SMI 0x062
-#define SVM_EXIT_INIT 0x063
-#define SVM_EXIT_VINTR 0x064
-#define SVM_EXIT_CR0_SEL_WRITE 0x065
-#define SVM_EXIT_IDTR_READ 0x066
-#define SVM_EXIT_GDTR_READ 0x067
-#define SVM_EXIT_LDTR_READ 0x068
-#define SVM_EXIT_TR_READ 0x069
-#define SVM_EXIT_IDTR_WRITE 0x06a
-#define SVM_EXIT_GDTR_WRITE 0x06b
-#define SVM_EXIT_LDTR_WRITE 0x06c
-#define SVM_EXIT_TR_WRITE 0x06d
-#define SVM_EXIT_RDTSC 0x06e
-#define SVM_EXIT_RDPMC 0x06f
-#define SVM_EXIT_PUSHF 0x070
-#define SVM_EXIT_POPF 0x071
-#define SVM_EXIT_CPUID 0x072
-#define SVM_EXIT_RSM 0x073
-#define SVM_EXIT_IRET 0x074
-#define SVM_EXIT_SWINT 0x075
-#define SVM_EXIT_INVD 0x076
-#define SVM_EXIT_PAUSE 0x077
-#define SVM_EXIT_HLT 0x078
-#define SVM_EXIT_INVLPG 0x079
-#define SVM_EXIT_INVLPGA 0x07a
-#define SVM_EXIT_IOIO 0x07b
-#define SVM_EXIT_MSR 0x07c
-#define SVM_EXIT_TASK_SWITCH 0x07d
-#define SVM_EXIT_FERR_FREEZE 0x07e
-#define SVM_EXIT_SHUTDOWN 0x07f
-#define SVM_EXIT_VMRUN 0x080
-#define SVM_EXIT_VMMCALL 0x081
-#define SVM_EXIT_VMLOAD 0x082
-#define SVM_EXIT_VMSAVE 0x083
-#define SVM_EXIT_STGI 0x084
-#define SVM_EXIT_CLGI 0x085
-#define SVM_EXIT_SKINIT 0x086
-#define SVM_EXIT_RDTSCP 0x087
-#define SVM_EXIT_ICEBP 0x088
-#define SVM_EXIT_WBINVD 0x089
-#define SVM_EXIT_MONITOR 0x08a
-#define SVM_EXIT_MWAIT 0x08b
-#define SVM_EXIT_MWAIT_COND 0x08c
-#define SVM_EXIT_XSETBV 0x08d
-#define SVM_EXIT_NPF 0x400
-
-#define SVM_EXIT_ERR -1
-
-#define SVM_EXIT_REASONS \
- { SVM_EXIT_READ_CR0, "read_cr0" }, \
- { SVM_EXIT_READ_CR3, "read_cr3" }, \
- { SVM_EXIT_READ_CR4, "read_cr4" }, \
- { SVM_EXIT_READ_CR8, "read_cr8" }, \
- { SVM_EXIT_WRITE_CR0, "write_cr0" }, \
- { SVM_EXIT_WRITE_CR3, "write_cr3" }, \
- { SVM_EXIT_WRITE_CR4, "write_cr4" }, \
- { SVM_EXIT_WRITE_CR8, "write_cr8" }, \
- { SVM_EXIT_READ_DR0, "read_dr0" }, \
- { SVM_EXIT_READ_DR1, "read_dr1" }, \
- { SVM_EXIT_READ_DR2, "read_dr2" }, \
- { SVM_EXIT_READ_DR3, "read_dr3" }, \
- { SVM_EXIT_WRITE_DR0, "write_dr0" }, \
- { SVM_EXIT_WRITE_DR1, "write_dr1" }, \
- { SVM_EXIT_WRITE_DR2, "write_dr2" }, \
- { SVM_EXIT_WRITE_DR3, "write_dr3" }, \
- { SVM_EXIT_WRITE_DR5, "write_dr5" }, \
- { SVM_EXIT_WRITE_DR7, "write_dr7" }, \
- { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \
- { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \
- { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
- { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
- { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
- { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
- { SVM_EXIT_INTR, "interrupt" }, \
- { SVM_EXIT_NMI, "nmi" }, \
- { SVM_EXIT_SMI, "smi" }, \
- { SVM_EXIT_INIT, "init" }, \
- { SVM_EXIT_VINTR, "vintr" }, \
- { SVM_EXIT_CPUID, "cpuid" }, \
- { SVM_EXIT_INVD, "invd" }, \
- { SVM_EXIT_HLT, "hlt" }, \
- { SVM_EXIT_INVLPG, "invlpg" }, \
- { SVM_EXIT_INVLPGA, "invlpga" }, \
- { SVM_EXIT_IOIO, "io" }, \
- { SVM_EXIT_MSR, "msr" }, \
- { SVM_EXIT_TASK_SWITCH, "task_switch" }, \
- { SVM_EXIT_SHUTDOWN, "shutdown" }, \
- { SVM_EXIT_VMRUN, "vmrun" }, \
- { SVM_EXIT_VMMCALL, "hypercall" }, \
- { SVM_EXIT_VMLOAD, "vmload" }, \
- { SVM_EXIT_VMSAVE, "vmsave" }, \
- { SVM_EXIT_STGI, "stgi" }, \
- { SVM_EXIT_CLGI, "clgi" }, \
- { SVM_EXIT_SKINIT, "skinit" }, \
- { SVM_EXIT_WBINVD, "wbinvd" }, \
- { SVM_EXIT_MONITOR, "monitor" }, \
- { SVM_EXIT_MWAIT, "mwait" }, \
- { SVM_EXIT_XSETBV, "xsetbv" }, \
- { SVM_EXIT_NPF, "npf" }
-
-#ifdef __KERNEL__
+#include <uapi/asm/svm.h>
+
enum {
INTERCEPT_INTR,
@@ -403,5 +277,3 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
#endif
-
-#endif
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index a9a8cf3da49..31f61f96e0f 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -54,8 +54,6 @@ asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
asmlinkage long sys32_personality(unsigned long);
asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
-asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
-
long sys32_lseek(unsigned int, int, unsigned int);
long sys32_kill(int, int);
long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
@@ -71,8 +69,6 @@ asmlinkage long sys32_fallocate(int, int, unsigned,
/* ia32/ia32_signal.c */
asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
-asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *,
- stack_ia32_t __user *, struct pt_regs *);
asmlinkage long sys32_sigreturn(struct pt_regs *);
asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 2be0b880417..58b7e3eac0a 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -20,23 +20,11 @@
asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
long sys_iopl(unsigned int, struct pt_regs *);
-/* kernel/process.c */
-int sys_fork(struct pt_regs *);
-int sys_vfork(struct pt_regs *);
-long sys_execve(const char __user *,
- const char __user *const __user *,
- const char __user *const __user *);
-long sys_clone(unsigned long, unsigned long, void __user *,
- void __user *, struct pt_regs *);
-
/* kernel/ldt.c */
asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
/* kernel/signal.c */
long sys_rt_sigreturn(struct pt_regs *);
-long sys_sigaltstack(const stack_t __user *, stack_t __user *,
- struct pt_regs *);
-
/* kernel/tls.c */
asmlinkage int sys_set_thread_area(struct user_desc __user *);
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 74a44333545..0fee48e279c 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -56,10 +56,7 @@ static inline void __flush_tlb_all(void)
static inline void __flush_tlb_one(unsigned long addr)
{
- if (cpu_has_invlpg)
__flush_tlb_single(addr);
- else
- __flush_tlb();
}
#define TLB_FLUSH_ALL -1UL
diff --git a/arch/x86/include/asm/trace_clock.h b/arch/x86/include/asm/trace_clock.h
new file mode 100644
index 00000000000..beab86cc282
--- /dev/null
+++ b/arch/x86/include/asm/trace_clock.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_TRACE_CLOCK_H
+#define _ASM_X86_TRACE_CLOCK_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_X86_TSC
+
+extern u64 notrace trace_clock_x86_tsc(void);
+
+# define ARCH_TRACE_CLOCKS \
+ { trace_clock_x86_tsc, "x86-tsc", .in_ns = 0 },
+
+#else /* !CONFIG_X86_TSC */
+
+#define ARCH_TRACE_CLOCKS
+
+#endif
+
+#endif /* _ASM_X86_TRACE_CLOCK_H */
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 7ccf8d13153..1709801d18e 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -237,8 +237,6 @@ extern void __put_user_2(void);
extern void __put_user_4(void);
extern void __put_user_8(void);
-#ifdef CONFIG_X86_WP_WORKS_OK
-
/**
* put_user: - Write a simple value into user space.
* @x: Value to copy to user space.
@@ -326,29 +324,6 @@ do { \
} \
} while (0)
-#else
-
-#define __put_user_size(x, ptr, size, retval, errret) \
-do { \
- __typeof__(*(ptr))__pus_tmp = x; \
- retval = 0; \
- \
- if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \
- retval = errret; \
-} while (0)
-
-#define put_user(x, ptr) \
-({ \
- int __ret_pu; \
- __typeof__(*(ptr))__pus_tmp = x; \
- __ret_pu = 0; \
- if (unlikely(__copy_to_user_ll(ptr, &__pus_tmp, \
- sizeof(*(ptr))) != 0)) \
- __ret_pu = -EFAULT; \
- __ret_pu; \
-})
-#endif
-
#ifdef CONFIG_X86_32
#define __get_user_asm_u64(x, ptr, retval, errret) (x) = __get_user_bad()
#define __get_user_asm_ex_u64(x, ptr) (x) = __get_user_bad()
@@ -543,29 +518,12 @@ struct __large_struct { unsigned long buf[100]; };
(x) = (__force __typeof__(*(ptr)))__gue_val; \
} while (0)
-#ifdef CONFIG_X86_WP_WORKS_OK
-
#define put_user_try uaccess_try
#define put_user_catch(err) uaccess_catch(err)
#define put_user_ex(x, ptr) \
__put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
-#else /* !CONFIG_X86_WP_WORKS_OK */
-
-#define put_user_try do { \
- int __uaccess_err = 0;
-
-#define put_user_catch(err) \
- (err) |= __uaccess_err; \
-} while (0)
-
-#define put_user_ex(x, ptr) do { \
- __uaccess_err |= __put_user(x, ptr); \
-} while (0)
-
-#endif /* CONFIG_X86_WP_WORKS_OK */
-
extern unsigned long
copy_from_user_nmi(void *to, const void __user *from, unsigned long n);
extern __must_check long
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 16f3fc6ebf2..a0790e07ba6 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -1,10 +1,8 @@
#ifndef _ASM_X86_UNISTD_H
#define _ASM_X86_UNISTD_H 1
-/* x32 syscall flag bit */
-#define __X32_SYSCALL_BIT 0x40000000
+#include <uapi/asm/unistd.h>
-#ifdef __KERNEL__
# ifdef CONFIG_X86_X32_ABI
# define __SYSCALL_MASK (~(__X32_SYSCALL_BIT))
@@ -50,7 +48,9 @@
# define __ARCH_WANT_SYS_TIME
# define __ARCH_WANT_SYS_UTIME
# define __ARCH_WANT_SYS_WAITPID
-# define __ARCH_WANT_SYS_EXECVE
+# define __ARCH_WANT_SYS_FORK
+# define __ARCH_WANT_SYS_VFORK
+# define __ARCH_WANT_SYS_CLONE
/*
* "Conditional" syscalls
@@ -60,14 +60,4 @@
*/
# define cond_syscall(x) asm(".weak\t" #x "\n\t.set\t" #x ",sys_ni_syscall")
-#else
-# ifdef __i386__
-# include <asm/unistd_32.h>
-# elif defined(__ILP32__)
-# include <asm/unistd_x32.h>
-# else
-# include <asm/unistd_64.h>
-# endif
-#endif
-
#endif /* _ASM_X86_UNISTD_H */
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index b47c2a82ff1..062921ef34e 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -16,7 +16,7 @@ extern void uv_system_init(void);
extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm,
unsigned long start,
- unsigned end,
+ unsigned long end,
unsigned int cpu);
#else /* X86_UV */
diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h
index f9303602fbc..1d8de3f3fec 100644
--- a/arch/x86/include/asm/vm86.h
+++ b/arch/x86/include/asm/vm86.h
@@ -1,133 +1,9 @@
#ifndef _ASM_X86_VM86_H
#define _ASM_X86_VM86_H
-/*
- * I'm guessing at the VIF/VIP flag usage, but hope that this is how
- * the Pentium uses them. Linux will return from vm86 mode when both
- * VIF and VIP is set.
- *
- * On a Pentium, we could probably optimize the virtual flags directly
- * in the eflags register instead of doing it "by hand" in vflags...
- *
- * Linus
- */
-
-#include <asm/processor-flags.h>
-
-#define BIOSSEG 0x0f000
-
-#define CPU_086 0
-#define CPU_186 1
-#define CPU_286 2
-#define CPU_386 3
-#define CPU_486 4
-#define CPU_586 5
-
-/*
- * Return values for the 'vm86()' system call
- */
-#define VM86_TYPE(retval) ((retval) & 0xff)
-#define VM86_ARG(retval) ((retval) >> 8)
-
-#define VM86_SIGNAL 0 /* return due to signal */
-#define VM86_UNKNOWN 1 /* unhandled GP fault
- - IO-instruction or similar */
-#define VM86_INTx 2 /* int3/int x instruction (ARG = x) */
-#define VM86_STI 3 /* sti/popf/iret instruction enabled
- virtual interrupts */
-
-/*
- * Additional return values when invoking new vm86()
- */
-#define VM86_PICRETURN 4 /* return due to pending PIC request */
-#define VM86_TRAP 6 /* return due to DOS-debugger request */
-
-/*
- * function codes when invoking new vm86()
- */
-#define VM86_PLUS_INSTALL_CHECK 0
-#define VM86_ENTER 1
-#define VM86_ENTER_NO_BYPASS 2
-#define VM86_REQUEST_IRQ 3
-#define VM86_FREE_IRQ 4
-#define VM86_GET_IRQ_BITS 5
-#define VM86_GET_AND_RESET_IRQ 6
-
-/*
- * This is the stack-layout seen by the user space program when we have
- * done a translation of "SAVE_ALL" from vm86 mode. The real kernel layout
- * is 'kernel_vm86_regs' (see below).
- */
-
-struct vm86_regs {
-/*
- * normal regs, with special meaning for the segment descriptors..
- */
- long ebx;
- long ecx;
- long edx;
- long esi;
- long edi;
- long ebp;
- long eax;
- long __null_ds;
- long __null_es;
- long __null_fs;
- long __null_gs;
- long orig_eax;
- long eip;
- unsigned short cs, __csh;
- long eflags;
- long esp;
- unsigned short ss, __ssh;
-/*
- * these are specific to v86 mode:
- */
- unsigned short es, __esh;
- unsigned short ds, __dsh;
- unsigned short fs, __fsh;
- unsigned short gs, __gsh;
-};
-
-struct revectored_struct {
- unsigned long __map[8]; /* 256 bits */
-};
-
-struct vm86_struct {
- struct vm86_regs regs;
- unsigned long flags;
- unsigned long screen_bitmap;
- unsigned long cpu_type;
- struct revectored_struct int_revectored;
- struct revectored_struct int21_revectored;
-};
-
-/*
- * flags masks
- */
-#define VM86_SCREEN_BITMAP 0x0001
-
-struct vm86plus_info_struct {
- unsigned long force_return_for_pic:1;
- unsigned long vm86dbg_active:1; /* for debugger */
- unsigned long vm86dbg_TFpendig:1; /* for debugger */
- unsigned long unused:28;
- unsigned long is_vm86pus:1; /* for vm86 internal use */
- unsigned char vm86dbg_intxxtab[32]; /* for debugger */
-};
-struct vm86plus_struct {
- struct vm86_regs regs;
- unsigned long flags;
- unsigned long screen_bitmap;
- unsigned long cpu_type;
- struct revectored_struct int_revectored;
- struct revectored_struct int21_revectored;
- struct vm86plus_info_struct vm86plus;
-};
-
-#ifdef __KERNEL__
#include <asm/ptrace.h>
+#include <uapi/asm/vm86.h>
/*
* This is the (kernel) stack-layout when we have done a "SAVE_ALL" from vm86
@@ -203,6 +79,4 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c)
#endif /* CONFIG_VM86 */
-#endif /* __KERNEL__ */
-
#endif /* _ASM_X86_VM86_H */
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 36ec21c36d6..235b49fa554 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -1,6 +1,3 @@
-#ifndef VMX_H
-#define VMX_H
-
/*
* vmx.h: VMX Architecture related definitions
* Copyright (c) 2004, Intel Corporation.
@@ -24,90 +21,12 @@
* Yaniv Kamay <yaniv@qumranet.com>
*
*/
+#ifndef VMX_H
+#define VMX_H
-#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
-
-#define EXIT_REASON_EXCEPTION_NMI 0
-#define EXIT_REASON_EXTERNAL_INTERRUPT 1
-#define EXIT_REASON_TRIPLE_FAULT 2
-
-#define EXIT_REASON_PENDING_INTERRUPT 7
-#define EXIT_REASON_NMI_WINDOW 8
-#define EXIT_REASON_TASK_SWITCH 9
-#define EXIT_REASON_CPUID 10
-#define EXIT_REASON_HLT 12
-#define EXIT_REASON_INVD 13
-#define EXIT_REASON_INVLPG 14
-#define EXIT_REASON_RDPMC 15
-#define EXIT_REASON_RDTSC 16
-#define EXIT_REASON_VMCALL 18
-#define EXIT_REASON_VMCLEAR 19
-#define EXIT_REASON_VMLAUNCH 20
-#define EXIT_REASON_VMPTRLD 21
-#define EXIT_REASON_VMPTRST 22
-#define EXIT_REASON_VMREAD 23
-#define EXIT_REASON_VMRESUME 24
-#define EXIT_REASON_VMWRITE 25
-#define EXIT_REASON_VMOFF 26
-#define EXIT_REASON_VMON 27
-#define EXIT_REASON_CR_ACCESS 28
-#define EXIT_REASON_DR_ACCESS 29
-#define EXIT_REASON_IO_INSTRUCTION 30
-#define EXIT_REASON_MSR_READ 31
-#define EXIT_REASON_MSR_WRITE 32
-#define EXIT_REASON_INVALID_STATE 33
-#define EXIT_REASON_MWAIT_INSTRUCTION 36
-#define EXIT_REASON_MONITOR_INSTRUCTION 39
-#define EXIT_REASON_PAUSE_INSTRUCTION 40
-#define EXIT_REASON_MCE_DURING_VMENTRY 41
-#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
-#define EXIT_REASON_APIC_ACCESS 44
-#define EXIT_REASON_EPT_VIOLATION 48
-#define EXIT_REASON_EPT_MISCONFIG 49
-#define EXIT_REASON_WBINVD 54
-#define EXIT_REASON_XSETBV 55
-#define EXIT_REASON_INVPCID 58
-
-#define VMX_EXIT_REASONS \
- { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
- { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
- { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
- { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
- { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
- { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
- { EXIT_REASON_CPUID, "CPUID" }, \
- { EXIT_REASON_HLT, "HLT" }, \
- { EXIT_REASON_INVLPG, "INVLPG" }, \
- { EXIT_REASON_RDPMC, "RDPMC" }, \
- { EXIT_REASON_RDTSC, "RDTSC" }, \
- { EXIT_REASON_VMCALL, "VMCALL" }, \
- { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \
- { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \
- { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \
- { EXIT_REASON_VMPTRST, "VMPTRST" }, \
- { EXIT_REASON_VMREAD, "VMREAD" }, \
- { EXIT_REASON_VMRESUME, "VMRESUME" }, \
- { EXIT_REASON_VMWRITE, "VMWRITE" }, \
- { EXIT_REASON_VMOFF, "VMOFF" }, \
- { EXIT_REASON_VMON, "VMON" }, \
- { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \
- { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \
- { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
- { EXIT_REASON_MSR_READ, "MSR_READ" }, \
- { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
- { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
- { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
- { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \
- { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
- { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
- { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
- { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
- { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
- { EXIT_REASON_WBINVD, "WBINVD" }
-
-#ifdef __KERNEL__
#include <linux/types.h>
+#include <uapi/asm/vmx.h>
/*
* Definitions of Primary Processor-Based VM-Execution Controls.
@@ -445,8 +364,7 @@ enum vmcs_field {
#define VMX_EPTP_WB_BIT (1ull << 14)
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
#define VMX_EPT_1GB_PAGE_BIT (1ull << 17)
-#define VMX_EPT_AD_BIT (1ull << 21)
-#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
+#define VMX_EPT_AD_BIT (1ull << 21)
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
@@ -527,5 +445,3 @@ enum vm_instruction_error_number {
};
#endif
-
-#endif
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index eaea1d31f75..2a46ca720af 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -1,20 +1,8 @@
#ifndef _ASM_X86_VSYSCALL_H
#define _ASM_X86_VSYSCALL_H
-enum vsyscall_num {
- __NR_vgettimeofday,
- __NR_vtime,
- __NR_vgetcpu,
-};
-
-#define VSYSCALL_START (-10UL << 20)
-#define VSYSCALL_SIZE 1024
-#define VSYSCALL_END (-2UL << 20)
-#define VSYSCALL_MAPPED_PAGES 1
-#define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr))
-
-#ifdef __KERNEL__
#include <linux/seqlock.h>
+#include <uapi/asm/vsyscall.h>
#define VGETCPU_RDTSCP 1
#define VGETCPU_LSL 2
@@ -33,6 +21,24 @@ extern void map_vsyscall(void);
*/
extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
-#endif /* __KERNEL__ */
+#ifdef CONFIG_X86_64
+
+#define VGETCPU_CPU_MASK 0xfff
+
+static inline unsigned int __getcpu(void)
+{
+ unsigned int p;
+
+ if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
+ /* Load per CPU data from RDTSCP */
+ native_read_tscp(&p);
+ } else {
+ /* Load per CPU data from GDT */
+ asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+ }
+
+ return p;
+}
+#endif /* CONFIG_X86_64 */
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 57693498519..7669941cc9d 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -181,19 +181,38 @@ struct x86_platform_ops {
};
struct pci_dev;
+struct msi_msg;
struct x86_msi_ops {
int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
+ void (*compose_msi_msg)(struct pci_dev *dev, unsigned int irq,
+ unsigned int dest, struct msi_msg *msg,
+ u8 hpet_id);
void (*teardown_msi_irq)(unsigned int irq);
void (*teardown_msi_irqs)(struct pci_dev *dev);
void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
+ int (*setup_hpet_msi)(unsigned int irq, unsigned int id);
};
+struct IO_APIC_route_entry;
+struct io_apic_irq_attr;
+struct irq_data;
+struct cpumask;
+
struct x86_io_apic_ops {
- void (*init) (void);
- unsigned int (*read) (unsigned int apic, unsigned int reg);
- void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
- void (*modify)(unsigned int apic, unsigned int reg, unsigned int value);
+ void (*init) (void);
+ unsigned int (*read) (unsigned int apic, unsigned int reg);
+ void (*write) (unsigned int apic, unsigned int reg, unsigned int value);
+ void (*modify) (unsigned int apic, unsigned int reg, unsigned int value);
+ void (*disable)(void);
+ void (*print_entries)(unsigned int apic, unsigned int nr_entries);
+ int (*set_affinity)(struct irq_data *data,
+ const struct cpumask *mask,
+ bool force);
+ int (*setup_entry)(int irq, struct IO_APIC_route_entry *entry,
+ unsigned int destination, int vector,
+ struct io_apic_irq_attr *attr);
+ void (*eoi_ioapic_pin)(int apic, int pin, int vector);
};
extern struct x86_init_ops x86_init;
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 54d52ff1304..fd9cb7695b5 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -63,6 +63,7 @@ DEFINE_GUEST_HANDLE(void);
DEFINE_GUEST_HANDLE(uint64_t);
DEFINE_GUEST_HANDLE(uint32_t);
DEFINE_GUEST_HANDLE(xen_pfn_t);
+DEFINE_GUEST_HANDLE(xen_ulong_t);
#endif
#ifndef HYPERVISOR_VIRT_START
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index f8fde90bc45..d8829751b3f 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -1,10 +1,499 @@
#ifdef CONFIG_KMEMCHECK
/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
# include <asm-generic/xor.h>
+#elif !defined(_ASM_X86_XOR_H)
+#define _ASM_X86_XOR_H
+
+/*
+ * Optimized RAID-5 checksumming functions for SSE.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * You should have received a copy of the GNU General Public License
+ * (for example /usr/src/linux/COPYING); if not, write to the Free
+ * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * Cache avoiding checksumming functions utilizing KNI instructions
+ * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
+ */
+
+/*
+ * Based on
+ * High-speed RAID5 checksumming functions utilizing SSE instructions.
+ * Copyright (C) 1998 Ingo Molnar.
+ */
+
+/*
+ * x86-64 changes / gcc fixes from Andi Kleen.
+ * Copyright 2002 Andi Kleen, SuSE Labs.
+ *
+ * This hasn't been optimized for the hammer yet, but there are likely
+ * no advantages to be gotten from x86-64 here anyways.
+ */
+
+#include <asm/i387.h>
+
+#ifdef CONFIG_X86_32
+/* reduce register pressure */
+# define XOR_CONSTANT_CONSTRAINT "i"
#else
+# define XOR_CONSTANT_CONSTRAINT "re"
+#endif
+
+#define OFFS(x) "16*("#x")"
+#define PF_OFFS(x) "256+16*("#x")"
+#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
+#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
+#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
+#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
+#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
+#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
+#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
+#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
+#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
+#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
+#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
+#define NOP(x)
+
+#define BLK64(pf, op, i) \
+ pf(i) \
+ op(i, 0) \
+ op(i + 1, 1) \
+ op(i + 2, 2) \
+ op(i + 3, 3)
+
+static void
+xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ LD(i, 0) \
+ LD(i + 1, 1) \
+ PF1(i) \
+ PF1(i + 2) \
+ LD(i + 2, 2) \
+ LD(i + 3, 3) \
+ PF0(i + 4) \
+ PF0(i + 6) \
+ XO1(i, 0) \
+ XO1(i + 1, 1) \
+ XO1(i + 2, 2) \
+ XO1(i + 3, 3) \
+ ST(i, 0) \
+ ST(i + 1, 1) \
+ ST(i + 2, 2) \
+ ST(i + 3, 3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ BLK64(PF0, LD, i) \
+ BLK64(PF1, XO1, i) \
+ BLK64(NOP, ST, i) \
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i + 2) \
+ LD(i, 0) \
+ LD(i + 1, 1) \
+ LD(i + 2, 2) \
+ LD(i + 3, 3) \
+ PF2(i) \
+ PF2(i + 2) \
+ PF0(i + 4) \
+ PF0(i + 6) \
+ XO1(i, 0) \
+ XO1(i + 1, 1) \
+ XO1(i + 2, 2) \
+ XO1(i + 3, 3) \
+ XO2(i, 0) \
+ XO2(i + 1, 1) \
+ XO2(i + 2, 2) \
+ XO2(i + 3, 3) \
+ ST(i, 0) \
+ ST(i + 1, 1) \
+ ST(i + 2, 2) \
+ ST(i + 3, 3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ BLK64(PF0, LD, i) \
+ BLK64(PF1, XO1, i) \
+ BLK64(PF2, XO2, i) \
+ BLK64(NOP, ST, i) \
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines),
+ [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i + 2) \
+ LD(i, 0) \
+ LD(i + 1, 1) \
+ LD(i + 2, 2) \
+ LD(i + 3, 3) \
+ PF2(i) \
+ PF2(i + 2) \
+ XO1(i, 0) \
+ XO1(i + 1, 1) \
+ XO1(i + 2, 2) \
+ XO1(i + 3, 3) \
+ PF3(i) \
+ PF3(i + 2) \
+ PF0(i + 4) \
+ PF0(i + 6) \
+ XO2(i, 0) \
+ XO2(i + 1, 1) \
+ XO2(i + 2, 2) \
+ XO2(i + 3, 3) \
+ XO3(i, 0) \
+ XO3(i + 1, 1) \
+ XO3(i + 2, 2) \
+ XO3(i + 3, 3) \
+ ST(i, 0) \
+ ST(i + 1, 1) \
+ ST(i + 2, 2) \
+ ST(i + 3, 3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " add %[inc], %[p4] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines), [p1] "+r" (p1),
+ [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ BLK64(PF0, LD, i) \
+ BLK64(PF1, XO1, i) \
+ BLK64(PF2, XO2, i) \
+ BLK64(PF3, XO3, i) \
+ BLK64(NOP, ST, i) \
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " add %[inc], %[p4] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines), [p1] "+r" (p1),
+ [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ PF1(i) \
+ PF1(i + 2) \
+ LD(i, 0) \
+ LD(i + 1, 1) \
+ LD(i + 2, 2) \
+ LD(i + 3, 3) \
+ PF2(i) \
+ PF2(i + 2) \
+ XO1(i, 0) \
+ XO1(i + 1, 1) \
+ XO1(i + 2, 2) \
+ XO1(i + 3, 3) \
+ PF3(i) \
+ PF3(i + 2) \
+ XO2(i, 0) \
+ XO2(i + 1, 1) \
+ XO2(i + 2, 2) \
+ XO2(i + 3, 3) \
+ PF4(i) \
+ PF4(i + 2) \
+ PF0(i + 4) \
+ PF0(i + 6) \
+ XO3(i, 0) \
+ XO3(i + 1, 1) \
+ XO3(i + 2, 2) \
+ XO3(i + 3, 3) \
+ XO4(i, 0) \
+ XO4(i + 1, 1) \
+ XO4(i + 2, 2) \
+ XO4(i + 3, 3) \
+ ST(i, 0) \
+ ST(i + 1, 1) \
+ ST(i + 2, 2) \
+ ST(i + 3, 3) \
+
+
+ PF0(0)
+ PF0(2)
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " add %[inc], %[p4] ;\n"
+ " add %[inc], %[p5] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
+ [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static void
+xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ unsigned long lines = bytes >> 8;
+
+ kernel_fpu_begin();
+
+ asm volatile(
+#undef BLOCK
+#define BLOCK(i) \
+ BLK64(PF0, LD, i) \
+ BLK64(PF1, XO1, i) \
+ BLK64(PF2, XO2, i) \
+ BLK64(PF3, XO3, i) \
+ BLK64(PF4, XO4, i) \
+ BLK64(NOP, ST, i) \
+
+ " .align 32 ;\n"
+ " 1: ;\n"
+
+ BLOCK(0)
+ BLOCK(4)
+ BLOCK(8)
+ BLOCK(12)
+
+ " add %[inc], %[p1] ;\n"
+ " add %[inc], %[p2] ;\n"
+ " add %[inc], %[p3] ;\n"
+ " add %[inc], %[p4] ;\n"
+ " add %[inc], %[p5] ;\n"
+ " dec %[cnt] ;\n"
+ " jnz 1b ;\n"
+ : [cnt] "+r" (lines), [p1] "+r" (p1), [p2] "+r" (p2),
+ [p3] "+r" (p3), [p4] "+r" (p4), [p5] "+r" (p5)
+ : [inc] XOR_CONSTANT_CONSTRAINT (256UL)
+ : "memory");
+
+ kernel_fpu_end();
+}
+
+static struct xor_block_template xor_block_sse_pf64 = {
+ .name = "prefetch64-sse",
+ .do_2 = xor_sse_2_pf64,
+ .do_3 = xor_sse_3_pf64,
+ .do_4 = xor_sse_4_pf64,
+ .do_5 = xor_sse_5_pf64,
+};
+
+#undef LD
+#undef XO1
+#undef XO2
+#undef XO3
+#undef XO4
+#undef ST
+#undef NOP
+#undef BLK64
+#undef BLOCK
+
+#undef XOR_CONSTANT_CONSTRAINT
+
#ifdef CONFIG_X86_32
# include <asm/xor_32.h>
#else
# include <asm/xor_64.h>
#endif
-#endif
+
+#define XOR_SELECT_TEMPLATE(FASTEST) \
+ AVX_SELECT(FASTEST)
+
+#endif /* _ASM_X86_XOR_H */
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index f79cb7ec0e0..ce05722e3c6 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -2,7 +2,7 @@
#define _ASM_X86_XOR_32_H
/*
- * Optimized RAID-5 checksumming functions for MMX and SSE.
+ * Optimized RAID-5 checksumming functions for MMX.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -529,290 +529,6 @@ static struct xor_block_template xor_block_p5_mmx = {
.do_5 = xor_p5_mmx_5,
};
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-#define OFFS(x) "16*("#x")"
-#define PF_OFFS(x) "256+16*("#x")"
-#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
-#define LD(x, y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
-#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
-#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n"
-#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n"
-#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n"
-#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n"
-#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n"
-#define XO1(x, y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
-#define XO2(x, y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
-#define XO3(x, y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
-#define XO4(x, y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
-#define XO5(x, y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
-
-
-static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- LD(i, 0) \
- LD(i + 1, 1) \
- PF1(i) \
- PF1(i + 2) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2)
- :
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i,0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i,0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- XO2(i,0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- ST(i,0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r"(p2), "+r"(p3)
- :
- : "memory" );
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i,0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i,0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO2(i,0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- XO3(i,0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- ST(i,0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " addl $256, %4 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4)
- :
- : "memory" );
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
- unsigned long lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- /* Make sure GCC forgets anything it knows about p4 or p5,
- such that it won't pass to the asm volatile below a
- register that is shared with any other variable. That's
- because we modify p4 and p5 there, but we can't mark them
- as read/write, otherwise we'd overflow the 10-asm-operands
- limit of GCC < 3.1. */
- asm("" : "+r" (p4), "+r" (p5));
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i,0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i,0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- XO2(i,0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- PF4(i) \
- PF4(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO3(i,0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- XO4(i,0) \
- XO4(i + 1, 1) \
- XO4(i + 2, 2) \
- XO4(i + 3, 3) \
- ST(i,0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addl $256, %1 ;\n"
- " addl $256, %2 ;\n"
- " addl $256, %3 ;\n"
- " addl $256, %4 ;\n"
- " addl $256, %5 ;\n"
- " decl %0 ;\n"
- " jnz 1b ;\n"
- : "+r" (lines),
- "+r" (p1), "+r" (p2), "+r" (p3)
- : "r" (p4), "r" (p5)
- : "memory");
-
- /* p4 and p5 were modified, and now the variables are dead.
- Clobber them just to be sure nobody does something stupid
- like assuming they have some legal value. */
- asm("" : "=r" (p4), "=r" (p5));
-
- kernel_fpu_end();
-}
-
static struct xor_block_template xor_block_pIII_sse = {
.name = "pIII_sse",
.do_2 = xor_sse_2,
@@ -827,26 +543,25 @@ static struct xor_block_template xor_block_pIII_sse = {
/* Also try the generic routines. */
#include <asm-generic/xor.h>
+/* We force the use of the SSE xor block because it can write around L2.
+ We may also be able to load into the L1 only depending on how the cpu
+ deals with a load to a line that is being prefetched. */
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
- xor_speed(&xor_block_8regs); \
- xor_speed(&xor_block_8regs_p); \
- xor_speed(&xor_block_32regs); \
- xor_speed(&xor_block_32regs_p); \
AVX_XOR_SPEED; \
- if (cpu_has_xmm) \
+ if (cpu_has_xmm) { \
xor_speed(&xor_block_pIII_sse); \
- if (cpu_has_mmx) { \
+ xor_speed(&xor_block_sse_pf64); \
+ } else if (cpu_has_mmx) { \
xor_speed(&xor_block_pII_mmx); \
xor_speed(&xor_block_p5_mmx); \
+ } else { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_8regs_p); \
+ xor_speed(&xor_block_32regs); \
+ xor_speed(&xor_block_32regs_p); \
} \
} while (0)
-/* We force the use of the SSE xor block because it can write around L2.
- We may also be able to load into the L1 only depending on how the cpu
- deals with a load to a line that is being prefetched. */
-#define XOR_SELECT_TEMPLATE(FASTEST) \
- AVX_SELECT(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
-
#endif /* _ASM_X86_XOR_32_H */
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index 87ac522c4af..546f1e3b87c 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -1,301 +1,6 @@
#ifndef _ASM_X86_XOR_64_H
#define _ASM_X86_XOR_64_H
-/*
- * Optimized RAID-5 checksumming functions for MMX and SSE.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * You should have received a copy of the GNU General Public License
- * (for example /usr/src/linux/COPYING); if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
-/*
- * Cache avoiding checksumming functions utilizing KNI instructions
- * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
- */
-
-/*
- * Based on
- * High-speed RAID5 checksumming functions utilizing SSE instructions.
- * Copyright (C) 1998 Ingo Molnar.
- */
-
-/*
- * x86-64 changes / gcc fixes from Andi Kleen.
- * Copyright 2002 Andi Kleen, SuSE Labs.
- *
- * This hasn't been optimized for the hammer yet, but there are likely
- * no advantages to be gotten from x86-64 here anyways.
- */
-
-#include <asm/i387.h>
-
-#define OFFS(x) "16*("#x")"
-#define PF_OFFS(x) "256+16*("#x")"
-#define PF0(x) " prefetchnta "PF_OFFS(x)"(%[p1]) ;\n"
-#define LD(x, y) " movaps "OFFS(x)"(%[p1]), %%xmm"#y" ;\n"
-#define ST(x, y) " movaps %%xmm"#y", "OFFS(x)"(%[p1]) ;\n"
-#define PF1(x) " prefetchnta "PF_OFFS(x)"(%[p2]) ;\n"
-#define PF2(x) " prefetchnta "PF_OFFS(x)"(%[p3]) ;\n"
-#define PF3(x) " prefetchnta "PF_OFFS(x)"(%[p4]) ;\n"
-#define PF4(x) " prefetchnta "PF_OFFS(x)"(%[p5]) ;\n"
-#define PF5(x) " prefetchnta "PF_OFFS(x)"(%[p6]) ;\n"
-#define XO1(x, y) " xorps "OFFS(x)"(%[p2]), %%xmm"#y" ;\n"
-#define XO2(x, y) " xorps "OFFS(x)"(%[p3]), %%xmm"#y" ;\n"
-#define XO3(x, y) " xorps "OFFS(x)"(%[p4]), %%xmm"#y" ;\n"
-#define XO4(x, y) " xorps "OFFS(x)"(%[p5]), %%xmm"#y" ;\n"
-#define XO5(x, y) " xorps "OFFS(x)"(%[p6]), %%xmm"#y" ;\n"
-
-
-static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
-{
- unsigned int lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- LD(i, 0) \
- LD(i + 1, 1) \
- PF1(i) \
- PF1(i + 2) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addq %[inc], %[p1] ;\n"
- " addq %[inc], %[p2] ;\n"
- " decl %[cnt] ; jnz 1b"
- : [p1] "+r" (p1), [p2] "+r" (p2), [cnt] "+r" (lines)
- : [inc] "r" (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3)
-{
- unsigned int lines = bytes >> 8;
-
- kernel_fpu_begin();
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addq %[inc], %[p1] ;\n"
- " addq %[inc], %[p2] ;\n"
- " addq %[inc], %[p3] ;\n"
- " decl %[cnt] ; jnz 1b"
- : [cnt] "+r" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3)
- : [inc] "r" (256UL)
- : "memory");
- kernel_fpu_end();
-}
-
-static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4)
-{
- unsigned int lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- XO3(i, 0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addq %[inc], %[p1] ;\n"
- " addq %[inc], %[p2] ;\n"
- " addq %[inc], %[p3] ;\n"
- " addq %[inc], %[p4] ;\n"
- " decl %[cnt] ; jnz 1b"
- : [cnt] "+c" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4)
- : [inc] "r" (256UL)
- : "memory" );
-
- kernel_fpu_end();
-}
-
-static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
- unsigned long *p3, unsigned long *p4, unsigned long *p5)
-{
- unsigned int lines = bytes >> 8;
-
- kernel_fpu_begin();
-
- asm volatile(
-#undef BLOCK
-#define BLOCK(i) \
- PF1(i) \
- PF1(i + 2) \
- LD(i, 0) \
- LD(i + 1, 1) \
- LD(i + 2, 2) \
- LD(i + 3, 3) \
- PF2(i) \
- PF2(i + 2) \
- XO1(i, 0) \
- XO1(i + 1, 1) \
- XO1(i + 2, 2) \
- XO1(i + 3, 3) \
- PF3(i) \
- PF3(i + 2) \
- XO2(i, 0) \
- XO2(i + 1, 1) \
- XO2(i + 2, 2) \
- XO2(i + 3, 3) \
- PF4(i) \
- PF4(i + 2) \
- PF0(i + 4) \
- PF0(i + 6) \
- XO3(i, 0) \
- XO3(i + 1, 1) \
- XO3(i + 2, 2) \
- XO3(i + 3, 3) \
- XO4(i, 0) \
- XO4(i + 1, 1) \
- XO4(i + 2, 2) \
- XO4(i + 3, 3) \
- ST(i, 0) \
- ST(i + 1, 1) \
- ST(i + 2, 2) \
- ST(i + 3, 3) \
-
-
- PF0(0)
- PF0(2)
-
- " .align 32 ;\n"
- " 1: ;\n"
-
- BLOCK(0)
- BLOCK(4)
- BLOCK(8)
- BLOCK(12)
-
- " addq %[inc], %[p1] ;\n"
- " addq %[inc], %[p2] ;\n"
- " addq %[inc], %[p3] ;\n"
- " addq %[inc], %[p4] ;\n"
- " addq %[inc], %[p5] ;\n"
- " decl %[cnt] ; jnz 1b"
- : [cnt] "+c" (lines),
- [p1] "+r" (p1), [p2] "+r" (p2), [p3] "+r" (p3), [p4] "+r" (p4),
- [p5] "+r" (p5)
- : [inc] "r" (256UL)
- : "memory");
-
- kernel_fpu_end();
-}
-
static struct xor_block_template xor_block_sse = {
.name = "generic_sse",
.do_2 = xor_sse_2,
@@ -308,17 +13,15 @@ static struct xor_block_template xor_block_sse = {
/* Also try the AVX routines */
#include <asm/xor_avx.h>
+/* We force the use of the SSE xor block because it can write around L2.
+ We may also be able to load into the L1 only depending on how the cpu
+ deals with a load to a line that is being prefetched. */
#undef XOR_TRY_TEMPLATES
#define XOR_TRY_TEMPLATES \
do { \
AVX_XOR_SPEED; \
+ xor_speed(&xor_block_sse_pf64); \
xor_speed(&xor_block_sse); \
} while (0)
-/* We force the use of the SSE xor block because it can write around L2.
- We may also be able to load into the L1 only depending on how the cpu
- deals with a load to a line that is being prefetched. */
-#define XOR_SELECT_TEMPLATE(FASTEST) \
- AVX_SELECT(&xor_block_sse)
-
#endif /* _ASM_X86_XOR_64_H */
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
index 83b6e9a0dce..09409c44f9a 100644
--- a/arch/x86/include/uapi/asm/Kbuild
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -4,3 +4,61 @@ include include/uapi/asm-generic/Kbuild.asm
genhdr-y += unistd_32.h
genhdr-y += unistd_64.h
genhdr-y += unistd_x32.h
+header-y += a.out.h
+header-y += auxvec.h
+header-y += bitsperlong.h
+header-y += boot.h
+header-y += bootparam.h
+header-y += byteorder.h
+header-y += debugreg.h
+header-y += e820.h
+header-y += errno.h
+header-y += fcntl.h
+header-y += hw_breakpoint.h
+header-y += hyperv.h
+header-y += ioctl.h
+header-y += ioctls.h
+header-y += ipcbuf.h
+header-y += ist.h
+header-y += kvm.h
+header-y += kvm_para.h
+header-y += ldt.h
+header-y += mce.h
+header-y += mman.h
+header-y += msgbuf.h
+header-y += msr-index.h
+header-y += msr.h
+header-y += mtrr.h
+header-y += param.h
+header-y += perf_regs.h
+header-y += poll.h
+header-y += posix_types.h
+header-y += posix_types_32.h
+header-y += posix_types_64.h
+header-y += posix_types_x32.h
+header-y += prctl.h
+header-y += processor-flags.h
+header-y += ptrace-abi.h
+header-y += ptrace.h
+header-y += resource.h
+header-y += sembuf.h
+header-y += setup.h
+header-y += shmbuf.h
+header-y += sigcontext.h
+header-y += sigcontext32.h
+header-y += siginfo.h
+header-y += signal.h
+header-y += socket.h
+header-y += sockios.h
+header-y += stat.h
+header-y += statfs.h
+header-y += svm.h
+header-y += swab.h
+header-y += termbits.h
+header-y += termios.h
+header-y += types.h
+header-y += ucontext.h
+header-y += unistd.h
+header-y += vm86.h
+header-y += vmx.h
+header-y += vsyscall.h
diff --git a/arch/x86/include/asm/a.out.h b/arch/x86/include/uapi/asm/a.out.h
index 4684f97a5bb..4684f97a5bb 100644
--- a/arch/x86/include/asm/a.out.h
+++ b/arch/x86/include/uapi/asm/a.out.h
diff --git a/arch/x86/include/asm/auxvec.h b/arch/x86/include/uapi/asm/auxvec.h
index 77203ac352d..77203ac352d 100644
--- a/arch/x86/include/asm/auxvec.h
+++ b/arch/x86/include/uapi/asm/auxvec.h
diff --git a/arch/x86/include/asm/bitsperlong.h b/arch/x86/include/uapi/asm/bitsperlong.h
index b0ae1c4dc79..b0ae1c4dc79 100644
--- a/arch/x86/include/asm/bitsperlong.h
+++ b/arch/x86/include/uapi/asm/bitsperlong.h
diff --git a/arch/x86/include/uapi/asm/boot.h b/arch/x86/include/uapi/asm/boot.h
new file mode 100644
index 00000000000..94292c4c812
--- /dev/null
+++ b/arch/x86/include/uapi/asm/boot.h
@@ -0,0 +1,10 @@
+#ifndef _UAPI_ASM_X86_BOOT_H
+#define _UAPI_ASM_X86_BOOT_H
+
+/* Internal svga startup constants */
+#define NORMAL_VGA 0xffff /* 80x25 mode */
+#define EXTENDED_VGA 0xfffe /* 80x50 mode */
+#define ASK_VGA 0xfffd /* ask for it at bootup */
+
+
+#endif /* _UAPI_ASM_X86_BOOT_H */
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 2ad874cb661..c15ddaf9071 100644
--- a/arch/x86/include/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -1,6 +1,31 @@
#ifndef _ASM_X86_BOOTPARAM_H
#define _ASM_X86_BOOTPARAM_H
+/* setup_data types */
+#define SETUP_NONE 0
+#define SETUP_E820_EXT 1
+#define SETUP_DTB 2
+#define SETUP_PCI 3
+
+/* ram_size flags */
+#define RAMDISK_IMAGE_START_MASK 0x07FF
+#define RAMDISK_PROMPT_FLAG 0x8000
+#define RAMDISK_LOAD_FLAG 0x4000
+
+/* loadflags */
+#define LOADED_HIGH (1<<0)
+#define QUIET_FLAG (1<<5)
+#define KEEP_SEGMENTS (1<<6)
+#define CAN_USE_HEAP (1<<7)
+
+/* xloadflags */
+#define XLF_KERNEL_64 (1<<0)
+#define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1)
+#define XLF_EFI_HANDOVER_32 (1<<2)
+#define XLF_EFI_HANDOVER_64 (1<<3)
+
+#ifndef __ASSEMBLY__
+
#include <linux/types.h>
#include <linux/screen_info.h>
#include <linux/apm_bios.h>
@@ -9,11 +34,6 @@
#include <asm/ist.h>
#include <video/edid.h>
-/* setup data types */
-#define SETUP_NONE 0
-#define SETUP_E820_EXT 1
-#define SETUP_DTB 2
-
/* extensible setup data list node */
struct setup_data {
__u64 next;
@@ -27,9 +47,6 @@ struct setup_header {
__u16 root_flags;
__u32 syssize;
__u16 ram_size;
-#define RAMDISK_IMAGE_START_MASK 0x07FF
-#define RAMDISK_PROMPT_FLAG 0x8000
-#define RAMDISK_LOAD_FLAG 0x4000
__u16 vid_mode;
__u16 root_dev;
__u16 boot_flag;
@@ -41,10 +58,6 @@ struct setup_header {
__u16 kernel_version;
__u8 type_of_loader;
__u8 loadflags;
-#define LOADED_HIGH (1<<0)
-#define QUIET_FLAG (1<<5)
-#define KEEP_SEGMENTS (1<<6)
-#define CAN_USE_HEAP (1<<7)
__u16 setup_move_size;
__u32 code32_start;
__u32 ramdisk_image;
@@ -57,7 +70,8 @@ struct setup_header {
__u32 initrd_addr_max;
__u32 kernel_alignment;
__u8 relocatable_kernel;
- __u8 _pad2[3];
+ __u8 min_alignment;
+ __u16 xloadflags;
__u32 cmdline_size;
__u32 hardware_subarch;
__u64 hardware_subarch_data;
@@ -105,7 +119,10 @@ struct boot_params {
__u8 hd1_info[16]; /* obsolete! */ /* 0x090 */
struct sys_desc_table sys_desc_table; /* 0x0a0 */
struct olpc_ofw_header olpc_ofw_header; /* 0x0b0 */
- __u8 _pad4[128]; /* 0x0c0 */
+ __u32 ext_ramdisk_image; /* 0x0c0 */
+ __u32 ext_ramdisk_size; /* 0x0c4 */
+ __u32 ext_cmd_line_ptr; /* 0x0c8 */
+ __u8 _pad4[116]; /* 0x0cc */
struct edid_info edid_info; /* 0x140 */
struct efi_info efi_info; /* 0x1c0 */
__u32 alt_mem_k; /* 0x1e0 */
@@ -114,7 +131,20 @@ struct boot_params {
__u8 eddbuf_entries; /* 0x1e9 */
__u8 edd_mbr_sig_buf_entries; /* 0x1ea */
__u8 kbd_status; /* 0x1eb */
- __u8 _pad6[5]; /* 0x1ec */
+ __u8 _pad5[3]; /* 0x1ec */
+ /*
+ * The sentinel is set to a nonzero value (0xff) in header.S.
+ *
+ * A bootloader is supposed to only take setup_header and put
+ * it into a clean boot_params buffer. If it turns out that
+ * it is clumsy or too generous with the buffer, it most
+ * probably will pick up the sentinel variable too. The fact
+ * that this variable then is still 0xff will let kernel
+ * know that some variables in boot_params are invalid and
+ * kernel should zero out certain portions of boot_params.
+ */
+ __u8 sentinel; /* 0x1ef */
+ __u8 _pad6[1]; /* 0x1f0 */
struct setup_header hdr; /* setup header */ /* 0x1f1 */
__u8 _pad7[0x290-0x1f1-sizeof(struct setup_header)];
__u32 edd_mbr_sig_buffer[EDD_MBR_SIG_MAX]; /* 0x290 */
@@ -133,6 +163,6 @@ enum {
X86_NR_SUBARCHS,
};
-
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_BOOTPARAM_H */
diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/uapi/asm/byteorder.h
index b13a7a88f3e..b13a7a88f3e 100644
--- a/arch/x86/include/asm/byteorder.h
+++ b/arch/x86/include/uapi/asm/byteorder.h
diff --git a/arch/x86/include/uapi/asm/debugreg.h b/arch/x86/include/uapi/asm/debugreg.h
new file mode 100644
index 00000000000..3c0874dd986
--- /dev/null
+++ b/arch/x86/include/uapi/asm/debugreg.h
@@ -0,0 +1,80 @@
+#ifndef _UAPI_ASM_X86_DEBUGREG_H
+#define _UAPI_ASM_X86_DEBUGREG_H
+
+
+/* Indicate the register numbers for a number of the specific
+ debug registers. Registers 0-3 contain the addresses we wish to trap on */
+#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */
+#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */
+
+#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */
+#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */
+
+/* Define a few things for the status register. We can use this to determine
+ which debugging register was responsible for the trap. The other bits
+ are either reserved or not of interest to us. */
+
+/* Define reserved bits in DR6 which are always set to 1 */
+#define DR6_RESERVED (0xFFFF0FF0)
+
+#define DR_TRAP0 (0x1) /* db0 */
+#define DR_TRAP1 (0x2) /* db1 */
+#define DR_TRAP2 (0x4) /* db2 */
+#define DR_TRAP3 (0x8) /* db3 */
+#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
+
+#define DR_STEP (0x4000) /* single-step */
+#define DR_SWITCH (0x8000) /* task switch */
+
+/* Now define a bunch of things for manipulating the control register.
+ The top two bytes of the control register consist of 4 fields of 4
+ bits - each field corresponds to one of the four debug registers,
+ and indicates what types of access we trap on, and how large the data
+ field is that we are looking at */
+
+#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
+#define DR_CONTROL_SIZE 4 /* 4 control bits per register */
+
+#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */
+#define DR_RW_WRITE (0x1)
+#define DR_RW_READ (0x3)
+
+#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
+#define DR_LEN_2 (0x4)
+#define DR_LEN_4 (0xC)
+#define DR_LEN_8 (0x8)
+
+/* The low byte to the control register determine which registers are
+ enabled. There are 4 fields of two bits. One bit is "local", meaning
+ that the processor will reset the bit after a task switch and the other
+ is global meaning that we have to explicitly reset the bit. With linux,
+ you can use either one, since we explicitly zero the register when we enter
+ kernel mode. */
+
+#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
+#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
+#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
+#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
+#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
+
+#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
+#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
+
+/* The second byte to the control register has a few special things.
+ We can slow the instruction pipeline for instructions coming via the
+ gdt or the ldt if we want to. I am not sure why this is an advantage */
+
+#ifdef __i386__
+#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
+#else
+#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */
+#endif
+
+#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
+#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
+
+/*
+ * HW breakpoint additions
+ */
+
+#endif /* _UAPI_ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
new file mode 100644
index 00000000000..bbae0247070
--- /dev/null
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -0,0 +1,75 @@
+#ifndef _UAPI_ASM_X86_E820_H
+#define _UAPI_ASM_X86_E820_H
+#define E820MAP 0x2d0 /* our map */
+#define E820MAX 128 /* number of entries in E820MAP */
+
+/*
+ * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the
+ * constrained space in the zeropage. If we have more nodes than
+ * that, and if we've booted off EFI firmware, then the EFI tables
+ * passed us from the EFI firmware can list more nodes. Size our
+ * internal memory map tables to have room for these additional
+ * nodes, based on up to three entries per node for which the
+ * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT),
+ * plus E820MAX, allowing space for the possible duplicate E820
+ * entries that might need room in the same arrays, prior to the
+ * call to sanitize_e820_map() to remove duplicates. The allowance
+ * of three memory map entries per node is "enough" entries for
+ * the initial hardware platform motivating this mechanism to make
+ * use of additional EFI map entries. Future platforms may want
+ * to allow more than three entries per node or otherwise refine
+ * this size.
+ */
+
+/*
+ * Odd: 'make headers_check' complains about numa.h if I try
+ * to collapse the next two #ifdef lines to a single line:
+ * #if defined(__KERNEL__) && defined(CONFIG_EFI)
+ */
+#ifndef __KERNEL__
+#define E820_X_MAX E820MAX
+#endif
+
+#define E820NR 0x1e8 /* # entries in E820MAP */
+
+#define E820_RAM 1
+#define E820_RESERVED 2
+#define E820_ACPI 3
+#define E820_NVS 4
+#define E820_UNUSABLE 5
+
+
+/*
+ * reserved RAM used by kernel itself
+ * if CONFIG_INTEL_TXT is enabled, memory of this type will be
+ * included in the S3 integrity calculation and so should not include
+ * any memory that BIOS might alter over the S3 transition
+ */
+#define E820_RESERVED_KERN 128
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+struct e820entry {
+ __u64 addr; /* start of memory segment */
+ __u64 size; /* size of memory segment */
+ __u32 type; /* type of memory segment */
+} __attribute__((packed));
+
+struct e820map {
+ __u32 nr_map;
+ struct e820entry map[E820_X_MAX];
+};
+
+#define ISA_START_ADDRESS 0xa0000
+#define ISA_END_ADDRESS 0x100000
+
+#define BIOS_BEGIN 0x000a0000
+#define BIOS_END 0x00100000
+
+#define BIOS_ROM_BASE 0xffe00000
+#define BIOS_ROM_END 0xffffffff
+
+#endif /* __ASSEMBLY__ */
+
+
+#endif /* _UAPI_ASM_X86_E820_H */
diff --git a/arch/x86/include/asm/errno.h b/arch/x86/include/uapi/asm/errno.h
index 4c82b503d92..4c82b503d92 100644
--- a/arch/x86/include/asm/errno.h
+++ b/arch/x86/include/uapi/asm/errno.h
diff --git a/arch/x86/include/asm/fcntl.h b/arch/x86/include/uapi/asm/fcntl.h
index 46ab12db573..46ab12db573 100644
--- a/arch/x86/include/asm/fcntl.h
+++ b/arch/x86/include/uapi/asm/fcntl.h
diff --git a/arch/x86/include/uapi/asm/hw_breakpoint.h b/arch/x86/include/uapi/asm/hw_breakpoint.h
new file mode 100644
index 00000000000..79a9626b550
--- /dev/null
+++ b/arch/x86/include/uapi/asm/hw_breakpoint.h
@@ -0,0 +1 @@
+/* */
diff --git a/arch/x86/include/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index b80420bcd09..b80420bcd09 100644
--- a/arch/x86/include/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
diff --git a/arch/x86/include/asm/ioctl.h b/arch/x86/include/uapi/asm/ioctl.h
index b279fe06dfe..b279fe06dfe 100644
--- a/arch/x86/include/asm/ioctl.h
+++ b/arch/x86/include/uapi/asm/ioctl.h
diff --git a/arch/x86/include/asm/ioctls.h b/arch/x86/include/uapi/asm/ioctls.h
index ec34c760665..ec34c760665 100644
--- a/arch/x86/include/asm/ioctls.h
+++ b/arch/x86/include/uapi/asm/ioctls.h
diff --git a/arch/x86/include/asm/ipcbuf.h b/arch/x86/include/uapi/asm/ipcbuf.h
index 84c7e51cb6d..84c7e51cb6d 100644
--- a/arch/x86/include/asm/ipcbuf.h
+++ b/arch/x86/include/uapi/asm/ipcbuf.h
diff --git a/arch/x86/include/uapi/asm/ist.h b/arch/x86/include/uapi/asm/ist.h
new file mode 100644
index 00000000000..bad9f5ea407
--- /dev/null
+++ b/arch/x86/include/uapi/asm/ist.h
@@ -0,0 +1,29 @@
+/*
+ * Include file for the interface to IST BIOS
+ * Copyright 2002 Andy Grover <andrew.grover@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#ifndef _UAPI_ASM_X86_IST_H
+#define _UAPI_ASM_X86_IST_H
+
+
+
+#include <linux/types.h>
+
+struct ist_info {
+ __u32 signature;
+ __u32 command;
+ __u32 event;
+ __u32 perf_level;
+};
+
+#endif /* _UAPI_ASM_X86_IST_H */
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index a65ec29e6ff..a65ec29e6ff 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
new file mode 100644
index 00000000000..06fdbd987e9
--- /dev/null
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -0,0 +1,100 @@
+#ifndef _UAPI_ASM_X86_KVM_PARA_H
+#define _UAPI_ASM_X86_KVM_PARA_H
+
+#include <linux/types.h>
+#include <asm/hyperv.h>
+
+/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
+ * should be used to determine that a VM is running under KVM.
+ */
+#define KVM_CPUID_SIGNATURE 0x40000000
+
+/* This CPUID returns a feature bitmap in eax. Before enabling a particular
+ * paravirtualization, the appropriate feature bit should be checked.
+ */
+#define KVM_CPUID_FEATURES 0x40000001
+#define KVM_FEATURE_CLOCKSOURCE 0
+#define KVM_FEATURE_NOP_IO_DELAY 1
+#define KVM_FEATURE_MMU_OP 2
+/* This indicates that the new set of kvmclock msrs
+ * are available. The use of 0x11 and 0x12 is deprecated
+ */
+#define KVM_FEATURE_CLOCKSOURCE2 3
+#define KVM_FEATURE_ASYNC_PF 4
+#define KVM_FEATURE_STEAL_TIME 5
+#define KVM_FEATURE_PV_EOI 6
+
+/* The last 8 bits are used to indicate how to interpret the flags field
+ * in pvclock structure. If no bits are set, all flags are ignored.
+ */
+#define KVM_FEATURE_CLOCKSOURCE_STABLE_BIT 24
+
+#define MSR_KVM_WALL_CLOCK 0x11
+#define MSR_KVM_SYSTEM_TIME 0x12
+
+#define KVM_MSR_ENABLED 1
+/* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
+#define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00
+#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
+#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
+#define MSR_KVM_STEAL_TIME 0x4b564d03
+#define MSR_KVM_PV_EOI_EN 0x4b564d04
+
+struct kvm_steal_time {
+ __u64 steal;
+ __u32 version;
+ __u32 flags;
+ __u32 pad[12];
+};
+
+#define KVM_STEAL_ALIGNMENT_BITS 5
+#define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1)))
+#define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1)
+
+#define KVM_MAX_MMU_OP_BATCH 32
+
+#define KVM_ASYNC_PF_ENABLED (1 << 0)
+#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
+
+/* Operations for KVM_HC_MMU_OP */
+#define KVM_MMU_OP_WRITE_PTE 1
+#define KVM_MMU_OP_FLUSH_TLB 2
+#define KVM_MMU_OP_RELEASE_PT 3
+
+/* Payload for KVM_HC_MMU_OP */
+struct kvm_mmu_op_header {
+ __u32 op;
+ __u32 pad;
+};
+
+struct kvm_mmu_op_write_pte {
+ struct kvm_mmu_op_header header;
+ __u64 pte_phys;
+ __u64 pte_val;
+};
+
+struct kvm_mmu_op_flush_tlb {
+ struct kvm_mmu_op_header header;
+};
+
+struct kvm_mmu_op_release_pt {
+ struct kvm_mmu_op_header header;
+ __u64 pt_phys;
+};
+
+#define KVM_PV_REASON_PAGE_NOT_PRESENT 1
+#define KVM_PV_REASON_PAGE_READY 2
+
+struct kvm_vcpu_pv_apf_data {
+ __u32 reason;
+ __u8 pad[60];
+ __u32 enabled;
+};
+
+#define KVM_PV_EOI_BIT 0
+#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
+#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
+#define KVM_PV_EOI_DISABLED 0x0
+
+
+#endif /* _UAPI_ASM_X86_KVM_PARA_H */
diff --git a/arch/x86/include/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h
index 46727eb37bf..46727eb37bf 100644
--- a/arch/x86/include/asm/ldt.h
+++ b/arch/x86/include/uapi/asm/ldt.h
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
new file mode 100644
index 00000000000..a0eab85ce7b
--- /dev/null
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -0,0 +1,34 @@
+#ifndef _UAPI_ASM_X86_MCE_H
+#define _UAPI_ASM_X86_MCE_H
+
+#include <linux/types.h>
+#include <asm/ioctls.h>
+
+/* Fields are zero when not available */
+struct mce {
+ __u64 status;
+ __u64 misc;
+ __u64 addr;
+ __u64 mcgstatus;
+ __u64 ip;
+ __u64 tsc; /* cpu time stamp counter */
+ __u64 time; /* wall time_t when error was detected */
+ __u8 cpuvendor; /* cpu vendor as encoded in system.h */
+ __u8 inject_flags; /* software inject flags */
+ __u16 pad;
+ __u32 cpuid; /* CPUID 1 EAX */
+ __u8 cs; /* code segment */
+ __u8 bank; /* machine check bank */
+ __u8 cpu; /* cpu number; obsolete; use extcpu now */
+ __u8 finished; /* entry is valid */
+ __u32 extcpu; /* linux cpu number that detected the error */
+ __u32 socketid; /* CPU socket ID */
+ __u32 apicid; /* CPU initial apic ID */
+ __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */
+};
+
+#define MCE_GET_RECORD_LEN _IOR('M', 1, int)
+#define MCE_GET_LOG_LEN _IOR('M', 2, int)
+#define MCE_GETCLEAR_FLAGS _IOR('M', 3, int)
+
+#endif /* _UAPI_ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index 593e51d4643..513b05f15bb 100644
--- a/arch/x86/include/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -3,6 +3,9 @@
#define MAP_32BIT 0x40 /* only give out 32bit addresses */
+#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
+#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
+
#include <asm-generic/mman.h>
#endif /* _ASM_X86_MMAN_H */
diff --git a/arch/x86/include/asm/msgbuf.h b/arch/x86/include/uapi/asm/msgbuf.h
index 809134c644a..809134c644a 100644
--- a/arch/x86/include/asm/msgbuf.h
+++ b/arch/x86/include/uapi/asm/msgbuf.h
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 7f0edceb756..075a4025559 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -35,11 +35,14 @@
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
+#define MSR_NHM_PLATFORM_INFO 0x000000ce
#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
#define NHM_C1_AUTO_DEMOTE (1UL << 26)
#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
+#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
+#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
#define MSR_MTRRcap 0x000000fe
#define MSR_IA32_BBL_CR_CTL 0x00000119
@@ -55,6 +58,8 @@
#define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7
+#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
+#define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
@@ -103,6 +108,38 @@
#define MSR_IA32_MC0_ADDR 0x00000402
#define MSR_IA32_MC0_MISC 0x00000403
+/* C-state Residency Counters */
+#define MSR_PKG_C3_RESIDENCY 0x000003f8
+#define MSR_PKG_C6_RESIDENCY 0x000003f9
+#define MSR_PKG_C7_RESIDENCY 0x000003fa
+#define MSR_CORE_C3_RESIDENCY 0x000003fc
+#define MSR_CORE_C6_RESIDENCY 0x000003fd
+#define MSR_CORE_C7_RESIDENCY 0x000003fe
+#define MSR_PKG_C2_RESIDENCY 0x0000060d
+
+/* Run Time Average Power Limiting (RAPL) Interface */
+
+#define MSR_RAPL_POWER_UNIT 0x00000606
+
+#define MSR_PKG_POWER_LIMIT 0x00000610
+#define MSR_PKG_ENERGY_STATUS 0x00000611
+#define MSR_PKG_PERF_STATUS 0x00000613
+#define MSR_PKG_POWER_INFO 0x00000614
+
+#define MSR_DRAM_POWER_LIMIT 0x00000618
+#define MSR_DRAM_ENERGY_STATUS 0x00000619
+#define MSR_DRAM_PERF_STATUS 0x0000061b
+#define MSR_DRAM_POWER_INFO 0x0000061c
+
+#define MSR_PP0_POWER_LIMIT 0x00000638
+#define MSR_PP0_ENERGY_STATUS 0x00000639
+#define MSR_PP0_POLICY 0x0000063a
+#define MSR_PP0_PERF_STATUS 0x0000063b
+
+#define MSR_PP1_POWER_LIMIT 0x00000640
+#define MSR_PP1_ENERGY_STATUS 0x00000641
+#define MSR_PP1_POLICY 0x00000642
+
#define MSR_AMD64_MC0_MASK 0xc0010044
#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
@@ -157,6 +194,8 @@
/* Fam 15h MSRs */
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTR 0xc0010201
+#define MSR_F15H_NB_PERF_CTL 0xc0010240
+#define MSR_F15H_NB_PERF_CTR 0xc0010241
/* Fam 10h MSRs */
#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
@@ -236,6 +275,7 @@
#define MSR_IA32_EBL_CR_POWERON 0x0000002a
#define MSR_EBC_FREQUENCY_ID 0x0000002c
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
+#define MSR_IA32_TSC_ADJUST 0x0000003b
#define FEATURE_CONTROL_LOCKED (1<<0)
#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
@@ -337,6 +377,8 @@
#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38)
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39)
+#define MSR_IA32_TSC_DEADLINE 0x000006E0
+
/* P4/Xeon+ specific */
#define MSR_IA32_MCG_EAX 0x00000180
#define MSR_IA32_MCG_EBX 0x00000181
diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h
new file mode 100644
index 00000000000..155e51048fa
--- /dev/null
+++ b/arch/x86/include/uapi/asm/msr.h
@@ -0,0 +1,15 @@
+#ifndef _UAPI_ASM_X86_MSR_H
+#define _UAPI_ASM_X86_MSR_H
+
+#include <asm/msr-index.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8])
+#define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8])
+
+#endif /* __ASSEMBLY__ */
+#endif /* _UAPI_ASM_X86_MSR_H */
diff --git a/arch/x86/include/uapi/asm/mtrr.h b/arch/x86/include/uapi/asm/mtrr.h
new file mode 100644
index 00000000000..d0acb658c8f
--- /dev/null
+++ b/arch/x86/include/uapi/asm/mtrr.h
@@ -0,0 +1,117 @@
+/* Generic MTRR (Memory Type Range Register) ioctls.
+
+ Copyright (C) 1997-1999 Richard Gooch
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with this library; if not, write to the Free
+ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Richard Gooch may be reached by email at rgooch@atnf.csiro.au
+ The postal address is:
+ Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+*/
+#ifndef _UAPI_ASM_X86_MTRR_H
+#define _UAPI_ASM_X86_MTRR_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <linux/errno.h>
+
+#define MTRR_IOCTL_BASE 'M'
+
+/* Warning: this structure has a different order from i386
+ on x86-64. The 32bit emulation code takes care of that.
+ But you need to use this for 64bit, otherwise your X server
+ will break. */
+
+#ifdef __i386__
+struct mtrr_sentry {
+ unsigned long base; /* Base address */
+ unsigned int size; /* Size of region */
+ unsigned int type; /* Type of region */
+};
+
+struct mtrr_gentry {
+ unsigned int regnum; /* Register number */
+ unsigned long base; /* Base address */
+ unsigned int size; /* Size of region */
+ unsigned int type; /* Type of region */
+};
+
+#else /* __i386__ */
+
+struct mtrr_sentry {
+ __u64 base; /* Base address */
+ __u32 size; /* Size of region */
+ __u32 type; /* Type of region */
+};
+
+struct mtrr_gentry {
+ __u64 base; /* Base address */
+ __u32 size; /* Size of region */
+ __u32 regnum; /* Register number */
+ __u32 type; /* Type of region */
+ __u32 _pad; /* Unused */
+};
+
+#endif /* !__i386__ */
+
+struct mtrr_var_range {
+ __u32 base_lo;
+ __u32 base_hi;
+ __u32 mask_lo;
+ __u32 mask_hi;
+};
+
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+ an 8 bit field: */
+typedef __u8 mtrr_type;
+
+#define MTRR_NUM_FIXED_RANGES 88
+#define MTRR_MAX_VAR_RANGES 256
+
+struct mtrr_state_type {
+ struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+ mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
+ unsigned char enabled;
+ unsigned char have_fixed;
+ mtrr_type def_type;
+};
+
+#define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg))
+#define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1)
+
+/* These are the various ioctls */
+#define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry)
+#define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry)
+#define MTRRIOC_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry)
+#define MTRRIOC_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry)
+#define MTRRIOC_KILL_ENTRY _IOW(MTRR_IOCTL_BASE, 4, struct mtrr_sentry)
+#define MTRRIOC_ADD_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 5, struct mtrr_sentry)
+#define MTRRIOC_SET_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 6, struct mtrr_sentry)
+#define MTRRIOC_DEL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 7, struct mtrr_sentry)
+#define MTRRIOC_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry)
+#define MTRRIOC_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry)
+
+/* These are the region types */
+#define MTRR_TYPE_UNCACHABLE 0
+#define MTRR_TYPE_WRCOMB 1
+/*#define MTRR_TYPE_ 2*/
+/*#define MTRR_TYPE_ 3*/
+#define MTRR_TYPE_WRTHROUGH 4
+#define MTRR_TYPE_WRPROT 5
+#define MTRR_TYPE_WRBACK 6
+#define MTRR_NUM_TYPES 7
+
+
+#endif /* _UAPI_ASM_X86_MTRR_H */
diff --git a/arch/x86/include/asm/param.h b/arch/x86/include/uapi/asm/param.h
index 965d4542797..965d4542797 100644
--- a/arch/x86/include/asm/param.h
+++ b/arch/x86/include/uapi/asm/param.h
diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index 3f2207bfd17..3f2207bfd17 100644
--- a/arch/x86/include/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
diff --git a/arch/x86/include/asm/poll.h b/arch/x86/include/uapi/asm/poll.h
index c98509d3149..c98509d3149 100644
--- a/arch/x86/include/asm/poll.h
+++ b/arch/x86/include/uapi/asm/poll.h
diff --git a/arch/x86/include/uapi/asm/posix_types.h b/arch/x86/include/uapi/asm/posix_types.h
new file mode 100644
index 00000000000..85506b38362
--- /dev/null
+++ b/arch/x86/include/uapi/asm/posix_types.h
@@ -0,0 +1,9 @@
+#ifndef __KERNEL__
+# ifdef __i386__
+# include <asm/posix_types_32.h>
+# elif defined(__ILP32__)
+# include <asm/posix_types_x32.h>
+# else
+# include <asm/posix_types_64.h>
+# endif
+#endif
diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/uapi/asm/posix_types_32.h
index 8e525059e7d..8e525059e7d 100644
--- a/arch/x86/include/asm/posix_types_32.h
+++ b/arch/x86/include/uapi/asm/posix_types_32.h
diff --git a/arch/x86/include/asm/posix_types_64.h b/arch/x86/include/uapi/asm/posix_types_64.h
index cba0c1ead16..cba0c1ead16 100644
--- a/arch/x86/include/asm/posix_types_64.h
+++ b/arch/x86/include/uapi/asm/posix_types_64.h
diff --git a/arch/x86/include/asm/posix_types_x32.h b/arch/x86/include/uapi/asm/posix_types_x32.h
index 85f9bdafa93..85f9bdafa93 100644
--- a/arch/x86/include/asm/posix_types_x32.h
+++ b/arch/x86/include/uapi/asm/posix_types_x32.h
diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 3ac5032fae0..3ac5032fae0 100644
--- a/arch/x86/include/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
new file mode 100644
index 00000000000..54991a74604
--- /dev/null
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -0,0 +1,99 @@
+#ifndef _UAPI_ASM_X86_PROCESSOR_FLAGS_H
+#define _UAPI_ASM_X86_PROCESSOR_FLAGS_H
+/* Various flags defined: can be included from assembler. */
+
+/*
+ * EFLAGS bits
+ */
+#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
+#define X86_EFLAGS_BIT1 0x00000002 /* Bit 1 - always on */
+#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
+#define X86_EFLAGS_AF 0x00000010 /* Auxiliary carry Flag */
+#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
+#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
+#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
+#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
+#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
+#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
+#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
+#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
+#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
+#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
+#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
+#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
+#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
+#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
+
+/*
+ * Basic CPU control in CR0
+ */
+#define X86_CR0_PE 0x00000001 /* Protection Enable */
+#define X86_CR0_MP 0x00000002 /* Monitor Coprocessor */
+#define X86_CR0_EM 0x00000004 /* Emulation */
+#define X86_CR0_TS 0x00000008 /* Task Switched */
+#define X86_CR0_ET 0x00000010 /* Extension Type */
+#define X86_CR0_NE 0x00000020 /* Numeric Error */
+#define X86_CR0_WP 0x00010000 /* Write Protect */
+#define X86_CR0_AM 0x00040000 /* Alignment Mask */
+#define X86_CR0_NW 0x20000000 /* Not Write-through */
+#define X86_CR0_CD 0x40000000 /* Cache Disable */
+#define X86_CR0_PG 0x80000000 /* Paging */
+
+/*
+ * Paging options in CR3
+ */
+#define X86_CR3_PWT 0x00000008 /* Page Write Through */
+#define X86_CR3_PCD 0x00000010 /* Page Cache Disable */
+#define X86_CR3_PCID_MASK 0x00000fff /* PCID Mask */
+
+/*
+ * Intel CPU features in CR4
+ */
+#define X86_CR4_VME 0x00000001 /* enable vm86 extensions */
+#define X86_CR4_PVI 0x00000002 /* virtual interrupts flag enable */
+#define X86_CR4_TSD 0x00000004 /* disable time stamp at ipl 3 */
+#define X86_CR4_DE 0x00000008 /* enable debugging extensions */
+#define X86_CR4_PSE 0x00000010 /* enable page size extensions */
+#define X86_CR4_PAE 0x00000020 /* enable physical address extensions */
+#define X86_CR4_MCE 0x00000040 /* Machine check enable */
+#define X86_CR4_PGE 0x00000080 /* enable global pages */
+#define X86_CR4_PCE 0x00000100 /* enable performance counters at ipl 3 */
+#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */
+#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
+#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
+#define X86_CR4_RDWRGSFS 0x00010000 /* enable RDWRGSFS support */
+#define X86_CR4_PCIDE 0x00020000 /* enable PCID support */
+#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
+#define X86_CR4_SMEP 0x00100000 /* enable SMEP support */
+#define X86_CR4_SMAP 0x00200000 /* enable SMAP support */
+
+/*
+ * x86-64 Task Priority Register, CR8
+ */
+#define X86_CR8_TPR 0x0000000F /* task priority register */
+
+/*
+ * AMD and Transmeta use MSRs for configuration; see <asm/msr-index.h>
+ */
+
+/*
+ * NSC/Cyrix CPU configuration register indexes
+ */
+#define CX86_PCR0 0x20
+#define CX86_GCR 0xb8
+#define CX86_CCR0 0xc0
+#define CX86_CCR1 0xc1
+#define CX86_CCR2 0xc2
+#define CX86_CCR3 0xc3
+#define CX86_CCR4 0xe8
+#define CX86_CCR5 0xe9
+#define CX86_CCR6 0xea
+#define CX86_CCR7 0xeb
+#define CX86_PCR1 0xf0
+#define CX86_DIR0 0xfe
+#define CX86_DIR1 0xff
+#define CX86_ARR_BASE 0xc4
+#define CX86_RCR_BASE 0xdc
+
+
+#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 7b0a55a8885..7b0a55a8885 100644
--- a/arch/x86/include/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
new file mode 100644
index 00000000000..ac4b9aa4d99
--- /dev/null
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -0,0 +1,78 @@
+#ifndef _UAPI_ASM_X86_PTRACE_H
+#define _UAPI_ASM_X86_PTRACE_H
+
+#include <linux/compiler.h> /* For __user */
+#include <asm/ptrace-abi.h>
+#include <asm/processor-flags.h>
+
+
+#ifndef __ASSEMBLY__
+
+#ifdef __i386__
+/* this struct defines the way the registers are stored on the
+ stack during a system call. */
+
+#ifndef __KERNEL__
+
+struct pt_regs {
+ long ebx;
+ long ecx;
+ long edx;
+ long esi;
+ long edi;
+ long ebp;
+ long eax;
+ int xds;
+ int xes;
+ int xfs;
+ int xgs;
+ long orig_eax;
+ long eip;
+ int xcs;
+ long eflags;
+ long esp;
+ int xss;
+};
+
+#endif /* __KERNEL__ */
+
+#else /* __i386__ */
+
+#ifndef __KERNEL__
+
+struct pt_regs {
+ unsigned long r15;
+ unsigned long r14;
+ unsigned long r13;
+ unsigned long r12;
+ unsigned long rbp;
+ unsigned long rbx;
+/* arguments: non interrupts/non tracing syscalls only save up to here*/
+ unsigned long r11;
+ unsigned long r10;
+ unsigned long r9;
+ unsigned long r8;
+ unsigned long rax;
+ unsigned long rcx;
+ unsigned long rdx;
+ unsigned long rsi;
+ unsigned long rdi;
+ unsigned long orig_rax;
+/* end of arguments */
+/* cpu exception frame or undefined */
+ unsigned long rip;
+ unsigned long cs;
+ unsigned long eflags;
+ unsigned long rsp;
+ unsigned long ss;
+/* top of stack page */
+};
+
+#endif /* __KERNEL__ */
+#endif /* !__i386__ */
+
+
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _UAPI_ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/asm/resource.h b/arch/x86/include/uapi/asm/resource.h
index 04bc4db8921..04bc4db8921 100644
--- a/arch/x86/include/asm/resource.h
+++ b/arch/x86/include/uapi/asm/resource.h
diff --git a/arch/x86/include/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h
index ee50c801f7b..ee50c801f7b 100644
--- a/arch/x86/include/asm/sembuf.h
+++ b/arch/x86/include/uapi/asm/sembuf.h
diff --git a/arch/x86/include/uapi/asm/setup.h b/arch/x86/include/uapi/asm/setup.h
new file mode 100644
index 00000000000..79a9626b550
--- /dev/null
+++ b/arch/x86/include/uapi/asm/setup.h
@@ -0,0 +1 @@
+/* */
diff --git a/arch/x86/include/asm/shmbuf.h b/arch/x86/include/uapi/asm/shmbuf.h
index 83c05fc2de3..83c05fc2de3 100644
--- a/arch/x86/include/asm/shmbuf.h
+++ b/arch/x86/include/uapi/asm/shmbuf.h
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
new file mode 100644
index 00000000000..d8b9f9081e8
--- /dev/null
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -0,0 +1,221 @@
+#ifndef _UAPI_ASM_X86_SIGCONTEXT_H
+#define _UAPI_ASM_X86_SIGCONTEXT_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#define FP_XSTATE_MAGIC1 0x46505853U
+#define FP_XSTATE_MAGIC2 0x46505845U
+#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2)
+
+/*
+ * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame
+ * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes
+ * are used to extended the fpstate pointer in the sigcontext, which now
+ * includes the extended state information along with fpstate information.
+ *
+ * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved
+ * area and FP_XSTATE_MAGIC2 at the end of memory layout
+ * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the
+ * extended state information in the memory layout pointed by the fpstate
+ * pointer in sigcontext.
+ */
+struct _fpx_sw_bytes {
+ __u32 magic1; /* FP_XSTATE_MAGIC1 */
+ __u32 extended_size; /* total size of the layout referred by
+ * fpstate pointer in the sigcontext.
+ */
+ __u64 xstate_bv;
+ /* feature bit mask (including fp/sse/extended
+ * state) that is present in the memory
+ * layout.
+ */
+ __u32 xstate_size; /* actual xsave state size, based on the
+ * features saved in the layout.
+ * 'extended_size' will be greater than
+ * 'xstate_size'.
+ */
+ __u32 padding[7]; /* for future use. */
+};
+
+#ifdef __i386__
+/*
+ * As documented in the iBCS2 standard..
+ *
+ * The first part of "struct _fpstate" is just the normal i387
+ * hardware setup, the extra "status" word is used to save the
+ * coprocessor status word before entering the handler.
+ *
+ * Pentium III FXSR, SSE support
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ *
+ * The FPU state data structure has had to grow to accommodate the
+ * extended FPU state required by the Streaming SIMD Extensions.
+ * There is no documented standard to accomplish this at the moment.
+ */
+struct _fpreg {
+ unsigned short significand[4];
+ unsigned short exponent;
+};
+
+struct _fpxreg {
+ unsigned short significand[4];
+ unsigned short exponent;
+ unsigned short padding[3];
+};
+
+struct _xmmreg {
+ unsigned long element[4];
+};
+
+struct _fpstate {
+ /* Regular FPU environment */
+ unsigned long cw;
+ unsigned long sw;
+ unsigned long tag;
+ unsigned long ipoff;
+ unsigned long cssel;
+ unsigned long dataoff;
+ unsigned long datasel;
+ struct _fpreg _st[8];
+ unsigned short status;
+ unsigned short magic; /* 0xffff = regular FPU data only */
+
+ /* FXSR FPU environment */
+ unsigned long _fxsr_env[6]; /* FXSR FPU env is ignored */
+ unsigned long mxcsr;
+ unsigned long reserved;
+ struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
+ struct _xmmreg _xmm[8];
+ unsigned long padding1[44];
+
+ union {
+ unsigned long padding2[12];
+ struct _fpx_sw_bytes sw_reserved; /* represents the extended
+ * state info */
+ };
+};
+
+#define X86_FXSR_MAGIC 0x0000
+
+#ifndef __KERNEL__
+/*
+ * User-space might still rely on the old definition:
+ */
+struct sigcontext {
+ unsigned short gs, __gsh;
+ unsigned short fs, __fsh;
+ unsigned short es, __esh;
+ unsigned short ds, __dsh;
+ unsigned long edi;
+ unsigned long esi;
+ unsigned long ebp;
+ unsigned long esp;
+ unsigned long ebx;
+ unsigned long edx;
+ unsigned long ecx;
+ unsigned long eax;
+ unsigned long trapno;
+ unsigned long err;
+ unsigned long eip;
+ unsigned short cs, __csh;
+ unsigned long eflags;
+ unsigned long esp_at_signal;
+ unsigned short ss, __ssh;
+ struct _fpstate __user *fpstate;
+ unsigned long oldmask;
+ unsigned long cr2;
+};
+#endif /* !__KERNEL__ */
+
+#else /* __i386__ */
+
+/* FXSAVE frame */
+/* Note: reserved1/2 may someday contain valuable data. Always save/restore
+ them when you change signal frames. */
+struct _fpstate {
+ __u16 cwd;
+ __u16 swd;
+ __u16 twd; /* Note this is not the same as the
+ 32bit/x87/FSAVE twd */
+ __u16 fop;
+ __u64 rip;
+ __u64 rdp;
+ __u32 mxcsr;
+ __u32 mxcsr_mask;
+ __u32 st_space[32]; /* 8*16 bytes for each FP-reg */
+ __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */
+ __u32 reserved2[12];
+ union {
+ __u32 reserved3[12];
+ struct _fpx_sw_bytes sw_reserved; /* represents the extended
+ * state information */
+ };
+};
+
+#ifndef __KERNEL__
+/*
+ * User-space might still rely on the old definition:
+ */
+struct sigcontext {
+ __u64 r8;
+ __u64 r9;
+ __u64 r10;
+ __u64 r11;
+ __u64 r12;
+ __u64 r13;
+ __u64 r14;
+ __u64 r15;
+ __u64 rdi;
+ __u64 rsi;
+ __u64 rbp;
+ __u64 rbx;
+ __u64 rdx;
+ __u64 rax;
+ __u64 rcx;
+ __u64 rsp;
+ __u64 rip;
+ __u64 eflags; /* RFLAGS */
+ __u16 cs;
+ __u16 gs;
+ __u16 fs;
+ __u16 __pad0;
+ __u64 err;
+ __u64 trapno;
+ __u64 oldmask;
+ __u64 cr2;
+ struct _fpstate __user *fpstate; /* zero when no FPU context */
+#ifdef __ILP32__
+ __u32 __fpstate_pad;
+#endif
+ __u64 reserved1[8];
+};
+#endif /* !__KERNEL__ */
+
+#endif /* !__i386__ */
+
+struct _xsave_hdr {
+ __u64 xstate_bv;
+ __u64 reserved1[2];
+ __u64 reserved2[5];
+};
+
+struct _ymmh_state {
+ /* 16 * 16 bytes for each YMMH-reg */
+ __u32 ymmh_space[64];
+};
+
+/*
+ * Extended state pointed by the fpstate pointer in the sigcontext.
+ * In addition to the fpstate, information encoded in the xstate_hdr
+ * indicates the presence of other extended state information
+ * supported by the processor and OS.
+ */
+struct _xstate {
+ struct _fpstate fpstate;
+ struct _xsave_hdr xstate_hdr;
+ struct _ymmh_state ymmh;
+ /* new processor state extensions go here */
+};
+
+#endif /* _UAPI_ASM_X86_SIGCONTEXT_H */
diff --git a/arch/x86/include/asm/sigcontext32.h b/arch/x86/include/uapi/asm/sigcontext32.h
index ad1478c4ae1..ad1478c4ae1 100644
--- a/arch/x86/include/asm/sigcontext32.h
+++ b/arch/x86/include/uapi/asm/sigcontext32.h
diff --git a/arch/x86/include/asm/siginfo.h b/arch/x86/include/uapi/asm/siginfo.h
index 34c47b3341c..34c47b3341c 100644
--- a/arch/x86/include/asm/siginfo.h
+++ b/arch/x86/include/uapi/asm/siginfo.h
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
new file mode 100644
index 00000000000..aa7d6ae39e0
--- /dev/null
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -0,0 +1,139 @@
+#ifndef _UAPI_ASM_X86_SIGNAL_H
+#define _UAPI_ASM_X86_SIGNAL_H
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/compiler.h>
+
+/* Avoid too many header ordering problems. */
+struct siginfo;
+
+#ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers. */
+
+#define NSIG 32
+typedef unsigned long sigset_t;
+
+#endif /* __KERNEL__ */
+#endif /* __ASSEMBLY__ */
+
+
+#define SIGHUP 1
+#define SIGINT 2
+#define SIGQUIT 3
+#define SIGILL 4
+#define SIGTRAP 5
+#define SIGABRT 6
+#define SIGIOT 6
+#define SIGBUS 7
+#define SIGFPE 8
+#define SIGKILL 9
+#define SIGUSR1 10
+#define SIGSEGV 11
+#define SIGUSR2 12
+#define SIGPIPE 13
+#define SIGALRM 14
+#define SIGTERM 15
+#define SIGSTKFLT 16
+#define SIGCHLD 17
+#define SIGCONT 18
+#define SIGSTOP 19
+#define SIGTSTP 20
+#define SIGTTIN 21
+#define SIGTTOU 22
+#define SIGURG 23
+#define SIGXCPU 24
+#define SIGXFSZ 25
+#define SIGVTALRM 26
+#define SIGPROF 27
+#define SIGWINCH 28
+#define SIGIO 29
+#define SIGPOLL SIGIO
+/*
+#define SIGLOST 29
+*/
+#define SIGPWR 30
+#define SIGSYS 31
+#define SIGUNUSED 31
+
+/* These should not be considered constants from userland. */
+#define SIGRTMIN 32
+#define SIGRTMAX _NSIG
+
+/*
+ * SA_FLAGS values:
+ *
+ * SA_ONSTACK indicates that a registered stack_t will be used.
+ * SA_RESTART flag to get restarting signals (which were the default long ago)
+ * SA_NOCLDSTOP flag to turn off SIGCHLD when children stop.
+ * SA_RESETHAND clears the handler when the signal is delivered.
+ * SA_NOCLDWAIT flag on SIGCHLD to inhibit zombies.
+ * SA_NODEFER prevents the current signal from being masked in the handler.
+ *
+ * SA_ONESHOT and SA_NOMASK are the historical Linux names for the Single
+ * Unix names RESETHAND and NODEFER respectively.
+ */
+#define SA_NOCLDSTOP 0x00000001u
+#define SA_NOCLDWAIT 0x00000002u
+#define SA_SIGINFO 0x00000004u
+#define SA_ONSTACK 0x08000000u
+#define SA_RESTART 0x10000000u
+#define SA_NODEFER 0x40000000u
+#define SA_RESETHAND 0x80000000u
+
+#define SA_NOMASK SA_NODEFER
+#define SA_ONESHOT SA_RESETHAND
+
+#define SA_RESTORER 0x04000000
+
+#define MINSIGSTKSZ 2048
+#define SIGSTKSZ 8192
+
+#include <asm-generic/signal-defs.h>
+
+#ifndef __ASSEMBLY__
+
+
+#ifdef __i386__
+# ifndef __KERNEL__
+/* Here we must cater to libcs that poke about in kernel headers. */
+
+struct sigaction {
+ union {
+ __sighandler_t _sa_handler;
+ void (*_sa_sigaction)(int, struct siginfo *, void *);
+ } _u;
+ sigset_t sa_mask;
+ unsigned long sa_flags;
+ void (*sa_restorer)(void);
+};
+
+#define sa_handler _u._sa_handler
+#define sa_sigaction _u._sa_sigaction
+
+# endif /* ! __KERNEL__ */
+#else /* __i386__ */
+
+struct sigaction {
+ __sighandler_t sa_handler;
+ unsigned long sa_flags;
+ __sigrestore_t sa_restorer;
+ sigset_t sa_mask; /* mask last for extensibility */
+};
+
+struct k_sigaction {
+ struct sigaction sa;
+};
+
+#endif /* !__i386__ */
+
+typedef struct sigaltstack {
+ void __user *ss_sp;
+ int ss_flags;
+ size_t ss_size;
+} stack_t;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_ASM_X86_SIGNAL_H */
diff --git a/arch/x86/include/asm/socket.h b/arch/x86/include/uapi/asm/socket.h
index 6b71384b9d8..6b71384b9d8 100644
--- a/arch/x86/include/asm/socket.h
+++ b/arch/x86/include/uapi/asm/socket.h
diff --git a/arch/x86/include/asm/sockios.h b/arch/x86/include/uapi/asm/sockios.h
index def6d4746ee..def6d4746ee 100644
--- a/arch/x86/include/asm/sockios.h
+++ b/arch/x86/include/uapi/asm/sockios.h
diff --git a/arch/x86/include/asm/stat.h b/arch/x86/include/uapi/asm/stat.h
index 7b3ddc34858..7b3ddc34858 100644
--- a/arch/x86/include/asm/stat.h
+++ b/arch/x86/include/uapi/asm/stat.h
diff --git a/arch/x86/include/asm/statfs.h b/arch/x86/include/uapi/asm/statfs.h
index 2d0adbf99a8..2d0adbf99a8 100644
--- a/arch/x86/include/asm/statfs.h
+++ b/arch/x86/include/uapi/asm/statfs.h
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
new file mode 100644
index 00000000000..b5d7640abc5
--- /dev/null
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -0,0 +1,132 @@
+#ifndef _UAPI__SVM_H
+#define _UAPI__SVM_H
+
+#define SVM_EXIT_READ_CR0 0x000
+#define SVM_EXIT_READ_CR3 0x003
+#define SVM_EXIT_READ_CR4 0x004
+#define SVM_EXIT_READ_CR8 0x008
+#define SVM_EXIT_WRITE_CR0 0x010
+#define SVM_EXIT_WRITE_CR3 0x013
+#define SVM_EXIT_WRITE_CR4 0x014
+#define SVM_EXIT_WRITE_CR8 0x018
+#define SVM_EXIT_READ_DR0 0x020
+#define SVM_EXIT_READ_DR1 0x021
+#define SVM_EXIT_READ_DR2 0x022
+#define SVM_EXIT_READ_DR3 0x023
+#define SVM_EXIT_READ_DR4 0x024
+#define SVM_EXIT_READ_DR5 0x025
+#define SVM_EXIT_READ_DR6 0x026
+#define SVM_EXIT_READ_DR7 0x027
+#define SVM_EXIT_WRITE_DR0 0x030
+#define SVM_EXIT_WRITE_DR1 0x031
+#define SVM_EXIT_WRITE_DR2 0x032
+#define SVM_EXIT_WRITE_DR3 0x033
+#define SVM_EXIT_WRITE_DR4 0x034
+#define SVM_EXIT_WRITE_DR5 0x035
+#define SVM_EXIT_WRITE_DR6 0x036
+#define SVM_EXIT_WRITE_DR7 0x037
+#define SVM_EXIT_EXCP_BASE 0x040
+#define SVM_EXIT_INTR 0x060
+#define SVM_EXIT_NMI 0x061
+#define SVM_EXIT_SMI 0x062
+#define SVM_EXIT_INIT 0x063
+#define SVM_EXIT_VINTR 0x064
+#define SVM_EXIT_CR0_SEL_WRITE 0x065
+#define SVM_EXIT_IDTR_READ 0x066
+#define SVM_EXIT_GDTR_READ 0x067
+#define SVM_EXIT_LDTR_READ 0x068
+#define SVM_EXIT_TR_READ 0x069
+#define SVM_EXIT_IDTR_WRITE 0x06a
+#define SVM_EXIT_GDTR_WRITE 0x06b
+#define SVM_EXIT_LDTR_WRITE 0x06c
+#define SVM_EXIT_TR_WRITE 0x06d
+#define SVM_EXIT_RDTSC 0x06e
+#define SVM_EXIT_RDPMC 0x06f
+#define SVM_EXIT_PUSHF 0x070
+#define SVM_EXIT_POPF 0x071
+#define SVM_EXIT_CPUID 0x072
+#define SVM_EXIT_RSM 0x073
+#define SVM_EXIT_IRET 0x074
+#define SVM_EXIT_SWINT 0x075
+#define SVM_EXIT_INVD 0x076
+#define SVM_EXIT_PAUSE 0x077
+#define SVM_EXIT_HLT 0x078
+#define SVM_EXIT_INVLPG 0x079
+#define SVM_EXIT_INVLPGA 0x07a
+#define SVM_EXIT_IOIO 0x07b
+#define SVM_EXIT_MSR 0x07c
+#define SVM_EXIT_TASK_SWITCH 0x07d
+#define SVM_EXIT_FERR_FREEZE 0x07e
+#define SVM_EXIT_SHUTDOWN 0x07f
+#define SVM_EXIT_VMRUN 0x080
+#define SVM_EXIT_VMMCALL 0x081
+#define SVM_EXIT_VMLOAD 0x082
+#define SVM_EXIT_VMSAVE 0x083
+#define SVM_EXIT_STGI 0x084
+#define SVM_EXIT_CLGI 0x085
+#define SVM_EXIT_SKINIT 0x086
+#define SVM_EXIT_RDTSCP 0x087
+#define SVM_EXIT_ICEBP 0x088
+#define SVM_EXIT_WBINVD 0x089
+#define SVM_EXIT_MONITOR 0x08a
+#define SVM_EXIT_MWAIT 0x08b
+#define SVM_EXIT_MWAIT_COND 0x08c
+#define SVM_EXIT_XSETBV 0x08d
+#define SVM_EXIT_NPF 0x400
+
+#define SVM_EXIT_ERR -1
+
+#define SVM_EXIT_REASONS \
+ { SVM_EXIT_READ_CR0, "read_cr0" }, \
+ { SVM_EXIT_READ_CR3, "read_cr3" }, \
+ { SVM_EXIT_READ_CR4, "read_cr4" }, \
+ { SVM_EXIT_READ_CR8, "read_cr8" }, \
+ { SVM_EXIT_WRITE_CR0, "write_cr0" }, \
+ { SVM_EXIT_WRITE_CR3, "write_cr3" }, \
+ { SVM_EXIT_WRITE_CR4, "write_cr4" }, \
+ { SVM_EXIT_WRITE_CR8, "write_cr8" }, \
+ { SVM_EXIT_READ_DR0, "read_dr0" }, \
+ { SVM_EXIT_READ_DR1, "read_dr1" }, \
+ { SVM_EXIT_READ_DR2, "read_dr2" }, \
+ { SVM_EXIT_READ_DR3, "read_dr3" }, \
+ { SVM_EXIT_WRITE_DR0, "write_dr0" }, \
+ { SVM_EXIT_WRITE_DR1, "write_dr1" }, \
+ { SVM_EXIT_WRITE_DR2, "write_dr2" }, \
+ { SVM_EXIT_WRITE_DR3, "write_dr3" }, \
+ { SVM_EXIT_WRITE_DR5, "write_dr5" }, \
+ { SVM_EXIT_WRITE_DR7, "write_dr7" }, \
+ { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \
+ { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \
+ { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
+ { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
+ { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
+ { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
+ { SVM_EXIT_INTR, "interrupt" }, \
+ { SVM_EXIT_NMI, "nmi" }, \
+ { SVM_EXIT_SMI, "smi" }, \
+ { SVM_EXIT_INIT, "init" }, \
+ { SVM_EXIT_VINTR, "vintr" }, \
+ { SVM_EXIT_CPUID, "cpuid" }, \
+ { SVM_EXIT_INVD, "invd" }, \
+ { SVM_EXIT_HLT, "hlt" }, \
+ { SVM_EXIT_INVLPG, "invlpg" }, \
+ { SVM_EXIT_INVLPGA, "invlpga" }, \
+ { SVM_EXIT_IOIO, "io" }, \
+ { SVM_EXIT_MSR, "msr" }, \
+ { SVM_EXIT_TASK_SWITCH, "task_switch" }, \
+ { SVM_EXIT_SHUTDOWN, "shutdown" }, \
+ { SVM_EXIT_VMRUN, "vmrun" }, \
+ { SVM_EXIT_VMMCALL, "hypercall" }, \
+ { SVM_EXIT_VMLOAD, "vmload" }, \
+ { SVM_EXIT_VMSAVE, "vmsave" }, \
+ { SVM_EXIT_STGI, "stgi" }, \
+ { SVM_EXIT_CLGI, "clgi" }, \
+ { SVM_EXIT_SKINIT, "skinit" }, \
+ { SVM_EXIT_WBINVD, "wbinvd" }, \
+ { SVM_EXIT_MONITOR, "monitor" }, \
+ { SVM_EXIT_MWAIT, "mwait" }, \
+ { SVM_EXIT_XSETBV, "xsetbv" }, \
+ { SVM_EXIT_NPF, "npf" }
+
+
+#endif /* _UAPI__SVM_H */
diff --git a/arch/x86/include/asm/swab.h b/arch/x86/include/uapi/asm/swab.h
index 557cd9f0066..7f235c7105c 100644
--- a/arch/x86/include/asm/swab.h
+++ b/arch/x86/include/uapi/asm/swab.h
@@ -6,22 +6,7 @@
static inline __attribute_const__ __u32 __arch_swab32(__u32 val)
{
-#ifdef __i386__
-# ifdef CONFIG_X86_BSWAP
- asm("bswap %0" : "=r" (val) : "0" (val));
-# else
- asm("xchgb %b0,%h0\n\t" /* swap lower bytes */
- "rorl $16,%0\n\t" /* swap words */
- "xchgb %b0,%h0" /* swap higher bytes */
- : "=q" (val)
- : "0" (val));
-# endif
-
-#else /* __i386__ */
- asm("bswapl %0"
- : "=r" (val)
- : "0" (val));
-#endif
+ asm("bswapl %0" : "=r" (val) : "0" (val));
return val;
}
#define __arch_swab32 __arch_swab32
@@ -37,22 +22,12 @@ static inline __attribute_const__ __u64 __arch_swab64(__u64 val)
__u64 u;
} v;
v.u = val;
-# ifdef CONFIG_X86_BSWAP
asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1"
: "=r" (v.s.a), "=r" (v.s.b)
: "0" (v.s.a), "1" (v.s.b));
-# else
- v.s.a = __arch_swab32(v.s.a);
- v.s.b = __arch_swab32(v.s.b);
- asm("xchgl %0,%1"
- : "=r" (v.s.a), "=r" (v.s.b)
- : "0" (v.s.a), "1" (v.s.b));
-# endif
return v.u;
#else /* __i386__ */
- asm("bswapq %0"
- : "=r" (val)
- : "0" (val));
+ asm("bswapq %0" : "=r" (val) : "0" (val));
return val;
#endif
}
diff --git a/arch/x86/include/asm/termbits.h b/arch/x86/include/uapi/asm/termbits.h
index 3935b106de7..3935b106de7 100644
--- a/arch/x86/include/asm/termbits.h
+++ b/arch/x86/include/uapi/asm/termbits.h
diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/uapi/asm/termios.h
index 280d78a9d96..280d78a9d96 100644
--- a/arch/x86/include/asm/termios.h
+++ b/arch/x86/include/uapi/asm/termios.h
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/uapi/asm/types.h
index 8e8c23fef08..8e8c23fef08 100644
--- a/arch/x86/include/asm/types.h
+++ b/arch/x86/include/uapi/asm/types.h
diff --git a/arch/x86/include/asm/ucontext.h b/arch/x86/include/uapi/asm/ucontext.h
index b7c29c8017f..b7c29c8017f 100644
--- a/arch/x86/include/asm/ucontext.h
+++ b/arch/x86/include/uapi/asm/ucontext.h
diff --git a/arch/x86/include/uapi/asm/unistd.h b/arch/x86/include/uapi/asm/unistd.h
new file mode 100644
index 00000000000..a26df0d75cd
--- /dev/null
+++ b/arch/x86/include/uapi/asm/unistd.h
@@ -0,0 +1,17 @@
+#ifndef _UAPI_ASM_X86_UNISTD_H
+#define _UAPI_ASM_X86_UNISTD_H
+
+/* x32 syscall flag bit */
+#define __X32_SYSCALL_BIT 0x40000000
+
+#ifndef __KERNEL__
+# ifdef __i386__
+# include <asm/unistd_32.h>
+# elif defined(__ILP32__)
+# include <asm/unistd_x32.h>
+# else
+# include <asm/unistd_64.h>
+# endif
+#endif
+
+#endif /* _UAPI_ASM_X86_UNISTD_H */
diff --git a/arch/x86/include/uapi/asm/vm86.h b/arch/x86/include/uapi/asm/vm86.h
new file mode 100644
index 00000000000..e0b243e9d85
--- /dev/null
+++ b/arch/x86/include/uapi/asm/vm86.h
@@ -0,0 +1,129 @@
+#ifndef _UAPI_ASM_X86_VM86_H
+#define _UAPI_ASM_X86_VM86_H
+
+/*
+ * I'm guessing at the VIF/VIP flag usage, but hope that this is how
+ * the Pentium uses them. Linux will return from vm86 mode when both
+ * VIF and VIP is set.
+ *
+ * On a Pentium, we could probably optimize the virtual flags directly
+ * in the eflags register instead of doing it "by hand" in vflags...
+ *
+ * Linus
+ */
+
+#include <asm/processor-flags.h>
+
+#define BIOSSEG 0x0f000
+
+#define CPU_086 0
+#define CPU_186 1
+#define CPU_286 2
+#define CPU_386 3
+#define CPU_486 4
+#define CPU_586 5
+
+/*
+ * Return values for the 'vm86()' system call
+ */
+#define VM86_TYPE(retval) ((retval) & 0xff)
+#define VM86_ARG(retval) ((retval) >> 8)
+
+#define VM86_SIGNAL 0 /* return due to signal */
+#define VM86_UNKNOWN 1 /* unhandled GP fault
+ - IO-instruction or similar */
+#define VM86_INTx 2 /* int3/int x instruction (ARG = x) */
+#define VM86_STI 3 /* sti/popf/iret instruction enabled
+ virtual interrupts */
+
+/*
+ * Additional return values when invoking new vm86()
+ */
+#define VM86_PICRETURN 4 /* return due to pending PIC request */
+#define VM86_TRAP 6 /* return due to DOS-debugger request */
+
+/*
+ * function codes when invoking new vm86()
+ */
+#define VM86_PLUS_INSTALL_CHECK 0
+#define VM86_ENTER 1
+#define VM86_ENTER_NO_BYPASS 2
+#define VM86_REQUEST_IRQ 3
+#define VM86_FREE_IRQ 4
+#define VM86_GET_IRQ_BITS 5
+#define VM86_GET_AND_RESET_IRQ 6
+
+/*
+ * This is the stack-layout seen by the user space program when we have
+ * done a translation of "SAVE_ALL" from vm86 mode. The real kernel layout
+ * is 'kernel_vm86_regs' (see below).
+ */
+
+struct vm86_regs {
+/*
+ * normal regs, with special meaning for the segment descriptors..
+ */
+ long ebx;
+ long ecx;
+ long edx;
+ long esi;
+ long edi;
+ long ebp;
+ long eax;
+ long __null_ds;
+ long __null_es;
+ long __null_fs;
+ long __null_gs;
+ long orig_eax;
+ long eip;
+ unsigned short cs, __csh;
+ long eflags;
+ long esp;
+ unsigned short ss, __ssh;
+/*
+ * these are specific to v86 mode:
+ */
+ unsigned short es, __esh;
+ unsigned short ds, __dsh;
+ unsigned short fs, __fsh;
+ unsigned short gs, __gsh;
+};
+
+struct revectored_struct {
+ unsigned long __map[8]; /* 256 bits */
+};
+
+struct vm86_struct {
+ struct vm86_regs regs;
+ unsigned long flags;
+ unsigned long screen_bitmap;
+ unsigned long cpu_type;
+ struct revectored_struct int_revectored;
+ struct revectored_struct int21_revectored;
+};
+
+/*
+ * flags masks
+ */
+#define VM86_SCREEN_BITMAP 0x0001
+
+struct vm86plus_info_struct {
+ unsigned long force_return_for_pic:1;
+ unsigned long vm86dbg_active:1; /* for debugger */
+ unsigned long vm86dbg_TFpendig:1; /* for debugger */
+ unsigned long unused:28;
+ unsigned long is_vm86pus:1; /* for vm86 internal use */
+ unsigned char vm86dbg_intxxtab[32]; /* for debugger */
+};
+struct vm86plus_struct {
+ struct vm86_regs regs;
+ unsigned long flags;
+ unsigned long screen_bitmap;
+ unsigned long cpu_type;
+ struct revectored_struct int_revectored;
+ struct revectored_struct int21_revectored;
+ struct vm86plus_info_struct vm86plus;
+};
+
+
+#endif /* _UAPI_ASM_X86_VM86_H */
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
new file mode 100644
index 00000000000..979d03bce13
--- /dev/null
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -0,0 +1,109 @@
+/*
+ * vmx.h: VMX Architecture related definitions
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * A few random additions are:
+ * Copyright (C) 2006 Qumranet
+ * Avi Kivity <avi@qumranet.com>
+ * Yaniv Kamay <yaniv@qumranet.com>
+ *
+ */
+#ifndef _UAPIVMX_H
+#define _UAPIVMX_H
+
+
+#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI 0
+#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+#define EXIT_REASON_TRIPLE_FAULT 2
+
+#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_NMI_WINDOW 8
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID 10
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVD 13
+#define EXIT_REASON_INVLPG 14
+#define EXIT_REASON_RDPMC 15
+#define EXIT_REASON_RDTSC 16
+#define EXIT_REASON_VMCALL 18
+#define EXIT_REASON_VMCLEAR 19
+#define EXIT_REASON_VMLAUNCH 20
+#define EXIT_REASON_VMPTRLD 21
+#define EXIT_REASON_VMPTRST 22
+#define EXIT_REASON_VMREAD 23
+#define EXIT_REASON_VMRESUME 24
+#define EXIT_REASON_VMWRITE 25
+#define EXIT_REASON_VMOFF 26
+#define EXIT_REASON_VMON 27
+#define EXIT_REASON_CR_ACCESS 28
+#define EXIT_REASON_DR_ACCESS 29
+#define EXIT_REASON_IO_INSTRUCTION 30
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+#define EXIT_REASON_INVALID_STATE 33
+#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
+#define EXIT_REASON_MCE_DURING_VMENTRY 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EPT_VIOLATION 48
+#define EXIT_REASON_EPT_MISCONFIG 49
+#define EXIT_REASON_WBINVD 54
+#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_INVPCID 58
+
+#define VMX_EXIT_REASONS \
+ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
+ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
+ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
+ { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
+ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
+ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
+ { EXIT_REASON_CPUID, "CPUID" }, \
+ { EXIT_REASON_HLT, "HLT" }, \
+ { EXIT_REASON_INVLPG, "INVLPG" }, \
+ { EXIT_REASON_RDPMC, "RDPMC" }, \
+ { EXIT_REASON_RDTSC, "RDTSC" }, \
+ { EXIT_REASON_VMCALL, "VMCALL" }, \
+ { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \
+ { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \
+ { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \
+ { EXIT_REASON_VMPTRST, "VMPTRST" }, \
+ { EXIT_REASON_VMREAD, "VMREAD" }, \
+ { EXIT_REASON_VMRESUME, "VMRESUME" }, \
+ { EXIT_REASON_VMWRITE, "VMWRITE" }, \
+ { EXIT_REASON_VMOFF, "VMOFF" }, \
+ { EXIT_REASON_VMON, "VMON" }, \
+ { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \
+ { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \
+ { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
+ { EXIT_REASON_MSR_READ, "MSR_READ" }, \
+ { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
+ { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
+ { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
+ { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \
+ { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
+ { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
+ { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
+ { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
+ { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
+ { EXIT_REASON_WBINVD, "WBINVD" }
+
+
+#endif /* _UAPIVMX_H */
diff --git a/arch/x86/include/uapi/asm/vsyscall.h b/arch/x86/include/uapi/asm/vsyscall.h
new file mode 100644
index 00000000000..85dc1b3825a
--- /dev/null
+++ b/arch/x86/include/uapi/asm/vsyscall.h
@@ -0,0 +1,17 @@
+#ifndef _UAPI_ASM_X86_VSYSCALL_H
+#define _UAPI_ASM_X86_VSYSCALL_H
+
+enum vsyscall_num {
+ __NR_vgettimeofday,
+ __NR_vtime,
+ __NR_vgetcpu,
+};
+
+#define VSYSCALL_START (-10UL << 20)
+#define VSYSCALL_SIZE 1024
+#define VSYSCALL_END (-2UL << 20)
+#define VSYSCALL_MAPPED_PAGES 1
+#define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr))
+
+
+#endif /* _UAPI_ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 91ce48f05f9..ac3b3d00283 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -9,7 +9,6 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_tsc.o = -pg
-CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
CFLAGS_REMOVE_pvclock.o = -pg
CFLAGS_REMOVE_kvmclock.o = -pg
@@ -62,11 +61,11 @@ obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
+obj-$(CONFIG_X86_TSC) += trace_clock.o
obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
-obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_OPTPROBES) += kprobes-opt.o
+obj-y += kprobes/
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
obj-$(CONFIG_KGDB) += kgdb.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index e651f7a589a..bacf4b0d91f 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -574,6 +574,12 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
return irq;
}
+EXPORT_SYMBOL_GPL(acpi_register_gsi);
+
+void acpi_unregister_gsi(u32 gsi)
+{
+}
+EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
void __init acpi_set_irq_model_pic(void)
{
@@ -1700,3 +1706,9 @@ int __acpi_release_global_lock(unsigned int *lock)
} while (unlikely (val != old));
return old & 0x1;
}
+
+void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size)
+{
+ e820_add_region(addr, size, E820_ACPI);
+ update_e820();
+}
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 11676cf65ae..d5e0d717005 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,6 +101,8 @@ static int __init acpi_sleep_setup(char *str)
#endif
if (strncmp(str, "nonvs", 5) == 0)
acpi_nvs_nosave();
+ if (strncmp(str, "nonvs_s3", 8) == 0)
+ acpi_nvs_nosave_s3();
if (strncmp(str, "old_ordering", 12) == 0)
acpi_old_suspend_ordering();
str = strchr(str, ',');
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b17416e72fb..a5b4dce1b7a 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -90,21 +90,6 @@ EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
*/
DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
-/*
- * Knob to control our willingness to enable the local APIC.
- *
- * +1=force-enable
- */
-static int force_enable_local_apic __initdata;
-/*
- * APIC command line parameters
- */
-static int __init parse_lapic(char *arg)
-{
- force_enable_local_apic = 1;
- return 0;
-}
-early_param("lapic", parse_lapic);
/* Local APIC was disabled by the BIOS and enabled by the kernel */
static int enabled_via_apicbase;
@@ -133,6 +118,25 @@ static inline void imcr_apic_to_pic(void)
}
#endif
+/*
+ * Knob to control our willingness to enable the local APIC.
+ *
+ * +1=force-enable
+ */
+static int force_enable_local_apic __initdata;
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+ if (config_enabled(CONFIG_X86_32) && !arg)
+ force_enable_local_apic = 1;
+ else if (!strncmp(arg, "notscdeadline", 13))
+ setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
+ return 0;
+}
+early_param("lapic", parse_lapic);
+
#ifdef CONFIG_X86_64
static int apic_calibrate_pmtmr __initdata;
static __init int setup_apicpmtimer(char *s)
@@ -315,6 +319,7 @@ int lapic_get_maxlvt(void)
/* Clock divisor */
#define APIC_DIVISOR 16
+#define TSC_DIVISOR 32
/*
* This function sets up the local APIC timer, with a timeout of
@@ -333,6 +338,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
lvtt_value = LOCAL_TIMER_VECTOR;
if (!oneshot)
lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+ else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
+
if (!lapic_is_integrated())
lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
@@ -341,6 +349,11 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
apic_write(APIC_LVTT, lvtt_value);
+ if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
+ printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
+ return;
+ }
+
/*
* Divide PICLK by 16
*/
@@ -453,6 +466,16 @@ static int lapic_next_event(unsigned long delta,
return 0;
}
+static int lapic_next_deadline(unsigned long delta,
+ struct clock_event_device *evt)
+{
+ u64 tsc;
+
+ rdtscll(tsc);
+ wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
+ return 0;
+}
+
/*
* Setup the lapic timer in periodic or oneshot mode
*/
@@ -533,7 +556,15 @@ static void __cpuinit setup_APIC_timer(void)
memcpy(levt, &lapic_clockevent, sizeof(*levt));
levt->cpumask = cpumask_of(smp_processor_id());
- clockevents_register_device(levt);
+ if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
+ levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
+ CLOCK_EVT_FEAT_DUMMY);
+ levt->set_next_event = lapic_next_deadline;
+ clockevents_config_and_register(levt,
+ (tsc_khz / TSC_DIVISOR) * 1000,
+ 0xF, ~0UL);
+ } else
+ clockevents_register_device(levt);
}
/*
@@ -661,7 +692,9 @@ static int __init calibrate_APIC_clock(void)
* in the clockevent structure and return.
*/
- if (lapic_timer_frequency) {
+ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
+ return 0;
+ } else if (lapic_timer_frequency) {
apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
lapic_timer_frequency);
lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
@@ -674,6 +707,9 @@ static int __init calibrate_APIC_clock(void)
return 0;
}
+ apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
+ "calibrating APIC timer ...\n");
+
local_irq_disable();
/* Replace the global interrupt handler */
@@ -811,9 +847,6 @@ void __init setup_boot_APIC_clock(void)
return;
}
- apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
- "calibrating APIC timer ...\n");
-
if (calibrate_APIC_clock()) {
/* No broadcast on UP ! */
if (num_possible_cpus() > 1)
@@ -1444,8 +1477,7 @@ void __init bsp_end_local_APIC_setup(void)
* Now that local APIC setup is completed for BP, configure the fault
* handling for interrupt remapping.
*/
- if (irq_remapping_enabled)
- irq_remap_enable_fault_handling();
+ irq_remap_enable_fault_handling();
}
@@ -2218,8 +2250,7 @@ static int lapic_suspend(void)
local_irq_save(flags);
disable_local_APIC();
- if (irq_remapping_enabled)
- irq_remapping_disable();
+ irq_remapping_disable();
local_irq_restore(flags);
return 0;
@@ -2235,16 +2266,15 @@ static void lapic_resume(void)
return;
local_irq_save(flags);
- if (irq_remapping_enabled) {
- /*
- * IO-APIC and PIC have their own resume routines.
- * We just mask them here to make sure the interrupt
- * subsystem is completely quiet while we enable x2apic
- * and interrupt-remapping.
- */
- mask_ioapic_entries();
- legacy_pic->mask_all();
- }
+
+ /*
+ * IO-APIC and PIC have their own resume routines.
+ * We just mask them here to make sure the interrupt
+ * subsystem is completely quiet while we enable x2apic
+ * and interrupt-remapping.
+ */
+ mask_ioapic_entries();
+ legacy_pic->mask_all();
if (x2apic_mode)
enable_x2apic();
@@ -2287,8 +2317,7 @@ static void lapic_resume(void)
apic_write(APIC_ESR, 0);
apic_read(APIC_ESR);
- if (irq_remapping_enabled)
- irq_remapping_reenable(x2apic_mode);
+ irq_remapping_reenable(x2apic_mode);
local_irq_restore(flags);
}
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index a65829ac2b9..9c2aa89a11c 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -22,6 +22,7 @@
#include <linux/hardirq.h>
#include <linux/delay.h>
+#include <asm/numachip/numachip.h>
#include <asm/numachip/numachip_csr.h>
#include <asm/smp.h>
#include <asm/apic.h>
@@ -179,6 +180,7 @@ static int __init numachip_system_init(void)
return 0;
x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+ x86_init.pci.arch_init = pci_numachip_init;
map_csrs();
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1817fa91102..9ed796ccc32 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -68,22 +68,6 @@
#define for_each_irq_pin(entry, head) \
for (entry = head; entry; entry = entry->next)
-#ifdef CONFIG_IRQ_REMAP
-static void irq_remap_modify_chip_defaults(struct irq_chip *chip);
-static inline bool irq_remapped(struct irq_cfg *cfg)
-{
- return cfg->irq_2_iommu.iommu != NULL;
-}
-#else
-static inline bool irq_remapped(struct irq_cfg *cfg)
-{
- return false;
-}
-static inline void irq_remap_modify_chip_defaults(struct irq_chip *chip)
-{
-}
-#endif
-
/*
* Is the SiS APIC rmw bug present ?
* -1 = don't know, 0 = no, 1 = yes
@@ -234,11 +218,11 @@ int __init arch_early_irq_init(void)
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
/*
* For legacy IRQ's, start with assigning irq0 to irq15 to
- * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
+ * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
*/
if (i < legacy_pic->nr_legacy_irqs) {
cfg[i].vector = IRQ0_VECTOR + i;
- cpumask_set_cpu(0, cfg[i].domain);
+ cpumask_setall(cfg[i].domain);
}
}
@@ -300,9 +284,9 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
return cfg;
}
-static int alloc_irq_from(unsigned int from, int node)
+static int alloc_irqs_from(unsigned int from, unsigned int count, int node)
{
- return irq_alloc_desc_from(from, node);
+ return irq_alloc_descs_from(from, count, node);
}
static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
@@ -326,7 +310,7 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+ (mpc_ioapic_addr(idx) & ~PAGE_MASK);
}
-static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
+void io_apic_eoi(unsigned int apic, unsigned int vector)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
writel(vector, &io_apic->eoi);
@@ -573,19 +557,10 @@ static void unmask_ioapic_irq(struct irq_data *data)
* Otherwise, we simulate the EOI message manually by changing the trigger
* mode to edge and then back to level, with RTE being masked during this.
*/
-static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
+void native_eoi_ioapic_pin(int apic, int pin, int vector)
{
if (mpc_ioapic_ver(apic) >= 0x20) {
- /*
- * Intr-remapping uses pin number as the virtual vector
- * in the RTE. Actual vector is programmed in
- * intr-remapping table entry. Hence for the io-apic
- * EOI we use the pin number.
- */
- if (cfg && irq_remapped(cfg))
- io_apic_eoi(apic, pin);
- else
- io_apic_eoi(apic, vector);
+ io_apic_eoi(apic, vector);
} else {
struct IO_APIC_route_entry entry, entry1;
@@ -606,14 +581,15 @@ static void __eoi_ioapic_pin(int apic, int pin, int vector, struct irq_cfg *cfg)
}
}
-static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
struct irq_pin_list *entry;
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
for_each_irq_pin(entry, cfg->irq_2_pin)
- __eoi_ioapic_pin(entry->apic, entry->pin, cfg->vector, cfg);
+ x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin,
+ cfg->vector);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -650,7 +626,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
}
raw_spin_lock_irqsave(&ioapic_lock, flags);
- __eoi_ioapic_pin(apic, pin, entry.vector, NULL);
+ x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
@@ -1141,7 +1117,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
* allocation for the members that are not used anymore.
*/
cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
- cfg->move_in_progress = 1;
+ cfg->move_in_progress =
+ cpumask_intersects(cfg->old_domain, cpu_online_mask);
cpumask_and(cfg->domain, cfg->domain, tmp_mask);
break;
}
@@ -1172,8 +1149,9 @@ next:
current_vector = vector;
current_offset = offset;
if (cfg->vector) {
- cfg->move_in_progress = 1;
cpumask_copy(cfg->old_domain, cfg->domain);
+ cfg->move_in_progress =
+ cpumask_intersects(cfg->old_domain, cpu_online_mask);
}
for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
per_cpu(vector_irq, new_cpu)[vector] = irq;
@@ -1241,12 +1219,6 @@ void __setup_vector_irq(int cpu)
cfg = irq_get_chip_data(irq);
if (!cfg)
continue;
- /*
- * If it is a legacy IRQ handled by the legacy PIC, this cpu
- * will be part of the irq_cfg's domain.
- */
- if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq))
- cpumask_set_cpu(cpu, cfg->domain);
if (!cpumask_test_cpu(cpu, cfg->domain))
continue;
@@ -1308,25 +1280,18 @@ static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
fasteoi = false;
}
- if (irq_remapped(cfg)) {
- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- irq_remap_modify_chip_defaults(chip);
+ if (setup_remapped_irq(irq, cfg, chip))
fasteoi = trigger != 0;
- }
hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
irq_set_chip_and_handler_name(irq, chip, hdl,
fasteoi ? "fasteoi" : "edge");
}
-static int setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
- unsigned int destination, int vector,
- struct io_apic_irq_attr *attr)
+int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
+ unsigned int destination, int vector,
+ struct io_apic_irq_attr *attr)
{
- if (irq_remapping_enabled)
- return setup_ioapic_remapped_entry(irq, entry, destination,
- vector, attr);
-
memset(entry, 0, sizeof(*entry));
entry->delivery_mode = apic->irq_delivery_mode;
@@ -1356,16 +1321,6 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
if (!IO_APIC_IRQ(irq))
return;
- /*
- * For legacy irqs, cfg->domain starts with cpu 0. Now that IO-APIC
- * can handle this irq and the apic driver is finialized at this point,
- * update the cfg->domain.
- */
- if (irq < legacy_pic->nr_legacy_irqs &&
- cpumask_equal(cfg->domain, cpumask_of(0)))
- apic->vector_allocation_domain(0, cfg->domain,
- apic->target_cpus());
-
if (assign_irq_vector(irq, cfg, apic->target_cpus()))
return;
@@ -1384,8 +1339,8 @@ static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
cfg->vector, irq, attr->trigger, attr->polarity, dest);
- if (setup_ioapic_entry(irq, &entry, dest, cfg->vector, attr)) {
- pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
+ if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
+ pr_warn("Failed to setup ioapic entry for ioapic %d, pin %d\n",
mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
__clear_irq_vector(irq, cfg);
@@ -1493,9 +1448,6 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
struct IO_APIC_route_entry entry;
unsigned int dest;
- if (irq_remapping_enabled)
- return;
-
memset(&entry, 0, sizeof(entry));
/*
@@ -1527,9 +1479,63 @@ static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
ioapic_write_entry(ioapic_idx, pin, entry);
}
-__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
+void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
{
int i;
+
+ pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n");
+
+ for (i = 0; i <= nr_entries; i++) {
+ struct IO_APIC_route_entry entry;
+
+ entry = ioapic_read_entry(apic, i);
+
+ pr_debug(" %02x %02X ", i, entry.dest);
+ pr_cont("%1d %1d %1d %1d %1d "
+ "%1d %1d %02X\n",
+ entry.mask,
+ entry.trigger,
+ entry.irr,
+ entry.polarity,
+ entry.delivery_status,
+ entry.dest_mode,
+ entry.delivery_mode,
+ entry.vector);
+ }
+}
+
+void intel_ir_io_apic_print_entries(unsigned int apic,
+ unsigned int nr_entries)
+{
+ int i;
+
+ pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n");
+
+ for (i = 0; i <= nr_entries; i++) {
+ struct IR_IO_APIC_route_entry *ir_entry;
+ struct IO_APIC_route_entry entry;
+
+ entry = ioapic_read_entry(apic, i);
+
+ ir_entry = (struct IR_IO_APIC_route_entry *)&entry;
+
+ pr_debug(" %02x %04X ", i, ir_entry->index);
+ pr_cont("%1d %1d %1d %1d %1d "
+ "%1d %1d %X %02X\n",
+ ir_entry->format,
+ ir_entry->mask,
+ ir_entry->trigger,
+ ir_entry->irr,
+ ir_entry->polarity,
+ ir_entry->delivery_status,
+ ir_entry->index2,
+ ir_entry->zero,
+ ir_entry->vector);
+ }
+}
+
+__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
+{
union IO_APIC_reg_00 reg_00;
union IO_APIC_reg_01 reg_01;
union IO_APIC_reg_02 reg_02;
@@ -1582,58 +1588,7 @@ __apicdebuginit(void) print_IO_APIC(int ioapic_idx)
printk(KERN_DEBUG ".... IRQ redirection table:\n");
- if (irq_remapping_enabled) {
- printk(KERN_DEBUG " NR Indx Fmt Mask Trig IRR"
- " Pol Stat Indx2 Zero Vect:\n");
- } else {
- printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
- " Stat Dmod Deli Vect:\n");
- }
-
- for (i = 0; i <= reg_01.bits.entries; i++) {
- if (irq_remapping_enabled) {
- struct IO_APIC_route_entry entry;
- struct IR_IO_APIC_route_entry *ir_entry;
-
- entry = ioapic_read_entry(ioapic_idx, i);
- ir_entry = (struct IR_IO_APIC_route_entry *) &entry;
- printk(KERN_DEBUG " %02x %04X ",
- i,
- ir_entry->index
- );
- pr_cont("%1d %1d %1d %1d %1d "
- "%1d %1d %X %02X\n",
- ir_entry->format,
- ir_entry->mask,
- ir_entry->trigger,
- ir_entry->irr,
- ir_entry->polarity,
- ir_entry->delivery_status,
- ir_entry->index2,
- ir_entry->zero,
- ir_entry->vector
- );
- } else {
- struct IO_APIC_route_entry entry;
-
- entry = ioapic_read_entry(ioapic_idx, i);
- printk(KERN_DEBUG " %02x %02X ",
- i,
- entry.dest
- );
- pr_cont("%1d %1d %1d %1d %1d "
- "%1d %1d %02X\n",
- entry.mask,
- entry.trigger,
- entry.irr,
- entry.polarity,
- entry.delivery_status,
- entry.dest_mode,
- entry.delivery_mode,
- entry.vector
- );
- }
- }
+ x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
}
__apicdebuginit(void) print_IO_APICs(void)
@@ -1935,30 +1890,14 @@ void __init enable_IO_APIC(void)
clear_IO_APIC();
}
-/*
- * Not an __init, needed by the reboot code
- */
-void disable_IO_APIC(void)
+void native_disable_io_apic(void)
{
/*
- * Clear the IO-APIC before rebooting:
- */
- clear_IO_APIC();
-
- if (!legacy_pic->nr_legacy_irqs)
- return;
-
- /*
* If the i8259 is routed through an IOAPIC
* Put that IOAPIC in virtual wire mode
* so legacy interrupts can be delivered.
- *
- * With interrupt-remapping, for now we will use virtual wire A mode,
- * as virtual wire B is little complex (need to configure both
- * IOAPIC RTE as well as interrupt-remapping table entry).
- * As this gets called during crash dump, keep this simple for now.
*/
- if (ioapic_i8259.pin != -1 && !irq_remapping_enabled) {
+ if (ioapic_i8259.pin != -1) {
struct IO_APIC_route_entry entry;
memset(&entry, 0, sizeof(entry));
@@ -1978,12 +1917,25 @@ void disable_IO_APIC(void)
ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
}
+ if (cpu_has_apic || apic_from_smp_config())
+ disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
/*
- * Use virtual wire A mode when interrupt remapping is enabled.
+ * Clear the IO-APIC before rebooting:
*/
- if (cpu_has_apic || apic_from_smp_config())
- disconnect_bsp_APIC(!irq_remapping_enabled &&
- ioapic_i8259.pin != -1);
+ clear_IO_APIC();
+
+ if (!legacy_pic->nr_legacy_irqs)
+ return;
+
+ x86_io_apic_ops.disable();
}
#ifdef CONFIG_X86_32
@@ -2199,9 +2151,11 @@ static int ioapic_retrigger_irq(struct irq_data *data)
{
struct irq_cfg *cfg = data->chip_data;
unsigned long flags;
+ int cpu;
raw_spin_lock_irqsave(&vector_lock, flags);
- apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
+ cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
+ apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
raw_spin_unlock_irqrestore(&vector_lock, flags);
return 1;
@@ -2334,12 +2288,8 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
apic = entry->apic;
pin = entry->pin;
- /*
- * With interrupt-remapping, destination information comes
- * from interrupt-remapping table entry.
- */
- if (!irq_remapped(cfg))
- io_apic_write(apic, 0x11 + pin*2, dest);
+
+ io_apic_write(apic, 0x11 + pin*2, dest);
reg = io_apic_read(apic, 0x10 + pin*2);
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
reg |= vector;
@@ -2381,9 +2331,10 @@ int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
return 0;
}
-static int
-ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
- bool force)
+
+int native_ioapic_set_affinity(struct irq_data *data,
+ const struct cpumask *mask,
+ bool force)
{
unsigned int dest, irq = data->irq;
unsigned long flags;
@@ -2560,33 +2511,6 @@ static void ack_apic_level(struct irq_data *data)
ioapic_irqd_unmask(data, cfg, masked);
}
-#ifdef CONFIG_IRQ_REMAP
-static void ir_ack_apic_edge(struct irq_data *data)
-{
- ack_APIC_irq();
-}
-
-static void ir_ack_apic_level(struct irq_data *data)
-{
- ack_APIC_irq();
- eoi_ioapic_irq(data->irq, data->chip_data);
-}
-
-static void ir_print_prefix(struct irq_data *data, struct seq_file *p)
-{
- seq_printf(p, " IR-%s", data->chip->name);
-}
-
-static void irq_remap_modify_chip_defaults(struct irq_chip *chip)
-{
- chip->irq_print_chip = ir_print_prefix;
- chip->irq_ack = ir_ack_apic_edge;
- chip->irq_eoi = ir_ack_apic_level;
-
- chip->irq_set_affinity = set_remapped_irq_affinity;
-}
-#endif /* CONFIG_IRQ_REMAP */
-
static struct irq_chip ioapic_chip __read_mostly = {
.name = "IO-APIC",
.irq_startup = startup_ioapic_irq,
@@ -2594,7 +2518,7 @@ static struct irq_chip ioapic_chip __read_mostly = {
.irq_unmask = unmask_ioapic_irq,
.irq_ack = ack_apic_edge,
.irq_eoi = ack_apic_level,
- .irq_set_affinity = ioapic_set_affinity,
+ .irq_set_affinity = native_ioapic_set_affinity,
.irq_retrigger = ioapic_retrigger_irq,
};
@@ -2793,8 +2717,7 @@ static inline void __init check_timer(void)
* 8259A.
*/
if (pin1 == -1) {
- if (irq_remapping_enabled)
- panic("BIOS bug: timer not connected to IO-APIC");
+ panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
pin1 = pin2;
apic1 = apic2;
no_pin1 = 1;
@@ -2826,8 +2749,7 @@ static inline void __init check_timer(void)
clear_IO_APIC_pin(0, pin1);
goto out;
}
- if (irq_remapping_enabled)
- panic("timer doesn't work through Interrupt-remapped IO-APIC");
+ panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
local_irq_disable();
clear_IO_APIC_pin(apic1, pin1);
if (!no_pin1)
@@ -2994,37 +2916,58 @@ device_initcall(ioapic_init_ops);
/*
* Dynamic irq allocate and deallocation
*/
-unsigned int create_irq_nr(unsigned int from, int node)
+unsigned int __create_irqs(unsigned int from, unsigned int count, int node)
{
- struct irq_cfg *cfg;
+ struct irq_cfg **cfg;
unsigned long flags;
- unsigned int ret = 0;
- int irq;
+ int irq, i;
if (from < nr_irqs_gsi)
from = nr_irqs_gsi;
- irq = alloc_irq_from(from, node);
- if (irq < 0)
- return 0;
- cfg = alloc_irq_cfg(irq, node);
- if (!cfg) {
- free_irq_at(irq, NULL);
+ cfg = kzalloc_node(count * sizeof(cfg[0]), GFP_KERNEL, node);
+ if (!cfg)
return 0;
+
+ irq = alloc_irqs_from(from, count, node);
+ if (irq < 0)
+ goto out_cfgs;
+
+ for (i = 0; i < count; i++) {
+ cfg[i] = alloc_irq_cfg(irq + i, node);
+ if (!cfg[i])
+ goto out_irqs;
}
raw_spin_lock_irqsave(&vector_lock, flags);
- if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
- ret = irq;
+ for (i = 0; i < count; i++)
+ if (__assign_irq_vector(irq + i, cfg[i], apic->target_cpus()))
+ goto out_vecs;
raw_spin_unlock_irqrestore(&vector_lock, flags);
- if (ret) {
- irq_set_chip_data(irq, cfg);
- irq_clear_status_flags(irq, IRQ_NOREQUEST);
- } else {
- free_irq_at(irq, cfg);
+ for (i = 0; i < count; i++) {
+ irq_set_chip_data(irq + i, cfg[i]);
+ irq_clear_status_flags(irq + i, IRQ_NOREQUEST);
}
- return ret;
+
+ kfree(cfg);
+ return irq;
+
+out_vecs:
+ for (i--; i >= 0; i--)
+ __clear_irq_vector(irq + i, cfg[i]);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+out_irqs:
+ for (i = 0; i < count; i++)
+ free_irq_at(irq + i, cfg[i]);
+out_cfgs:
+ kfree(cfg);
+ return 0;
+}
+
+unsigned int create_irq_nr(unsigned int from, int node)
+{
+ return __create_irqs(from, 1, node);
}
int create_irq(void)
@@ -3049,48 +2992,35 @@ void destroy_irq(unsigned int irq)
irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
- if (irq_remapped(cfg))
- free_remapped_irq(irq);
+ free_remapped_irq(irq);
+
raw_spin_lock_irqsave(&vector_lock, flags);
__clear_irq_vector(irq, cfg);
raw_spin_unlock_irqrestore(&vector_lock, flags);
free_irq_at(irq, cfg);
}
+void destroy_irqs(unsigned int irq, unsigned int count)
+{
+ unsigned int i;
+
+ for (i = 0; i < count; i++)
+ destroy_irq(irq + i);
+}
+
/*
* MSI message composition
*/
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
- struct msi_msg *msg, u8 hpet_id)
+void native_compose_msi_msg(struct pci_dev *pdev,
+ unsigned int irq, unsigned int dest,
+ struct msi_msg *msg, u8 hpet_id)
{
- struct irq_cfg *cfg;
- int err;
- unsigned dest;
-
- if (disable_apic)
- return -ENXIO;
+ struct irq_cfg *cfg = irq_cfg(irq);
- cfg = irq_cfg(irq);
- err = assign_irq_vector(irq, cfg, apic->target_cpus());
- if (err)
- return err;
-
- err = apic->cpu_mask_to_apicid_and(cfg->domain,
- apic->target_cpus(), &dest);
- if (err)
- return err;
-
- if (irq_remapped(cfg)) {
- compose_remapped_msi_msg(pdev, irq, dest, msg, hpet_id);
- return err;
- }
+ msg->address_hi = MSI_ADDR_BASE_HI;
if (x2apic_enabled())
- msg->address_hi = MSI_ADDR_BASE_HI |
- MSI_ADDR_EXT_DEST_ID(dest);
- else
- msg->address_hi = MSI_ADDR_BASE_HI;
+ msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
msg->address_lo =
MSI_ADDR_BASE_LO |
@@ -3109,8 +3039,32 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
MSI_DATA_DELIVERY_FIXED:
MSI_DATA_DELIVERY_LOWPRI) |
MSI_DATA_VECTOR(cfg->vector);
+}
- return err;
+#ifdef CONFIG_PCI_MSI
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+ struct msi_msg *msg, u8 hpet_id)
+{
+ struct irq_cfg *cfg;
+ int err;
+ unsigned dest;
+
+ if (disable_apic)
+ return -ENXIO;
+
+ cfg = irq_cfg(irq);
+ err = assign_irq_vector(irq, cfg, apic->target_cpus());
+ if (err)
+ return err;
+
+ err = apic->cpu_mask_to_apicid_and(cfg->domain,
+ apic->target_cpus(), &dest);
+ if (err)
+ return err;
+
+ x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+
+ return 0;
}
static int
@@ -3148,23 +3102,28 @@ static struct irq_chip msi_chip = {
.irq_retrigger = ioapic_retrigger_irq,
};
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+ unsigned int irq_base, unsigned int irq_offset)
{
struct irq_chip *chip = &msi_chip;
struct msi_msg msg;
+ unsigned int irq = irq_base + irq_offset;
int ret;
ret = msi_compose_msg(dev, irq, &msg, -1);
if (ret < 0)
return ret;
- irq_set_msi_desc(irq, msidesc);
- write_msi_msg(irq, &msg);
+ irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
- if (irq_remapped(irq_get_chip_data(irq))) {
- irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- irq_remap_modify_chip_defaults(chip);
- }
+ /*
+ * MSI-X message is written per-IRQ, the offset is always 0.
+ * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+ */
+ if (!irq_offset)
+ write_msi_msg(irq, &msg);
+
+ setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
@@ -3175,46 +3134,26 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- int node, ret, sub_handle, index = 0;
unsigned int irq, irq_want;
struct msi_desc *msidesc;
+ int node, ret;
- /* x86 doesn't support multiple MSI yet */
+ /* Multiple MSI vectors only supported with interrupt remapping */
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
node = dev_to_node(&dev->dev);
irq_want = nr_irqs_gsi;
- sub_handle = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
irq = create_irq_nr(irq_want, node);
if (irq == 0)
- return -1;
+ return -ENOSPC;
+
irq_want = irq + 1;
- if (!irq_remapping_enabled)
- goto no_ir;
- if (!sub_handle) {
- /*
- * allocate the consecutive block of IRTE's
- * for 'nvec'
- */
- index = msi_alloc_remapped_irq(dev, irq, nvec);
- if (index < 0) {
- ret = index;
- goto error;
- }
- } else {
- ret = msi_setup_remapped_irq(dev, irq, index,
- sub_handle);
- if (ret < 0)
- goto error;
- }
-no_ir:
- ret = setup_msi_irq(dev, msidesc, irq);
+ ret = setup_msi_irq(dev, msidesc, irq, 0);
if (ret < 0)
goto error;
- sub_handle++;
}
return 0;
@@ -3310,25 +3249,19 @@ static struct irq_chip hpet_msi_type = {
.irq_retrigger = ioapic_retrigger_irq,
};
-int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
+int default_setup_hpet_msi(unsigned int irq, unsigned int id)
{
struct irq_chip *chip = &hpet_msi_type;
struct msi_msg msg;
int ret;
- if (irq_remapping_enabled) {
- if (!setup_hpet_msi_remapped(irq, id))
- return -1;
- }
-
ret = msi_compose_msg(NULL, irq, &msg, id);
if (ret < 0)
return ret;
hpet_msi_write(irq_get_handler_data(irq), &msg);
irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
- if (irq_remapped(irq_get_chip_data(irq)))
- irq_remap_modify_chip_defaults(chip);
+ setup_remapped_irq(irq, irq_get_chip_data(irq), chip);
irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
return 0;
@@ -3694,10 +3627,7 @@ void __init setup_ioapic_dest(void)
else
mask = apic->target_cpus();
- if (irq_remapping_enabled)
- set_remapped_irq_affinity(idata, mask, false);
- else
- ioapic_set_affinity(idata, mask, false);
+ x86_io_apic_ops.set_affinity(idata, mask, false);
}
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index cce91bf2667..7434d8556d0 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -106,7 +106,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
unsigned long mask = cpumask_bits(cpumask)[0];
unsigned long flags;
- if (WARN_ONCE(!mask, "empty IPI mask"))
+ if (!mask)
return;
local_irq_save(flags);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index e03a1e180e8..562a76d433c 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -20,18 +20,19 @@ static int set_x2apic_phys_mode(char *arg)
}
early_param("x2apic_phys", set_x2apic_phys_mode);
-static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static bool x2apic_fadt_phys(void)
{
- if (x2apic_phys)
- return x2apic_enabled();
- else if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
- (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) &&
- x2apic_enabled()) {
+ if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
+ (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
printk(KERN_DEBUG "System requires x2apic physical mode\n");
- return 1;
+ return true;
}
- else
- return 0;
+ return false;
+}
+
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+ return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
}
static void
@@ -82,7 +83,7 @@ static void init_x2apic_ldr(void)
static int x2apic_phys_probe(void)
{
- if (x2apic_mode && x2apic_phys)
+ if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
return 1;
return apic == &apic_x2apic_phys;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index d65464e4350..8d7012b7f40 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -899,6 +899,7 @@ static void apm_cpu_idle(void)
static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */
static unsigned int last_stime; /* = 0 */
+ cputime_t stime;
int apm_idle_done = 0;
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
@@ -906,23 +907,23 @@ static void apm_cpu_idle(void)
WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012");
recalc:
+ task_cputime(current, NULL, &stime);
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0;
- last_jiffies = jiffies;
- last_stime = current->stime;
} else if (jiffies_since_last_check > idle_period) {
unsigned int idle_percentage;
- idle_percentage = current->stime - last_stime;
+ idle_percentage = stime - last_stime;
idle_percentage *= 100;
idle_percentage /= jiffies_since_last_check;
use_apm_idle = (idle_percentage > idle_threshold);
if (apm_info.forbid_idle)
use_apm_idle = 0;
- last_jiffies = jiffies;
- last_stime = current->stime;
}
+ last_jiffies = jiffies;
+ last_stime = stime;
+
bucket = IDLE_LEAKY_MAX;
while (!need_resched()) {
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1b7d1656a04..15239fffd6f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -304,7 +304,7 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c)
int cpu = smp_processor_id();
/* get information required for multi-node processors */
- if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
+ if (cpu_has_topoext) {
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
@@ -657,12 +657,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
detect_ht(c);
#endif
- if (c->extended_cpuid_level >= 0x80000006) {
- if (cpuid_edx(0x80000006) & 0xf000)
- num_cache_leaves = 4;
- else
- num_cache_leaves = 3;
- }
+ init_amd_cacheinfo(c);
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
@@ -753,9 +748,6 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
{
- if (!cpu_has_invlpg)
- return;
-
tlb_flushall_shift = 5;
if (c->x86 <= 0x11)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d0e910da16c..92dfec986a4 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -107,53 +107,17 @@ static void __init check_hlt(void)
}
/*
- * Most 386 processors have a bug where a POPAD can lock the
- * machine even from user space.
- */
-
-static void __init check_popad(void)
-{
-#ifndef CONFIG_X86_POPAD_OK
- int res, inp = (int) &res;
-
- pr_info("Checking for popad bug... ");
- __asm__ __volatile__(
- "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
- : "=&a" (res)
- : "d" (inp)
- : "ecx", "edi");
- /*
- * If this fails, it means that any user program may lock the
- * CPU hard. Too bad.
- */
- if (res != 12345678)
- pr_cont("Buggy\n");
- else
- pr_cont("OK\n");
-#endif
-}
-
-/*
* Check whether we are able to run this kernel safely on SMP.
*
- * - In order to run on a i386, we need to be compiled for i386
- * (for due to lack of "invlpg" and working WP on a i386)
+ * - i386 is no longer supported.
* - In order to run on anything without a TSC, we need to be
* compiled for a i486.
*/
static void __init check_config(void)
{
-/*
- * We'd better not be a i386 if we're configured to use some
- * i486+ only features! (WP works in supervisor mode and the
- * new "invlpg" and "bswap" instructions)
- */
-#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || \
- defined(CONFIG_X86_BSWAP)
- if (boot_cpu_data.x86 == 3)
+ if (boot_cpu_data.x86 < 4)
panic("Kernel requires i486+ for 'invlpg' and other features");
-#endif
}
@@ -166,7 +130,6 @@ void __init check_bugs(void)
#endif
check_config();
check_hlt();
- check_popad();
init_utsname()->machine[1] =
'0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
alternative_instructions();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 7505f7b13e7..9c3ab43a695 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1173,15 +1173,6 @@ DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
-/* Make sure %fs and %gs are initialized properly in idle threads */
-struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
-{
- memset(regs, 0, sizeof(struct pt_regs));
- regs->fs = __KERNEL_PERCPU;
- regs->gs = __KERNEL_STACK_CANARY;
-
- return regs;
-}
#endif /* CONFIG_X86_64 */
/*
@@ -1237,7 +1228,7 @@ void __cpuinit cpu_init(void)
oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
- if (cpu != 0 && this_cpu_read(numa_node) == 0 &&
+ if (this_cpu_read(numa_node) == 0 &&
early_cpu_to_node(cpu) != NUMA_NO_NODE)
set_numa_node(early_cpu_to_node(cpu));
#endif
@@ -1269,8 +1260,7 @@ void __cpuinit cpu_init(void)
barrier();
x86_configure_nx();
- if (cpu != 0)
- enable_x2apic();
+ enable_x2apic();
/*
* set up and load the per-CPU TSS
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index a8f8fa9769d..1e7e84a02eb 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -79,3 +79,10 @@ void __init init_hypervisor_platform(void)
if (x86_hyper->init_platform)
x86_hyper->init_platform();
}
+
+bool __init hypervisor_x2apic_available(void)
+{
+ return x86_hyper &&
+ x86_hyper->x2apic_available &&
+ x86_hyper->x2apic_available();
+}
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 198e019a531..fcaabd0432c 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -612,10 +612,6 @@ static void __cpuinit intel_tlb_lookup(const unsigned char desc)
static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
{
- if (!cpu_has_invlpg) {
- tlb_flushall_shift = -1;
- return;
- }
switch ((c->x86 << 8) + c->x86_model) {
case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 93c5451bdd5..84c1309c4c0 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -298,8 +298,7 @@ struct _cache_attr {
unsigned int);
};
-#ifdef CONFIG_AMD_NB
-
+#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
/*
* L3 cache descriptors
*/
@@ -524,9 +523,9 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
static struct _cache_attr subcaches =
__ATTR(subcaches, 0644, show_subcaches, store_subcaches);
-#else /* CONFIG_AMD_NB */
+#else
#define amd_init_l3_cache(x, y)
-#endif /* CONFIG_AMD_NB */
+#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */
static int
__cpuinit cpuid4_cache_lookup_regs(int index,
@@ -538,7 +537,11 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
unsigned edx;
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
- amd_cpuid4(index, &eax, &ebx, &ecx);
+ if (cpu_has_topoext)
+ cpuid_count(0x8000001d, index, &eax.full,
+ &ebx.full, &ecx.full, &edx);
+ else
+ amd_cpuid4(index, &eax, &ebx, &ecx);
amd_init_l3_cache(this_leaf, index);
} else {
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
@@ -557,21 +560,39 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
return 0;
}
-static int __cpuinit find_num_cache_leaves(void)
+static int __cpuinit find_num_cache_leaves(struct cpuinfo_x86 *c)
{
- unsigned int eax, ebx, ecx, edx;
+ unsigned int eax, ebx, ecx, edx, op;
union _cpuid4_leaf_eax cache_eax;
int i = -1;
+ if (c->x86_vendor == X86_VENDOR_AMD)
+ op = 0x8000001d;
+ else
+ op = 4;
+
do {
++i;
- /* Do cpuid(4) loop to find out num_cache_leaves */
- cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
+ /* Do cpuid(op) loop to find out num_cache_leaves */
+ cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
cache_eax.full = eax;
} while (cache_eax.split.type != CACHE_TYPE_NULL);
return i;
}
+void __cpuinit init_amd_cacheinfo(struct cpuinfo_x86 *c)
+{
+
+ if (cpu_has_topoext) {
+ num_cache_leaves = find_num_cache_leaves(c);
+ } else if (c->extended_cpuid_level >= 0x80000006) {
+ if (cpuid_edx(0x80000006) & 0xf000)
+ num_cache_leaves = 4;
+ else
+ num_cache_leaves = 3;
+ }
+}
+
unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
/* Cache sizes */
@@ -588,7 +609,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
if (is_initialized == 0) {
/* Init num_cache_leaves from boot CPU */
- num_cache_leaves = find_num_cache_leaves();
+ num_cache_leaves = find_num_cache_leaves(c);
is_initialized++;
}
@@ -728,37 +749,50 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index)
{
struct _cpuid4_info *this_leaf;
- int ret, i, sibling;
- struct cpuinfo_x86 *c = &cpu_data(cpu);
+ int i, sibling;
- ret = 0;
- if (index == 3) {
- ret = 1;
- for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
+ if (cpu_has_topoext) {
+ unsigned int apicid, nshared, first, last;
+
+ if (!per_cpu(ici_cpuid4_info, cpu))
+ return 0;
+
+ this_leaf = CPUID4_INFO_IDX(cpu, index);
+ nshared = this_leaf->base.eax.split.num_threads_sharing + 1;
+ apicid = cpu_data(cpu).apicid;
+ first = apicid - (apicid % nshared);
+ last = first + nshared - 1;
+
+ for_each_online_cpu(i) {
+ apicid = cpu_data(i).apicid;
+ if ((apicid < first) || (apicid > last))
+ continue;
if (!per_cpu(ici_cpuid4_info, i))
continue;
this_leaf = CPUID4_INFO_IDX(i, index);
- for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
- if (!cpu_online(sibling))
+
+ for_each_online_cpu(sibling) {
+ apicid = cpu_data(sibling).apicid;
+ if ((apicid < first) || (apicid > last))
continue;
set_bit(sibling, this_leaf->shared_cpu_map);
}
}
- } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) {
- ret = 1;
- for_each_cpu(i, cpu_sibling_mask(cpu)) {
+ } else if (index == 3) {
+ for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
if (!per_cpu(ici_cpuid4_info, i))
continue;
this_leaf = CPUID4_INFO_IDX(i, index);
- for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
+ for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
if (!cpu_online(sibling))
continue;
set_bit(sibling, this_leaf->shared_cpu_map);
}
}
- }
+ } else
+ return 0;
- return ret;
+ return 1;
}
static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 6a05c1d327a..5b7d4fa5d3b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -24,8 +24,6 @@ struct mce_bank {
int mce_severity(struct mce *a, int tolerant, char **msg);
struct dentry *mce_get_debugfs_dir(void);
-extern int mce_ser;
-
extern struct mce_bank *mce_banks;
#ifdef CONFIG_X86_MCE_INTEL
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 13017626f9a..beb1f1689e5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -193,9 +193,9 @@ int mce_severity(struct mce *m, int tolerant, char **msg)
continue;
if ((m->mcgstatus & s->mcgmask) != s->mcgres)
continue;
- if (s->ser == SER_REQUIRED && !mce_ser)
+ if (s->ser == SER_REQUIRED && !mca_cfg.ser)
continue;
- if (s->ser == NO_SER && mce_ser)
+ if (s->ser == NO_SER && mca_cfg.ser)
continue;
if (s->context && ctx != s->context)
continue;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 46cbf868969..80dbda84f1c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -58,34 +58,26 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
-int mce_disabled __read_mostly;
-
#define SPINUNIT 100 /* 100ns */
atomic_t mce_entry;
DEFINE_PER_CPU(unsigned, mce_exception_count);
-/*
- * Tolerant levels:
- * 0: always panic on uncorrected errors, log corrected errors
- * 1: panic or SIGBUS on uncorrected errors, log corrected errors
- * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
- * 3: never panic or SIGBUS, log all errors (for testing only)
- */
-static int tolerant __read_mostly = 1;
-static int banks __read_mostly;
-static int rip_msr __read_mostly;
-static int mce_bootlog __read_mostly = -1;
-static int monarch_timeout __read_mostly = -1;
-static int mce_panic_timeout __read_mostly;
-static int mce_dont_log_ce __read_mostly;
-int mce_cmci_disabled __read_mostly;
-int mce_ignore_ce __read_mostly;
-int mce_ser __read_mostly;
-int mce_bios_cmci_threshold __read_mostly;
-
-struct mce_bank *mce_banks __read_mostly;
+struct mce_bank *mce_banks __read_mostly;
+
+struct mca_config mca_cfg __read_mostly = {
+ .bootlog = -1,
+ /*
+ * Tolerant levels:
+ * 0: always panic on uncorrected errors, log corrected errors
+ * 1: panic or SIGBUS on uncorrected errors, log corrected errors
+ * 2: SIGBUS or log uncorrected errors (if possible), log corr. errors
+ * 3: never panic or SIGBUS, log all errors (for testing only)
+ */
+ .tolerant = 1,
+ .monarch_timeout = -1
+};
/* User mode helper program triggered by machine check event */
static unsigned long mce_need_notify;
@@ -302,7 +294,7 @@ static void wait_for_panic(void)
while (timeout-- > 0)
udelay(1);
if (panic_timeout == 0)
- panic_timeout = mce_panic_timeout;
+ panic_timeout = mca_cfg.panic_timeout;
panic("Panicing machine check CPU died");
}
@@ -360,7 +352,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
pr_emerg(HW_ERR "Machine check: %s\n", exp);
if (!fake_panic) {
if (panic_timeout == 0)
- panic_timeout = mce_panic_timeout;
+ panic_timeout = mca_cfg.panic_timeout;
panic(msg);
} else
pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg);
@@ -372,7 +364,7 @@ static int msr_to_offset(u32 msr)
{
unsigned bank = __this_cpu_read(injectm.bank);
- if (msr == rip_msr)
+ if (msr == mca_cfg.rip_msr)
return offsetof(struct mce, ip);
if (msr == MSR_IA32_MCx_STATUS(bank))
return offsetof(struct mce, status);
@@ -451,8 +443,8 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
m->cs |= 3;
}
/* Use accurate RIP reporting if available. */
- if (rip_msr)
- m->ip = mce_rdmsrl(rip_msr);
+ if (mca_cfg.rip_msr)
+ m->ip = mce_rdmsrl(mca_cfg.rip_msr);
}
}
@@ -513,7 +505,7 @@ static int mce_ring_add(unsigned long pfn)
int mce_available(struct cpuinfo_x86 *c)
{
- if (mce_disabled)
+ if (mca_cfg.disabled)
return 0;
return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
}
@@ -565,7 +557,7 @@ static void mce_read_aux(struct mce *m, int i)
/*
* Mask the reported address by the reported granularity.
*/
- if (mce_ser && (m->status & MCI_STATUS_MISCV)) {
+ if (mca_cfg.ser && (m->status & MCI_STATUS_MISCV)) {
u8 shift = MCI_MISC_ADDR_LSB(m->misc);
m->addr >>= shift;
m->addr <<= shift;
@@ -599,7 +591,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
mce_gather_info(&m, NULL);
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
if (!mce_banks[i].ctl || !test_bit(i, *b))
continue;
@@ -620,7 +612,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* TBD do the same check for MCI_STATUS_EN here?
*/
if (!(flags & MCP_UC) &&
- (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
+ (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue;
mce_read_aux(&m, i);
@@ -631,7 +623,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
*/
- if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce)
+ if (!(flags & MCP_DONTLOG) && !mca_cfg.dont_log_ce)
mce_log(&m);
/*
@@ -658,14 +650,14 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
{
int i, ret = 0;
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
if (m->status & MCI_STATUS_VAL) {
__set_bit(i, validp);
if (quirk_no_way_out)
quirk_no_way_out(i, m, regs);
}
- if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
+ if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY)
ret = 1;
}
return ret;
@@ -696,11 +688,11 @@ static int mce_timed_out(u64 *t)
rmb();
if (atomic_read(&mce_paniced))
wait_for_panic();
- if (!monarch_timeout)
+ if (!mca_cfg.monarch_timeout)
goto out;
if ((s64)*t < SPINUNIT) {
/* CHECKME: Make panic default for 1 too? */
- if (tolerant < 1)
+ if (mca_cfg.tolerant < 1)
mce_panic("Timeout synchronizing machine check over CPUs",
NULL, NULL);
cpu_missing = 1;
@@ -750,7 +742,8 @@ static void mce_reign(void)
* Grade the severity of the errors of all the CPUs.
*/
for_each_possible_cpu(cpu) {
- int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant,
+ int severity = mce_severity(&per_cpu(mces_seen, cpu),
+ mca_cfg.tolerant,
&nmsg);
if (severity > global_worst) {
msg = nmsg;
@@ -764,7 +757,7 @@ static void mce_reign(void)
* This dumps all the mces in the log buffer and stops the
* other CPUs.
*/
- if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3)
+ if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Fatal Machine check", m, msg);
/*
@@ -777,7 +770,7 @@ static void mce_reign(void)
* No machine check event found. Must be some external
* source or one CPU is hung. Panic.
*/
- if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
+ if (global_worst <= MCE_KEEP_SEVERITY && mca_cfg.tolerant < 3)
mce_panic("Machine check from unknown source", NULL, NULL);
/*
@@ -801,7 +794,7 @@ static int mce_start(int *no_way_out)
{
int order;
int cpus = num_online_cpus();
- u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
+ u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
if (!timeout)
return -1;
@@ -865,7 +858,7 @@ static int mce_start(int *no_way_out)
static int mce_end(int order)
{
int ret = -1;
- u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC;
+ u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
if (!timeout)
goto reset;
@@ -946,7 +939,7 @@ static void mce_clear_state(unsigned long *toclear)
{
int i;
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
if (test_bit(i, toclear))
mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
}
@@ -1011,6 +1004,7 @@ static void mce_clear_info(struct mce_info *mi)
*/
void do_machine_check(struct pt_regs *regs, long error_code)
{
+ struct mca_config *cfg = &mca_cfg;
struct mce m, *final;
int i;
int worst = 0;
@@ -1022,7 +1016,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
int order;
/*
* If no_way_out gets set, there is no safe way to recover from this
- * MCE. If tolerant is cranked up, we'll try anyway.
+ * MCE. If mca_cfg.tolerant is cranked up, we'll try anyway.
*/
int no_way_out = 0;
/*
@@ -1038,7 +1032,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
this_cpu_inc(mce_exception_count);
- if (!banks)
+ if (!cfg->banks)
goto out;
mce_gather_info(&m, regs);
@@ -1065,7 +1059,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* because the first one to see it will clear it.
*/
order = mce_start(&no_way_out);
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < cfg->banks; i++) {
__clear_bit(i, toclear);
if (!test_bit(i, valid_banks))
continue;
@@ -1084,7 +1078,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* Non uncorrected or non signaled errors are handled by
* machine_check_poll. Leave them alone, unless this panics.
*/
- if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
+ if (!(m.status & (cfg->ser ? MCI_STATUS_S : MCI_STATUS_UC)) &&
!no_way_out)
continue;
@@ -1093,7 +1087,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
*/
add_taint(TAINT_MACHINE_CHECK);
- severity = mce_severity(&m, tolerant, NULL);
+ severity = mce_severity(&m, cfg->tolerant, NULL);
/*
* When machine check was for corrected handler don't touch,
@@ -1117,7 +1111,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* When the ring overflows we just ignore the AO error.
* RED-PEN add some logging mechanism when
* usable_address or mce_add_ring fails.
- * RED-PEN don't ignore overflow for tolerant == 0
+ * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
*/
if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
mce_ring_add(m.addr >> PAGE_SHIFT);
@@ -1149,7 +1143,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
* issues we try to recover, or limit damage to the current
* process.
*/
- if (tolerant < 3) {
+ if (cfg->tolerant < 3) {
if (no_way_out)
mce_panic("Fatal machine check on current CPU", &m, msg);
if (worst == MCE_AR_SEVERITY) {
@@ -1377,11 +1371,13 @@ EXPORT_SYMBOL_GPL(mce_notify_irq);
static int __cpuinit __mcheck_cpu_mce_banks_init(void)
{
int i;
+ u8 num_banks = mca_cfg.banks;
- mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
+ mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL);
if (!mce_banks)
return -ENOMEM;
- for (i = 0; i < banks; i++) {
+
+ for (i = 0; i < num_banks; i++) {
struct mce_bank *b = &mce_banks[i];
b->ctl = -1ULL;
@@ -1401,7 +1397,7 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
rdmsrl(MSR_IA32_MCG_CAP, cap);
b = cap & MCG_BANKCNT_MASK;
- if (!banks)
+ if (!mca_cfg.banks)
pr_info("CPU supports %d MCE banks\n", b);
if (b > MAX_NR_BANKS) {
@@ -1411,8 +1407,9 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
}
/* Don't support asymmetric configurations today */
- WARN_ON(banks != 0 && b != banks);
- banks = b;
+ WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks);
+ mca_cfg.banks = b;
+
if (!mce_banks) {
int err = __mcheck_cpu_mce_banks_init();
@@ -1422,25 +1419,29 @@ static int __cpuinit __mcheck_cpu_cap_init(void)
/* Use accurate RIP reporting if available. */
if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9)
- rip_msr = MSR_IA32_MCG_EIP;
+ mca_cfg.rip_msr = MSR_IA32_MCG_EIP;
if (cap & MCG_SER_P)
- mce_ser = 1;
+ mca_cfg.ser = true;
return 0;
}
static void __mcheck_cpu_init_generic(void)
{
+ enum mcp_flags m_fl = 0;
mce_banks_t all_banks;
u64 cap;
int i;
+ if (!mca_cfg.bootlog)
+ m_fl = MCP_DONTLOG;
+
/*
* Log the machine checks left over from the previous reset.
*/
bitmap_fill(all_banks, MAX_NR_BANKS);
- machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks);
+ machine_check_poll(MCP_UC | m_fl, &all_banks);
set_in_cr4(X86_CR4_MCE);
@@ -1448,7 +1449,7 @@ static void __mcheck_cpu_init_generic(void)
if (cap & MCG_CTL_P)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (!b->init)
@@ -1489,6 +1490,8 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
/* Add per CPU specific workarounds here */
static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
+ struct mca_config *cfg = &mca_cfg;
+
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
pr_info("unknown CPU type - not enabling MCE support\n");
return -EOPNOTSUPP;
@@ -1496,7 +1499,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
/* This should be disabled by the BIOS, but isn't always */
if (c->x86_vendor == X86_VENDOR_AMD) {
- if (c->x86 == 15 && banks > 4) {
+ if (c->x86 == 15 && cfg->banks > 4) {
/*
* disable GART TBL walk error reporting, which
* trips off incorrectly with the IOMMU & 3ware
@@ -1504,18 +1507,18 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
*/
clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
}
- if (c->x86 <= 17 && mce_bootlog < 0) {
+ if (c->x86 <= 17 && cfg->bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
* by default and leave crap in there. Don't log:
*/
- mce_bootlog = 0;
+ cfg->bootlog = 0;
}
/*
* Various K7s with broken bank 0 around. Always disable
* by default.
*/
- if (c->x86 == 6 && banks > 0)
+ if (c->x86 == 6 && cfg->banks > 0)
mce_banks[0].ctl = 0;
/*
@@ -1566,7 +1569,7 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* valid event later, merely don't write CTL0.
*/
- if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
+ if (c->x86 == 6 && c->x86_model < 0x1A && cfg->banks > 0)
mce_banks[0].init = 0;
/*
@@ -1574,23 +1577,23 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
* synchronization with a one second timeout.
*/
if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) &&
- monarch_timeout < 0)
- monarch_timeout = USEC_PER_SEC;
+ cfg->monarch_timeout < 0)
+ cfg->monarch_timeout = USEC_PER_SEC;
/*
* There are also broken BIOSes on some Pentium M and
* earlier systems:
*/
- if (c->x86 == 6 && c->x86_model <= 13 && mce_bootlog < 0)
- mce_bootlog = 0;
+ if (c->x86 == 6 && c->x86_model <= 13 && cfg->bootlog < 0)
+ cfg->bootlog = 0;
if (c->x86 == 6 && c->x86_model == 45)
quirk_no_way_out = quirk_sandybridge_ifu;
}
- if (monarch_timeout < 0)
- monarch_timeout = 0;
- if (mce_bootlog != 0)
- mce_panic_timeout = 30;
+ if (cfg->monarch_timeout < 0)
+ cfg->monarch_timeout = 0;
+ if (cfg->bootlog != 0)
+ cfg->panic_timeout = 30;
return 0;
}
@@ -1635,7 +1638,7 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
__this_cpu_write(mce_next_interval, iv);
- if (mce_ignore_ce || !iv)
+ if (mca_cfg.ignore_ce || !iv)
return;
t->expires = round_jiffies(jiffies + iv);
@@ -1668,7 +1671,7 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
*/
void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
{
- if (mce_disabled)
+ if (mca_cfg.disabled)
return;
if (__mcheck_cpu_ancient_init(c))
@@ -1678,7 +1681,7 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
return;
if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
- mce_disabled = 1;
+ mca_cfg.disabled = true;
return;
}
@@ -1951,6 +1954,8 @@ static struct miscdevice mce_chrdev_device = {
*/
static int __init mcheck_enable(char *str)
{
+ struct mca_config *cfg = &mca_cfg;
+
if (*str == 0) {
enable_p5_mce();
return 1;
@@ -1958,22 +1963,22 @@ static int __init mcheck_enable(char *str)
if (*str == '=')
str++;
if (!strcmp(str, "off"))
- mce_disabled = 1;
+ cfg->disabled = true;
else if (!strcmp(str, "no_cmci"))
- mce_cmci_disabled = 1;
+ cfg->cmci_disabled = true;
else if (!strcmp(str, "dont_log_ce"))
- mce_dont_log_ce = 1;
+ cfg->dont_log_ce = true;
else if (!strcmp(str, "ignore_ce"))
- mce_ignore_ce = 1;
+ cfg->ignore_ce = true;
else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
- mce_bootlog = (str[0] == 'b');
+ cfg->bootlog = (str[0] == 'b');
else if (!strcmp(str, "bios_cmci_threshold"))
- mce_bios_cmci_threshold = 1;
+ cfg->bios_cmci_threshold = true;
else if (isdigit(str[0])) {
- get_option(&str, &tolerant);
+ get_option(&str, &(cfg->tolerant));
if (*str == ',') {
++str;
- get_option(&str, &monarch_timeout);
+ get_option(&str, &(cfg->monarch_timeout));
}
} else {
pr_info("mce argument %s ignored. Please use /sys\n", str);
@@ -2002,7 +2007,7 @@ static int mce_disable_error_reporting(void)
{
int i;
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2142,15 +2147,15 @@ static ssize_t set_ignore_ce(struct device *s,
if (strict_strtoull(buf, 0, &new) < 0)
return -EINVAL;
- if (mce_ignore_ce ^ !!new) {
+ if (mca_cfg.ignore_ce ^ !!new) {
if (new) {
/* disable ce features */
mce_timer_delete_all();
on_each_cpu(mce_disable_cmci, NULL, 1);
- mce_ignore_ce = 1;
+ mca_cfg.ignore_ce = true;
} else {
/* enable ce features */
- mce_ignore_ce = 0;
+ mca_cfg.ignore_ce = false;
on_each_cpu(mce_enable_ce, (void *)1, 1);
}
}
@@ -2166,14 +2171,14 @@ static ssize_t set_cmci_disabled(struct device *s,
if (strict_strtoull(buf, 0, &new) < 0)
return -EINVAL;
- if (mce_cmci_disabled ^ !!new) {
+ if (mca_cfg.cmci_disabled ^ !!new) {
if (new) {
/* disable cmci */
on_each_cpu(mce_disable_cmci, NULL, 1);
- mce_cmci_disabled = 1;
+ mca_cfg.cmci_disabled = true;
} else {
/* enable cmci */
- mce_cmci_disabled = 0;
+ mca_cfg.cmci_disabled = false;
on_each_cpu(mce_enable_ce, NULL, 1);
}
}
@@ -2190,9 +2195,9 @@ static ssize_t store_int_with_restart(struct device *s,
}
static DEVICE_ATTR(trigger, 0644, show_trigger, set_trigger);
-static DEVICE_INT_ATTR(tolerant, 0644, tolerant);
-static DEVICE_INT_ATTR(monarch_timeout, 0644, monarch_timeout);
-static DEVICE_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce);
+static DEVICE_INT_ATTR(tolerant, 0644, mca_cfg.tolerant);
+static DEVICE_INT_ATTR(monarch_timeout, 0644, mca_cfg.monarch_timeout);
+static DEVICE_BOOL_ATTR(dont_log_ce, 0644, mca_cfg.dont_log_ce);
static struct dev_ext_attribute dev_attr_check_interval = {
__ATTR(check_interval, 0644, device_show_int, store_int_with_restart),
@@ -2200,13 +2205,13 @@ static struct dev_ext_attribute dev_attr_check_interval = {
};
static struct dev_ext_attribute dev_attr_ignore_ce = {
- __ATTR(ignore_ce, 0644, device_show_int, set_ignore_ce),
- &mce_ignore_ce
+ __ATTR(ignore_ce, 0644, device_show_bool, set_ignore_ce),
+ &mca_cfg.ignore_ce
};
static struct dev_ext_attribute dev_attr_cmci_disabled = {
- __ATTR(cmci_disabled, 0644, device_show_int, set_cmci_disabled),
- &mce_cmci_disabled
+ __ATTR(cmci_disabled, 0644, device_show_bool, set_cmci_disabled),
+ &mca_cfg.cmci_disabled
};
static struct device_attribute *mce_device_attrs[] = {
@@ -2253,7 +2258,7 @@ static __cpuinit int mce_device_create(unsigned int cpu)
if (err)
goto error;
}
- for (j = 0; j < banks; j++) {
+ for (j = 0; j < mca_cfg.banks; j++) {
err = device_create_file(dev, &mce_banks[j].attr);
if (err)
goto error2;
@@ -2285,7 +2290,7 @@ static __cpuinit void mce_device_remove(unsigned int cpu)
for (i = 0; mce_device_attrs[i]; i++)
device_remove_file(dev, mce_device_attrs[i]);
- for (i = 0; i < banks; i++)
+ for (i = 0; i < mca_cfg.banks; i++)
device_remove_file(dev, &mce_banks[i].attr);
device_unregister(dev);
@@ -2304,7 +2309,7 @@ static void __cpuinit mce_disable_cpu(void *h)
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2322,7 +2327,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
if (!(action & CPU_TASKS_FROZEN))
cmci_reenable();
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
if (b->init)
@@ -2375,7 +2380,7 @@ static __init void mce_init_banks(void)
{
int i;
- for (i = 0; i < banks; i++) {
+ for (i = 0; i < mca_cfg.banks; i++) {
struct mce_bank *b = &mce_banks[i];
struct device_attribute *a = &b->attr;
@@ -2426,7 +2431,7 @@ device_initcall_sync(mcheck_init_device);
*/
static int __init mcheck_disable(char *str)
{
- mce_disabled = 1;
+ mca_cfg.disabled = true;
return 1;
}
__setup("nomce", mcheck_disable);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 4f9a3cbfc4a..402c454fbff 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -53,7 +53,7 @@ static int cmci_supported(int *banks)
{
u64 cap;
- if (mce_cmci_disabled || mce_ignore_ce)
+ if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
return 0;
/*
@@ -200,7 +200,7 @@ static void cmci_discover(int banks)
continue;
}
- if (!mce_bios_cmci_threshold) {
+ if (!mca_cfg.bios_cmci_threshold) {
val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
val |= CMCI_THRESHOLD;
} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
@@ -227,7 +227,7 @@ static void cmci_discover(int banks)
* set the thresholds properly or does not work with
* this boot option. Note down now and report later.
*/
- if (mce_bios_cmci_threshold && bios_zero_thresh &&
+ if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
bios_wrong_thresh = 1;
} else {
@@ -235,7 +235,7 @@ static void cmci_discover(int banks)
}
}
raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
- if (mce_bios_cmci_threshold && bios_wrong_thresh) {
+ if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
pr_info_once(
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
pr_info_once(
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 6b96110bb0c..726bf963c22 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -606,7 +606,7 @@ void __init mtrr_bp_init(void)
/*
* This is an AMD specific MSR, but we assume(hope?) that
- * Intel will implement it to when they extend the address
+ * Intel will implement it too when they extend the address
* bus of the Xeon.
*/
if (cpuid_eax(0x80000000) >= 0x80000008) {
@@ -695,11 +695,16 @@ void mtrr_ap_init(void)
}
/**
- * Save current fixed-range MTRR state of the BSP
+ * Save current fixed-range MTRR state of the first cpu in cpu_online_mask.
*/
void mtrr_save_state(void)
{
- smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
+ int first_cpu;
+
+ get_online_cpus();
+ first_cpu = cpumask_first(cpu_online_mask);
+ smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
+ put_online_cpus();
}
void set_mtrr_aps_delayed_init(void)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4a3374e61a9..bf0f01aea99 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -340,9 +340,6 @@ int x86_setup_perfctr(struct perf_event *event)
/* BTS is currently only allowed for user-mode. */
if (!attr->exclude_kernel)
return -EOPNOTSUPP;
-
- if (!attr->exclude_guest)
- return -EOPNOTSUPP;
}
hwc->config |= config;
@@ -385,9 +382,6 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip) {
int precise = 0;
- if (!event->attr.exclude_guest)
- return -EOPNOTSUPP;
-
/* Support for constant skid */
if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
precise++;
@@ -835,7 +829,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
} else {
hwc->config_base = x86_pmu_config_addr(hwc->idx);
hwc->event_base = x86_pmu_event_addr(hwc->idx);
- hwc->event_base_rdpmc = hwc->idx;
+ hwc->event_base_rdpmc = x86_pmu_rdpmc_index(hwc->idx);
}
}
@@ -1316,6 +1310,114 @@ static struct attribute_group x86_pmu_format_group = {
.attrs = NULL,
};
+/*
+ * Remove all undefined events (x86_pmu.event_map(id) == 0)
+ * out of events_attr attributes.
+ */
+static void __init filter_events(struct attribute **attrs)
+{
+ int i, j;
+
+ for (i = 0; attrs[i]; i++) {
+ if (x86_pmu.event_map(i))
+ continue;
+
+ for (j = i; attrs[j]; j++)
+ attrs[j] = attrs[j + 1];
+
+ /* Check the shifted attr. */
+ i--;
+ }
+}
+
+static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr,
+ char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr = \
+ container_of(attr, struct perf_pmu_events_attr, attr);
+
+ u64 config = x86_pmu.event_map(pmu_attr->id);
+ return x86_pmu.events_sysfs_show(page, config);
+}
+
+#define EVENT_VAR(_id) event_attr_##_id
+#define EVENT_PTR(_id) &event_attr_##_id.attr.attr
+
+#define EVENT_ATTR(_name, _id) \
+ PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \
+ events_sysfs_show)
+
+EVENT_ATTR(cpu-cycles, CPU_CYCLES );
+EVENT_ATTR(instructions, INSTRUCTIONS );
+EVENT_ATTR(cache-references, CACHE_REFERENCES );
+EVENT_ATTR(cache-misses, CACHE_MISSES );
+EVENT_ATTR(branch-instructions, BRANCH_INSTRUCTIONS );
+EVENT_ATTR(branch-misses, BRANCH_MISSES );
+EVENT_ATTR(bus-cycles, BUS_CYCLES );
+EVENT_ATTR(stalled-cycles-frontend, STALLED_CYCLES_FRONTEND );
+EVENT_ATTR(stalled-cycles-backend, STALLED_CYCLES_BACKEND );
+EVENT_ATTR(ref-cycles, REF_CPU_CYCLES );
+
+static struct attribute *empty_attrs;
+
+static struct attribute *events_attr[] = {
+ EVENT_PTR(CPU_CYCLES),
+ EVENT_PTR(INSTRUCTIONS),
+ EVENT_PTR(CACHE_REFERENCES),
+ EVENT_PTR(CACHE_MISSES),
+ EVENT_PTR(BRANCH_INSTRUCTIONS),
+ EVENT_PTR(BRANCH_MISSES),
+ EVENT_PTR(BUS_CYCLES),
+ EVENT_PTR(STALLED_CYCLES_FRONTEND),
+ EVENT_PTR(STALLED_CYCLES_BACKEND),
+ EVENT_PTR(REF_CPU_CYCLES),
+ NULL,
+};
+
+static struct attribute_group x86_pmu_events_group = {
+ .name = "events",
+ .attrs = events_attr,
+};
+
+ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event)
+{
+ u64 umask = (config & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
+ u64 cmask = (config & ARCH_PERFMON_EVENTSEL_CMASK) >> 24;
+ bool edge = (config & ARCH_PERFMON_EVENTSEL_EDGE);
+ bool pc = (config & ARCH_PERFMON_EVENTSEL_PIN_CONTROL);
+ bool any = (config & ARCH_PERFMON_EVENTSEL_ANY);
+ bool inv = (config & ARCH_PERFMON_EVENTSEL_INV);
+ ssize_t ret;
+
+ /*
+ * We have whole page size to spend and just little data
+ * to write, so we can safely use sprintf.
+ */
+ ret = sprintf(page, "event=0x%02llx", event);
+
+ if (umask)
+ ret += sprintf(page + ret, ",umask=0x%02llx", umask);
+
+ if (edge)
+ ret += sprintf(page + ret, ",edge");
+
+ if (pc)
+ ret += sprintf(page + ret, ",pc");
+
+ if (any)
+ ret += sprintf(page + ret, ",any");
+
+ if (inv)
+ ret += sprintf(page + ret, ",inv");
+
+ if (cmask)
+ ret += sprintf(page + ret, ",cmask=0x%02llx", cmask);
+
+ ret += sprintf(page + ret, "\n");
+
+ return ret;
+}
+
static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
@@ -1362,6 +1464,11 @@ static int __init init_hw_perf_events(void)
x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
x86_pmu_format_group.attrs = x86_pmu.format_attrs;
+ if (!x86_pmu.events_sysfs_show)
+ x86_pmu_events_group.attrs = &empty_attrs;
+ else
+ filter_events(x86_pmu_events_group.attrs);
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1651,6 +1758,7 @@ static struct attribute_group x86_pmu_attr_group = {
static const struct attribute_group *x86_pmu_attr_groups[] = {
&x86_pmu_attr_group,
&x86_pmu_format_group,
+ &x86_pmu_events_group,
NULL,
};
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 271d2570029..7f5c75c2afd 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -325,6 +325,8 @@ struct x86_pmu {
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel;
unsigned perfctr;
+ int (*addr_offset)(int index, bool eventsel);
+ int (*rdpmc_index)(int index);
u64 (*event_map)(int);
int max_events;
int num_counters;
@@ -354,6 +356,8 @@ struct x86_pmu {
int attr_rdpmc;
struct attribute **format_attrs;
+ ssize_t (*events_sysfs_show)(char *page, u64 config);
+
/*
* CPU Hotplug hooks
*/
@@ -444,28 +448,21 @@ extern u64 __read_mostly hw_cache_extra_regs
u64 x86_perf_event_update(struct perf_event *event);
-static inline int x86_pmu_addr_offset(int index)
+static inline unsigned int x86_pmu_config_addr(int index)
{
- int offset;
-
- /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
- alternative_io(ASM_NOP2,
- "shll $1, %%eax",
- X86_FEATURE_PERFCTR_CORE,
- "=a" (offset),
- "a" (index));
-
- return offset;
+ return x86_pmu.eventsel + (x86_pmu.addr_offset ?
+ x86_pmu.addr_offset(index, true) : index);
}
-static inline unsigned int x86_pmu_config_addr(int index)
+static inline unsigned int x86_pmu_event_addr(int index)
{
- return x86_pmu.eventsel + x86_pmu_addr_offset(index);
+ return x86_pmu.perfctr + (x86_pmu.addr_offset ?
+ x86_pmu.addr_offset(index, false) : index);
}
-static inline unsigned int x86_pmu_event_addr(int index)
+static inline int x86_pmu_rdpmc_index(int index)
{
- return x86_pmu.perfctr + x86_pmu_addr_offset(index);
+ return x86_pmu.rdpmc_index ? x86_pmu.rdpmc_index(index) : index;
}
int x86_setup_perfctr(struct perf_event *event);
@@ -536,6 +533,9 @@ static inline void set_linear_ip(struct pt_regs *regs, unsigned long ip)
regs->ip = ip;
}
+ssize_t x86_event_sysfs_show(char *page, u64 config, u64 event);
+ssize_t intel_event_sysfs_show(char *page, u64 config);
+
#ifdef CONFIG_CPU_SUP_AMD
int amd_pmu_init(void);
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index 4528ae7b6ec..dfdab42aed2 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -132,21 +132,102 @@ static u64 amd_pmu_event_map(int hw_event)
return amd_perfmon_event_map[hw_event];
}
-static int amd_pmu_hw_config(struct perf_event *event)
+static struct event_constraint *amd_nb_event_constraint;
+
+/*
+ * Previously calculated offsets
+ */
+static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly;
+static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly;
+static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly;
+
+/*
+ * Legacy CPUs:
+ * 4 counters starting at 0xc0010000 each offset by 1
+ *
+ * CPUs with core performance counter extensions:
+ * 6 counters starting at 0xc0010200 each offset by 2
+ *
+ * CPUs with north bridge performance counter extensions:
+ * 4 additional counters starting at 0xc0010240 each offset by 2
+ * (indexed right above either one of the above core counters)
+ */
+static inline int amd_pmu_addr_offset(int index, bool eventsel)
{
- int ret;
+ int offset, first, base;
- /* pass precise event sampling to ibs: */
- if (event->attr.precise_ip && get_ibs_caps())
- return -ENOENT;
+ if (!index)
+ return index;
+
+ if (eventsel)
+ offset = event_offsets[index];
+ else
+ offset = count_offsets[index];
+
+ if (offset)
+ return offset;
+
+ if (amd_nb_event_constraint &&
+ test_bit(index, amd_nb_event_constraint->idxmsk)) {
+ /*
+ * calculate the offset of NB counters with respect to
+ * base eventsel or perfctr
+ */
+
+ first = find_first_bit(amd_nb_event_constraint->idxmsk,
+ X86_PMC_IDX_MAX);
+
+ if (eventsel)
+ base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel;
+ else
+ base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr;
+
+ offset = base + ((index - first) << 1);
+ } else if (!cpu_has_perfctr_core)
+ offset = index;
+ else
+ offset = index << 1;
+
+ if (eventsel)
+ event_offsets[index] = offset;
+ else
+ count_offsets[index] = offset;
+
+ return offset;
+}
+
+static inline int amd_pmu_rdpmc_index(int index)
+{
+ int ret, first;
+
+ if (!index)
+ return index;
+
+ ret = rdpmc_indexes[index];
- ret = x86_pmu_hw_config(event);
if (ret)
return ret;
- if (has_branch_stack(event))
- return -EOPNOTSUPP;
+ if (amd_nb_event_constraint &&
+ test_bit(index, amd_nb_event_constraint->idxmsk)) {
+ /*
+ * according to the mnual, ECX value of the NB counters is
+ * the index of the NB counter (0, 1, 2 or 3) plus 6
+ */
+ first = find_first_bit(amd_nb_event_constraint->idxmsk,
+ X86_PMC_IDX_MAX);
+ ret = index - first + 6;
+ } else
+ ret = index;
+
+ rdpmc_indexes[index] = ret;
+
+ return ret;
+}
+
+static int amd_core_hw_config(struct perf_event *event)
+{
if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0
@@ -156,14 +237,37 @@ static int amd_pmu_hw_config(struct perf_event *event)
event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
ARCH_PERFMON_EVENTSEL_OS);
else if (event->attr.exclude_host)
- event->hw.config |= AMD_PERFMON_EVENTSEL_GUESTONLY;
+ event->hw.config |= AMD64_EVENTSEL_GUESTONLY;
else if (event->attr.exclude_guest)
- event->hw.config |= AMD_PERFMON_EVENTSEL_HOSTONLY;
+ event->hw.config |= AMD64_EVENTSEL_HOSTONLY;
- if (event->attr.type != PERF_TYPE_RAW)
- return 0;
+ return 0;
+}
- event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+/*
+ * NB counters do not support the following event select bits:
+ * Host/Guest only
+ * Counter mask
+ * Invert counter mask
+ * Edge detect
+ * OS/User mode
+ */
+static int amd_nb_hw_config(struct perf_event *event)
+{
+ /* for NB, we only allow system wide counting mode */
+ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ if (event->attr.exclude_user || event->attr.exclude_kernel ||
+ event->attr.exclude_host || event->attr.exclude_guest)
+ return -EINVAL;
+
+ event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR |
+ ARCH_PERFMON_EVENTSEL_OS);
+
+ if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB |
+ ARCH_PERFMON_EVENTSEL_INT))
+ return -EINVAL;
return 0;
}
@@ -181,6 +285,11 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
return (hwc->config & 0xe0) == 0xe0;
}
+static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc)
+{
+ return amd_nb_event_constraint && amd_is_nb_event(hwc);
+}
+
static inline int amd_has_nb(struct cpu_hw_events *cpuc)
{
struct amd_nb *nb = cpuc->amd_nb;
@@ -188,20 +297,37 @@ static inline int amd_has_nb(struct cpu_hw_events *cpuc)
return nb && nb->nb_id != -1;
}
-static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
- struct perf_event *event)
+static int amd_pmu_hw_config(struct perf_event *event)
+{
+ int ret;
+
+ /* pass precise event sampling to ibs: */
+ if (event->attr.precise_ip && get_ibs_caps())
+ return -ENOENT;
+
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ ret = x86_pmu_hw_config(event);
+ if (ret)
+ return ret;
+
+ if (event->attr.type == PERF_TYPE_RAW)
+ event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK;
+
+ if (amd_is_perfctr_nb_event(&event->hw))
+ return amd_nb_hw_config(event);
+
+ return amd_core_hw_config(event);
+}
+
+static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
{
- struct hw_perf_event *hwc = &event->hw;
struct amd_nb *nb = cpuc->amd_nb;
int i;
/*
- * only care about NB events
- */
- if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
- return;
-
- /*
* need to scan whole list because event may not have
* been assigned during scheduling
*
@@ -215,6 +341,19 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
}
}
+static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc)
+{
+ int core_id = cpu_data(smp_processor_id()).cpu_core_id;
+
+ /* deliver interrupts only to this core */
+ if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) {
+ hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE;
+ hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK;
+ hwc->config |= (u64)(core_id) <<
+ AMD64_EVENTSEL_INT_CORE_SEL_SHIFT;
+ }
+}
+
/*
* AMD64 NorthBridge events need special treatment because
* counter access needs to be synchronized across all cores
@@ -247,24 +386,24 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
*
* Given that resources are allocated (cmpxchg), they must be
* eventually freed for others to use. This is accomplished by
- * calling amd_put_event_constraints().
+ * calling __amd_put_nb_event_constraints()
*
* Non NB events are not impacted by this restriction.
*/
static struct event_constraint *
-amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+__amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
+ struct event_constraint *c)
{
struct hw_perf_event *hwc = &event->hw;
struct amd_nb *nb = cpuc->amd_nb;
- struct perf_event *old = NULL;
- int max = x86_pmu.num_counters;
- int i, j, k = -1;
+ struct perf_event *old;
+ int idx, new = -1;
- /*
- * if not NB event or no NB, then no constraints
- */
- if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
- return &unconstrained;
+ if (!c)
+ c = &unconstrained;
+
+ if (cpuc->is_fake)
+ return c;
/*
* detect if already present, if so reuse
@@ -276,48 +415,36 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
* because of successive calls to x86_schedule_events() from
* hw_perf_group_sched_in() without hw_perf_enable()
*/
- for (i = 0; i < max; i++) {
- /*
- * keep track of first free slot
- */
- if (k == -1 && !nb->owners[i])
- k = i;
+ for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) {
+ if (new == -1 || hwc->idx == idx)
+ /* assign free slot, prefer hwc->idx */
+ old = cmpxchg(nb->owners + idx, NULL, event);
+ else if (nb->owners[idx] == event)
+ /* event already present */
+ old = event;
+ else
+ continue;
+
+ if (old && old != event)
+ continue;
+
+ /* reassign to this slot */
+ if (new != -1)
+ cmpxchg(nb->owners + new, event, NULL);
+ new = idx;
/* already present, reuse */
- if (nb->owners[i] == event)
- goto done;
- }
- /*
- * not present, so grab a new slot
- * starting either at:
- */
- if (hwc->idx != -1) {
- /* previous assignment */
- i = hwc->idx;
- } else if (k != -1) {
- /* start from free slot found */
- i = k;
- } else {
- /*
- * event not found, no slot found in
- * first pass, try again from the
- * beginning
- */
- i = 0;
- }
- j = i;
- do {
- old = cmpxchg(nb->owners+i, NULL, event);
- if (!old)
+ if (old == event)
break;
- if (++i == max)
- i = 0;
- } while (i != j);
-done:
- if (!old)
- return &nb->event_constraints[i];
-
- return &emptyconstraint;
+ }
+
+ if (new == -1)
+ return &emptyconstraint;
+
+ if (amd_is_perfctr_nb_event(hwc))
+ amd_nb_interrupt_hw_config(hwc);
+
+ return &nb->event_constraints[new];
}
static struct amd_nb *amd_alloc_nb(int cpu)
@@ -364,7 +491,7 @@ static void amd_pmu_cpu_starting(int cpu)
struct amd_nb *nb;
int i, nb_id;
- cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+ cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
if (boot_cpu_data.x86_max_cores < 2)
return;
@@ -407,6 +534,26 @@ static void amd_pmu_cpu_dead(int cpu)
}
}
+static struct event_constraint *
+amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+ /*
+ * if not NB event or no NB, then no constraints
+ */
+ if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw)))
+ return &unconstrained;
+
+ return __amd_get_nb_event_constraints(cpuc, event,
+ amd_nb_event_constraint);
+}
+
+static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
+ struct perf_event *event)
+{
+ if (amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))
+ __amd_put_nb_event_constraints(cpuc, event);
+}
+
PMU_FORMAT_ATTR(event, "config:0-7,32-35");
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -496,6 +643,9 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09,
static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0);
static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0);
+static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0);
+static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0);
+
static struct event_constraint *
amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event)
{
@@ -561,13 +711,21 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev
return &amd_f15_PMC20;
}
case AMD_EVENT_NB:
- /* not yet implemented */
- return &emptyconstraint;
+ return __amd_get_nb_event_constraints(cpuc, event,
+ amd_nb_event_constraint);
default:
return &emptyconstraint;
}
}
+static ssize_t amd_event_sysfs_show(char *page, u64 config)
+{
+ u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT) |
+ (config & AMD64_EVENTSEL_EVENT) >> 24;
+
+ return x86_event_sysfs_show(page, config, event);
+}
+
static __initconst const struct x86_pmu amd_pmu = {
.name = "AMD",
.handle_irq = x86_pmu_handle_irq,
@@ -579,6 +737,8 @@ static __initconst const struct x86_pmu amd_pmu = {
.schedule_events = x86_schedule_events,
.eventsel = MSR_K7_EVNTSEL0,
.perfctr = MSR_K7_PERFCTR0,
+ .addr_offset = amd_pmu_addr_offset,
+ .rdpmc_index = amd_pmu_rdpmc_index,
.event_map = amd_pmu_event_map,
.max_events = ARRAY_SIZE(amd_perfmon_event_map),
.num_counters = AMD64_NUM_COUNTERS,
@@ -591,6 +751,7 @@ static __initconst const struct x86_pmu amd_pmu = {
.put_event_constraints = amd_put_event_constraints,
.format_attrs = amd_format_attr,
+ .events_sysfs_show = amd_event_sysfs_show,
.cpu_prepare = amd_pmu_cpu_prepare,
.cpu_starting = amd_pmu_cpu_starting,
@@ -599,7 +760,7 @@ static __initconst const struct x86_pmu amd_pmu = {
static int setup_event_constraints(void)
{
- if (boot_cpu_data.x86 >= 0x15)
+ if (boot_cpu_data.x86 == 0x15)
x86_pmu.get_event_constraints = amd_get_event_constraints_f15h;
return 0;
}
@@ -629,6 +790,23 @@ static int setup_perfctr_core(void)
return 0;
}
+static int setup_perfctr_nb(void)
+{
+ if (!cpu_has_perfctr_nb)
+ return -ENODEV;
+
+ x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB;
+
+ if (cpu_has_perfctr_core)
+ amd_nb_event_constraint = &amd_NBPMC96;
+ else
+ amd_nb_event_constraint = &amd_NBPMC74;
+
+ printk(KERN_INFO "perf: AMD northbridge performance counters detected\n");
+
+ return 0;
+}
+
__init int amd_pmu_init(void)
{
/* Performance-monitoring supported from K7 and later: */
@@ -639,6 +817,7 @@ __init int amd_pmu_init(void)
setup_event_constraints();
setup_perfctr_core();
+ setup_perfctr_nb();
/* Events are common for all AMDs */
memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
@@ -669,7 +848,7 @@ void amd_pmu_disable_virt(void)
* SVM is disabled the Guest-only bits still gets set and the counter
* will not count anything.
*/
- cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY;
+ cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY;
/* Reload all events */
x86_pmu_disable_all();
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 324bb523d9d..4914e94ad6e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1603,6 +1603,13 @@ static struct attribute *intel_arch_formats_attr[] = {
NULL,
};
+ssize_t intel_event_sysfs_show(char *page, u64 config)
+{
+ u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
+
+ return x86_event_sysfs_show(page, config, event);
+}
+
static __initconst const struct x86_pmu core_pmu = {
.name = "core",
.handle_irq = x86_pmu_handle_irq,
@@ -1628,6 +1635,7 @@ static __initconst const struct x86_pmu core_pmu = {
.event_constraints = intel_core_event_constraints,
.guest_get_msrs = core_guest_get_msrs,
.format_attrs = intel_arch_formats_attr,
+ .events_sysfs_show = intel_event_sysfs_show,
};
struct intel_shared_regs *allocate_shared_regs(int cpu)
@@ -1766,6 +1774,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.pebs_aliases = intel_pebs_aliases_core2,
.format_attrs = intel_arch3_formats_attr,
+ .events_sysfs_show = intel_event_sysfs_show,
.cpu_prepare = intel_pmu_cpu_prepare,
.cpu_starting = intel_pmu_cpu_starting,
@@ -2010,7 +2019,10 @@ __init int intel_pmu_init(void)
break;
case 28: /* Atom */
- case 54: /* Cedariew */
+ case 38: /* Lincroft */
+ case 39: /* Penwell */
+ case 53: /* Cloverview */
+ case 54: /* Cedarview */
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
@@ -2075,6 +2087,7 @@ __init int intel_pmu_init(void)
pr_cont("SandyBridge events, ");
break;
case 58: /* IvyBridge */
+ case 62: /* IvyBridge EP */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 3cf3d97cce3..b43200dbfe7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2500,7 +2500,7 @@ static bool pcidrv_registered;
/*
* add a pci uncore device
*/
-static int __devinit uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
+static int uncore_pci_add(struct intel_uncore_type *type, struct pci_dev *pdev)
{
struct intel_uncore_pmu *pmu;
struct intel_uncore_box *box;
@@ -2571,8 +2571,8 @@ static void uncore_pci_remove(struct pci_dev *pdev)
kfree(box);
}
-static int __devinit uncore_pci_probe(struct pci_dev *pdev,
- const struct pci_device_id *id)
+static int uncore_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
{
struct intel_uncore_type *type;
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 7d0270bd793..4820c232a0b 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -19,7 +19,7 @@ static const u64 p6_perfmon_event_map[] =
};
-static __initconst u64 p6_hw_cache_event_ids
+static u64 p6_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -227,6 +227,8 @@ static __initconst const struct x86_pmu p6_pmu = {
.event_constraints = p6_event_constraints,
.format_attrs = intel_p6_formats_attr,
+ .events_sysfs_show = intel_event_sysfs_show,
+
};
__init int p6_pmu_init(void)
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index fbd89556229..3286a92e662 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -26,11 +26,6 @@ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
#ifdef CONFIG_X86_32
static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
{
- /*
- * We use exception 16 if we have hardware math and we've either seen
- * it or the CPU claims it is internal
- */
- int fpu_exception = c->hard_math && (ignore_fpu_irq || cpu_has_fpu);
seq_printf(m,
"fdiv_bug\t: %s\n"
"hlt_bug\t\t: %s\n"
@@ -45,7 +40,7 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
c->f00f_bug ? "yes" : "no",
c->coma_bug ? "yes" : "no",
c->hard_math ? "yes" : "no",
- fpu_exception ? "yes" : "no",
+ c->hard_math ? "yes" : "no",
c->cpuid_level,
c->wp_works_ok ? "yes" : "no");
}
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index d22d0c4edcf..03a36321ec5 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -33,6 +33,9 @@
#define VMWARE_PORT_CMD_GETVERSION 10
#define VMWARE_PORT_CMD_GETHZ 45
+#define VMWARE_PORT_CMD_GETVCPU_INFO 68
+#define VMWARE_PORT_CMD_LEGACY_X2APIC 3
+#define VMWARE_PORT_CMD_VCPU_RESERVED 31
#define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \
__asm__("inl (%%dx)" : \
@@ -125,10 +128,20 @@ static void __cpuinit vmware_set_cpu_features(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
}
+/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
+static bool __init vmware_legacy_x2apic_available(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+ VMWARE_PORT(GETVCPU_INFO, eax, ebx, ecx, edx);
+ return (eax & (1 << VMWARE_PORT_CMD_VCPU_RESERVED)) == 0 &&
+ (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
+}
+
const __refconst struct hypervisor_x86 x86_hyper_vmware = {
.name = "VMware",
.detect = vmware_platform,
.set_cpu_features = vmware_set_cpu_features,
.init_platform = vmware_platform_setup,
+ .x2apic_available = vmware_legacy_x2apic_available,
};
EXPORT_SYMBOL(x86_hyper_vmware);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 13ad89971d4..74467feb4dc 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -16,6 +16,7 @@
#include <linux/delay.h>
#include <linux/elf.h>
#include <linux/elfcore.h>
+#include <linux/module.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
@@ -30,6 +31,27 @@
int in_crash_kexec;
+/*
+ * This is used to VMCLEAR all VMCSs loaded on the
+ * processor. And when loading kvm_intel module, the
+ * callback function pointer will be assigned.
+ *
+ * protected by rcu.
+ */
+crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
+EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
+
+static inline void cpu_crash_vmclear_loaded_vmcss(void)
+{
+ crash_vmclear_fn *do_vmclear_operation = NULL;
+
+ rcu_read_lock();
+ do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
+ if (do_vmclear_operation)
+ do_vmclear_operation();
+ rcu_read_unlock();
+}
+
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -46,6 +68,11 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
#endif
crash_save_cpu(regs, cpu);
+ /*
+ * VMCLEAR VMCSs loaded on all cpus if needed.
+ */
+ cpu_crash_vmclear_loaded_vmcss();
+
/* Disable VMX or SVM if needed.
*
* We need to disable virtualization on all CPUs.
@@ -88,6 +115,11 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
kdump_nmi_shootdown_cpus();
+ /*
+ * VMCLEAR VMCSs loaded on this cpu if needed.
+ */
+ cpu_crash_vmclear_loaded_vmcss();
+
/* Booting kdump kernel with VMX or SVM enabled won't work,
* because (among other limitations) we can't disable paging
* with the virt flags.
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 88b725aa1d5..6ed91d9980e 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -739,30 +739,11 @@ ENTRY(ptregs_##name) ; \
ENDPROC(ptregs_##name)
PTREGSCALL1(iopl)
-PTREGSCALL0(fork)
-PTREGSCALL0(vfork)
-PTREGSCALL2(sigaltstack)
PTREGSCALL0(sigreturn)
PTREGSCALL0(rt_sigreturn)
PTREGSCALL2(vm86)
PTREGSCALL1(vm86old)
-/* Clone is an oddball. The 4th arg is in %edi */
-ENTRY(ptregs_clone)
- CFI_STARTPROC
- leal 4(%esp),%eax
- pushl_cfi %eax
- pushl_cfi PT_EDI(%eax)
- movl PT_EDX(%eax),%ecx
- movl PT_ECX(%eax),%edx
- movl PT_EBX(%eax),%eax
- call sys_clone
- addl $8,%esp
- CFI_ADJUST_CFA_OFFSET -8
- ret
- CFI_ENDPROC
-ENDPROC(ptregs_clone)
-
.macro FIXUP_ESPFIX_STACK
/*
* Switch back for ESPFIX stack to the normal zerobased stack
@@ -1084,7 +1065,6 @@ ENTRY(xen_failsafe_callback)
lea 16(%esp),%esp
CFI_ADJUST_CFA_OFFSET -16
jz 5f
- addl $16,%esp
jmp iret_exc
5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1328fe49a3f..cb3c591339a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -56,7 +56,7 @@
#include <asm/ftrace.h>
#include <asm/percpu.h>
#include <asm/asm.h>
-#include <asm/rcu.h>
+#include <asm/context_tracking.h>
#include <asm/smap.h>
#include <linux/err.h>
@@ -845,10 +845,25 @@ ENTRY(\label)
END(\label)
.endm
- PTREGSCALL stub_clone, sys_clone, %r8
- PTREGSCALL stub_fork, sys_fork, %rdi
- PTREGSCALL stub_vfork, sys_vfork, %rdi
- PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
+ .macro FORK_LIKE func
+ENTRY(stub_\func)
+ CFI_STARTPROC
+ popq %r11 /* save return address */
+ PARTIAL_FRAME 0
+ SAVE_REST
+ pushq %r11 /* put it back on stack */
+ FIXUP_TOP_OF_STACK %r11, 8
+ DEFAULT_FRAME 0 8 /* offset 8: return address */
+ call sys_\func
+ RESTORE_TOP_OF_STACK %r11, 8
+ ret $REST_SKIP /* pop extended registers */
+ CFI_ENDPROC
+END(stub_\func)
+ .endm
+
+ FORK_LIKE clone
+ FORK_LIKE fork
+ FORK_LIKE vfork
PTREGSCALL stub_iopl, sys_iopl, %rsi
ENTRY(ptregscall_common)
@@ -897,8 +912,6 @@ ENTRY(stub_rt_sigreturn)
END(stub_rt_sigreturn)
#ifdef CONFIG_X86_X32_ABI
- PTREGSCALL stub_x32_sigaltstack, sys32_sigaltstack, %rdx
-
ENTRY(stub_x32_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
@@ -1699,9 +1712,10 @@ nested_nmi:
1:
/* Set up the interrupted NMIs stack to jump to repeat_nmi */
- leaq -6*8(%rsp), %rdx
+ leaq -1*8(%rsp), %rdx
movq %rdx, %rsp
- CFI_ADJUST_CFA_OFFSET 6*8
+ CFI_ADJUST_CFA_OFFSET 1*8
+ leaq -10*8(%rsp), %rdx
pushq_cfi $__KERNEL_DS
pushq_cfi %rdx
pushfq_cfi
@@ -1709,8 +1723,8 @@ nested_nmi:
pushq_cfi $repeat_nmi
/* Put stack back */
- addq $(11*8), %rsp
- CFI_ADJUST_CFA_OFFSET -11*8
+ addq $(6*8), %rsp
+ CFI_ADJUST_CFA_OFFSET -6*8
nested_nmi_out:
popq_cfi %rdx
@@ -1736,18 +1750,18 @@ first_nmi:
* +-------------------------+
* | NMI executing variable |
* +-------------------------+
- * | Saved SS |
- * | Saved Return RSP |
- * | Saved RFLAGS |
- * | Saved CS |
- * | Saved RIP |
- * +-------------------------+
* | copied SS |
* | copied Return RSP |
* | copied RFLAGS |
* | copied CS |
* | copied RIP |
* +-------------------------+
+ * | Saved SS |
+ * | Saved Return RSP |
+ * | Saved RFLAGS |
+ * | Saved CS |
+ * | Saved RIP |
+ * +-------------------------+
* | pt_regs |
* +-------------------------+
*
@@ -1763,9 +1777,15 @@ first_nmi:
/* Set the NMI executing variable on the stack. */
pushq_cfi $1
+ /*
+ * Leave room for the "copied" frame
+ */
+ subq $(5*8), %rsp
+ CFI_ADJUST_CFA_OFFSET 5*8
+
/* Copy the stack frame to the Saved frame */
.rept 5
- pushq_cfi 6*8(%rsp)
+ pushq_cfi 11*8(%rsp)
.endr
CFI_DEF_CFA_OFFSET SS+8-RIP
@@ -1786,12 +1806,15 @@ repeat_nmi:
* is benign for the non-repeat case, where 1 was pushed just above
* to this very stack slot).
*/
- movq $1, 5*8(%rsp)
+ movq $1, 10*8(%rsp)
/* Make another copy, this one may be modified by nested NMIs */
+ addq $(10*8), %rsp
+ CFI_ADJUST_CFA_OFFSET -10*8
.rept 5
- pushq_cfi 4*8(%rsp)
+ pushq_cfi -6*8(%rsp)
.endr
+ subq $(5*8), %rsp
CFI_DEF_CFA_OFFSET SS+8-RIP
end_repeat_nmi:
@@ -1841,9 +1864,11 @@ end_repeat_nmi:
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
- RESTORE_ALL 8
+ /* Pop the extra iret frame at once */
+ RESTORE_ALL 6*8
+
/* Clear the NMI executing stack variable */
- movq $0, 10*8(%rsp)
+ movq $0, 5*8(%rsp)
jmp irq_return
CFI_ENDPROC
END(nmi)
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index c18f59d1010..6773c918b8c 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -18,6 +18,7 @@
#include <asm/io_apic.h>
#include <asm/bios_ebda.h>
#include <asm/tlbflush.h>
+#include <asm/bootparam_utils.h>
static void __init i386_default_early_setup(void)
{
@@ -30,6 +31,8 @@ static void __init i386_default_early_setup(void)
void __init i386_start_kernel(void)
{
+ sanitize_boot_params(&boot_params);
+
memblock_reserve(__pa_symbol(&_text),
__pa_symbol(&__bss_stop) - __pa_symbol(&_text));
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 037df57a99a..849fc9e63c2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -25,6 +25,7 @@
#include <asm/kdebug.h>
#include <asm/e820.h>
#include <asm/bios_ebda.h>
+#include <asm/bootparam_utils.h>
static void __init zap_identity_mappings(void)
{
@@ -46,6 +47,7 @@ static void __init copy_bootdata(char *real_mode_data)
char * command_line;
memcpy(&boot_params, real_mode_data, sizeof boot_params);
+ sanitize_boot_params(&boot_params);
if (boot_params.hdr.cmd_line_ptr) {
command_line = __va(boot_params.hdr.cmd_line_ptr);
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 4dac2f68ed4..3c3f58a0808 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -266,6 +266,19 @@ num_subarch_entries = (. - subarch_entries) / 4
jmp default_entry
#endif /* CONFIG_PARAVIRT */
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
+ * up already except stack. We just set up stack here. Then call
+ * start_secondary().
+ */
+ENTRY(start_cpu0)
+ movl stack_start, %ecx
+ movl %ecx, %esp
+ jmp *(initial_code)
+ENDPROC(start_cpu0)
+#endif
+
/*
* Non-boot CPU entry point; entered from trampoline.S
* We can't lgdt here, because lgdt itself uses a data segment, but
@@ -287,37 +300,52 @@ ENTRY(startup_32_smp)
leal -__PAGE_OFFSET(%ecx),%esp
default_entry:
+#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
+ X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
+ X86_CR0_PG)
+ movl $(CR0_STATE & ~X86_CR0_PG),%eax
+ movl %eax,%cr0
+
+/*
+ * We want to start out with EFLAGS unambiguously cleared. Some BIOSes leave
+ * bits like NT set. This would confuse the debugger if this code is traced. So
+ * initialize them properly now before switching to protected mode. That means
+ * DF in particular (even though we have cleared it earlier after copying the
+ * command line) because GCC expects it.
+ */
+ pushl $0
+ popfl
+
/*
- * New page tables may be in 4Mbyte page mode and may
- * be using the global pages.
+ * New page tables may be in 4Mbyte page mode and may be using the global pages.
*
- * NOTE! If we are on a 486 we may have no cr4 at all!
- * Specifically, cr4 exists if and only if CPUID exists
- * and has flags other than the FPU flag set.
+ * NOTE! If we are on a 486 we may have no cr4 at all! Specifically, cr4 exists
+ * if and only if CPUID exists and has flags other than the FPU flag set.
*/
+ movl $-1,pa(X86_CPUID) # preset CPUID level
movl $X86_EFLAGS_ID,%ecx
pushl %ecx
- popfl
- pushfl
- popl %eax
- pushl $0
- popfl
+ popfl # set EFLAGS=ID
pushfl
- popl %edx
- xorl %edx,%eax
- testl %ecx,%eax
- jz 6f # No ID flag = no CPUID = no CR4
+ popl %eax # get EFLAGS
+ testl $X86_EFLAGS_ID,%eax # did EFLAGS.ID remained set?
+ jz enable_paging # hw disallowed setting of ID bit
+ # which means no CPUID and no CR4
+
+ xorl %eax,%eax
+ cpuid
+ movl %eax,pa(X86_CPUID) # save largest std CPUID function
movl $1,%eax
cpuid
- andl $~1,%edx # Ignore CPUID.FPU
- jz 6f # No flags or only CPUID.FPU = no CR4
+ andl $~1,%edx # Ignore CPUID.FPU
+ jz enable_paging # No flags or only CPUID.FPU = no CR4
movl pa(mmu_cr4_features),%eax
movl %eax,%cr4
testb $X86_CR4_PAE, %al # check if PAE is enabled
- jz 6f
+ jz enable_paging
/* Check if extended functions are implemented */
movl $0x80000000, %eax
@@ -325,7 +353,7 @@ default_entry:
/* Value must be in the range 0x80000001 to 0x8000ffff */
subl $0x80000001, %eax
cmpl $(0x8000ffff-0x80000001), %eax
- ja 6f
+ ja enable_paging
/* Clear bogus XD_DISABLE bits */
call verify_cpu
@@ -334,7 +362,7 @@ default_entry:
cpuid
/* Execute Disable bit supported? */
btl $(X86_FEATURE_NX & 31), %edx
- jnc 6f
+ jnc enable_paging
/* Setup EFER (Extended Feature Enable Register) */
movl $MSR_EFER, %ecx
@@ -344,15 +372,14 @@ default_entry:
/* Make changes effective */
wrmsr
-6:
+enable_paging:
/*
* Enable paging
*/
movl $pa(initial_page_table), %eax
movl %eax,%cr3 /* set the page table pointer.. */
- movl %cr0,%eax
- orl $X86_CR0_PG,%eax
+ movl $CR0_STATE,%eax
movl %eax,%cr0 /* ..and set paging (PG) bit */
ljmp $__BOOT_CS,$1f /* Clear prefetch and normalize %eip */
1:
@@ -360,14 +387,6 @@ default_entry:
addl $__PAGE_OFFSET, %esp
/*
- * Initialize eflags. Some BIOS's leave bits like NT set. This would
- * confuse the debugger if this code is traced.
- * XXX - best to initialize before switching to protected mode.
- */
- pushl $0
- popfl
-
-/*
* start system 32-bit setup. We need to re-do some of the things done
* in 16-bit mode for the "real" operations.
*/
@@ -376,31 +395,11 @@ default_entry:
jz 1f # Did we do this already?
call *%eax
1:
-
-/* check if it is 486 or 386. */
+
/*
- * XXX - this does a lot of unnecessary setup. Alignment checks don't
- * apply at our cpl of 0 and the stack ought to be aligned already, and
- * we don't need to preserve eflags.
+ * Check if it is 486
*/
- movl $-1,X86_CPUID # -1 for no CPUID initially
- movb $3,X86 # at least 386
- pushfl # push EFLAGS
- popl %eax # get EFLAGS
- movl %eax,%ecx # save original EFLAGS
- xorl $0x240000,%eax # flip AC and ID bits in EFLAGS
- pushl %eax # copy to EFLAGS
- popfl # set EFLAGS
- pushfl # get new EFLAGS
- popl %eax # put it in eax
- xorl %ecx,%eax # change in flags
- pushl %ecx # restore original EFLAGS
- popfl
- testl $0x40000,%eax # check if AC bit changed
- je is386
-
- movb $4,X86 # at least 486
- testl $0x200000,%eax # check if ID bit changed
+ cmpl $-1,X86_CPUID
je is486
/* get vendor info */
@@ -426,11 +425,10 @@ default_entry:
movb %cl,X86_MASK
movl %edx,X86_CAPABILITY
-is486: movl $0x50022,%ecx # set AM, WP, NE and MP
- jmp 2f
-
-is386: movl $2,%ecx # set MP
-2: movl %cr0,%eax
+is486:
+ movb $4,X86
+ movl $0x50022,%ecx # set AM, WP, NE and MP
+ movl %cr0,%eax
andl $0x80000011,%eax # Save PG,PE,ET
orl %ecx,%eax
movl %eax,%cr0
@@ -455,7 +453,6 @@ is386: movl $2,%ecx # set MP
xorl %eax,%eax # Clear LDT
lldt %ax
- cld # gcc2 wants the direction flag cleared at all times
pushl $0 # fake return address for unwinder
jmp *(initial_code)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 94bf9cc2c7e..980053c4b9c 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -252,6 +252,22 @@ ENTRY(secondary_startup_64)
pushq %rax # target address in negative space
lretq
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
+ * up already except stack. We just set up stack here. Then call
+ * start_secondary().
+ */
+ENTRY(start_cpu0)
+ movq stack_start(%rip),%rsp
+ movq initial_code(%rip),%rax
+ pushq $0 # fake return address to stop unwinder
+ pushq $__KERNEL_CS # set correct cs
+ pushq %rax # target address in negative space
+ lretq
+ENDPROC(start_cpu0)
+#endif
+
/* SMP bootup changes these two */
__REFDATA
.align 8
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 1460a5df92f..da85a8e830a 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -434,7 +434,7 @@ void hpet_msi_unmask(struct irq_data *data)
/* unmask it */
cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
- cfg |= HPET_TN_FSB;
+ cfg |= HPET_TN_ENABLE | HPET_TN_FSB;
hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
}
@@ -445,7 +445,7 @@ void hpet_msi_mask(struct irq_data *data)
/* mask it */
cfg = hpet_readl(HPET_Tn_CFG(hdev->num));
- cfg &= ~HPET_TN_FSB;
+ cfg &= ~(HPET_TN_ENABLE | HPET_TN_FSB);
hpet_writel(cfg, HPET_Tn_CFG(hdev->num));
}
@@ -478,7 +478,7 @@ static int hpet_msi_next_event(unsigned long delta,
static int hpet_setup_msi_irq(unsigned int irq)
{
- if (arch_setup_hpet_msi(irq, hpet_blockid)) {
+ if (x86_msi.setup_hpet_msi(irq, hpet_blockid)) {
destroy_irq(irq);
return -EINVAL;
}
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 675a0501244..245a71db401 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -175,7 +175,11 @@ void __cpuinit fpu_init(void)
cr0 |= X86_CR0_EM;
write_cr0(cr0);
- if (!smp_processor_id())
+ /*
+ * init_thread_xstate is only called once to avoid overriding
+ * xstate_size during boot time or during CPU hotplug.
+ */
+ if (xstate_size == 0)
init_thread_xstate();
mxcsr_feature_mask_init();
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 6e03b0d6913..7dc4e459c2b 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -42,39 +42,6 @@
* (these are usually mapped into the 0x30-0xff vector range)
*/
-#ifdef CONFIG_X86_32
-/*
- * Note that on a 486, we don't want to do a SIGFPE on an irq13
- * as the irq is unreliable, and exception 16 works correctly
- * (ie as explained in the intel literature). On a 386, you
- * can't use exception 16 due to bad IBM design, so we have to
- * rely on the less exact irq13.
- *
- * Careful.. Not only is IRQ13 unreliable, but it is also
- * leads to races. IBM designers who came up with it should
- * be shot.
- */
-
-static irqreturn_t math_error_irq(int cpl, void *dev_id)
-{
- outb(0, 0xF0);
- if (ignore_fpu_irq || !boot_cpu_data.hard_math)
- return IRQ_NONE;
- math_error(get_irq_regs(), 0, X86_TRAP_MF);
- return IRQ_HANDLED;
-}
-
-/*
- * New motherboards sometimes make IRQ 13 be a PCI interrupt,
- * so allow interrupt sharing.
- */
-static struct irqaction fpu_irq = {
- .handler = math_error_irq,
- .name = "fpu",
- .flags = IRQF_NO_THREAD,
-};
-#endif
-
/*
* IRQ2 is cascade interrupt to second interrupt controller
*/
@@ -242,13 +209,6 @@ void __init native_init_IRQ(void)
setup_irq(2, &irq2);
#ifdef CONFIG_X86_32
- /*
- * External FPU? Set up irq13 if so, for
- * original braindamaged IBM FERR coupling.
- */
- if (boot_cpu_data.hard_math && !cpu_has_fpu)
- setup_irq(FPU_IRQ, &fpu_irq);
-
irq_ctx_init(smp_processor_id());
#endif
}
diff --git a/arch/x86/kernel/kprobes/Makefile b/arch/x86/kernel/kprobes/Makefile
new file mode 100644
index 00000000000..0d33169cc1a
--- /dev/null
+++ b/arch/x86/kernel/kprobes/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for kernel probes
+#
+
+obj-$(CONFIG_KPROBES) += core.o
+obj-$(CONFIG_OPTPROBES) += opt.o
+obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
diff --git a/arch/x86/kernel/kprobes-common.h b/arch/x86/kernel/kprobes/common.h
index 3230b68ef29..2e9d4b5af03 100644
--- a/arch/x86/kernel/kprobes-common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -99,4 +99,15 @@ static inline unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsig
return addr;
}
#endif
+
+#ifdef CONFIG_KPROBES_ON_FTRACE
+extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb);
+#else
+static inline int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ return 0;
+}
+#endif
#endif
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes/core.c
index 57916c0d3cf..e124554598e 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -58,7 +58,7 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
-#include "kprobes-common.h"
+#include "common.h"
void jprobe_return_end(void);
@@ -78,7 +78,7 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
* Groups, and some special opcodes can not boost.
* This is non-const and volatile to keep gcc from statically
* optimizing it out, as variable_test_bit makes gcc think only
- * *(unsigned long*) is used.
+ * *(unsigned long*) is used.
*/
static volatile u32 twobyte_is_boostable[256 / 32] = {
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
@@ -117,7 +117,7 @@ static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op)
struct __arch_relative_insn {
u8 op;
s32 raddr;
- } __attribute__((packed)) *insn;
+ } __packed *insn;
insn = (struct __arch_relative_insn *)from;
insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
@@ -541,23 +541,6 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
return 1;
}
-#ifdef KPROBES_CAN_USE_FTRACE
-static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
- struct kprobe_ctlblk *kcb)
-{
- /*
- * Emulate singlestep (and also recover regs->ip)
- * as if there is a 5byte nop
- */
- regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
- if (unlikely(p->post_handler)) {
- kcb->kprobe_status = KPROBE_HIT_SSDONE;
- p->post_handler(p, regs, 0);
- }
- __this_cpu_write(current_kprobe, NULL);
-}
-#endif
-
/*
* Interrupts are disabled on entry as trap3 is an interrupt gate and they
* remain disabled throughout this function.
@@ -616,13 +599,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
} else if (kprobe_running()) {
p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
-#ifdef KPROBES_CAN_USE_FTRACE
- if (kprobe_ftrace(p)) {
- skip_singlestep(p, regs, kcb);
- return 1;
- }
-#endif
- setup_singlestep(p, regs, kcb, 0);
+ if (!skip_singlestep(p, regs, kcb))
+ setup_singlestep(p, regs, kcb, 0);
return 1;
}
} /* else: not a kprobe fault; let the kernel handle it */
@@ -1075,50 +1053,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
return 0;
}
-#ifdef KPROBES_CAN_USE_FTRACE
-/* Ftrace callback handler for kprobes */
-void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct pt_regs *regs)
-{
- struct kprobe *p;
- struct kprobe_ctlblk *kcb;
- unsigned long flags;
-
- /* Disable irq for emulating a breakpoint and avoiding preempt */
- local_irq_save(flags);
-
- p = get_kprobe((kprobe_opcode_t *)ip);
- if (unlikely(!p) || kprobe_disabled(p))
- goto end;
-
- kcb = get_kprobe_ctlblk();
- if (kprobe_running()) {
- kprobes_inc_nmissed_count(p);
- } else {
- /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
- regs->ip = ip + sizeof(kprobe_opcode_t);
-
- __this_cpu_write(current_kprobe, p);
- kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (!p->pre_handler || !p->pre_handler(p, regs))
- skip_singlestep(p, regs, kcb);
- /*
- * If pre_handler returns !0, it sets regs->ip and
- * resets current kprobe.
- */
- }
-end:
- local_irq_restore(flags);
-}
-
-int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
-{
- p->ainsn.insn = NULL;
- p->ainsn.boostable = -1;
- return 0;
-}
-#endif
-
int __init arch_init_kprobes(void)
{
return arch_init_optprobes();
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
new file mode 100644
index 00000000000..23ef5c556f0
--- /dev/null
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -0,0 +1,93 @@
+/*
+ * Dynamic Ftrace based Kprobes Optimization
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) Hitachi Ltd., 2012
+ */
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+#include <linux/hardirq.h>
+#include <linux/preempt.h>
+#include <linux/ftrace.h>
+
+#include "common.h"
+
+static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ /*
+ * Emulate singlestep (and also recover regs->ip)
+ * as if there is a 5byte nop
+ */
+ regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, regs, 0);
+ }
+ __this_cpu_write(current_kprobe, NULL);
+ return 1;
+}
+
+int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ if (kprobe_ftrace(p))
+ return __skip_singlestep(p, regs, kcb);
+ else
+ return 0;
+}
+
+/* Ftrace callback handler for kprobes */
+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct pt_regs *regs)
+{
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+ unsigned long flags;
+
+ /* Disable irq for emulating a breakpoint and avoiding preempt */
+ local_irq_save(flags);
+
+ p = get_kprobe((kprobe_opcode_t *)ip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ goto end;
+
+ kcb = get_kprobe_ctlblk();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ } else {
+ /* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
+ regs->ip = ip + sizeof(kprobe_opcode_t);
+
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (!p->pre_handler || !p->pre_handler(p, regs))
+ __skip_singlestep(p, regs, kcb);
+ /*
+ * If pre_handler returns !0, it sets regs->ip and
+ * resets current kprobe.
+ */
+ }
+end:
+ local_irq_restore(flags);
+}
+
+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.insn = NULL;
+ p->ainsn.boostable = -1;
+ return 0;
+}
diff --git a/arch/x86/kernel/kprobes-opt.c b/arch/x86/kernel/kprobes/opt.c
index c5e410eed40..76dc6f09572 100644
--- a/arch/x86/kernel/kprobes-opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -37,7 +37,7 @@
#include <asm/insn.h>
#include <asm/debugreg.h>
-#include "kprobes-common.h"
+#include "common.h"
unsigned long __recover_optprobed_insn(kprobe_opcode_t *buf, unsigned long addr)
{
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 4180a874c76..2b44ea5f269 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -42,6 +42,8 @@
#include <asm/apic.h>
#include <asm/apicdef.h>
#include <asm/hypervisor.h>
+#include <asm/kvm_guest.h>
+#include <asm/context_tracking.h>
static int kvmapf = 1;
@@ -62,6 +64,15 @@ static int parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
+static int kvmclock_vsyscall = 1;
+static int parse_no_kvmclock_vsyscall(char *arg)
+{
+ kvmclock_vsyscall = 0;
+ return 0;
+}
+
+early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
+
static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
static int has_steal_clock = 0;
@@ -110,11 +121,8 @@ void kvm_async_pf_task_wait(u32 token)
struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
struct kvm_task_sleep_node n, *e;
DEFINE_WAIT(wait);
- int cpu, idle;
- cpu = get_cpu();
- idle = idle_cpu(cpu);
- put_cpu();
+ rcu_irq_enter();
spin_lock(&b->lock);
e = _find_apf_task(b, token);
@@ -123,12 +131,14 @@ void kvm_async_pf_task_wait(u32 token)
hlist_del(&e->link);
kfree(e);
spin_unlock(&b->lock);
+
+ rcu_irq_exit();
return;
}
n.token = token;
n.cpu = smp_processor_id();
- n.halted = idle || preempt_count() > 1;
+ n.halted = is_idle_task(current) || preempt_count() > 1;
init_waitqueue_head(&n.wq);
hlist_add_head(&n.link, &b->list);
spin_unlock(&b->lock);
@@ -147,13 +157,16 @@ void kvm_async_pf_task_wait(u32 token)
/*
* We cannot reschedule. So halt.
*/
+ rcu_irq_exit();
native_safe_halt();
+ rcu_irq_enter();
local_irq_disable();
}
}
if (!n.halted)
finish_wait(&n.wq, &wait);
+ rcu_irq_exit();
return;
}
EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
@@ -247,10 +260,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
- rcu_irq_enter();
+ exception_enter(regs);
exit_idle();
kvm_async_pf_task_wait((u32)read_cr2());
- rcu_irq_exit();
+ exception_exit(regs);
break;
case KVM_PV_REASON_PAGE_READY:
rcu_irq_enter();
@@ -471,6 +484,9 @@ void __init kvm_guest_init(void)
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
apic_set_eoi_write(kvm_guest_apic_eoi_write);
+ if (kvmclock_vsyscall)
+ kvm_setup_vsyscall_timeinfo();
+
#ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
register_cpu_notifier(&kvm_cpu_notifier);
@@ -489,6 +505,7 @@ static bool __init kvm_detect(void)
const struct hypervisor_x86 x86_hyper_kvm __refconst = {
.name = "KVM",
.detect = kvm_detect,
+ .x2apic_available = kvm_para_available,
};
EXPORT_SYMBOL_GPL(x86_hyper_kvm);
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index f1b42b3a186..220a360010f 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -23,6 +23,7 @@
#include <asm/apic.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
+#include <linux/memblock.h>
#include <asm/x86_init.h>
#include <asm/reboot.h>
@@ -39,7 +40,7 @@ static int parse_no_kvmclock(char *arg)
early_param("no-kvmclock", parse_no_kvmclock);
/* The hypervisor will put information about time periodically here */
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
+static struct pvclock_vsyscall_time_info *hv_clock;
static struct pvclock_wall_clock wall_clock;
/*
@@ -52,15 +53,20 @@ static unsigned long kvm_get_wallclock(void)
struct pvclock_vcpu_time_info *vcpu_time;
struct timespec ts;
int low, high;
+ int cpu;
low = (int)__pa_symbol(&wall_clock);
high = ((u64)__pa_symbol(&wall_clock) >> 32);
native_write_msr(msr_kvm_wall_clock, low, high);
- vcpu_time = &get_cpu_var(hv_clock);
+ preempt_disable();
+ cpu = smp_processor_id();
+
+ vcpu_time = &hv_clock[cpu].pvti;
pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
- put_cpu_var(hv_clock);
+
+ preempt_enable();
return ts.tv_sec;
}
@@ -74,9 +80,11 @@ static cycle_t kvm_clock_read(void)
{
struct pvclock_vcpu_time_info *src;
cycle_t ret;
+ int cpu;
preempt_disable_notrace();
- src = &__get_cpu_var(hv_clock);
+ cpu = smp_processor_id();
+ src = &hv_clock[cpu].pvti;
ret = pvclock_clocksource_read(src);
preempt_enable_notrace();
return ret;
@@ -99,8 +107,15 @@ static cycle_t kvm_clock_get_cycles(struct clocksource *cs)
static unsigned long kvm_get_tsc_khz(void)
{
struct pvclock_vcpu_time_info *src;
- src = &per_cpu(hv_clock, 0);
- return pvclock_tsc_khz(src);
+ int cpu;
+ unsigned long tsc_khz;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+ src = &hv_clock[cpu].pvti;
+ tsc_khz = pvclock_tsc_khz(src);
+ preempt_enable();
+ return tsc_khz;
}
static void kvm_get_preset_lpj(void)
@@ -119,10 +134,14 @@ bool kvm_check_and_clear_guest_paused(void)
{
bool ret = false;
struct pvclock_vcpu_time_info *src;
+ int cpu = smp_processor_id();
- src = &__get_cpu_var(hv_clock);
+ if (!hv_clock)
+ return ret;
+
+ src = &hv_clock[cpu].pvti;
if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) {
- __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED);
+ src->flags &= ~PVCLOCK_GUEST_STOPPED;
ret = true;
}
@@ -141,9 +160,10 @@ int kvm_register_clock(char *txt)
{
int cpu = smp_processor_id();
int low, high, ret;
+ struct pvclock_vcpu_time_info *src = &hv_clock[cpu].pvti;
- low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
- high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
+ low = (int)__pa(src) | 1;
+ high = ((u64)__pa(src) >> 32);
ret = native_write_msr_safe(msr_kvm_system_time, low, high);
printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
cpu, high, low, txt);
@@ -197,6 +217,8 @@ static void kvm_shutdown(void)
void __init kvmclock_init(void)
{
+ unsigned long mem;
+
if (!kvm_para_available())
return;
@@ -209,8 +231,18 @@ void __init kvmclock_init(void)
printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
msr_kvm_system_time, msr_kvm_wall_clock);
- if (kvm_register_clock("boot clock"))
+ mem = memblock_alloc(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS,
+ PAGE_SIZE);
+ if (!mem)
+ return;
+ hv_clock = __va(mem);
+
+ if (kvm_register_clock("boot clock")) {
+ hv_clock = NULL;
+ memblock_free(mem,
+ sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
return;
+ }
pv_time_ops.sched_clock = kvm_clock_read;
x86_platform.calibrate_tsc = kvm_get_tsc_khz;
x86_platform.get_wallclock = kvm_get_wallclock;
@@ -233,3 +265,37 @@ void __init kvmclock_init(void)
if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
}
+
+int __init kvm_setup_vsyscall_timeinfo(void)
+{
+#ifdef CONFIG_X86_64
+ int cpu;
+ int ret;
+ u8 flags;
+ struct pvclock_vcpu_time_info *vcpu_time;
+ unsigned int size;
+
+ size = sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+
+ vcpu_time = &hv_clock[cpu].pvti;
+ flags = pvclock_read_flags(vcpu_time);
+
+ if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
+ preempt_enable();
+ return 1;
+ }
+
+ if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
+ preempt_enable();
+ return ret;
+ }
+
+ preempt_enable();
+
+ kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
+#endif
+ return 0;
+}
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index a7c5661f849..4929502c137 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -174,6 +174,9 @@ static int msr_open(struct inode *inode, struct file *file)
unsigned int cpu;
struct cpuinfo_x86 *c;
+ if (!capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
cpu = iminor(file->f_path.dentry->d_inode);
if (cpu >= nr_cpu_ids || !cpu_online(cpu))
return -ENXIO; /* No such CPU */
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index de2b7ad7027..872079a67e4 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -56,7 +56,7 @@ struct device x86_dma_fallback_dev = {
EXPORT_SYMBOL(x86_dma_fallback_dev);
/* Number of entries preallocated for DMA-API debugging */
-#define PREALLOC_DMA_DEBUG_ENTRIES 32768
+#define PREALLOC_DMA_DEBUG_ENTRIES 65536
int dma_set_mask(struct device *dev, u64 mask)
{
@@ -265,7 +265,7 @@ rootfs_initcall(pci_iommu_init);
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
-static __devinit void via_no_dac(struct pci_dev *dev)
+static void via_no_dac(struct pci_dev *dev)
{
if (forbid_dac == 0) {
dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b644e1c765d..2ed787f15bf 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -262,36 +262,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
propagate_user_return_notify(prev_p, next_p);
}
-int sys_fork(struct pt_regs *regs)
-{
- return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
-}
-
-/*
- * This is trivial, and on the face of it looks like it
- * could equally well be done in user mode.
- *
- * Not so, for quite unobvious reasons - register pressure.
- * In user mode vfork() cannot have a stack frame, and if
- * done by calling the "clone()" system call directly, you
- * do not have enough call-clobbered registers to hold all
- * the information you need.
- */
-int sys_vfork(struct pt_regs *regs)
-{
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
- NULL, NULL);
-}
-
-long
-sys_clone(unsigned long clone_flags, unsigned long newsp,
- void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
-{
- if (!newsp)
- newsp = regs->sp;
- return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
-}
-
/*
* Idle related variables and functions
*/
@@ -306,11 +276,6 @@ void (*pm_idle)(void);
EXPORT_SYMBOL(pm_idle);
#endif
-static inline int hlt_use_halt(void)
-{
- return 1;
-}
-
#ifndef CONFIG_SMP
static inline void play_dead(void)
{
@@ -410,28 +375,22 @@ void cpu_idle(void)
*/
void default_idle(void)
{
- if (hlt_use_halt()) {
- trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
- trace_cpu_idle_rcuidle(1, smp_processor_id());
- current_thread_info()->status &= ~TS_POLLING;
- /*
- * TS_POLLING-cleared state must be visible before we
- * test NEED_RESCHED:
- */
- smp_mb();
+ trace_power_start_rcuidle(POWER_CSTATE, 1, smp_processor_id());
+ trace_cpu_idle_rcuidle(1, smp_processor_id());
+ current_thread_info()->status &= ~TS_POLLING;
+ /*
+ * TS_POLLING-cleared state must be visible before we
+ * test NEED_RESCHED:
+ */
+ smp_mb();
- if (!need_resched())
- safe_halt(); /* enables interrupts racelessly */
- else
- local_irq_enable();
- current_thread_info()->status |= TS_POLLING;
- trace_power_end_rcuidle(smp_processor_id());
- trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
- } else {
+ if (!need_resched())
+ safe_halt(); /* enables interrupts racelessly */
+ else
local_irq_enable();
- /* loop is done by the caller */
- cpu_relax();
- }
+ current_thread_info()->status |= TS_POLLING;
+ trace_power_end_rcuidle(smp_processor_id());
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id());
}
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(default_idle);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 44e0bff38e7..b5a8905785e 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -128,8 +128,7 @@ void release_thread(struct task_struct *dead_task)
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
- unsigned long arg,
- struct task_struct *p, struct pt_regs *regs)
+ unsigned long arg, struct task_struct *p)
{
struct pt_regs *childregs = task_pt_regs(p);
struct task_struct *tsk;
@@ -138,7 +137,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.sp = (unsigned long) childregs;
p->thread.sp0 = (unsigned long) (childregs+1);
- if (unlikely(!regs)) {
+ if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
p->thread.ip = (unsigned long) ret_from_kernel_thread;
@@ -156,12 +155,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
return 0;
}
- *childregs = *regs;
+ *childregs = *current_pt_regs();
childregs->ax = 0;
- childregs->sp = sp;
+ if (sp)
+ childregs->sp = sp;
p->thread.ip = (unsigned long) ret_from_fork;
- task_user_gs(p) = get_user_gs(regs);
+ task_user_gs(p) = get_user_gs(current_pt_regs());
p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 16c6365e2b8..6e68a619496 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -146,8 +146,7 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
- unsigned long arg,
- struct task_struct *p, struct pt_regs *regs)
+ unsigned long arg, struct task_struct *p)
{
int err;
struct pt_regs *childregs;
@@ -169,7 +168,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
savesegment(ds, p->thread.ds);
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
- if (unlikely(!regs)) {
+ if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->sp = (unsigned long)childregs;
@@ -181,10 +180,11 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
return 0;
}
- *childregs = *regs;
+ *childregs = *current_pt_regs();
childregs->ax = 0;
- childregs->sp = sp;
+ if (sp)
+ childregs->sp = sp;
err = -ENOMEM;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 974b67e46dd..b629bbe0d9b 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -23,6 +23,7 @@
#include <linux/hw_breakpoint.h>
#include <linux/rcupdate.h>
#include <linux/module.h>
+#include <linux/context_tracking.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -1491,7 +1492,7 @@ long syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;
- rcu_user_exit();
+ user_exit();
/*
* If we stepped into a sysenter/syscall insn, it trapped in
@@ -1546,7 +1547,7 @@ void syscall_trace_leave(struct pt_regs *regs)
* or do_notify_resume(), in which case we can be in RCU
* user mode.
*/
- rcu_user_exit();
+ user_exit();
audit_syscall_exit(regs);
@@ -1564,5 +1565,5 @@ void syscall_trace_leave(struct pt_regs *regs)
if (step || test_thread_flag(TIF_SYSCALL_TRACE))
tracehook_report_syscall_exit(regs, step);
- rcu_user_enter();
+ user_enter();
}
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 42eb3300dfc..85c39590c1a 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -17,23 +17,13 @@
#include <linux/kernel.h>
#include <linux/percpu.h>
+#include <linux/notifier.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+#include <linux/bootmem.h>
+#include <asm/fixmap.h>
#include <asm/pvclock.h>
-/*
- * These are perodically updated
- * xen: magic shared_info page
- * kvm: gpa registered via msr
- * and then copied here.
- */
-struct pvclock_shadow_time {
- u64 tsc_timestamp; /* TSC at last update of time vals. */
- u64 system_timestamp; /* Time, in nanosecs, since boot. */
- u32 tsc_to_nsec_mul;
- int tsc_shift;
- u32 version;
- u8 flags;
-};
-
static u8 valid_flags __read_mostly = 0;
void pvclock_set_flags(u8 flags)
@@ -41,34 +31,6 @@ void pvclock_set_flags(u8 flags)
valid_flags = flags;
}
-static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
-{
- u64 delta = native_read_tsc() - shadow->tsc_timestamp;
- return pvclock_scale_delta(delta, shadow->tsc_to_nsec_mul,
- shadow->tsc_shift);
-}
-
-/*
- * Reads a consistent set of time-base values from hypervisor,
- * into a shadow data area.
- */
-static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
- struct pvclock_vcpu_time_info *src)
-{
- do {
- dst->version = src->version;
- rmb(); /* fetch version before data */
- dst->tsc_timestamp = src->tsc_timestamp;
- dst->system_timestamp = src->system_time;
- dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
- dst->tsc_shift = src->tsc_shift;
- dst->flags = src->flags;
- rmb(); /* test version after fetching data */
- } while ((src->version & 1) || (dst->version != src->version));
-
- return dst->version;
-}
-
unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
{
u64 pv_tsc_khz = 1000000ULL << 32;
@@ -88,23 +50,32 @@ void pvclock_resume(void)
atomic64_set(&last_value, 0);
}
+u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
+{
+ unsigned version;
+ cycle_t ret;
+ u8 flags;
+
+ do {
+ version = __pvclock_read_cycles(src, &ret, &flags);
+ } while ((src->version & 1) || version != src->version);
+
+ return flags & valid_flags;
+}
+
cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
{
- struct pvclock_shadow_time shadow;
unsigned version;
- cycle_t ret, offset;
+ cycle_t ret;
u64 last;
+ u8 flags;
do {
- version = pvclock_get_time_values(&shadow, src);
- barrier();
- offset = pvclock_get_nsec_offset(&shadow);
- ret = shadow.system_timestamp + offset;
- barrier();
- } while (version != src->version);
+ version = __pvclock_read_cycles(src, &ret, &flags);
+ } while ((src->version & 1) || version != src->version);
if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
- (shadow.flags & PVCLOCK_TSC_STABLE_BIT))
+ (flags & PVCLOCK_TSC_STABLE_BIT))
return ret;
/*
@@ -156,3 +127,71 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
+
+static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
+
+static struct pvclock_vsyscall_time_info *
+pvclock_get_vsyscall_user_time_info(int cpu)
+{
+ if (!pvclock_vdso_info) {
+ BUG();
+ return NULL;
+ }
+
+ return &pvclock_vdso_info[cpu];
+}
+
+struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
+{
+ return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
+}
+
+#ifdef CONFIG_X86_64
+static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
+ void *v)
+{
+ struct task_migration_notifier *mn = v;
+ struct pvclock_vsyscall_time_info *pvti;
+
+ pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
+
+ /* this is NULL when pvclock vsyscall is not initialized */
+ if (unlikely(pvti == NULL))
+ return NOTIFY_DONE;
+
+ pvti->migrate_count++;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block pvclock_migrate = {
+ .notifier_call = pvclock_task_migrate,
+};
+
+/*
+ * Initialize the generic pvclock vsyscall state. This will allocate
+ * a/some page(s) for the per-vcpu pvclock information, set up a
+ * fixmap mapping for the page(s)
+ */
+
+int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
+ int size)
+{
+ int idx;
+
+ WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
+
+ pvclock_vdso_info = i;
+
+ for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
+ __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
+ __pa_symbol(i) + (idx*PAGE_SIZE),
+ PAGE_KERNEL_VVAR);
+ }
+
+
+ register_task_migration_notifier(&pvclock_migrate);
+
+ return 0;
+}
+#endif
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 1b27de56356..26ee48a33dc 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -8,7 +8,7 @@
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
-static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
+static void quirk_intel_irqbalance(struct pci_dev *dev)
{
u8 config;
u16 word;
@@ -512,7 +512,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
/* Set correct numa_node information for AMD NB functions */
-static void __devinit quirk_amd_nb_node(struct pci_dev *dev)
+static void quirk_amd_nb_node(struct pci_dev *dev)
{
struct pci_dev *nb_ht;
unsigned int devfn;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 4e8ba39eaf0..76fa1e9a2b3 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -584,7 +584,7 @@ static void native_machine_emergency_restart(void)
break;
case BOOT_EFI:
- if (efi_enabled)
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
efi.reset_system(reboot_mode ?
EFI_RESET_WARM :
EFI_RESET_COLD,
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 4929c1be0ac..2e8f3d3b564 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -149,7 +149,6 @@ unsigned long mach_get_cmos_time(void)
if (century) {
century = bcd2bin(century);
year += century * 100;
- printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
} else
year += CMOS_YEARS_OFFS;
@@ -195,12 +194,6 @@ void read_persistent_clock(struct timespec *ts)
ts->tv_nsec = 0;
}
-unsigned long long native_read_tsc(void)
-{
- return __native_read_tsc();
-}
-EXPORT_SYMBOL(native_read_tsc);
-
static struct resource rtc_resources[] = {
[0] = {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ca45696f30f..8b24289cc10 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -143,11 +143,7 @@ int default_check_phys_apicid_present(int phys_apicid)
}
#endif
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
-struct boot_params __initdata boot_params;
-#else
struct boot_params boot_params;
-#endif
/*
* Machine setup..
@@ -614,6 +610,83 @@ static __init void reserve_ibft_region(void)
static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
+static bool __init snb_gfx_workaround_needed(void)
+{
+#ifdef CONFIG_PCI
+ int i;
+ u16 vendor, devid;
+ static const __initconst u16 snb_ids[] = {
+ 0x0102,
+ 0x0112,
+ 0x0122,
+ 0x0106,
+ 0x0116,
+ 0x0126,
+ 0x010a,
+ };
+
+ /* Assume no if something weird is going on with PCI */
+ if (!early_pci_allowed())
+ return false;
+
+ vendor = read_pci_config_16(0, 2, 0, PCI_VENDOR_ID);
+ if (vendor != 0x8086)
+ return false;
+
+ devid = read_pci_config_16(0, 2, 0, PCI_DEVICE_ID);
+ for (i = 0; i < ARRAY_SIZE(snb_ids); i++)
+ if (devid == snb_ids[i])
+ return true;
+#endif
+
+ return false;
+}
+
+/*
+ * Sandy Bridge graphics has trouble with certain ranges, exclude
+ * them from allocation.
+ */
+static void __init trim_snb_memory(void)
+{
+ static const __initconst unsigned long bad_pages[] = {
+ 0x20050000,
+ 0x20110000,
+ 0x20130000,
+ 0x20138000,
+ 0x40004000,
+ };
+ int i;
+
+ if (!snb_gfx_workaround_needed())
+ return;
+
+ printk(KERN_DEBUG "reserving inaccessible SNB gfx pages\n");
+
+ /*
+ * Reserve all memory below the 1 MB mark that has not
+ * already been reserved.
+ */
+ memblock_reserve(0, 1<<20);
+
+ for (i = 0; i < ARRAY_SIZE(bad_pages); i++) {
+ if (memblock_reserve(bad_pages[i], PAGE_SIZE))
+ printk(KERN_WARNING "failed to reserve 0x%08lx\n",
+ bad_pages[i]);
+ }
+}
+
+/*
+ * Here we put platform-specific memory range workarounds, i.e.
+ * memory known to be corrupt or otherwise in need to be reserved on
+ * specific platforms.
+ *
+ * If this gets used more widely it could use a real dispatch mechanism.
+ */
+static void __init trim_platform_memory_ranges(void)
+{
+ trim_snb_memory();
+}
+
static void __init trim_bios_range(void)
{
/*
@@ -634,6 +707,7 @@ static void __init trim_bios_range(void)
* take them out.
*/
e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
+
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
@@ -733,15 +807,15 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_EFI
if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL32", 4)) {
- efi_enabled = 1;
- efi_64bit = false;
+ set_bit(EFI_BOOT, &x86_efi_facility);
} else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
"EL64", 4)) {
- efi_enabled = 1;
- efi_64bit = true;
+ set_bit(EFI_BOOT, &x86_efi_facility);
+ set_bit(EFI_64BIT, &x86_efi_facility);
}
- if (efi_enabled && efi_memblock_x86_reserve_range())
- efi_enabled = 0;
+
+ if (efi_enabled(EFI_BOOT))
+ efi_memblock_x86_reserve_range();
#endif
x86_init.oem.arch_setup();
@@ -814,7 +888,7 @@ void __init setup_arch(char **cmdline_p)
finish_e820_parsing();
- if (efi_enabled)
+ if (efi_enabled(EFI_BOOT))
efi_init();
dmi_scan_machine();
@@ -897,7 +971,7 @@ void __init setup_arch(char **cmdline_p)
* The EFI specification says that boot service code won't be called
* after ExitBootServices(). This is, in fact, a lie.
*/
- if (efi_enabled)
+ if (efi_enabled(EFI_MEMMAP))
efi_reserve_boot_services();
/* preallocate 4k for mptable mpc */
@@ -912,6 +986,8 @@ void __init setup_arch(char **cmdline_p)
setup_real_mode();
+ trim_platform_memory_ranges();
+
init_gbpages();
/* max_pfn_mapped is updated here */
@@ -956,6 +1032,10 @@ void __init setup_arch(char **cmdline_p)
reserve_initrd();
+#if defined(CONFIG_ACPI) && defined(CONFIG_BLK_DEV_INITRD)
+ acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start);
+#endif
+
reserve_crashkernel();
vsmp_init();
@@ -1034,7 +1114,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
- if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
+ if (!efi_enabled(EFI_BOOT) || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
@@ -1051,14 +1131,14 @@ void __init setup_arch(char **cmdline_p)
register_refined_jiffies(CLOCK_TICK_RATE);
#ifdef CONFIG_EFI
- /* Once setup is done above, disable efi_enabled on mismatched
- * firmware/kernel archtectures since there is no support for
- * runtime services.
+ /* Once setup is done above, unmap the EFI memory map on
+ * mismatched firmware/kernel archtectures since there is no
+ * support for runtime services.
*/
- if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) {
+ if (efi_enabled(EFI_BOOT) &&
+ IS_ENABLED(CONFIG_X86_64) != efi_enabled(EFI_64BIT)) {
pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
efi_unmap_memmap();
- efi_enabled = 0;
}
#endif
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 70b27ee6118..d6bf1f34a6e 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -22,6 +22,7 @@
#include <linux/uaccess.h>
#include <linux/user-return-notifier.h>
#include <linux/uprobes.h>
+#include <linux/context_tracking.h>
#include <asm/processor.h>
#include <asm/ucontext.h>
@@ -363,10 +364,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- put_user_ex(sas_ss_flags(regs->sp),
- &frame->uc.uc_stack.ss_flags);
- put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
/* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@@ -413,7 +411,6 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct rt_sigframe __user *frame;
void __user *fp = NULL;
int err = 0;
- struct task_struct *me = current;
frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp);
@@ -432,10 +429,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- put_user_ex(sas_ss_flags(regs->sp),
- &frame->uc.uc_stack.ss_flags);
- put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= __save_altstack(&frame->uc.uc_stack, regs->sp);
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
@@ -502,10 +496,7 @@ static int x32_setup_rt_frame(int sig, struct k_sigaction *ka,
else
put_user_ex(0, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
- put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- put_user_ex(sas_ss_flags(regs->sp),
- &frame->uc.uc_stack.ss_flags);
- put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
if (ka->sa.sa_flags & SA_RESTORER) {
@@ -602,13 +593,6 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
}
#endif /* CONFIG_X86_32 */
-long
-sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
- struct pt_regs *regs)
-{
- return do_sigaltstack(uss, uoss, regs->sp);
-}
-
/*
* Do a signal return; undo the signal stack.
*/
@@ -658,7 +642,7 @@ long sys_rt_sigreturn(struct pt_regs *regs)
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
- if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT)
+ if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
return ax;
@@ -816,7 +800,7 @@ static void do_signal(struct pt_regs *regs)
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{
- rcu_user_exit();
+ user_exit();
#ifdef CONFIG_X86_MCE
/* notify userspace of pending MCEs */
@@ -838,7 +822,7 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
- rcu_user_enter();
+ user_enter();
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
@@ -864,7 +848,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
struct rt_sigframe_x32 __user *frame;
sigset_t set;
unsigned long ax;
- struct pt_regs tregs;
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
@@ -878,8 +861,7 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
goto badframe;
- tregs = *regs;
- if (sys32_sigaltstack(&frame->uc.uc_stack, NULL, &tregs) == -EFAULT)
+ if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
return ax;
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f3e2ec878b8..ed0fe385289 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -127,8 +127,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
atomic_t init_deasserted;
/*
- * Report back to the Boot Processor.
- * Running on AP.
+ * Report back to the Boot Processor during boot time or to the caller processor
+ * during CPU online.
*/
static void __cpuinit smp_callin(void)
{
@@ -140,15 +140,17 @@ static void __cpuinit smp_callin(void)
* we may get here before an INIT-deassert IPI reaches
* our local APIC. We have to wait for the IPI or we'll
* lock up on an APIC access.
+ *
+ * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI.
*/
- if (apic->wait_for_init_deassert)
+ cpuid = smp_processor_id();
+ if (apic->wait_for_init_deassert && cpuid != 0)
apic->wait_for_init_deassert(&init_deasserted);
/*
* (This works even if the APIC is not enabled.)
*/
phys_id = read_apic_id();
- cpuid = smp_processor_id();
if (cpumask_test_cpu(cpuid, cpu_callin_mask)) {
panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
phys_id, cpuid);
@@ -230,6 +232,8 @@ static void __cpuinit smp_callin(void)
cpumask_set_cpu(cpuid, cpu_callin_mask);
}
+static int cpu0_logical_apicid;
+static int enable_start_cpu0;
/*
* Activate a secondary processor.
*/
@@ -245,6 +249,8 @@ notrace static void __cpuinit start_secondary(void *unused)
preempt_disable();
smp_callin();
+ enable_start_cpu0 = 0;
+
#ifdef CONFIG_X86_32
/* switch away from the initial page table */
load_cr3(swapper_pg_dir);
@@ -281,19 +287,30 @@ notrace static void __cpuinit start_secondary(void *unused)
cpu_idle();
}
+void __init smp_store_boot_cpu_info(void)
+{
+ int id = 0; /* CPU 0 */
+ struct cpuinfo_x86 *c = &cpu_data(id);
+
+ *c = boot_cpu_data;
+ c->cpu_index = id;
+}
+
/*
* The bootstrap kernel entry code has set these up. Save them for
* a given CPU
*/
-
void __cpuinit smp_store_cpu_info(int id)
{
struct cpuinfo_x86 *c = &cpu_data(id);
*c = boot_cpu_data;
c->cpu_index = id;
- if (id != 0)
- identify_secondary_cpu(c);
+ /*
+ * During boot time, CPU0 has this setup already. Save the info when
+ * bringing up AP or offlined CPU0.
+ */
+ identify_secondary_cpu(c);
}
static bool __cpuinit
@@ -315,7 +332,7 @@ do { \
static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
- if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
+ if (cpu_has_topoext) {
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
if (c->phys_proc_id == o->phys_proc_id &&
@@ -483,7 +500,7 @@ void __inquire_remote_apic(int apicid)
* won't ... remember to clear down the APIC, etc later.
*/
int __cpuinit
-wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
+wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip)
{
unsigned long send_status, accept_status = 0;
int maxlvt;
@@ -491,7 +508,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
/* Target chip */
/* Boot on the stack */
/* Kick the second */
- apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid);
+ apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid);
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
@@ -651,6 +668,63 @@ static void __cpuinit announce_cpu(int cpu, int apicid)
node, cpu, apicid);
}
+static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs)
+{
+ int cpu;
+
+ cpu = smp_processor_id();
+ if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0)
+ return NMI_HANDLED;
+
+ return NMI_DONE;
+}
+
+/*
+ * Wake up AP by INIT, INIT, STARTUP sequence.
+ *
+ * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS
+ * boot-strap code which is not a desired behavior for waking up BSP. To
+ * void the boot-strap code, wake up CPU0 by NMI instead.
+ *
+ * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined
+ * (i.e. physically hot removed and then hot added), NMI won't wake it up.
+ * We'll change this code in the future to wake up hard offlined CPU0 if
+ * real platform and request are available.
+ */
+static int __cpuinit
+wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid,
+ int *cpu0_nmi_registered)
+{
+ int id;
+ int boot_error;
+
+ /*
+ * Wake up AP by INIT, INIT, STARTUP sequence.
+ */
+ if (cpu)
+ return wakeup_secondary_cpu_via_init(apicid, start_ip);
+
+ /*
+ * Wake up BSP by nmi.
+ *
+ * Register a NMI handler to help wake up CPU0.
+ */
+ boot_error = register_nmi_handler(NMI_LOCAL,
+ wakeup_cpu0_nmi, 0, "wake_cpu0");
+
+ if (!boot_error) {
+ enable_start_cpu0 = 1;
+ *cpu0_nmi_registered = 1;
+ if (apic->dest_logical == APIC_DEST_LOGICAL)
+ id = cpu0_logical_apicid;
+ else
+ id = apicid;
+ boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip);
+ }
+
+ return boot_error;
+}
+
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -666,6 +740,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
unsigned long boot_error = 0;
int timeout;
+ int cpu0_nmi_registered = 0;
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
@@ -713,13 +788,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
}
/*
- * Kick the secondary CPU. Use the method in the APIC driver
- * if it's defined - or use an INIT boot APIC message otherwise:
+ * Wake up a CPU in difference cases:
+ * - Use the method in the APIC driver if it's defined
+ * Otherwise,
+ * - Use an INIT boot APIC message for APs or NMI for BSP.
*/
if (apic->wakeup_secondary_cpu)
boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
else
- boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
+ boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid,
+ &cpu0_nmi_registered);
if (!boot_error) {
/*
@@ -784,6 +862,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
*/
smpboot_restore_warm_reset_vector();
}
+ /*
+ * Clean up the nmi handler. Do this after the callin and callout sync
+ * to avoid impact of possible long unregister time.
+ */
+ if (cpu0_nmi_registered)
+ unregister_nmi_handler(NMI_LOCAL, "wake_cpu0");
+
return boot_error;
}
@@ -797,7 +882,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle)
pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu);
- if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid ||
+ if (apicid == BAD_APICID ||
!physid_isset(apicid, phys_cpu_present_map) ||
!apic->apic_id_valid(apicid)) {
pr_err("%s: bad cpu %d\n", __func__, cpu);
@@ -995,7 +1080,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
/*
* Setup boot CPU information
*/
- smp_store_cpu_info(0); /* Final full version of the data */
+ smp_store_boot_cpu_info(); /* Final full version of the data */
cpumask_copy(cpu_callin_mask, cpumask_of(0));
mb();
@@ -1031,6 +1116,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
*/
setup_local_APIC();
+ if (x2apic_mode)
+ cpu0_logical_apicid = apic_read(APIC_LDR);
+ else
+ cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
+
/*
* Enable IO APIC before setting up error vector
*/
@@ -1219,19 +1309,6 @@ void cpu_disable_common(void)
int native_cpu_disable(void)
{
- int cpu = smp_processor_id();
-
- /*
- * Perhaps use cpufreq to drop frequency, but that could go
- * into generic code.
- *
- * We won't take down the boot processor on i386 due to some
- * interrupts only being able to be serviced by the BSP.
- * Especially so if we're not using an IOAPIC -zwane
- */
- if (cpu == 0)
- return -EBUSY;
-
clear_local_APIC();
cpu_disable_common();
@@ -1271,6 +1348,14 @@ void play_dead_common(void)
local_irq_disable();
}
+static bool wakeup_cpu0(void)
+{
+ if (smp_processor_id() == 0 && enable_start_cpu0)
+ return true;
+
+ return false;
+}
+
/*
* We need to flush the caches before going to sleep, lest we have
* dirty data in our caches when we come back up.
@@ -1334,6 +1419,11 @@ static inline void mwait_play_dead(void)
__monitor(mwait_ptr, 0, 0);
mb();
__mwait(eax, 0);
+ /*
+ * If NMI wants to wake up CPU0, start CPU0.
+ */
+ if (wakeup_cpu0())
+ start_cpu0();
}
}
@@ -1344,6 +1434,11 @@ static inline void hlt_play_dead(void)
while (1) {
native_halt();
+ /*
+ * If NMI wants to wake up CPU0, start CPU0.
+ */
+ if (wakeup_cpu0())
+ start_cpu0();
}
}
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index cd3b2438a98..9b4d51d0c0d 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -165,10 +165,11 @@ void set_task_blockstep(struct task_struct *task, bool on)
* Ensure irq/preemption can't change debugctl in between.
* Note also that both TIF_BLOCKSTEP and debugctl should
* be changed atomically wrt preemption.
- * FIXME: this means that set/clear TIF_BLOCKSTEP is simply
- * wrong if task != current, SIGKILL can wakeup the stopped
- * tracee and set/clear can play with the running task, this
- * can confuse the next __switch_to_xtra().
+ *
+ * NOTE: this means that set/clear TIF_BLOCKSTEP is only safe if
+ * task is current or it can't be running, otherwise we can race
+ * with __switch_to_xtra(). We rely on ptrace_freeze_traced() but
+ * PTRACE_KILL is not safe.
*/
local_irq_disable();
debugctl = get_debugctlmsr();
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index b4d3c3927dd..dbded5aedb8 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -21,37 +21,23 @@
/*
* Align a virtual address to avoid aliasing in the I$ on AMD F15h.
- *
- * @flags denotes the allocation direction - bottomup or topdown -
- * or vDSO; see call sites below.
*/
-unsigned long align_addr(unsigned long addr, struct file *filp,
- enum align_flags flags)
+static unsigned long get_align_mask(void)
{
- unsigned long tmp_addr;
-
/* handle 32- and 64-bit case with a single conditional */
if (va_align.flags < 0 || !(va_align.flags & (2 - mmap_is_ia32())))
- return addr;
+ return 0;
if (!(current->flags & PF_RANDOMIZE))
- return addr;
-
- if (!((flags & ALIGN_VDSO) || filp))
- return addr;
-
- tmp_addr = addr;
-
- /*
- * We need an address which is <= than the original
- * one only when in topdown direction.
- */
- if (!(flags & ALIGN_TOPDOWN))
- tmp_addr += va_align.mask;
+ return 0;
- tmp_addr &= ~va_align.mask;
+ return va_align.mask;
+}
- return tmp_addr;
+unsigned long align_vdso_addr(unsigned long addr)
+{
+ unsigned long align_mask = get_align_mask();
+ return (addr + align_mask) & ~align_mask;
}
static int __init control_va_addr_alignment(char *str)
@@ -126,7 +112,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- unsigned long start_addr;
+ struct vm_unmapped_area_info info;
unsigned long begin, end;
if (flags & MAP_FIXED)
@@ -144,50 +130,16 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
(!vma || addr + len <= vma->vm_start))
return addr;
}
- if (((flags & MAP_32BIT) || test_thread_flag(TIF_ADDR32))
- && len <= mm->cached_hole_size) {
- mm->cached_hole_size = 0;
- mm->free_area_cache = begin;
- }
- addr = mm->free_area_cache;
- if (addr < begin)
- addr = begin;
- start_addr = addr;
-
-full_search:
-
- addr = align_addr(addr, filp, 0);
-
- for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
- /* At this point: (!vma || addr < vma->vm_end). */
- if (end - len < addr) {
- /*
- * Start a new search - just in case we missed
- * some holes.
- */
- if (start_addr != begin) {
- start_addr = addr = begin;
- mm->cached_hole_size = 0;
- goto full_search;
- }
- return -ENOMEM;
- }
- if (!vma || addr + len <= vma->vm_start) {
- /*
- * Remember the place where we stopped the search:
- */
- mm->free_area_cache = addr + len;
- return addr;
- }
- if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
- addr = vma->vm_end;
- addr = align_addr(addr, filp, 0);
- }
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = begin;
+ info.high_limit = end;
+ info.align_mask = filp ? get_align_mask() : 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
+ return vm_unmapped_area(&info);
}
-
unsigned long
arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
const unsigned long len, const unsigned long pgoff,
@@ -195,7 +147,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
{
struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
- unsigned long addr = addr0, start_addr;
+ unsigned long addr = addr0;
+ struct vm_unmapped_area_info info;
/* requested length too big for entire address space */
if (len > TASK_SIZE)
@@ -204,7 +157,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
if (flags & MAP_FIXED)
return addr;
- /* for MAP_32BIT mappings we force the legact mmap base */
+ /* for MAP_32BIT mappings we force the legacy mmap base */
if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
goto bottomup;
@@ -217,51 +170,16 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
return addr;
}
- /* check if free_area_cache is useful for us */
- if (len <= mm->cached_hole_size) {
- mm->cached_hole_size = 0;
- mm->free_area_cache = mm->mmap_base;
- }
-
-try_again:
- /* either no address requested or can't fit in requested address hole */
- start_addr = addr = mm->free_area_cache;
-
- if (addr < len)
- goto fail;
-
- addr -= len;
- do {
- addr = align_addr(addr, filp, ALIGN_TOPDOWN);
-
- /*
- * Lookup failure means no vma is above this address,
- * else if new region fits below vma->vm_start,
- * return with success:
- */
- vma = find_vma(mm, addr);
- if (!vma || addr+len <= vma->vm_start)
- /* remember the address as a hint for next time */
- return mm->free_area_cache = addr;
-
- /* remember the largest hole we saw so far */
- if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
-
- /* try just below the current vma->vm_start */
- addr = vma->vm_start-len;
- } while (len < vma->vm_start);
-
-fail:
- /*
- * if hint left us with no space for the requested
- * mapping then try again:
- */
- if (start_addr != mm->mmap_base) {
- mm->free_area_cache = mm->mmap_base;
- mm->cached_hole_size = 0;
- goto try_again;
- }
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = PAGE_SIZE;
+ info.high_limit = mm->mmap_base;
+ info.align_mask = filp ? get_align_mask() : 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
+ addr = vm_unmapped_area(&info);
+ if (!(addr & ~PAGE_MASK))
+ return addr;
+ VM_BUG_ON(addr != -ENOMEM);
bottomup:
/*
@@ -270,14 +188,5 @@ bottomup:
* can happen with large stack limits and large mmap()
* allocations.
*/
- mm->cached_hole_size = ~0UL;
- mm->free_area_cache = TASK_UNMAPPED_BASE;
- addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
- /*
- * Restore the topdown base:
- */
- mm->free_area_cache = mm->mmap_base;
- mm->cached_hole_size = ~0UL;
-
- return addr;
+ return arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
}
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 76ee97709a0..6e60b5fe224 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -30,23 +30,110 @@
#include <linux/mmzone.h>
#include <linux/init.h>
#include <linux/smp.h>
+#include <linux/irq.h>
#include <asm/cpu.h>
static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
#ifdef CONFIG_HOTPLUG_CPU
+
+#ifdef CONFIG_BOOTPARAM_HOTPLUG_CPU0
+static int cpu0_hotpluggable = 1;
+#else
+static int cpu0_hotpluggable;
+static int __init enable_cpu0_hotplug(char *str)
+{
+ cpu0_hotpluggable = 1;
+ return 1;
+}
+
+__setup("cpu0_hotplug", enable_cpu0_hotplug);
+#endif
+
+#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
+/*
+ * This function offlines a CPU as early as possible and allows userspace to
+ * boot up without the CPU. The CPU can be onlined back by user after boot.
+ *
+ * This is only called for debugging CPU offline/online feature.
+ */
+int __ref _debug_hotplug_cpu(int cpu, int action)
+{
+ struct device *dev = get_cpu_device(cpu);
+ int ret;
+
+ if (!cpu_is_hotpluggable(cpu))
+ return -EINVAL;
+
+ cpu_hotplug_driver_lock();
+
+ switch (action) {
+ case 0:
+ ret = cpu_down(cpu);
+ if (!ret) {
+ pr_info("CPU %u is now offline\n", cpu);
+ kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
+ } else
+ pr_debug("Can't offline CPU%d.\n", cpu);
+ break;
+ case 1:
+ ret = cpu_up(cpu);
+ if (!ret)
+ kobject_uevent(&dev->kobj, KOBJ_ONLINE);
+ else
+ pr_debug("Can't online CPU%d.\n", cpu);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ cpu_hotplug_driver_unlock();
+
+ return ret;
+}
+
+static int __init debug_hotplug_cpu(void)
+{
+ _debug_hotplug_cpu(0, 0);
+ return 0;
+}
+
+late_initcall_sync(debug_hotplug_cpu);
+#endif /* CONFIG_DEBUG_HOTPLUG_CPU0 */
+
int __ref arch_register_cpu(int num)
{
+ struct cpuinfo_x86 *c = &cpu_data(num);
+
+ /*
+ * Currently CPU0 is only hotpluggable on Intel platforms. Other
+ * vendors can add hotplug support later.
+ */
+ if (c->x86_vendor != X86_VENDOR_INTEL)
+ cpu0_hotpluggable = 0;
+
/*
- * CPU0 cannot be offlined due to several
- * restrictions and assumptions in kernel. This basically
- * doesn't add a control file, one cannot attempt to offline
- * BSP.
+ * Two known BSP/CPU0 dependencies: Resume from suspend/hibernate
+ * depends on BSP. PIC interrupts depend on BSP.
*
- * Also certain PCI quirks require not to enable hotplug control
- * for all CPU's.
+ * If the BSP depencies are under control, one can tell kernel to
+ * enable BSP hotplug. This basically adds a control file and
+ * one can attempt to offline BSP.
*/
- if (num)
+ if (num == 0 && cpu0_hotpluggable) {
+ unsigned int irq;
+ /*
+ * We won't take down the boot processor on i386 if some
+ * interrupts only are able to be serviced by the BSP in PIC.
+ */
+ for_each_active_irq(irq) {
+ if (!IO_APIC_IRQ(irq) && irq_has_action(irq)) {
+ cpu0_hotpluggable = 0;
+ break;
+ }
+ }
+ }
+ if (num || cpu0_hotpluggable)
per_cpu(cpu_devices, num).cpu.hotpluggable = 1;
return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c
new file mode 100644
index 00000000000..25b993729f9
--- /dev/null
+++ b/arch/x86/kernel/trace_clock.c
@@ -0,0 +1,21 @@
+/*
+ * X86 trace clocks
+ */
+#include <asm/trace_clock.h>
+#include <asm/barrier.h>
+#include <asm/msr.h>
+
+/*
+ * trace_clock_x86_tsc(): A clock that is just the cycle counter.
+ *
+ * Unlike the other clocks, this is not in nanoseconds.
+ */
+u64 notrace trace_clock_x86_tsc(void)
+{
+ u64 ret;
+
+ rdtsc_barrier();
+ rdtscll(ret);
+
+ return ret;
+}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 8276dc6794c..ecffca11f4e 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -55,7 +55,7 @@
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/mce.h>
-#include <asm/rcu.h>
+#include <asm/context_tracking.h>
#include <asm/mach_traps.h>
@@ -69,9 +69,6 @@
asmlinkage int system_call(void);
-/* Do we ignore FPU interrupts ? */
-char ignore_fpu_irq;
-
/*
* The IDT has to be page-aligned to simplify the Pentium
* F0 0F bug workaround.
@@ -564,9 +561,6 @@ void math_error(struct pt_regs *regs, int error_code, int trapnr)
dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
{
-#ifdef CONFIG_X86_32
- ignore_fpu_irq = 1;
-#endif
exception_enter(regs);
math_error(regs, error_code, X86_TRAP_MF);
exception_exit(regs);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index cfa5d4f7ca5..4b9ea101fe3 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -77,6 +77,12 @@ unsigned long long
sched_clock(void) __attribute__((alias("native_sched_clock")));
#endif
+unsigned long long native_read_tsc(void)
+{
+ return __native_read_tsc();
+}
+EXPORT_SYMBOL(native_read_tsc);
+
int check_tsc_unstable(void)
{
return tsc_unstable;
@@ -617,7 +623,8 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
ns_now = __cycles_2_ns(tsc_now);
if (cpu_khz) {
- *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
+ *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) +
+ cpu_khz / 2) / cpu_khz;
*offset = ns_now - mult_frac(tsc_now, *scale,
(1UL << CYC2NS_SCALE_FACTOR));
}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index aafa5557b39..0ba4cfb4f41 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -478,6 +478,11 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
regs->ip = current->utask->xol_vaddr;
pre_xol_rip_insn(auprobe, regs, autask);
+ autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
+ regs->flags |= X86_EFLAGS_TF;
+ if (test_tsk_thread_flag(current, TIF_BLOCKSTEP))
+ set_task_blockstep(current, false);
+
return 0;
}
@@ -603,6 +608,16 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
if (auprobe->fixups & UPROBE_FIX_CALL)
result = adjust_ret_addr(regs->sp, correction);
+ /*
+ * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP
+ * so we can get an extra SIGTRAP if we do not clear TF. We need
+ * to examine the opcode to make it right.
+ */
+ if (utask->autask.saved_tf)
+ send_sig(SIGTRAP, current, 0);
+ else if (!(auprobe->fixups & UPROBE_FIX_SETF))
+ regs->flags &= ~X86_EFLAGS_TF;
+
return result;
}
@@ -647,6 +662,10 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
current->thread.trap_nr = utask->autask.saved_trap_nr;
handle_riprel_post_xol(auprobe, regs, NULL);
instruction_pointer_set(regs, utask->vaddr);
+
+ /* clear TF if it was set by us in arch_uprobe_pre_xol() */
+ if (!utask->autask.saved_tf)
+ regs->flags &= ~X86_EFLAGS_TF;
}
/*
@@ -661,8 +680,10 @@ static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
if (auprobe->insn[i] == 0x66)
continue;
- if (auprobe->insn[i] == 0x90)
+ if (auprobe->insn[i] == 0x90) {
+ regs->ip += i + 1;
return true;
+ }
break;
}
@@ -676,38 +697,3 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
send_sig(SIGTRAP, current, 0);
return ret;
}
-
-void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
-{
- struct task_struct *task = current;
- struct arch_uprobe_task *autask = &task->utask->autask;
- struct pt_regs *regs = task_pt_regs(task);
-
- autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
-
- regs->flags |= X86_EFLAGS_TF;
- if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
- set_task_blockstep(task, false);
-}
-
-void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
-{
- struct task_struct *task = current;
- struct arch_uprobe_task *autask = &task->utask->autask;
- bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
- struct pt_regs *regs = task_pt_regs(task);
- /*
- * The state of TIF_BLOCKSTEP was not saved so we can get an extra
- * SIGTRAP if we do not clear TF. We need to examine the opcode to
- * make it right.
- */
- if (unlikely(trapped)) {
- if (!autask->saved_tf)
- regs->flags &= ~X86_EFLAGS_TF;
- } else {
- if (autask->saved_tf)
- send_sig(SIGTRAP, task, 0);
- else if (!(auprobe->fixups & UPROBE_FIX_SETF))
- regs->flags &= ~X86_EFLAGS_TF;
- }
-}
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 5c9687b1bde..1dfe69cc78a 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -182,7 +182,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
if (pud_none_or_clear_bad(pud))
goto out;
pmd = pmd_offset(pud, 0xA0000);
- split_huge_page_pmd(mm, pmd);
+ split_huge_page_pmd_mm(mm, 0xA0000, pmd);
if (pmd_none_or_clear_bad(pmd))
goto out;
pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 3a3e8c9e280..9a907a67be8 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -145,19 +145,6 @@ static int addr_to_vsyscall_nr(unsigned long addr)
return nr;
}
-#ifdef CONFIG_SECCOMP
-static int vsyscall_seccomp(struct task_struct *tsk, int syscall_nr)
-{
- if (!seccomp_mode(&tsk->seccomp))
- return 0;
- task_pt_regs(tsk)->orig_ax = syscall_nr;
- task_pt_regs(tsk)->ax = syscall_nr;
- return __secure_computing(syscall_nr);
-}
-#else
-#define vsyscall_seccomp(_tsk, _nr) 0
-#endif
-
static bool write_ok_or_segv(unsigned long ptr, size_t size)
{
/*
@@ -190,10 +177,9 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
- int vsyscall_nr;
+ int vsyscall_nr, syscall_nr, tmp;
int prev_sig_on_uaccess_error;
long ret;
- int skip;
/*
* No point in checking CS -- the only way to get here is a user mode
@@ -225,56 +211,84 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
}
tsk = current;
- /*
- * With a real vsyscall, page faults cause SIGSEGV. We want to
- * preserve that behavior to make writing exploits harder.
- */
- prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
- current_thread_info()->sig_on_uaccess_error = 1;
/*
+ * Check for access_ok violations and find the syscall nr.
+ *
* NULL is a valid user pointer (in the access_ok sense) on 32-bit and
* 64-bit, so we don't need to special-case it here. For all the
* vsyscalls, NULL means "don't write anything" not "write it at
* address 0".
*/
- ret = -EFAULT;
- skip = 0;
switch (vsyscall_nr) {
case 0:
- skip = vsyscall_seccomp(tsk, __NR_gettimeofday);
- if (skip)
- break;
-
if (!write_ok_or_segv(regs->di, sizeof(struct timeval)) ||
- !write_ok_or_segv(regs->si, sizeof(struct timezone)))
- break;
+ !write_ok_or_segv(regs->si, sizeof(struct timezone))) {
+ ret = -EFAULT;
+ goto check_fault;
+ }
+
+ syscall_nr = __NR_gettimeofday;
+ break;
+
+ case 1:
+ if (!write_ok_or_segv(regs->di, sizeof(time_t))) {
+ ret = -EFAULT;
+ goto check_fault;
+ }
+
+ syscall_nr = __NR_time;
+ break;
+
+ case 2:
+ if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
+ !write_ok_or_segv(regs->si, sizeof(unsigned))) {
+ ret = -EFAULT;
+ goto check_fault;
+ }
+
+ syscall_nr = __NR_getcpu;
+ break;
+ }
+
+ /*
+ * Handle seccomp. regs->ip must be the original value.
+ * See seccomp_send_sigsys and Documentation/prctl/seccomp_filter.txt.
+ *
+ * We could optimize the seccomp disabled case, but performance
+ * here doesn't matter.
+ */
+ regs->orig_ax = syscall_nr;
+ regs->ax = -ENOSYS;
+ tmp = secure_computing(syscall_nr);
+ if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
+ warn_bad_vsyscall(KERN_DEBUG, regs,
+ "seccomp tried to change syscall nr or ip");
+ do_exit(SIGSYS);
+ }
+ if (tmp)
+ goto do_ret; /* skip requested */
+ /*
+ * With a real vsyscall, page faults cause SIGSEGV. We want to
+ * preserve that behavior to make writing exploits harder.
+ */
+ prev_sig_on_uaccess_error = current_thread_info()->sig_on_uaccess_error;
+ current_thread_info()->sig_on_uaccess_error = 1;
+
+ ret = -EFAULT;
+ switch (vsyscall_nr) {
+ case 0:
ret = sys_gettimeofday(
(struct timeval __user *)regs->di,
(struct timezone __user *)regs->si);
break;
case 1:
- skip = vsyscall_seccomp(tsk, __NR_time);
- if (skip)
- break;
-
- if (!write_ok_or_segv(regs->di, sizeof(time_t)))
- break;
-
ret = sys_time((time_t __user *)regs->di);
break;
case 2:
- skip = vsyscall_seccomp(tsk, __NR_getcpu);
- if (skip)
- break;
-
- if (!write_ok_or_segv(regs->di, sizeof(unsigned)) ||
- !write_ok_or_segv(regs->si, sizeof(unsigned)))
- break;
-
ret = sys_getcpu((unsigned __user *)regs->di,
(unsigned __user *)regs->si,
NULL);
@@ -283,12 +297,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
current_thread_info()->sig_on_uaccess_error = prev_sig_on_uaccess_error;
- if (skip) {
- if ((long)regs->ax <= 0L) /* seccomp errno emulation */
- goto do_ret;
- goto done; /* seccomp trace/trap */
- }
-
+check_fault:
if (ret == -EFAULT) {
/* Bad news -- userspace fed a bad pointer to a vsyscall. */
warn_bad_vsyscall(KERN_INFO, regs,
@@ -311,7 +320,6 @@ do_ret:
/* Emulate a ret instruction. */
regs->ip = caller;
regs->sp += 8;
-done:
return true;
sigsegv:
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 7a3d075a814..d065d67c267 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -19,6 +19,7 @@
#include <asm/time.h>
#include <asm/irq.h>
#include <asm/io_apic.h>
+#include <asm/hpet.h>
#include <asm/pat.h>
#include <asm/tsc.h>
#include <asm/iommu.h>
@@ -111,15 +112,22 @@ struct x86_platform_ops x86_platform = {
EXPORT_SYMBOL_GPL(x86_platform);
struct x86_msi_ops x86_msi = {
- .setup_msi_irqs = native_setup_msi_irqs,
- .teardown_msi_irq = native_teardown_msi_irq,
- .teardown_msi_irqs = default_teardown_msi_irqs,
- .restore_msi_irqs = default_restore_msi_irqs,
+ .setup_msi_irqs = native_setup_msi_irqs,
+ .compose_msi_msg = native_compose_msi_msg,
+ .teardown_msi_irq = native_teardown_msi_irq,
+ .teardown_msi_irqs = default_teardown_msi_irqs,
+ .restore_msi_irqs = default_restore_msi_irqs,
+ .setup_hpet_msi = default_setup_hpet_msi,
};
struct x86_io_apic_ops x86_io_apic_ops = {
- .init = native_io_apic_init_mappings,
- .read = native_io_apic_read,
- .write = native_io_apic_write,
- .modify = native_io_apic_modify,
+ .init = native_io_apic_init_mappings,
+ .read = native_io_apic_read,
+ .write = native_io_apic_write,
+ .modify = native_io_apic_modify,
+ .disable = native_disable_io_apic,
+ .print_entries = native_io_apic_print_entries,
+ .set_affinity = native_ioapic_set_affinity,
+ .setup_entry = native_setup_ioapic_entry,
+ .eoi_ioapic_pin = native_eoi_ioapic_pin,
};
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ec79e773342..a20ecb5b6cb 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -320,6 +320,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (index == 0) {
entry->ebx &= kvm_supported_word9_x86_features;
cpuid_mask(&entry->ebx, 9);
+ // TSC_ADJUST is emulated
+ entry->ebx |= F(TSC_ADJUST);
} else
entry->ebx = 0;
entry->eax = 0;
@@ -659,6 +661,7 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
} else
*eax = *ebx = *ecx = *edx = 0;
}
+EXPORT_SYMBOL_GPL(kvm_cpuid);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 58fc5148882..b7fd0798488 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -31,6 +31,14 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
return best && (best->ecx & bit(X86_FEATURE_XSAVE));
}
+static inline bool guest_cpuid_has_tsc_adjust(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ebx & bit(X86_FEATURE_TSC_ADJUST));
+}
+
static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index bba39bfa1c4..a27e7637110 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -676,8 +676,9 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
addr.seg);
if (!usable)
goto bad;
- /* code segment or read-only data segment */
- if (((desc.type & 8) || !(desc.type & 2)) && write)
+ /* code segment in protected mode or read-only data segment */
+ if ((((ctxt->mode != X86EMUL_MODE_REAL) && (desc.type & 8))
+ || !(desc.type & 2)) && write)
goto bad;
/* unreadable code segment */
if (!fetch && (desc.type & 8) && !(desc.type & 2))
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 43e9fadca5d..9392f527f10 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1011,7 +1011,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
local_irq_save(flags);
now = apic->lapic_timer.timer.base->get_time();
- guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
+ guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu, native_read_tsc());
if (likely(tscdeadline > guest_tsc)) {
ns = (tscdeadline - guest_tsc) * 1000000ULL;
do_div(ns, this_tsc_khz);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6f85fe0bf95..01d7c2ad05f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2382,12 +2382,20 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
|| (!vcpu->arch.mmu.direct_map && write_fault
&& !is_write_protection(vcpu) && !user_fault)) {
+ /*
+ * There are two cases:
+ * - the one is other vcpu creates new sp in the window
+ * between mapping_level() and acquiring mmu-lock.
+ * - the another case is the new sp is created by itself
+ * (page-fault path) when guest uses the target gfn as
+ * its page table.
+ * Both of these cases can be fixed by allowing guest to
+ * retry the access, it will refault, then we can establish
+ * the mapping by using small page.
+ */
if (level > PT_PAGE_TABLE_LEVEL &&
- has_wrprotected_page(vcpu->kvm, gfn, level)) {
- ret = 1;
- drop_spte(vcpu->kvm, sptep);
+ has_wrprotected_page(vcpu->kvm, gfn, level))
goto done;
- }
spte |= PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE;
@@ -2505,6 +2513,14 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
mmu_free_roots(vcpu);
}
+static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
+{
+ int bit7;
+
+ bit7 = (gpte >> 7) & 1;
+ return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
+}
+
static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
bool no_dirty_log)
{
@@ -2517,6 +2533,26 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
return gfn_to_pfn_memslot_atomic(slot, gfn);
}
+static bool prefetch_invalid_gpte(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *sp, u64 *spte,
+ u64 gpte)
+{
+ if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
+ goto no_present;
+
+ if (!is_present_gpte(gpte))
+ goto no_present;
+
+ if (!(gpte & PT_ACCESSED_MASK))
+ goto no_present;
+
+ return false;
+
+no_present:
+ drop_spte(vcpu->kvm, spte);
+ return true;
+}
+
static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp,
u64 *start, u64 *end)
@@ -2671,7 +2707,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
* PT_PAGE_TABLE_LEVEL and there would be no adjustment done
* here.
*/
- if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
+ if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
level == PT_PAGE_TABLE_LEVEL &&
PageTransCompound(pfn_to_page(pfn)) &&
!has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) {
@@ -2699,18 +2735,13 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
}
}
-static bool mmu_invalid_pfn(pfn_t pfn)
-{
- return unlikely(is_invalid_pfn(pfn));
-}
-
static bool handle_abnormal_pfn(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
pfn_t pfn, unsigned access, int *ret_val)
{
bool ret = true;
/* The pfn is invalid, report the error! */
- if (unlikely(is_invalid_pfn(pfn))) {
+ if (unlikely(is_error_pfn(pfn))) {
*ret_val = kvm_handle_bad_page(vcpu, gfn, pfn);
goto exit;
}
@@ -2862,7 +2893,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
return r;
spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu, mmu_seq))
+ if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
if (likely(!force_pt_level))
@@ -3331,7 +3362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
return r;
spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu, mmu_seq))
+ if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
kvm_mmu_free_some_pages(vcpu);
if (likely(!force_pt_level))
@@ -3399,14 +3430,6 @@ static void paging_free(struct kvm_vcpu *vcpu)
nonpaging_free(vcpu);
}
-static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
-{
- int bit7;
-
- bit7 = (gpte >> 7) & 1;
- return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
-}
-
static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
{
unsigned mask;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 714e2c01a6f..891eb6d93b8 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -305,51 +305,43 @@ static int FNAME(walk_addr_nested)(struct guest_walker *walker,
addr, access);
}
-static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp, u64 *spte,
- pt_element_t gpte)
+static bool
+FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ u64 *spte, pt_element_t gpte, bool no_dirty_log)
{
- if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL))
- goto no_present;
-
- if (!is_present_gpte(gpte))
- goto no_present;
-
- if (!(gpte & PT_ACCESSED_MASK))
- goto no_present;
-
- return false;
-
-no_present:
- drop_spte(vcpu->kvm, spte);
- return true;
-}
-
-static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- u64 *spte, const void *pte)
-{
- pt_element_t gpte;
unsigned pte_access;
+ gfn_t gfn;
pfn_t pfn;
- gpte = *(const pt_element_t *)pte;
- if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
- return;
+ if (prefetch_invalid_gpte(vcpu, sp, spte, gpte))
+ return false;
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
+
+ gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access & gpte_access(vcpu, gpte);
protect_clean_gpte(&pte_access, gpte);
- pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
- if (mmu_invalid_pfn(pfn))
- return;
+ pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
+ no_dirty_log && (pte_access & ACC_WRITE_MASK));
+ if (is_error_pfn(pfn))
+ return false;
/*
- * we call mmu_set_spte() with host_writable = true because that
- * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
+ * we call mmu_set_spte() with host_writable = true because
+ * pte_prefetch_gfn_to_pfn always gets a writable pfn.
*/
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
- NULL, PT_PAGE_TABLE_LEVEL,
- gpte_to_gfn(gpte), pfn, true, true);
+ NULL, PT_PAGE_TABLE_LEVEL, gfn, pfn, true, true);
+
+ return true;
+}
+
+static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
+ u64 *spte, const void *pte)
+{
+ pt_element_t gpte = *(const pt_element_t *)pte;
+
+ FNAME(prefetch_gpte)(vcpu, sp, spte, gpte, false);
}
static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
@@ -395,53 +387,34 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
spte = sp->spt + i;
for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
- pt_element_t gpte;
- unsigned pte_access;
- gfn_t gfn;
- pfn_t pfn;
-
if (spte == sptep)
continue;
if (is_shadow_present_pte(*spte))
continue;
- gpte = gptep[i];
-
- if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
- continue;
-
- pte_access = sp->role.access & gpte_access(vcpu, gpte);
- protect_clean_gpte(&pte_access, gpte);
- gfn = gpte_to_gfn(gpte);
- pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
- pte_access & ACC_WRITE_MASK);
- if (mmu_invalid_pfn(pfn))
+ if (!FNAME(prefetch_gpte)(vcpu, sp, spte, gptep[i], true))
break;
-
- mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
- NULL, PT_PAGE_TABLE_LEVEL, gfn,
- pfn, true, true);
}
}
/*
* Fetch a shadow pte for a specific level in the paging hierarchy.
+ * If the guest tries to write a write-protected page, we need to
+ * emulate this operation, return 1 to indicate this case.
*/
-static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
+static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct guest_walker *gw,
int user_fault, int write_fault, int hlevel,
- int *emulate, pfn_t pfn, bool map_writable,
- bool prefault)
+ pfn_t pfn, bool map_writable, bool prefault)
{
- unsigned access = gw->pt_access;
struct kvm_mmu_page *sp = NULL;
- int top_level;
- unsigned direct_access;
struct kvm_shadow_walk_iterator it;
+ unsigned direct_access, access = gw->pt_access;
+ int top_level, emulate = 0;
if (!is_present_gpte(gw->ptes[gw->level - 1]))
- return NULL;
+ return 0;
direct_access = gw->pte_access;
@@ -505,17 +478,17 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
clear_sp_write_flooding_count(it.sptep);
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access,
- user_fault, write_fault, emulate, it.level,
+ user_fault, write_fault, &emulate, it.level,
gw->gfn, pfn, prefault, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
- return it.sptep;
+ return emulate;
out_gpte_changed:
if (sp)
kvm_mmu_put_page(sp, it.sptep);
kvm_release_pfn_clean(pfn);
- return NULL;
+ return 0;
}
/*
@@ -538,8 +511,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
int write_fault = error_code & PFERR_WRITE_MASK;
int user_fault = error_code & PFERR_USER_MASK;
struct guest_walker walker;
- u64 *sptep;
- int emulate = 0;
int r;
pfn_t pfn;
int level = PT_PAGE_TABLE_LEVEL;
@@ -594,24 +565,20 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
return r;
spin_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu, mmu_seq))
+ if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
kvm_mmu_free_some_pages(vcpu);
if (!force_pt_level)
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
- sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
- level, &emulate, pfn, map_writable, prefault);
- (void)sptep;
- pgprintk("%s: shadow pte %p %llx emulate %d\n", __func__,
- sptep, *sptep, emulate);
-
+ r = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault,
+ level, pfn, map_writable, prefault);
++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
spin_unlock(&vcpu->kvm->mmu_lock);
- return emulate;
+ return r;
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
@@ -757,7 +724,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
sizeof(pt_element_t)))
return -EINVAL;
- if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) {
+ if (prefetch_invalid_gpte(vcpu, sp, &sp->spt[i], gpte)) {
vcpu->kvm->tlbs_dirty++;
continue;
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d017df3899e..d29d3cd1c15 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -20,6 +20,7 @@
#include "mmu.h"
#include "kvm_cache_regs.h"
#include "x86.h"
+#include "cpuid.h"
#include <linux/module.h>
#include <linux/mod_devicetable.h>
@@ -630,15 +631,12 @@ static int svm_hardware_enable(void *garbage)
return -EBUSY;
if (!has_svm()) {
- printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
- me);
+ pr_err("%s: err EOPNOTSUPP on %d\n", __func__, me);
return -EINVAL;
}
sd = per_cpu(svm_data, me);
-
if (!sd) {
- printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
- me);
+ pr_err("%s: svm_data is NULL on %d\n", __func__, me);
return -EINVAL;
}
@@ -1012,6 +1010,13 @@ static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
svm->tsc_ratio = ratio;
}
+static u64 svm_read_tsc_offset(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ return svm->vmcb->control.tsc_offset;
+}
+
static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -1189,6 +1194,8 @@ static void init_vmcb(struct vcpu_svm *svm)
static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ u32 dummy;
+ u32 eax = 1;
init_vmcb(svm);
@@ -1197,8 +1204,9 @@ static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
}
- vcpu->arch.regs_avail = ~0;
- vcpu->arch.regs_dirty = ~0;
+
+ kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
+ kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
return 0;
}
@@ -1254,11 +1262,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
svm->asid_generation = 0;
init_vmcb(svm);
- kvm_write_tsc(&svm->vcpu, 0);
-
- err = fx_init(&svm->vcpu);
- if (err)
- goto free_page4;
svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
if (kvm_vcpu_is_bsp(&svm->vcpu))
@@ -1268,8 +1271,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
return &svm->vcpu;
-free_page4:
- __free_page(hsave_page);
free_page3:
__free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
free_page2:
@@ -3008,11 +3009,11 @@ static int cr8_write_interception(struct vcpu_svm *svm)
return 0;
}
-u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu)
+u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
return vmcb->control.tsc_offset +
- svm_scale_tsc(vcpu, native_read_tsc());
+ svm_scale_tsc(vcpu, host_tsc);
}
static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
@@ -3131,13 +3132,15 @@ static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
return 0;
}
-static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
+static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ u32 ecx = msr->index;
+ u64 data = msr->data;
switch (ecx) {
case MSR_IA32_TSC:
- kvm_write_tsc(vcpu, data);
+ kvm_write_tsc(vcpu, msr);
break;
case MSR_STAR:
svm->vmcb->save.star = data;
@@ -3192,20 +3195,24 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
default:
- return kvm_set_msr_common(vcpu, ecx, data);
+ return kvm_set_msr_common(vcpu, msr);
}
return 0;
}
static int wrmsr_interception(struct vcpu_svm *svm)
{
+ struct msr_data msr;
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
+ msr.data = data;
+ msr.index = ecx;
+ msr.host_initiated = false;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
- if (svm_set_msr(&svm->vcpu, ecx, data)) {
+ if (svm_set_msr(&svm->vcpu, &msr)) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(&svm->vcpu, 0);
} else {
@@ -4302,6 +4309,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.has_wbinvd_exit = svm_has_wbinvd_exit,
.set_tsc_khz = svm_set_tsc_khz,
+ .read_tsc_offset = svm_read_tsc_offset,
.write_tsc_offset = svm_write_tsc_offset,
.adjust_tsc_offset = svm_adjust_tsc_offset,
.compute_tsc_offset = svm_compute_tsc_offset,
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index bca63f04dcc..fe5e00ed703 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -4,6 +4,7 @@
#include <linux/tracepoint.h>
#include <asm/vmx.h>
#include <asm/svm.h>
+#include <asm/clocksource.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm
@@ -754,6 +755,68 @@ TRACE_EVENT(
__entry->write ? "Write" : "Read",
__entry->gpa_match ? "GPA" : "GVA")
);
+
+#ifdef CONFIG_X86_64
+
+#define host_clocks \
+ {VCLOCK_NONE, "none"}, \
+ {VCLOCK_TSC, "tsc"}, \
+ {VCLOCK_HPET, "hpet"} \
+
+TRACE_EVENT(kvm_update_master_clock,
+ TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched),
+ TP_ARGS(use_master_clock, host_clock, offset_matched),
+
+ TP_STRUCT__entry(
+ __field( bool, use_master_clock )
+ __field( unsigned int, host_clock )
+ __field( bool, offset_matched )
+ ),
+
+ TP_fast_assign(
+ __entry->use_master_clock = use_master_clock;
+ __entry->host_clock = host_clock;
+ __entry->offset_matched = offset_matched;
+ ),
+
+ TP_printk("masterclock %d hostclock %s offsetmatched %u",
+ __entry->use_master_clock,
+ __print_symbolic(__entry->host_clock, host_clocks),
+ __entry->offset_matched)
+);
+
+TRACE_EVENT(kvm_track_tsc,
+ TP_PROTO(unsigned int vcpu_id, unsigned int nr_matched,
+ unsigned int online_vcpus, bool use_master_clock,
+ unsigned int host_clock),
+ TP_ARGS(vcpu_id, nr_matched, online_vcpus, use_master_clock,
+ host_clock),
+
+ TP_STRUCT__entry(
+ __field( unsigned int, vcpu_id )
+ __field( unsigned int, nr_vcpus_matched_tsc )
+ __field( unsigned int, online_vcpus )
+ __field( bool, use_master_clock )
+ __field( unsigned int, host_clock )
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu_id;
+ __entry->nr_vcpus_matched_tsc = nr_matched;
+ __entry->online_vcpus = online_vcpus;
+ __entry->use_master_clock = use_master_clock;
+ __entry->host_clock = host_clock;
+ ),
+
+ TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u"
+ " hostclock %s",
+ __entry->vcpu_id, __entry->use_master_clock,
+ __entry->nr_vcpus_matched_tsc, __entry->online_vcpus,
+ __print_symbolic(__entry->host_clock, host_clocks))
+);
+
+#endif /* CONFIG_X86_64 */
+
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f85815945fc..9120ae1901e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -42,6 +42,7 @@
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/perf_event.h>
+#include <asm/kexec.h>
#include "trace.h"
@@ -802,11 +803,6 @@ static inline bool cpu_has_vmx_ept_ad_bits(void)
return vmx_capability.ept & VMX_EPT_AD_BIT;
}
-static inline bool cpu_has_vmx_invept_individual_addr(void)
-{
- return vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT;
-}
-
static inline bool cpu_has_vmx_invept_context(void)
{
return vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT;
@@ -992,6 +988,46 @@ static void vmcs_load(struct vmcs *vmcs)
vmcs, phys_addr);
}
+#ifdef CONFIG_KEXEC
+/*
+ * This bitmap is used to indicate whether the vmclear
+ * operation is enabled on all cpus. All disabled by
+ * default.
+ */
+static cpumask_t crash_vmclear_enabled_bitmap = CPU_MASK_NONE;
+
+static inline void crash_enable_local_vmclear(int cpu)
+{
+ cpumask_set_cpu(cpu, &crash_vmclear_enabled_bitmap);
+}
+
+static inline void crash_disable_local_vmclear(int cpu)
+{
+ cpumask_clear_cpu(cpu, &crash_vmclear_enabled_bitmap);
+}
+
+static inline int crash_local_vmclear_enabled(int cpu)
+{
+ return cpumask_test_cpu(cpu, &crash_vmclear_enabled_bitmap);
+}
+
+static void crash_vmclear_local_loaded_vmcss(void)
+{
+ int cpu = raw_smp_processor_id();
+ struct loaded_vmcs *v;
+
+ if (!crash_local_vmclear_enabled(cpu))
+ return;
+
+ list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
+ loaded_vmcss_on_cpu_link)
+ vmcs_clear(v->vmcs);
+}
+#else
+static inline void crash_enable_local_vmclear(int cpu) { }
+static inline void crash_disable_local_vmclear(int cpu) { }
+#endif /* CONFIG_KEXEC */
+
static void __loaded_vmcs_clear(void *arg)
{
struct loaded_vmcs *loaded_vmcs = arg;
@@ -1001,15 +1037,28 @@ static void __loaded_vmcs_clear(void *arg)
return; /* vcpu migration can race with cpu offline */
if (per_cpu(current_vmcs, cpu) == loaded_vmcs->vmcs)
per_cpu(current_vmcs, cpu) = NULL;
+ crash_disable_local_vmclear(cpu);
list_del(&loaded_vmcs->loaded_vmcss_on_cpu_link);
+
+ /*
+ * we should ensure updating loaded_vmcs->loaded_vmcss_on_cpu_link
+ * is before setting loaded_vmcs->vcpu to -1 which is done in
+ * loaded_vmcs_init. Otherwise, other cpu can see vcpu = -1 fist
+ * then adds the vmcs into percpu list before it is deleted.
+ */
+ smp_wmb();
+
loaded_vmcs_init(loaded_vmcs);
+ crash_enable_local_vmclear(cpu);
}
static void loaded_vmcs_clear(struct loaded_vmcs *loaded_vmcs)
{
- if (loaded_vmcs->cpu != -1)
- smp_call_function_single(
- loaded_vmcs->cpu, __loaded_vmcs_clear, loaded_vmcs, 1);
+ int cpu = loaded_vmcs->cpu;
+
+ if (cpu != -1)
+ smp_call_function_single(cpu,
+ __loaded_vmcs_clear, loaded_vmcs, 1);
}
static inline void vpid_sync_vcpu_single(struct vcpu_vmx *vmx)
@@ -1051,17 +1100,6 @@ static inline void ept_sync_context(u64 eptp)
}
}
-static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
-{
- if (enable_ept) {
- if (cpu_has_vmx_invept_individual_addr())
- __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
- eptp, gpa);
- else
- ept_sync_context(eptp);
- }
-}
-
static __always_inline unsigned long vmcs_readl(unsigned long field)
{
unsigned long value;
@@ -1535,8 +1573,18 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
local_irq_disable();
+ crash_disable_local_vmclear(cpu);
+
+ /*
+ * Read loaded_vmcs->cpu should be before fetching
+ * loaded_vmcs->loaded_vmcss_on_cpu_link.
+ * See the comments in __loaded_vmcs_clear().
+ */
+ smp_rmb();
+
list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
&per_cpu(loaded_vmcss_on_cpu, cpu));
+ crash_enable_local_vmclear(cpu);
local_irq_enable();
/*
@@ -1839,11 +1887,10 @@ static u64 guest_read_tsc(void)
* Like guest_read_tsc, but always returns L1's notion of the timestamp
* counter, even if a nested guest (L2) is currently running.
*/
-u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu)
+u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc)
{
- u64 host_tsc, tsc_offset;
+ u64 tsc_offset;
- rdtscll(host_tsc);
tsc_offset = is_guest_mode(vcpu) ?
to_vmx(vcpu)->nested.vmcs01_tsc_offset :
vmcs_read64(TSC_OFFSET);
@@ -1866,6 +1913,11 @@ static void vmx_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz, bool scale)
WARN(1, "user requested TSC rate below hardware speed\n");
}
+static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
+{
+ return vmcs_read64(TSC_OFFSET);
+}
+
/*
* writes 'offset' into guest's timestamp counter offset register
*/
@@ -2202,15 +2254,17 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
-static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct shared_msr_entry *msr;
int ret = 0;
+ u32 msr_index = msr_info->index;
+ u64 data = msr_info->data;
switch (msr_index) {
case MSR_EFER:
- ret = kvm_set_msr_common(vcpu, msr_index, data);
+ ret = kvm_set_msr_common(vcpu, msr_info);
break;
#ifdef CONFIG_X86_64
case MSR_FS_BASE:
@@ -2236,7 +2290,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_TSC:
- kvm_write_tsc(vcpu, data);
+ kvm_write_tsc(vcpu, msr_info);
break;
case MSR_IA32_CR_PAT:
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
@@ -2244,7 +2298,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
vcpu->arch.pat = data;
break;
}
- ret = kvm_set_msr_common(vcpu, msr_index, data);
+ ret = kvm_set_msr_common(vcpu, msr_info);
+ break;
+ case MSR_IA32_TSC_ADJUST:
+ ret = kvm_set_msr_common(vcpu, msr_info);
break;
case MSR_TSC_AUX:
if (!vmx->rdtscp_enabled)
@@ -2267,7 +2324,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
}
break;
}
- ret = kvm_set_msr_common(vcpu, msr_index, data);
+ ret = kvm_set_msr_common(vcpu, msr_info);
}
return ret;
@@ -2341,6 +2398,18 @@ static int hardware_enable(void *garbage)
return -EBUSY;
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
+
+ /*
+ * Now we can enable the vmclear operation in kdump
+ * since the loaded_vmcss_on_cpu list on this cpu
+ * has been initialized.
+ *
+ * Though the cpu is not in VMX operation now, there
+ * is no problem to enable the vmclear operation
+ * for the loaded_vmcss_on_cpu list is empty!
+ */
+ crash_enable_local_vmclear(cpu);
+
rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
test_bits = FEATURE_CONTROL_LOCKED;
@@ -2697,6 +2766,7 @@ static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment
if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
tmp.base = vmcs_readl(sf->base);
tmp.selector = vmcs_read16(sf->selector);
+ tmp.dpl = tmp.selector & SELECTOR_RPL_MASK;
tmp.s = 1;
}
vmx_set_segment(vcpu, &tmp, seg);
@@ -3246,7 +3316,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
* unrestricted guest like Westmere to older host that don't have
* unrestricted guest like Nehelem.
*/
- if (!enable_unrestricted_guest && vmx->rmode.vm86_active) {
+ if (vmx->rmode.vm86_active) {
switch (seg) {
case VCPU_SREG_CS:
vmcs_write32(GUEST_CS_AR_BYTES, 0xf3);
@@ -3897,8 +3967,6 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
set_cr4_guest_host_mask(vmx);
- kvm_write_tsc(&vmx->vcpu, 0);
-
return 0;
}
@@ -3908,8 +3976,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
u64 msr;
int ret;
- vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
-
vmx->rmode.vm86_active = 0;
vmx->soft_vnmi_blocked = 0;
@@ -3921,10 +3987,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
msr |= MSR_IA32_APICBASE_BSP;
kvm_set_apic_base(&vmx->vcpu, msr);
- ret = fx_init(&vmx->vcpu);
- if (ret != 0)
- goto out;
-
vmx_segment_cache_clear(vmx);
seg_setup(VCPU_SREG_CS);
@@ -3965,7 +4027,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
kvm_rip_write(vcpu, 0xfff0);
else
kvm_rip_write(vcpu, 0);
- kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
vmcs_writel(GUEST_GDTR_BASE, 0);
vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
@@ -4015,7 +4076,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
/* HACK: Don't enable emulation on guest boot/reset */
vmx->emulation_required = 0;
-out:
return ret;
}
@@ -4287,16 +4347,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (is_machine_check(intr_info))
return handle_machine_check(vcpu);
- if ((vect_info & VECTORING_INFO_VALID_MASK) &&
- !is_page_fault(intr_info)) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
- vcpu->run->internal.ndata = 2;
- vcpu->run->internal.data[0] = vect_info;
- vcpu->run->internal.data[1] = intr_info;
- return 0;
- }
-
if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
return 1; /* already handled by vmx_vcpu_run() */
@@ -4315,6 +4365,22 @@ static int handle_exception(struct kvm_vcpu *vcpu)
error_code = 0;
if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+
+ /*
+ * The #PF with PFEC.RSVD = 1 indicates the guest is accessing
+ * MMIO, it is better to report an internal error.
+ * See the comments in vmx_handle_exit.
+ */
+ if ((vect_info & VECTORING_INFO_VALID_MASK) &&
+ !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
+ vcpu->run->internal.ndata = 2;
+ vcpu->run->internal.data[0] = vect_info;
+ vcpu->run->internal.data[1] = intr_info;
+ return 0;
+ }
+
if (is_page_fault(intr_info)) {
/* EPT won't cause page fault directly */
BUG_ON(enable_ept);
@@ -4626,11 +4692,15 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
static int handle_wrmsr(struct kvm_vcpu *vcpu)
{
+ struct msr_data msr;
u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
- if (vmx_set_msr(vcpu, ecx, data) != 0) {
+ msr.data = data;
+ msr.index = ecx;
+ msr.host_initiated = false;
+ if (vmx_set_msr(vcpu, &msr) != 0) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(vcpu, 0);
return 1;
@@ -4827,11 +4897,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- if (exit_qualification & (1 << 6)) {
- printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
- return -EINVAL;
- }
-
gla_validity = (exit_qualification >> 7) & 0x3;
if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
@@ -5979,13 +6044,24 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0;
}
+ /*
+ * Note:
+ * Do not try to fix EXIT_REASON_EPT_MISCONFIG if it caused by
+ * delivery event since it indicates guest is accessing MMIO.
+ * The vm-exit can be triggered again after return to guest that
+ * will cause infinite loop.
+ */
if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
exit_reason != EXIT_REASON_EPT_VIOLATION &&
- exit_reason != EXIT_REASON_TASK_SWITCH))
- printk(KERN_WARNING "%s: unexpected, valid vectoring info "
- "(0x%x) and exit reason is 0x%x\n",
- __func__, vectoring_info, exit_reason);
+ exit_reason != EXIT_REASON_TASK_SWITCH)) {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
+ vcpu->run->internal.ndata = 2;
+ vcpu->run->internal.data[0] = vectoring_info;
+ vcpu->run->internal.data[1] = exit_reason;
+ return 0;
+ }
if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
!(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
@@ -7309,6 +7385,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
.set_tsc_khz = vmx_set_tsc_khz,
+ .read_tsc_offset = vmx_read_tsc_offset,
.write_tsc_offset = vmx_write_tsc_offset,
.adjust_tsc_offset = vmx_adjust_tsc_offset,
.compute_tsc_offset = vmx_compute_tsc_offset,
@@ -7367,6 +7444,11 @@ static int __init vmx_init(void)
if (r)
goto out3;
+#ifdef CONFIG_KEXEC
+ rcu_assign_pointer(crash_vmclear_loaded_vmcss,
+ crash_vmclear_local_loaded_vmcss);
+#endif
+
vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
@@ -7404,6 +7486,11 @@ static void __exit vmx_exit(void)
free_page((unsigned long)vmx_io_bitmap_b);
free_page((unsigned long)vmx_io_bitmap_a);
+#ifdef CONFIG_KEXEC
+ rcu_assign_pointer(crash_vmclear_loaded_vmcss, NULL);
+ synchronize_rcu();
+#endif
+
kvm_exit();
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4f7641756be..c243b81e3c7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -46,6 +46,8 @@
#include <linux/uaccess.h>
#include <linux/hash.h>
#include <linux/pci.h>
+#include <linux/timekeeper_internal.h>
+#include <linux/pvclock_gtod.h>
#include <trace/events/kvm.h>
#define CREATE_TRACE_POINTS
@@ -118,7 +120,7 @@ struct kvm_shared_msrs {
};
static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
-static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
+static struct kvm_shared_msrs __percpu *shared_msrs;
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "pf_fixed", VCPU_STAT(pf_fixed) },
@@ -158,7 +160,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
u64 __read_mostly host_xcr0;
-int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
+static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt);
+
+static int kvm_vcpu_reset(struct kvm_vcpu *vcpu);
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
{
@@ -187,10 +191,10 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
static void shared_msr_update(unsigned slot, u32 msr)
{
- struct kvm_shared_msrs *smsr;
u64 value;
+ unsigned int cpu = smp_processor_id();
+ struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
- smsr = &__get_cpu_var(shared_msrs);
/* only read, and nobody should modify it at this time,
* so don't need lock */
if (slot >= shared_msrs_global.nr) {
@@ -222,7 +226,8 @@ static void kvm_shared_msr_cpu_online(void)
void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
{
- struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
+ unsigned int cpu = smp_processor_id();
+ struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
if (((value ^ smsr->values[slot].curr) & mask) == 0)
return;
@@ -238,7 +243,8 @@ EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
static void drop_user_return_notifiers(void *ignore)
{
- struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
+ unsigned int cpu = smp_processor_id();
+ struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
if (smsr->registered)
kvm_on_user_return(&smsr->urn);
@@ -633,7 +639,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
}
if (is_long_mode(vcpu)) {
- if (kvm_read_cr4(vcpu) & X86_CR4_PCIDE) {
+ if (kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE)) {
if (cr3 & CR3_PCID_ENABLED_RESERVED_BITS)
return 1;
} else
@@ -827,6 +833,7 @@ static u32 msrs_to_save[] = {
static unsigned num_msrs_to_save;
static const u32 emulated_msrs[] = {
+ MSR_IA32_TSC_ADJUST,
MSR_IA32_TSCDEADLINE,
MSR_IA32_MISC_ENABLE,
MSR_IA32_MCG_STATUS,
@@ -886,9 +893,9 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
* Returns 0 on success, non-0 otherwise.
* Assumes vcpu_load() was already called.
*/
-int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+int kvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
- return kvm_x86_ops->set_msr(vcpu, msr_index, data);
+ return kvm_x86_ops->set_msr(vcpu, msr);
}
/*
@@ -896,9 +903,63 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
*/
static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
{
- return kvm_set_msr(vcpu, index, *data);
+ struct msr_data msr;
+
+ msr.data = *data;
+ msr.index = index;
+ msr.host_initiated = true;
+ return kvm_set_msr(vcpu, &msr);
}
+#ifdef CONFIG_X86_64
+struct pvclock_gtod_data {
+ seqcount_t seq;
+
+ struct { /* extract of a clocksource struct */
+ int vclock_mode;
+ cycle_t cycle_last;
+ cycle_t mask;
+ u32 mult;
+ u32 shift;
+ } clock;
+
+ /* open coded 'struct timespec' */
+ u64 monotonic_time_snsec;
+ time_t monotonic_time_sec;
+};
+
+static struct pvclock_gtod_data pvclock_gtod_data;
+
+static void update_pvclock_gtod(struct timekeeper *tk)
+{
+ struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
+
+ write_seqcount_begin(&vdata->seq);
+
+ /* copy pvclock gtod data */
+ vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
+ vdata->clock.cycle_last = tk->clock->cycle_last;
+ vdata->clock.mask = tk->clock->mask;
+ vdata->clock.mult = tk->mult;
+ vdata->clock.shift = tk->shift;
+
+ vdata->monotonic_time_sec = tk->xtime_sec
+ + tk->wall_to_monotonic.tv_sec;
+ vdata->monotonic_time_snsec = tk->xtime_nsec
+ + (tk->wall_to_monotonic.tv_nsec
+ << tk->shift);
+ while (vdata->monotonic_time_snsec >=
+ (((u64)NSEC_PER_SEC) << tk->shift)) {
+ vdata->monotonic_time_snsec -=
+ ((u64)NSEC_PER_SEC) << tk->shift;
+ vdata->monotonic_time_sec++;
+ }
+
+ write_seqcount_end(&vdata->seq);
+}
+#endif
+
+
static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
{
int version;
@@ -995,6 +1056,10 @@ static inline u64 get_kernel_ns(void)
return timespec_to_ns(&ts);
}
+#ifdef CONFIG_X86_64
+static atomic_t kvm_guest_has_master_clock = ATOMIC_INIT(0);
+#endif
+
static DEFINE_PER_CPU(unsigned long, cpu_tsc_khz);
unsigned long max_tsc_khz;
@@ -1046,12 +1111,47 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
return tsc;
}
-void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
+void kvm_track_tsc_matching(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_X86_64
+ bool vcpus_matched;
+ bool do_request = false;
+ struct kvm_arch *ka = &vcpu->kvm->arch;
+ struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+
+ vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
+ atomic_read(&vcpu->kvm->online_vcpus));
+
+ if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
+ if (!ka->use_master_clock)
+ do_request = 1;
+
+ if (!vcpus_matched && ka->use_master_clock)
+ do_request = 1;
+
+ if (do_request)
+ kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
+
+ trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
+ atomic_read(&vcpu->kvm->online_vcpus),
+ ka->use_master_clock, gtod->clock.vclock_mode);
+#endif
+}
+
+static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset)
+{
+ u64 curr_offset = kvm_x86_ops->read_tsc_offset(vcpu);
+ vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset;
+}
+
+void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
{
struct kvm *kvm = vcpu->kvm;
u64 offset, ns, elapsed;
unsigned long flags;
s64 usdiff;
+ bool matched;
+ u64 data = msr->data;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
@@ -1094,6 +1194,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
offset = kvm_x86_ops->compute_tsc_offset(vcpu, data);
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
}
+ matched = true;
} else {
/*
* We split periods of matched TSC writes into generations.
@@ -1108,6 +1209,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
kvm->arch.cur_tsc_nsec = ns;
kvm->arch.cur_tsc_write = data;
kvm->arch.cur_tsc_offset = offset;
+ matched = false;
pr_debug("kvm: new tsc generation %u, clock %llu\n",
kvm->arch.cur_tsc_generation, data);
}
@@ -1129,26 +1231,195 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
vcpu->arch.this_tsc_nsec = kvm->arch.cur_tsc_nsec;
vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
+ if (guest_cpuid_has_tsc_adjust(vcpu) && !msr->host_initiated)
+ update_ia32_tsc_adjust_msr(vcpu, offset);
kvm_x86_ops->write_tsc_offset(vcpu, offset);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
+
+ spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
+ if (matched)
+ kvm->arch.nr_vcpus_matched_tsc++;
+ else
+ kvm->arch.nr_vcpus_matched_tsc = 0;
+
+ kvm_track_tsc_matching(vcpu);
+ spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
}
EXPORT_SYMBOL_GPL(kvm_write_tsc);
+#ifdef CONFIG_X86_64
+
+static cycle_t read_tsc(void)
+{
+ cycle_t ret;
+ u64 last;
+
+ /*
+ * Empirically, a fence (of type that depends on the CPU)
+ * before rdtsc is enough to ensure that rdtsc is ordered
+ * with respect to loads. The various CPU manuals are unclear
+ * as to whether rdtsc can be reordered with later loads,
+ * but no one has ever seen it happen.
+ */
+ rdtsc_barrier();
+ ret = (cycle_t)vget_cycles();
+
+ last = pvclock_gtod_data.clock.cycle_last;
+
+ if (likely(ret >= last))
+ return ret;
+
+ /*
+ * GCC likes to generate cmov here, but this branch is extremely
+ * predictable (it's just a funciton of time and the likely is
+ * very likely) and there's a data dependence, so force GCC
+ * to generate a branch instead. I don't barrier() because
+ * we don't actually need a barrier, and if this function
+ * ever gets inlined it will generate worse code.
+ */
+ asm volatile ("");
+ return last;
+}
+
+static inline u64 vgettsc(cycle_t *cycle_now)
+{
+ long v;
+ struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+
+ *cycle_now = read_tsc();
+
+ v = (*cycle_now - gtod->clock.cycle_last) & gtod->clock.mask;
+ return v * gtod->clock.mult;
+}
+
+static int do_monotonic(struct timespec *ts, cycle_t *cycle_now)
+{
+ unsigned long seq;
+ u64 ns;
+ int mode;
+ struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+
+ ts->tv_nsec = 0;
+ do {
+ seq = read_seqcount_begin(&gtod->seq);
+ mode = gtod->clock.vclock_mode;
+ ts->tv_sec = gtod->monotonic_time_sec;
+ ns = gtod->monotonic_time_snsec;
+ ns += vgettsc(cycle_now);
+ ns >>= gtod->clock.shift;
+ } while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
+ timespec_add_ns(ts, ns);
+
+ return mode;
+}
+
+/* returns true if host is using tsc clocksource */
+static bool kvm_get_time_and_clockread(s64 *kernel_ns, cycle_t *cycle_now)
+{
+ struct timespec ts;
+
+ /* checked again under seqlock below */
+ if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
+ return false;
+
+ if (do_monotonic(&ts, cycle_now) != VCLOCK_TSC)
+ return false;
+
+ monotonic_to_bootbased(&ts);
+ *kernel_ns = timespec_to_ns(&ts);
+
+ return true;
+}
+#endif
+
+/*
+ *
+ * Assuming a stable TSC across physical CPUS, and a stable TSC
+ * across virtual CPUs, the following condition is possible.
+ * Each numbered line represents an event visible to both
+ * CPUs at the next numbered event.
+ *
+ * "timespecX" represents host monotonic time. "tscX" represents
+ * RDTSC value.
+ *
+ * VCPU0 on CPU0 | VCPU1 on CPU1
+ *
+ * 1. read timespec0,tsc0
+ * 2. | timespec1 = timespec0 + N
+ * | tsc1 = tsc0 + M
+ * 3. transition to guest | transition to guest
+ * 4. ret0 = timespec0 + (rdtsc - tsc0) |
+ * 5. | ret1 = timespec1 + (rdtsc - tsc1)
+ * | ret1 = timespec0 + N + (rdtsc - (tsc0 + M))
+ *
+ * Since ret0 update is visible to VCPU1 at time 5, to obey monotonicity:
+ *
+ * - ret0 < ret1
+ * - timespec0 + (rdtsc - tsc0) < timespec0 + N + (rdtsc - (tsc0 + M))
+ * ...
+ * - 0 < N - M => M < N
+ *
+ * That is, when timespec0 != timespec1, M < N. Unfortunately that is not
+ * always the case (the difference between two distinct xtime instances
+ * might be smaller then the difference between corresponding TSC reads,
+ * when updating guest vcpus pvclock areas).
+ *
+ * To avoid that problem, do not allow visibility of distinct
+ * system_timestamp/tsc_timestamp values simultaneously: use a master
+ * copy of host monotonic time values. Update that master copy
+ * in lockstep.
+ *
+ * Rely on synchronization of host TSCs and guest TSCs for monotonicity.
+ *
+ */
+
+static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
+{
+#ifdef CONFIG_X86_64
+ struct kvm_arch *ka = &kvm->arch;
+ int vclock_mode;
+ bool host_tsc_clocksource, vcpus_matched;
+
+ vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
+ atomic_read(&kvm->online_vcpus));
+
+ /*
+ * If the host uses TSC clock, then passthrough TSC as stable
+ * to the guest.
+ */
+ host_tsc_clocksource = kvm_get_time_and_clockread(
+ &ka->master_kernel_ns,
+ &ka->master_cycle_now);
+
+ ka->use_master_clock = host_tsc_clocksource & vcpus_matched;
+
+ if (ka->use_master_clock)
+ atomic_set(&kvm_guest_has_master_clock, 1);
+
+ vclock_mode = pvclock_gtod_data.clock.vclock_mode;
+ trace_kvm_update_master_clock(ka->use_master_clock, vclock_mode,
+ vcpus_matched);
+#endif
+}
+
static int kvm_guest_time_update(struct kvm_vcpu *v)
{
- unsigned long flags;
+ unsigned long flags, this_tsc_khz;
struct kvm_vcpu_arch *vcpu = &v->arch;
+ struct kvm_arch *ka = &v->kvm->arch;
void *shared_kaddr;
- unsigned long this_tsc_khz;
s64 kernel_ns, max_kernel_ns;
- u64 tsc_timestamp;
+ u64 tsc_timestamp, host_tsc;
+ struct pvclock_vcpu_time_info *guest_hv_clock;
u8 pvclock_flags;
+ bool use_master_clock;
+
+ kernel_ns = 0;
+ host_tsc = 0;
/* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags);
- tsc_timestamp = kvm_x86_ops->read_l1_tsc(v);
- kernel_ns = get_kernel_ns();
this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
if (unlikely(this_tsc_khz == 0)) {
local_irq_restore(flags);
@@ -1157,6 +1428,24 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
}
/*
+ * If the host uses TSC clock, then passthrough TSC as stable
+ * to the guest.
+ */
+ spin_lock(&ka->pvclock_gtod_sync_lock);
+ use_master_clock = ka->use_master_clock;
+ if (use_master_clock) {
+ host_tsc = ka->master_cycle_now;
+ kernel_ns = ka->master_kernel_ns;
+ }
+ spin_unlock(&ka->pvclock_gtod_sync_lock);
+ if (!use_master_clock) {
+ host_tsc = native_read_tsc();
+ kernel_ns = get_kernel_ns();
+ }
+
+ tsc_timestamp = kvm_x86_ops->read_l1_tsc(v, host_tsc);
+
+ /*
* We may have to catch up the TSC to match elapsed wall clock
* time for two reasons, even if kvmclock is used.
* 1) CPU could have been running below the maximum TSC rate
@@ -1217,23 +1506,20 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hw_tsc_khz = this_tsc_khz;
}
- if (max_kernel_ns > kernel_ns)
- kernel_ns = max_kernel_ns;
-
+ /* with a master <monotonic time, tsc value> tuple,
+ * pvclock clock reads always increase at the (scaled) rate
+ * of guest TSC - no need to deal with sampling errors.
+ */
+ if (!use_master_clock) {
+ if (max_kernel_ns > kernel_ns)
+ kernel_ns = max_kernel_ns;
+ }
/* With all the info we got, fill in the values */
vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
vcpu->last_kernel_ns = kernel_ns;
vcpu->last_guest_tsc = tsc_timestamp;
- pvclock_flags = 0;
- if (vcpu->pvclock_set_guest_stopped_request) {
- pvclock_flags |= PVCLOCK_GUEST_STOPPED;
- vcpu->pvclock_set_guest_stopped_request = false;
- }
-
- vcpu->hv_clock.flags = pvclock_flags;
-
/*
* The interface expects us to write an even number signaling that the
* update is finished. Since the guest won't see the intermediate
@@ -1243,6 +1529,22 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
shared_kaddr = kmap_atomic(vcpu->time_page);
+ guest_hv_clock = shared_kaddr + vcpu->time_offset;
+
+ /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
+ pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+
+ if (vcpu->pvclock_set_guest_stopped_request) {
+ pvclock_flags |= PVCLOCK_GUEST_STOPPED;
+ vcpu->pvclock_set_guest_stopped_request = false;
+ }
+
+ /* If the host uses TSC clocksource, then it is stable */
+ if (use_master_clock)
+ pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
+
+ vcpu->hv_clock.flags = pvclock_flags;
+
memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
sizeof(vcpu->hv_clock));
@@ -1572,9 +1874,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
&vcpu->arch.st.steal, sizeof(struct kvm_steal_time));
}
-int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
+int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
bool pr = false;
+ u32 msr = msr_info->index;
+ u64 data = msr_info->data;
switch (msr) {
case MSR_EFER:
@@ -1625,6 +1929,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
case MSR_IA32_TSCDEADLINE:
kvm_set_lapic_tscdeadline_msr(vcpu, data);
break;
+ case MSR_IA32_TSC_ADJUST:
+ if (guest_cpuid_has_tsc_adjust(vcpu)) {
+ if (!msr_info->host_initiated) {
+ u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
+ kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
+ }
+ vcpu->arch.ia32_tsc_adjust_msr = data;
+ }
+ break;
case MSR_IA32_MISC_ENABLE:
vcpu->arch.ia32_misc_enable_msr = data;
break;
@@ -1984,6 +2297,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
case MSR_IA32_TSCDEADLINE:
data = kvm_get_lapic_tscdeadline_msr(vcpu);
break;
+ case MSR_IA32_TSC_ADJUST:
+ data = (u64)vcpu->arch.ia32_tsc_adjust_msr;
+ break;
case MSR_IA32_MISC_ENABLE:
data = vcpu->arch.ia32_misc_enable_msr;
break;
@@ -2342,7 +2658,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_x86_ops->write_tsc_offset(vcpu, offset);
vcpu->arch.tsc_catchup = 1;
}
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
+ /*
+ * On a host with synchronized TSC, there is no need to update
+ * kvmclock on vcpu->cpu migration
+ */
+ if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
if (vcpu->cpu != cpu)
kvm_migrate_timers(vcpu);
vcpu->cpu = cpu;
@@ -2691,15 +3012,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!vcpu->arch.apic)
goto out;
u.lapic = memdup_user(argp, sizeof(*u.lapic));
- if (IS_ERR(u.lapic)) {
- r = PTR_ERR(u.lapic);
- goto out;
- }
+ if (IS_ERR(u.lapic))
+ return PTR_ERR(u.lapic);
r = kvm_vcpu_ioctl_set_lapic(vcpu, u.lapic);
- if (r)
- goto out;
- r = 0;
break;
}
case KVM_INTERRUPT: {
@@ -2709,16 +3025,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (copy_from_user(&irq, argp, sizeof irq))
goto out;
r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
- if (r)
- goto out;
- r = 0;
break;
}
case KVM_NMI: {
r = kvm_vcpu_ioctl_nmi(vcpu);
- if (r)
- goto out;
- r = 0;
break;
}
case KVM_SET_CPUID: {
@@ -2729,8 +3039,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (copy_from_user(&cpuid, cpuid_arg, sizeof cpuid))
goto out;
r = kvm_vcpu_ioctl_set_cpuid(vcpu, &cpuid, cpuid_arg->entries);
- if (r)
- goto out;
break;
}
case KVM_SET_CPUID2: {
@@ -2742,8 +3050,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
goto out;
r = kvm_vcpu_ioctl_set_cpuid2(vcpu, &cpuid,
cpuid_arg->entries);
- if (r)
- goto out;
break;
}
case KVM_GET_CPUID2: {
@@ -2875,10 +3181,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
case KVM_SET_XSAVE: {
u.xsave = memdup_user(argp, sizeof(*u.xsave));
- if (IS_ERR(u.xsave)) {
- r = PTR_ERR(u.xsave);
- goto out;
- }
+ if (IS_ERR(u.xsave))
+ return PTR_ERR(u.xsave);
r = kvm_vcpu_ioctl_x86_set_xsave(vcpu, u.xsave);
break;
@@ -2900,10 +3204,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
case KVM_SET_XCRS: {
u.xcrs = memdup_user(argp, sizeof(*u.xcrs));
- if (IS_ERR(u.xcrs)) {
- r = PTR_ERR(u.xcrs);
- goto out;
- }
+ if (IS_ERR(u.xcrs))
+ return PTR_ERR(u.xcrs);
r = kvm_vcpu_ioctl_x86_set_xcrs(vcpu, u.xcrs);
break;
@@ -2951,7 +3253,7 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
int ret;
if (addr > (unsigned int)(-3 * PAGE_SIZE))
- return -1;
+ return -EINVAL;
ret = kvm_x86_ops->set_tss_addr(kvm, addr);
return ret;
}
@@ -3212,8 +3514,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
switch (ioctl) {
case KVM_SET_TSS_ADDR:
r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
- if (r < 0)
- goto out;
break;
case KVM_SET_IDENTITY_MAP_ADDR: {
u64 ident_addr;
@@ -3222,14 +3522,10 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (copy_from_user(&ident_addr, argp, sizeof ident_addr))
goto out;
r = kvm_vm_ioctl_set_identity_map_addr(kvm, ident_addr);
- if (r < 0)
- goto out;
break;
}
case KVM_SET_NR_MMU_PAGES:
r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
- if (r)
- goto out;
break;
case KVM_GET_NR_MMU_PAGES:
r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
@@ -3320,8 +3616,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
get_irqchip_out:
kfree(chip);
- if (r)
- goto out;
break;
}
case KVM_SET_IRQCHIP: {
@@ -3343,8 +3637,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
set_irqchip_out:
kfree(chip);
- if (r)
- goto out;
break;
}
case KVM_GET_PIT: {
@@ -3371,9 +3663,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (!kvm->arch.vpit)
goto out;
r = kvm_vm_ioctl_set_pit(kvm, &u.ps);
- if (r)
- goto out;
- r = 0;
break;
}
case KVM_GET_PIT2: {
@@ -3397,9 +3686,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (!kvm->arch.vpit)
goto out;
r = kvm_vm_ioctl_set_pit2(kvm, &u.ps2);
- if (r)
- goto out;
- r = 0;
break;
}
case KVM_REINJECT_CONTROL: {
@@ -3408,9 +3694,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
if (copy_from_user(&control, argp, sizeof(control)))
goto out;
r = kvm_vm_ioctl_reinject(kvm, &control);
- if (r)
- goto out;
- r = 0;
break;
}
case KVM_XEN_HVM_CONFIG: {
@@ -4273,7 +4556,12 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
u32 msr_index, u64 data)
{
- return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
+ struct msr_data msr;
+
+ msr.data = data;
+ msr.index = msr_index;
+ msr.host_initiated = false;
+ return kvm_set_msr(emul_to_vcpu(ctxt), &msr);
}
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@@ -4495,7 +4783,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
* instruction -> ...
*/
pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
- if (!is_error_pfn(pfn)) {
+ if (!is_error_noslot_pfn(pfn)) {
kvm_release_pfn_clean(pfn);
return true;
}
@@ -4881,6 +5169,50 @@ static void kvm_set_mmio_spte_mask(void)
kvm_mmu_set_mmio_spte_mask(mask);
}
+#ifdef CONFIG_X86_64
+static void pvclock_gtod_update_fn(struct work_struct *work)
+{
+ struct kvm *kvm;
+
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ raw_spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list)
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ set_bit(KVM_REQ_MASTERCLOCK_UPDATE, &vcpu->requests);
+ atomic_set(&kvm_guest_has_master_clock, 0);
+ raw_spin_unlock(&kvm_lock);
+}
+
+static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
+
+/*
+ * Notification about pvclock gtod data update.
+ */
+static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
+ void *priv)
+{
+ struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
+ struct timekeeper *tk = priv;
+
+ update_pvclock_gtod(tk);
+
+ /* disable master clock if host does not trust, or does not
+ * use, TSC clocksource
+ */
+ if (gtod->clock.vclock_mode != VCLOCK_TSC &&
+ atomic_read(&kvm_guest_has_master_clock) != 0)
+ queue_work(system_long_wq, &pvclock_gtod_work);
+
+ return 0;
+}
+
+static struct notifier_block pvclock_gtod_notifier = {
+ .notifier_call = pvclock_gtod_notify,
+};
+#endif
+
int kvm_arch_init(void *opaque)
{
int r;
@@ -4903,9 +5235,16 @@ int kvm_arch_init(void *opaque)
goto out;
}
+ r = -ENOMEM;
+ shared_msrs = alloc_percpu(struct kvm_shared_msrs);
+ if (!shared_msrs) {
+ printk(KERN_ERR "kvm: failed to allocate percpu kvm_shared_msrs\n");
+ goto out;
+ }
+
r = kvm_mmu_module_init();
if (r)
- goto out;
+ goto out_free_percpu;
kvm_set_mmio_spte_mask();
kvm_init_msr_list();
@@ -4922,8 +5261,14 @@ int kvm_arch_init(void *opaque)
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_lapic_init();
+#ifdef CONFIG_X86_64
+ pvclock_gtod_register_notifier(&pvclock_gtod_notifier);
+#endif
+
return 0;
+out_free_percpu:
+ free_percpu(shared_msrs);
out:
return r;
}
@@ -4936,8 +5281,12 @@ void kvm_arch_exit(void)
cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER);
unregister_hotcpu_notifier(&kvmclock_cpu_notifier_block);
+#ifdef CONFIG_X86_64
+ pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+#endif
kvm_x86_ops = NULL;
kvm_mmu_module_exit();
+ free_percpu(shared_msrs);
}
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -5059,7 +5408,7 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
-int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
+static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
char instruction[3];
@@ -5235,6 +5584,29 @@ static void process_nmi(struct kvm_vcpu *vcpu)
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
+static void kvm_gen_update_masterclock(struct kvm *kvm)
+{
+#ifdef CONFIG_X86_64
+ int i;
+ struct kvm_vcpu *vcpu;
+ struct kvm_arch *ka = &kvm->arch;
+
+ spin_lock(&ka->pvclock_gtod_sync_lock);
+ kvm_make_mclock_inprogress_request(kvm);
+ /* no guest entries from this point */
+ pvclock_update_vm_gtod_copy(kvm);
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ set_bit(KVM_REQ_CLOCK_UPDATE, &vcpu->requests);
+
+ /* guest entries allowed */
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
+
+ spin_unlock(&ka->pvclock_gtod_sync_lock);
+#endif
+}
+
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
@@ -5247,6 +5619,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_mmu_unload(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
__kvm_migrate_timers(vcpu);
+ if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
+ kvm_gen_update_masterclock(vcpu->kvm);
if (kvm_check_request(KVM_REQ_CLOCK_UPDATE, vcpu)) {
r = kvm_guest_time_update(vcpu);
if (unlikely(r))
@@ -5362,7 +5736,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (hw_breakpoint_active())
hw_breakpoint_restore();
- vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu);
+ vcpu->arch.last_guest_tsc = kvm_x86_ops->read_l1_tsc(vcpu,
+ native_read_tsc());
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
@@ -5419,7 +5794,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
pr_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, vcpu->arch.sipi_vector);
kvm_lapic_reset(vcpu);
- r = kvm_arch_vcpu_reset(vcpu);
+ r = kvm_vcpu_reset(vcpu);
if (r)
return r;
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -6047,7 +6422,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
r = vcpu_load(vcpu);
if (r)
return r;
- r = kvm_arch_vcpu_reset(vcpu);
+ r = kvm_vcpu_reset(vcpu);
if (r == 0)
r = kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
@@ -6055,6 +6430,23 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
return r;
}
+int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+ int r;
+ struct msr_data msr;
+
+ r = vcpu_load(vcpu);
+ if (r)
+ return r;
+ msr.data = 0x0;
+ msr.index = MSR_IA32_TSC;
+ msr.host_initiated = true;
+ kvm_write_tsc(vcpu, &msr);
+ vcpu_put(vcpu);
+
+ return r;
+}
+
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
int r;
@@ -6069,7 +6461,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_x86_ops->vcpu_free(vcpu);
}
-int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
+static int kvm_vcpu_reset(struct kvm_vcpu *vcpu)
{
atomic_set(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = 0;
@@ -6092,6 +6484,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
kvm_pmu_reset(vcpu);
+ memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
+ vcpu->arch.regs_avail = ~0;
+ vcpu->arch.regs_dirty = ~0;
+
return kvm_x86_ops->vcpu_reset(vcpu);
}
@@ -6168,6 +6564,8 @@ int kvm_arch_hardware_enable(void *garbage)
kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.tsc_offset_adjustment += delta_cyc;
vcpu->arch.last_host_tsc = local_tsc;
+ set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
+ &vcpu->requests);
}
/*
@@ -6258,10 +6656,17 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
goto fail_free_mce_banks;
+ r = fx_init(vcpu);
+ if (r)
+ goto fail_free_wbinvd_dirty_mask;
+
+ vcpu->arch.ia32_tsc_adjust_msr = 0x0;
kvm_async_pf_hash_reset(vcpu);
kvm_pmu_init(vcpu);
return 0;
+fail_free_wbinvd_dirty_mask:
+ free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
fail_free_mce_banks:
kfree(vcpu->arch.mce_banks);
fail_free_lapic:
@@ -6305,6 +6710,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock);
+ spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
+
+ pvclock_update_vm_gtod_copy(kvm);
return 0;
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 2b5219c12ac..e224f7a671b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -112,7 +112,7 @@ void kvm_before_handle_nmi(struct kvm_vcpu *vcpu);
void kvm_after_handle_nmi(struct kvm_vcpu *vcpu);
int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
-void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data);
+void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
int kvm_read_guest_virt(struct x86_emulate_ctxt *ctxt,
gva_t addr, void *val, unsigned int bytes,
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 642d8805bc1..df4176cdbb3 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1412,7 +1412,7 @@ __init void lguest_init(void)
/* We don't have features. We have puppies! Puppies! */
#ifdef CONFIG_X86_MCE
- mce_disabled = 1;
+ mca_cfg.disabled = true;
#endif
#ifdef CONFIG_ACPI
acpi_disabled = 1;
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index b00f6785da7..96b2c6697c9 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -32,7 +32,6 @@ ifeq ($(CONFIG_X86_32),y)
lib-y += checksum_32.o
lib-y += strstr_32.o
lib-y += string_32.o
- lib-y += cmpxchg.o
ifneq ($(CONFIG_X86_CMPXCHG64),y)
lib-y += cmpxchg8b_emu.o atomic64_386_32.o
endif
diff --git a/arch/x86/lib/cmpxchg.c b/arch/x86/lib/cmpxchg.c
deleted file mode 100644
index 5d619f6df3e..00000000000
--- a/arch/x86/lib/cmpxchg.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * cmpxchg*() fallbacks for CPU not supporting these instructions
- */
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/module.h>
-
-#ifndef CONFIG_X86_CMPXCHG
-unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
-{
- u8 prev;
- unsigned long flags;
-
- /* Poor man's cmpxchg for 386. Unsuitable for SMP */
- local_irq_save(flags);
- prev = *(u8 *)ptr;
- if (prev == old)
- *(u8 *)ptr = new;
- local_irq_restore(flags);
- return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u8);
-
-unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
-{
- u16 prev;
- unsigned long flags;
-
- /* Poor man's cmpxchg for 386. Unsuitable for SMP */
- local_irq_save(flags);
- prev = *(u16 *)ptr;
- if (prev == old)
- *(u16 *)ptr = new;
- local_irq_restore(flags);
- return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u16);
-
-unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
-{
- u32 prev;
- unsigned long flags;
-
- /* Poor man's cmpxchg for 386. Unsuitable for SMP */
- local_irq_save(flags);
- prev = *(u32 *)ptr;
- if (prev == old)
- *(u32 *)ptr = new;
- local_irq_restore(flags);
- return prev;
-}
-EXPORT_SYMBOL(cmpxchg_386_u32);
-#endif
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 6b34d04d096..176cca67212 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -5,91 +5,89 @@
#include <asm/alternative-asm.h>
ALIGN
-copy_page_c:
+copy_page_rep:
CFI_STARTPROC
- movl $4096/8,%ecx
- rep movsq
+ movl $4096/8, %ecx
+ rep movsq
ret
CFI_ENDPROC
-ENDPROC(copy_page_c)
+ENDPROC(copy_page_rep)
-/* Don't use streaming store because it's better when the target
- ends up in cache. */
-
-/* Could vary the prefetch distance based on SMP/UP */
+/*
+ * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
+ * Could vary the prefetch distance based on SMP/UP.
+*/
ENTRY(copy_page)
CFI_STARTPROC
- subq $2*8,%rsp
+ subq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET 2*8
- movq %rbx,(%rsp)
+ movq %rbx, (%rsp)
CFI_REL_OFFSET rbx, 0
- movq %r12,1*8(%rsp)
+ movq %r12, 1*8(%rsp)
CFI_REL_OFFSET r12, 1*8
- movl $(4096/64)-5,%ecx
+ movl $(4096/64)-5, %ecx
.p2align 4
.Loop64:
- dec %rcx
-
- movq (%rsi), %rax
- movq 8 (%rsi), %rbx
- movq 16 (%rsi), %rdx
- movq 24 (%rsi), %r8
- movq 32 (%rsi), %r9
- movq 40 (%rsi), %r10
- movq 48 (%rsi), %r11
- movq 56 (%rsi), %r12
+ dec %rcx
+ movq 0x8*0(%rsi), %rax
+ movq 0x8*1(%rsi), %rbx
+ movq 0x8*2(%rsi), %rdx
+ movq 0x8*3(%rsi), %r8
+ movq 0x8*4(%rsi), %r9
+ movq 0x8*5(%rsi), %r10
+ movq 0x8*6(%rsi), %r11
+ movq 0x8*7(%rsi), %r12
prefetcht0 5*64(%rsi)
- movq %rax, (%rdi)
- movq %rbx, 8 (%rdi)
- movq %rdx, 16 (%rdi)
- movq %r8, 24 (%rdi)
- movq %r9, 32 (%rdi)
- movq %r10, 40 (%rdi)
- movq %r11, 48 (%rdi)
- movq %r12, 56 (%rdi)
+ movq %rax, 0x8*0(%rdi)
+ movq %rbx, 0x8*1(%rdi)
+ movq %rdx, 0x8*2(%rdi)
+ movq %r8, 0x8*3(%rdi)
+ movq %r9, 0x8*4(%rdi)
+ movq %r10, 0x8*5(%rdi)
+ movq %r11, 0x8*6(%rdi)
+ movq %r12, 0x8*7(%rdi)
- leaq 64 (%rsi), %rsi
- leaq 64 (%rdi), %rdi
+ leaq 64 (%rsi), %rsi
+ leaq 64 (%rdi), %rdi
- jnz .Loop64
+ jnz .Loop64
- movl $5,%ecx
+ movl $5, %ecx
.p2align 4
.Loop2:
- decl %ecx
-
- movq (%rsi), %rax
- movq 8 (%rsi), %rbx
- movq 16 (%rsi), %rdx
- movq 24 (%rsi), %r8
- movq 32 (%rsi), %r9
- movq 40 (%rsi), %r10
- movq 48 (%rsi), %r11
- movq 56 (%rsi), %r12
-
- movq %rax, (%rdi)
- movq %rbx, 8 (%rdi)
- movq %rdx, 16 (%rdi)
- movq %r8, 24 (%rdi)
- movq %r9, 32 (%rdi)
- movq %r10, 40 (%rdi)
- movq %r11, 48 (%rdi)
- movq %r12, 56 (%rdi)
-
- leaq 64(%rdi),%rdi
- leaq 64(%rsi),%rsi
-
+ decl %ecx
+
+ movq 0x8*0(%rsi), %rax
+ movq 0x8*1(%rsi), %rbx
+ movq 0x8*2(%rsi), %rdx
+ movq 0x8*3(%rsi), %r8
+ movq 0x8*4(%rsi), %r9
+ movq 0x8*5(%rsi), %r10
+ movq 0x8*6(%rsi), %r11
+ movq 0x8*7(%rsi), %r12
+
+ movq %rax, 0x8*0(%rdi)
+ movq %rbx, 0x8*1(%rdi)
+ movq %rdx, 0x8*2(%rdi)
+ movq %r8, 0x8*3(%rdi)
+ movq %r9, 0x8*4(%rdi)
+ movq %r10, 0x8*5(%rdi)
+ movq %r11, 0x8*6(%rdi)
+ movq %r12, 0x8*7(%rdi)
+
+ leaq 64(%rdi), %rdi
+ leaq 64(%rsi), %rsi
jnz .Loop2
- movq (%rsp),%rbx
+ movq (%rsp), %rbx
CFI_RESTORE rbx
- movq 1*8(%rsp),%r12
+ movq 1*8(%rsp), %r12
CFI_RESTORE r12
- addq $2*8,%rsp
+ addq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET -2*8
ret
.Lcopy_page_end:
@@ -103,7 +101,7 @@ ENDPROC(copy_page)
.section .altinstr_replacement,"ax"
1: .byte 0xeb /* jmp <disp8> */
- .byte (copy_page_c - copy_page) - (2f - 1b) /* offset */
+ .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
2:
.previous
.section .altinstructions,"a"
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index e395693abdb..7c3bee636e2 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -98,7 +98,7 @@ void use_tsc_delay(void)
delay_fn = delay_tsc;
}
-int __devinit read_current_timer(unsigned long *timer_val)
+int read_current_timer(unsigned long *timer_val)
{
if (delay_fn == delay_tsc) {
rdtscll(*timer_val);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 98f6d6b68f5..f0312d74640 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -570,63 +570,6 @@ do { \
unsigned long __copy_to_user_ll(void __user *to, const void *from,
unsigned long n)
{
-#ifndef CONFIG_X86_WP_WORKS_OK
- if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
- ((unsigned long)to) < TASK_SIZE) {
- /*
- * When we are in an atomic section (see
- * mm/filemap.c:file_read_actor), return the full
- * length to take the slow path.
- */
- if (in_atomic())
- return n;
-
- /*
- * CPU does not honor the WP bit when writing
- * from supervisory mode, and due to preemption or SMP,
- * the page tables can change at any time.
- * Do it manually. Manfred <manfred@colorfullife.com>
- */
- while (n) {
- unsigned long offset = ((unsigned long)to)%PAGE_SIZE;
- unsigned long len = PAGE_SIZE - offset;
- int retval;
- struct page *pg;
- void *maddr;
-
- if (len > n)
- len = n;
-
-survive:
- down_read(&current->mm->mmap_sem);
- retval = get_user_pages(current, current->mm,
- (unsigned long)to, 1, 1, 0, &pg, NULL);
-
- if (retval == -ENOMEM && is_global_init(current)) {
- up_read(&current->mm->mmap_sem);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- goto survive;
- }
-
- if (retval != 1) {
- up_read(&current->mm->mmap_sem);
- break;
- }
-
- maddr = kmap_atomic(pg);
- memcpy(maddr + offset, from, len);
- kunmap_atomic(maddr);
- set_page_dirty_lock(pg);
- put_page(pg);
- up_read(&current->mm->mmap_sem);
-
- from += len;
- to += len;
- n -= len;
- }
- return n;
- }
-#endif
stac();
if (movsl_is_ok(to, from, n))
__copy_user(to, from, n);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 8e13ecb41be..fb674fd3fc2 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,7 +18,7 @@
#include <asm/pgalloc.h> /* pgd_*(), ... */
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
#include <asm/fixmap.h> /* VSYSCALL_START */
-#include <asm/rcu.h> /* exception_enter(), ... */
+#include <asm/context_tracking.h> /* exception_enter(), ... */
/*
* Page fault error code bits:
@@ -748,13 +748,15 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
return;
}
#endif
+ /* Kernel addresses are always protection faults: */
+ if (address >= TASK_SIZE)
+ error_code |= PF_PROT;
- if (unlikely(show_unhandled_signals))
+ if (likely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
- /* Kernel addresses are always protection faults: */
tsk->thread.cr2 = address;
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+ tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_PF;
force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
@@ -803,20 +805,6 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
__bad_area(regs, error_code, address, SEGV_ACCERR);
}
-/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
-static void
-out_of_memory(struct pt_regs *regs, unsigned long error_code,
- unsigned long address)
-{
- /*
- * We ran out of memory, call the OOM killer, and return the userspace
- * (which will retry the fault, or kill us if we got oom-killed):
- */
- up_read(&current->mm->mmap_sem);
-
- pagefault_out_of_memory();
-}
-
static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
unsigned int fault)
@@ -879,7 +867,14 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
return 1;
}
- out_of_memory(regs, error_code, address);
+ up_read(&current->mm->mmap_sem);
+
+ /*
+ * We ran out of memory, call the OOM killer, and return the
+ * userspace (which will retry the fault, or kill us if we got
+ * oom-killed):
+ */
+ pagefault_out_of_memory();
} else {
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
VM_FAULT_HWPOISON_LARGE))
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index 937bff5cdaa..ae1aa71d011 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -274,42 +274,15 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long start_addr;
-
- if (len > mm->cached_hole_size) {
- start_addr = mm->free_area_cache;
- } else {
- start_addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- }
-
-full_search:
- addr = ALIGN(start_addr, huge_page_size(h));
-
- for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
- /* At this point: (!vma || addr < vma->vm_end). */
- if (TASK_SIZE - len < addr) {
- /*
- * Start a new search - just in case we missed
- * some holes.
- */
- if (start_addr != TASK_UNMAPPED_BASE) {
- start_addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- goto full_search;
- }
- return -ENOMEM;
- }
- if (!vma || addr + len <= vma->vm_start) {
- mm->free_area_cache = addr + len;
- return addr;
- }
- if (addr + mm->cached_hole_size < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
- addr = ALIGN(vma->vm_end, huge_page_size(h));
- }
+ struct vm_unmapped_area_info info;
+
+ info.flags = 0;
+ info.length = len;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = TASK_SIZE;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ return vm_unmapped_area(&info);
}
static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
@@ -317,83 +290,30 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
unsigned long pgoff, unsigned long flags)
{
struct hstate *h = hstate_file(file);
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long base = mm->mmap_base;
- unsigned long addr = addr0;
- unsigned long largest_hole = mm->cached_hole_size;
- unsigned long start_addr;
-
- /* don't allow allocations above current base */
- if (mm->free_area_cache > base)
- mm->free_area_cache = base;
-
- if (len <= largest_hole) {
- largest_hole = 0;
- mm->free_area_cache = base;
- }
-try_again:
- start_addr = mm->free_area_cache;
-
- /* make sure it can fit in the remaining address space */
- if (mm->free_area_cache < len)
- goto fail;
-
- /* either no address requested or can't fit in requested address hole */
- addr = (mm->free_area_cache - len) & huge_page_mask(h);
- do {
- /*
- * Lookup failure means no vma is above this address,
- * i.e. return with success:
- */
- vma = find_vma(mm, addr);
- if (!vma)
- return addr;
+ struct vm_unmapped_area_info info;
+ unsigned long addr;
- if (addr + len <= vma->vm_start) {
- /* remember the address as a hint for next time */
- mm->cached_hole_size = largest_hole;
- return (mm->free_area_cache = addr);
- } else if (mm->free_area_cache == vma->vm_end) {
- /* pull free_area_cache down to the first hole */
- mm->free_area_cache = vma->vm_start;
- mm->cached_hole_size = largest_hole;
- }
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.low_limit = PAGE_SIZE;
+ info.high_limit = current->mm->mmap_base;
+ info.align_mask = PAGE_MASK & ~huge_page_mask(h);
+ info.align_offset = 0;
+ addr = vm_unmapped_area(&info);
- /* remember the largest hole we saw so far */
- if (addr + largest_hole < vma->vm_start)
- largest_hole = vma->vm_start - addr;
-
- /* try just below the current vma->vm_start */
- addr = (vma->vm_start - len) & huge_page_mask(h);
- } while (len <= vma->vm_start);
-
-fail:
- /*
- * if hint left us with no space for the requested
- * mapping then try again:
- */
- if (start_addr != base) {
- mm->free_area_cache = base;
- largest_hole = 0;
- goto try_again;
- }
/*
* A failed mmap() very likely causes application failure,
* so fall back to the bottom-up function here. This scenario
* can happen with large stack limits and large mmap()
* allocations.
*/
- mm->free_area_cache = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = ~0UL;
- addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
- len, pgoff, flags);
-
- /*
- * Restore the topdown base:
- */
- mm->free_area_cache = base;
- mm->cached_hole_size = ~0UL;
+ if (addr & ~PAGE_MASK) {
+ VM_BUG_ON(addr != -ENOMEM);
+ info.flags = 0;
+ info.low_limit = TASK_UNMAPPED_BASE;
+ info.high_limit = TASK_SIZE;
+ addr = vm_unmapped_area(&info);
+ }
return addr;
}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 11a58001b4c..745d66b843c 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -715,10 +715,7 @@ static void __init test_wp_bit(void)
if (!boot_cpu_data.wp_works_ok) {
printk(KERN_CONT "No.\n");
-#ifdef CONFIG_X86_WP_WORKS_OK
- panic(
- "This kernel doesn't support CPU's with broken WP. Recompile it for a 386!");
-#endif
+ panic("Linux doesn't support CPUs with broken WP.");
} else {
printk(KERN_CONT "Ok.\n");
}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3baff255ada..d6eeead4375 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -605,7 +605,7 @@ kernel_physical_mapping_init(unsigned long start,
}
if (pgd_changed)
- sync_global_pgds(addr, end);
+ sync_global_pgds(addr, end - 1);
__flush_tlb_all();
@@ -630,7 +630,9 @@ void __init paging_init(void)
* numa support is not compiled in, and later node_set_state
* will not set it back.
*/
- node_clear_state(0, N_NORMAL_MEMORY);
+ node_clear_state(0, N_MEMORY);
+ if (N_MEMORY != N_NORMAL_MEMORY)
+ node_clear_state(0, N_NORMAL_MEMORY);
zone_sizes_init();
}
@@ -829,6 +831,9 @@ int kern_addr_valid(unsigned long addr)
if (pud_none(*pud))
return 0;
+ if (pud_large(*pud))
+ return pfn_valid(pud_pfn(*pud));
+
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
return 0;
@@ -979,7 +984,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
}
}
- sync_global_pgds((unsigned long)start_page, end);
+ sync_global_pgds((unsigned long)start_page, end - 1);
return 0;
}
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8573b83a63d..e27fbf887f3 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -137,7 +137,7 @@ static void pgd_dtor(pgd_t *pgd)
* against pageattr.c; it is the unique case in which a valid change
* of kernel pagetables can't be lazily synchronized by vmalloc faults.
* vmalloc faults work because attached pagetables are never freed.
- * -- wli
+ * -- nyc
*/
#ifdef CONFIG_X86_PAE
@@ -301,6 +301,13 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
free_page((unsigned long)pgd);
}
+/*
+ * Used to set accessed or dirty bits in the page table entries
+ * on other architectures. On x86, the accessed and dirty bits
+ * are tracked by hardware. However, do_wp_page calls this function
+ * to also make the pte writeable at the same time the dirty bit is
+ * set. In that case we do actually need to write the PTE.
+ */
int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
pte_t entry, int dirty)
@@ -310,7 +317,6 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
if (changed && dirty) {
*ptep = entry;
pte_update_defer(vma->vm_mm, address, ptep);
- flush_tlb_page(vma, address);
}
return changed;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 60f926cd8b0..13a6b29e2e5 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -104,7 +104,7 @@ static void flush_tlb_func(void *info)
return;
if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
- if (f->flush_end == TLB_FLUSH_ALL || !cpu_has_invlpg)
+ if (f->flush_end == TLB_FLUSH_ALL)
local_flush_tlb();
else if (!f->flush_end)
__flush_tlb_single(f->flush_start);
@@ -337,10 +337,8 @@ static const struct file_operations fops_tlbflush = {
static int __cpuinit create_tlb_flushall_shift(void)
{
- if (cpu_has_invlpg) {
- debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR,
- arch_debugfs_dir, NULL, &fops_tlbflush);
- }
+ debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR,
+ arch_debugfs_dir, NULL, &fops_tlbflush);
return 0;
}
late_initcall(create_tlb_flushall_shift);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 520d2bd0b9c..d11a47099d3 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -11,6 +11,7 @@
#include <asm/cacheflush.h>
#include <linux/netdevice.h>
#include <linux/filter.h>
+#include <linux/if_vlan.h>
/*
* Conventions :
@@ -212,6 +213,8 @@ void bpf_jit_compile(struct sk_filter *fp)
case BPF_S_ANC_MARK:
case BPF_S_ANC_RXHASH:
case BPF_S_ANC_CPU:
+ case BPF_S_ANC_VLAN_TAG:
+ case BPF_S_ANC_VLAN_TAG_PRESENT:
case BPF_S_ANC_QUEUE:
case BPF_S_LD_W_ABS:
case BPF_S_LD_H_ABS:
@@ -515,6 +518,24 @@ void bpf_jit_compile(struct sk_filter *fp)
CLEAR_A();
#endif
break;
+ case BPF_S_ANC_VLAN_TAG:
+ case BPF_S_ANC_VLAN_TAG_PRESENT:
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+ if (is_imm8(offsetof(struct sk_buff, vlan_tci))) {
+ /* movzwl off8(%rdi),%eax */
+ EMIT4(0x0f, 0xb7, 0x47, offsetof(struct sk_buff, vlan_tci));
+ } else {
+ EMIT3(0x0f, 0xb7, 0x87); /* movzwl off32(%rdi),%eax */
+ EMIT(offsetof(struct sk_buff, vlan_tci), 4);
+ }
+ BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+ if (filter[i].code == BPF_S_ANC_VLAN_TAG) {
+ EMIT3(0x80, 0xe4, 0xef); /* and $0xef,%ah */
+ } else {
+ EMIT3(0xc1, 0xe8, 0x0c); /* shr $0xc,%eax */
+ EMIT3(0x83, 0xe0, 0x01); /* and $0x1,%eax */
+ }
+ break;
case BPF_S_LD_W_ABS:
func = CHOOSE_LOAD_FUNC(K, sk_load_word);
common_load: seen |= SEEN_DATAREF;
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile
index 3af5a1e79c9..ee0af58ca5b 100644
--- a/arch/x86/pci/Makefile
+++ b/arch/x86/pci/Makefile
@@ -16,6 +16,7 @@ obj-$(CONFIG_STA2X11) += sta2x11-fixup.o
obj-$(CONFIG_X86_VISWS) += visws.o
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
+obj-$(CONFIG_X86_NUMACHIP) += numachip.o
obj-$(CONFIG_X86_INTEL_MID) += mrst.o
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 192397c9860..53ea60458e0 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -12,6 +12,7 @@ struct pci_root_info {
char name[16];
unsigned int res_num;
struct resource *res;
+ resource_size_t *res_offset;
struct pci_sysdata sd;
#ifdef CONFIG_PCI_MMCONFIG
bool mcfg_added;
@@ -22,6 +23,7 @@ struct pci_root_info {
};
static bool pci_use_crs = true;
+static bool pci_ignore_seg = false;
static int __init set_use_crs(const struct dmi_system_id *id)
{
@@ -35,7 +37,14 @@ static int __init set_nouse_crs(const struct dmi_system_id *id)
return 0;
}
-static const struct dmi_system_id pci_use_crs_table[] __initconst = {
+static int __init set_ignore_seg(const struct dmi_system_id *id)
+{
+ printk(KERN_INFO "PCI: %s detected: ignoring ACPI _SEG\n", id->ident);
+ pci_ignore_seg = true;
+ return 0;
+}
+
+static const struct dmi_system_id pci_crs_quirks[] __initconst = {
/* http://bugzilla.kernel.org/show_bug.cgi?id=14183 */
{
.callback = set_use_crs,
@@ -98,6 +107,16 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = {
DMI_MATCH(DMI_BIOS_VERSION, "6JET85WW (1.43 )"),
},
},
+
+ /* https://bugzilla.kernel.org/show_bug.cgi?id=15362 */
+ {
+ .callback = set_ignore_seg,
+ .ident = "HP xw9300",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP xw9300 Workstation"),
+ },
+ },
{}
};
@@ -108,7 +127,7 @@ void __init pci_acpi_crs_quirks(void)
if (dmi_get_date(DMI_BIOS_DATE, &year, NULL, NULL) && year < 2008)
pci_use_crs = false;
- dmi_check_system(pci_use_crs_table);
+ dmi_check_system(pci_crs_quirks);
/*
* If the user specifies "pci=use_crs" or "pci=nocrs" explicitly, that
@@ -126,7 +145,7 @@ void __init pci_acpi_crs_quirks(void)
}
#ifdef CONFIG_PCI_MMCONFIG
-static int __devinit check_segment(u16 seg, struct device *dev, char *estr)
+static int check_segment(u16 seg, struct device *dev, char *estr)
{
if (seg) {
dev_err(dev,
@@ -149,9 +168,8 @@ static int __devinit check_segment(u16 seg, struct device *dev, char *estr)
return 0;
}
-static int __devinit setup_mcfg_map(struct pci_root_info *info,
- u16 seg, u8 start, u8 end,
- phys_addr_t addr)
+static int setup_mcfg_map(struct pci_root_info *info, u16 seg, u8 start,
+ u8 end, phys_addr_t addr)
{
int result;
struct device *dev = &info->bridge->dev;
@@ -189,7 +207,7 @@ static void teardown_mcfg_map(struct pci_root_info *info)
}
}
#else
-static int __devinit setup_mcfg_map(struct pci_root_info *info,
+static int setup_mcfg_map(struct pci_root_info *info,
u16 seg, u8 start, u8 end,
phys_addr_t addr)
{
@@ -305,6 +323,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
res->flags = flags;
res->start = start;
res->end = end;
+ info->res_offset[info->res_num] = addr.translation_offset;
if (!pci_use_crs) {
dev_printk(KERN_DEBUG, &info->bridge->dev,
@@ -374,7 +393,8 @@ static void add_resources(struct pci_root_info *info,
"ignoring host bridge window %pR (conflicts with %s %pR)\n",
res, conflict->name, conflict);
else
- pci_add_resource(resources, res);
+ pci_add_resource_offset(resources, res,
+ info->res_offset[i]);
}
}
@@ -382,6 +402,8 @@ static void free_pci_root_info_res(struct pci_root_info *info)
{
kfree(info->res);
info->res = NULL;
+ kfree(info->res_offset);
+ info->res_offset = NULL;
info->res_num = 0;
}
@@ -432,16 +454,26 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
return;
size = sizeof(*info->res) * info->res_num;
- info->res_num = 0;
info->res = kzalloc(size, GFP_KERNEL);
- if (!info->res)
+ if (!info->res) {
+ info->res_num = 0;
return;
+ }
+
+ size = sizeof(*info->res_offset) * info->res_num;
+ info->res_num = 0;
+ info->res_offset = kzalloc(size, GFP_KERNEL);
+ if (!info->res_offset) {
+ kfree(info->res);
+ info->res = NULL;
+ return;
+ }
acpi_walk_resources(device->handle, METHOD_NAME__CRS, setup_resource,
info);
}
-struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
+struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
{
struct acpi_device *device = root->device;
struct pci_root_info *info = NULL;
@@ -455,6 +487,9 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
int pxm;
#endif
+ if (pci_ignore_seg)
+ domain = 0;
+
if (domain && !pci_domains_supported) {
printk(KERN_WARNING "pci_bus %04x:%02x: "
"ignored (multiple domains not supported)\n",
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c
index d37e2fec97e..c2735feb250 100644
--- a/arch/x86/pci/bus_numa.c
+++ b/arch/x86/pci/bus_numa.c
@@ -93,8 +93,8 @@ struct pci_root_info __init *alloc_pci_root_info(int bus_min, int bus_max,
return info;
}
-void __devinit update_res(struct pci_root_info *info, resource_size_t start,
- resource_size_t end, unsigned long flags, int merge)
+void update_res(struct pci_root_info *info, resource_size_t start,
+ resource_size_t end, unsigned long flags, int merge)
{
struct resource *res;
struct pci_root_res *root_res;
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 720e973fc34..ccd0ab3ab89 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -17,6 +17,7 @@
#include <asm/io.h>
#include <asm/smp.h>
#include <asm/pci_x86.h>
+#include <asm/setup.h>
unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
PCI_PROBE_MMCONF;
@@ -80,14 +81,14 @@ struct pci_ops pci_root_ops = {
*/
DEFINE_RAW_SPINLOCK(pci_config_lock);
-static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d)
+static int can_skip_ioresource_align(const struct dmi_system_id *d)
{
pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident);
return 0;
}
-static const struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitconst = {
+static const struct dmi_system_id can_skip_pciprobe_dmi_table[] = {
/*
* Systems where PCI IO resource ISA alignment can be skipped
* when the ISA enable bit in the bridge control is not set
@@ -124,7 +125,7 @@ void __init dmi_check_skip_isa_align(void)
dmi_check_system(can_skip_pciprobe_dmi_table);
}
-static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
+static void pcibios_fixup_device_resources(struct pci_dev *dev)
{
struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE];
struct resource *bar_r;
@@ -161,7 +162,7 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
* are examined.
*/
-void __devinit pcibios_fixup_bus(struct pci_bus *b)
+void pcibios_fixup_bus(struct pci_bus *b)
{
struct pci_dev *dev;
@@ -175,7 +176,7 @@ void __devinit pcibios_fixup_bus(struct pci_bus *b)
* on the kernel command line (which was parsed earlier).
*/
-static int __devinit set_bf_sort(const struct dmi_system_id *d)
+static int set_bf_sort(const struct dmi_system_id *d)
{
if (pci_bf_sort == pci_bf_sort_default) {
pci_bf_sort = pci_dmi_bf;
@@ -184,7 +185,7 @@ static int __devinit set_bf_sort(const struct dmi_system_id *d)
return 0;
}
-static void __devinit read_dmi_type_b1(const struct dmi_header *dm,
+static void read_dmi_type_b1(const struct dmi_header *dm,
void *private_data)
{
u8 *d = (u8 *)dm + 4;
@@ -206,7 +207,7 @@ static void __devinit read_dmi_type_b1(const struct dmi_header *dm,
}
}
-static int __devinit find_sort_method(const struct dmi_system_id *d)
+static int find_sort_method(const struct dmi_system_id *d)
{
dmi_walk(read_dmi_type_b1, NULL);
@@ -221,7 +222,7 @@ static int __devinit find_sort_method(const struct dmi_system_id *d)
* Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus)
*/
#ifdef __i386__
-static int __devinit assign_all_busses(const struct dmi_system_id *d)
+static int assign_all_busses(const struct dmi_system_id *d)
{
pci_probe |= PCI_ASSIGN_ALL_BUSSES;
printk(KERN_INFO "%s detected: enabling PCI bus# renumbering"
@@ -230,7 +231,7 @@ static int __devinit assign_all_busses(const struct dmi_system_id *d)
}
#endif
-static int __devinit set_scan_all(const struct dmi_system_id *d)
+static int set_scan_all(const struct dmi_system_id *d)
{
printk(KERN_INFO "PCI: %s detected, enabling pci=pcie_scan_all\n",
d->ident);
@@ -238,7 +239,7 @@ static int __devinit set_scan_all(const struct dmi_system_id *d)
return 0;
}
-static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
+static const struct dmi_system_id pciprobe_dmi_table[] = {
#ifdef __i386__
/*
* Laptops which need pci=assign-busses to see Cardbus cards
@@ -433,7 +434,8 @@ static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
.callback = set_scan_all,
.ident = "Stratus/NEC ftServer",
.matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "ftServer"),
+ DMI_MATCH(DMI_SYS_VENDOR, "Stratus"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "ftServer"),
},
},
{}
@@ -444,7 +446,7 @@ void __init dmi_check_pciprobe(void)
dmi_check_system(pciprobe_dmi_table);
}
-struct pci_bus * __devinit pcibios_scan_root(int busnum)
+struct pci_bus *pcibios_scan_root(int busnum)
{
struct pci_bus *bus = NULL;
@@ -608,6 +610,35 @@ unsigned int pcibios_assign_all_busses(void)
return (pci_probe & PCI_ASSIGN_ALL_BUSSES) ? 1 : 0;
}
+int pcibios_add_device(struct pci_dev *dev)
+{
+ struct setup_data *data;
+ struct pci_setup_rom *rom;
+ u64 pa_data;
+
+ pa_data = boot_params.hdr.setup_data;
+ while (pa_data) {
+ data = phys_to_virt(pa_data);
+
+ if (data->type == SETUP_PCI) {
+ rom = (struct pci_setup_rom *)data;
+
+ if ((pci_domain_nr(dev->bus) == rom->segment) &&
+ (dev->bus->number == rom->bus) &&
+ (PCI_SLOT(dev->devfn) == rom->device) &&
+ (PCI_FUNC(dev->devfn) == rom->function) &&
+ (dev->vendor == rom->vendor) &&
+ (dev->device == rom->devid)) {
+ dev->rom = pa_data +
+ offsetof(struct pci_setup_rom, romdata);
+ dev->romlen = rom->pcilen;
+ }
+ }
+ pa_data = data->next;
+ }
+ return 0;
+}
+
int pcibios_enable_device(struct pci_dev *dev, int mask)
{
int err;
@@ -626,7 +657,7 @@ void pcibios_disable_device (struct pci_dev *dev)
pcibios_disable_irq(dev);
}
-int pci_ext_cfg_avail(struct pci_dev *dev)
+int pci_ext_cfg_avail(void)
{
if (raw_pci_ext_ops)
return 1;
@@ -634,7 +665,7 @@ int pci_ext_cfg_avail(struct pci_dev *dev)
return 0;
}
-struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
+struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
{
LIST_HEAD(resources);
struct pci_bus *bus = NULL;
@@ -662,7 +693,7 @@ struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops,
return bus;
}
-struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno)
+struct pci_bus *pci_scan_bus_with_sysdata(int busno)
{
return pci_scan_bus_on_node(busno, &pci_root_ops, -1);
}
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index af8a224db21..f5809fa2753 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -9,7 +9,7 @@
#include <linux/vgaarb.h>
#include <asm/pci_x86.h>
-static void __devinit pci_fixup_i450nx(struct pci_dev *d)
+static void pci_fixup_i450nx(struct pci_dev *d)
{
/*
* i450NX -- Find and scan all secondary buses on all PXB's.
@@ -34,7 +34,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx);
-static void __devinit pci_fixup_i450gx(struct pci_dev *d)
+static void pci_fixup_i450gx(struct pci_dev *d)
{
/*
* i450GX and i450KX -- Find and scan all secondary buses.
@@ -48,7 +48,7 @@ static void __devinit pci_fixup_i450gx(struct pci_dev *d)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx);
-static void __devinit pci_fixup_umc_ide(struct pci_dev *d)
+static void pci_fixup_umc_ide(struct pci_dev *d)
{
/*
* UM8886BF IDE controller sets region type bits incorrectly,
@@ -62,7 +62,7 @@ static void __devinit pci_fixup_umc_ide(struct pci_dev *d)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_UMC, PCI_DEVICE_ID_UMC_UM8886BF, pci_fixup_umc_ide);
-static void __devinit pci_fixup_ncr53c810(struct pci_dev *d)
+static void pci_fixup_ncr53c810(struct pci_dev *d)
{
/*
* NCR 53C810 returns class code 0 (at least on some systems).
@@ -75,7 +75,7 @@ static void __devinit pci_fixup_ncr53c810(struct pci_dev *d)
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, pci_fixup_ncr53c810);
-static void __devinit pci_fixup_latency(struct pci_dev *d)
+static void pci_fixup_latency(struct pci_dev *d)
{
/*
* SiS 5597 and 5598 chipsets require latency timer set to
@@ -87,7 +87,7 @@ static void __devinit pci_fixup_latency(struct pci_dev *d)
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5597, pci_fixup_latency);
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SI, PCI_DEVICE_ID_SI_5598, pci_fixup_latency);
-static void __devinit pci_fixup_piix4_acpi(struct pci_dev *d)
+static void pci_fixup_piix4_acpi(struct pci_dev *d)
{
/*
* PIIX4 ACPI device: hardwired IRQ9
@@ -163,7 +163,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8367_0, pci_fixup_
* system to PCI bus no matter what are their window settings, so they are
* "transparent" (or subtractive decoding) from programmers point of view.
*/
-static void __devinit pci_fixup_transparent_bridge(struct pci_dev *dev)
+static void pci_fixup_transparent_bridge(struct pci_dev *dev)
{
if ((dev->device & 0xff00) == 0x2400)
dev->transparent = 1;
@@ -317,7 +317,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_r
* video device at this point.
*/
-static void __devinit pci_fixup_video(struct pci_dev *pdev)
+static void pci_fixup_video(struct pci_dev *pdev)
{
struct pci_dev *bridge;
struct pci_bus *bus;
@@ -357,7 +357,7 @@ DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID,
PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video);
-static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {
+static const struct dmi_system_id msi_k8t_dmi_table[] = {
{
.ident = "MSI-K8T-Neo2Fir",
.matches = {
@@ -378,7 +378,7 @@ static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {
* The soundcard is only enabled, if the mainborad is identified
* via DMI-tables and the soundcard is detected to be off.
*/
-static void __devinit pci_fixup_msi_k8t_onboard_sound(struct pci_dev *dev)
+static void pci_fixup_msi_k8t_onboard_sound(struct pci_dev *dev)
{
unsigned char val;
if (!dmi_check_system(msi_k8t_dmi_table))
@@ -414,7 +414,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237,
*/
static u16 toshiba_line_size;
-static const struct dmi_system_id __devinitconst toshiba_ohci1394_dmi_table[] = {
+static const struct dmi_system_id toshiba_ohci1394_dmi_table[] = {
{
.ident = "Toshiba PS5 based laptop",
.matches = {
@@ -439,7 +439,7 @@ static const struct dmi_system_id __devinitconst toshiba_ohci1394_dmi_table[] =
{ }
};
-static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev)
+static void pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev)
{
if (!dmi_check_system(toshiba_ohci1394_dmi_table))
return; /* only applies to certain Toshibas (so far) */
@@ -450,7 +450,7 @@ static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev)
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0x8032,
pci_pre_fixup_toshiba_ohci1394);
-static void __devinit pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev)
+static void pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev)
{
if (!dmi_check_system(toshiba_ohci1394_dmi_table))
return; /* only applies to certain Toshibas (so far) */
@@ -488,7 +488,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY,
* Siemens Nixdorf AG FSC Multiprocessor Interrupt Controller:
* prevent update of the BAR0, which doesn't look like a normal BAR.
*/
-static void __devinit pci_siemens_interrupt_controller(struct pci_dev *dev)
+static void pci_siemens_interrupt_controller(struct pci_dev *dev)
{
dev->resource[0].flags |= IORESOURCE_PCI_FIXED;
}
@@ -531,7 +531,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_ATI, 0x4385, sb600_disable_hpet_bar);
*
* Match off the LPC and svid/sdid (older kernels lose the bridge subvendor)
*/
-static void __devinit twinhead_reserve_killing_zone(struct pci_dev *dev)
+static void twinhead_reserve_killing_zone(struct pci_dev *dev)
{
if (dev->subsystem_vendor == 0x14FF && dev->subsystem_device == 0xA003) {
pr_info("Reserving memory on Twinhead H12Y\n");
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index a1df191129d..4a2ab9cb365 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -10,7 +10,7 @@
* Discover remaining PCI buses in case there are peer host bridges.
* We use the number of last PCI bus provided by the PCI BIOS.
*/
-static void __devinit pcibios_fixup_peer_bridges(void)
+static void pcibios_fixup_peer_bridges(void)
{
int n;
@@ -34,7 +34,7 @@ int __init pci_legacy_init(void)
return 0;
}
-void __devinit pcibios_scan_specific_bus(int busn)
+void pcibios_scan_specific_bus(int busn)
{
int devfn;
long node;
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 704b9ec043d..fb29968a7cd 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -49,7 +49,7 @@ static __init void free_all_mmcfg(void)
pci_mmconfig_remove(cfg);
}
-static __devinit void list_add_sorted(struct pci_mmcfg_region *new)
+static void list_add_sorted(struct pci_mmcfg_region *new)
{
struct pci_mmcfg_region *cfg;
@@ -65,9 +65,8 @@ static __devinit void list_add_sorted(struct pci_mmcfg_region *new)
list_add_tail_rcu(&new->list, &pci_mmcfg_list);
}
-static __devinit struct pci_mmcfg_region *pci_mmconfig_alloc(int segment,
- int start,
- int end, u64 addr)
+static struct pci_mmcfg_region *pci_mmconfig_alloc(int segment, int start,
+ int end, u64 addr)
{
struct pci_mmcfg_region *new;
struct resource *res;
@@ -371,8 +370,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
return !list_empty(&pci_mmcfg_list);
}
-static acpi_status __devinit check_mcfg_resource(struct acpi_resource *res,
- void *data)
+static acpi_status check_mcfg_resource(struct acpi_resource *res, void *data)
{
struct resource *mcfg_res = data;
struct acpi_resource_address64 address;
@@ -408,8 +406,8 @@ static acpi_status __devinit check_mcfg_resource(struct acpi_resource *res,
return AE_OK;
}
-static acpi_status __devinit find_mboard_resource(acpi_handle handle, u32 lvl,
- void *context, void **rv)
+static acpi_status find_mboard_resource(acpi_handle handle, u32 lvl,
+ void *context, void **rv)
{
struct resource *mcfg_res = context;
@@ -422,7 +420,7 @@ static acpi_status __devinit find_mboard_resource(acpi_handle handle, u32 lvl,
return AE_OK;
}
-static int __devinit is_acpi_reserved(u64 start, u64 end, unsigned not_used)
+static int is_acpi_reserved(u64 start, u64 end, unsigned not_used)
{
struct resource mcfg_res;
@@ -693,9 +691,8 @@ static int __init pci_mmcfg_late_insert_resources(void)
late_initcall(pci_mmcfg_late_insert_resources);
/* Add MMCFG information for host bridges */
-int __devinit pci_mmconfig_insert(struct device *dev,
- u16 seg, u8 start, u8 end,
- phys_addr_t addr)
+int pci_mmconfig_insert(struct device *dev, u16 seg, u8 start, u8 end,
+ phys_addr_t addr)
{
int rc;
struct resource *tmp = NULL;
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index db63ac23e3d..5c90975cdf0 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -142,7 +142,7 @@ void __init pci_mmcfg_arch_free(void)
{
}
-int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
+int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
{
return 0;
}
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index d4ebd07c306..bea52496aea 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -95,7 +95,7 @@ const struct pci_raw_ops pci_mmcfg = {
.write = pci_mmcfg_write,
};
-static void __iomem * __devinit mcfg_ioremap(struct pci_mmcfg_region *cfg)
+static void __iomem *mcfg_ioremap(struct pci_mmcfg_region *cfg)
{
void __iomem *addr;
u64 start, size;
@@ -133,7 +133,7 @@ void __init pci_mmcfg_arch_free(void)
pci_mmcfg_arch_unmap(cfg);
}
-int __devinit pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
+int pci_mmcfg_arch_map(struct pci_mmcfg_region *cfg)
{
cfg->virt = mcfg_ioremap(cfg);
if (!cfg->virt) {
diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index e14a2ff708b..6eb18c42a28 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -247,7 +247,7 @@ int __init pci_mrst_init(void)
/* Langwell devices are not true pci devices, they are not subject to 10 ms
* d3 to d0 delay required by pci spec.
*/
-static void __devinit pci_d3delay_fixup(struct pci_dev *dev)
+static void pci_d3delay_fixup(struct pci_dev *dev)
{
/* PCI fixups are effectively decided compile time. If we have a dual
SoC/non-SoC kernel we don't want to mangle d3 on non SoC devices */
@@ -262,7 +262,7 @@ static void __devinit pci_d3delay_fixup(struct pci_dev *dev)
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_ANY_ID, pci_d3delay_fixup);
-static void __devinit mrst_power_off_unused_dev(struct pci_dev *dev)
+static void mrst_power_off_unused_dev(struct pci_dev *dev)
{
pci_set_power_state(dev, PCI_D3hot);
}
@@ -275,7 +275,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0815, mrst_power_off_unused_dev);
/*
* Langwell devices reside at fixed offsets, don't try to move them.
*/
-static void __devinit pci_fixed_bar_fixup(struct pci_dev *dev)
+static void pci_fixed_bar_fixup(struct pci_dev *dev)
{
unsigned long offset;
u32 size;
diff --git a/arch/x86/pci/numachip.c b/arch/x86/pci/numachip.c
new file mode 100644
index 00000000000..7307d9d12d1
--- /dev/null
+++ b/arch/x86/pci/numachip.c
@@ -0,0 +1,129 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-specific PCI code
+ *
+ * Copyright (C) 2012 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ * PCI accessor functions derived from mmconfig_64.c
+ *
+ */
+
+#include <linux/pci.h>
+#include <asm/pci_x86.h>
+
+static u8 limit __read_mostly;
+
+static inline char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn)
+{
+ struct pci_mmcfg_region *cfg = pci_mmconfig_lookup(seg, bus);
+
+ if (cfg && cfg->virt)
+ return cfg->virt + (PCI_MMCFG_BUS_OFFSET(bus) | (devfn << 12));
+ return NULL;
+}
+
+static int pci_mmcfg_read_numachip(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 *value)
+{
+ char __iomem *addr;
+
+ /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
+ if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) {
+err: *value = -1;
+ return -EINVAL;
+ }
+
+ /* Ensure AMD Northbridges don't decode reads to other devices */
+ if (unlikely(bus == 0 && devfn >= limit)) {
+ *value = -1;
+ return 0;
+ }
+
+ rcu_read_lock();
+ addr = pci_dev_base(seg, bus, devfn);
+ if (!addr) {
+ rcu_read_unlock();
+ goto err;
+ }
+
+ switch (len) {
+ case 1:
+ *value = mmio_config_readb(addr + reg);
+ break;
+ case 2:
+ *value = mmio_config_readw(addr + reg);
+ break;
+ case 4:
+ *value = mmio_config_readl(addr + reg);
+ break;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+static int pci_mmcfg_write_numachip(unsigned int seg, unsigned int bus,
+ unsigned int devfn, int reg, int len, u32 value)
+{
+ char __iomem *addr;
+
+ /* Why do we have this when nobody checks it. How about a BUG()!? -AK */
+ if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095)))
+ return -EINVAL;
+
+ /* Ensure AMD Northbridges don't decode writes to other devices */
+ if (unlikely(bus == 0 && devfn >= limit))
+ return 0;
+
+ rcu_read_lock();
+ addr = pci_dev_base(seg, bus, devfn);
+ if (!addr) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+ switch (len) {
+ case 1:
+ mmio_config_writeb(addr + reg, value);
+ break;
+ case 2:
+ mmio_config_writew(addr + reg, value);
+ break;
+ case 4:
+ mmio_config_writel(addr + reg, value);
+ break;
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+
+const struct pci_raw_ops pci_mmcfg_numachip = {
+ .read = pci_mmcfg_read_numachip,
+ .write = pci_mmcfg_write_numachip,
+};
+
+int __init pci_numachip_init(void)
+{
+ int ret = 0;
+ u32 val;
+
+ /* For remote I/O, restrict bus 0 access to the actual number of AMD
+ Northbridges, which starts at device number 0x18 */
+ ret = raw_pci_read(0, 0, PCI_DEVFN(0x18, 0), 0x60, sizeof(val), &val);
+ if (ret)
+ goto out;
+
+ /* HyperTransport fabric size in bits 6:4 */
+ limit = PCI_DEVFN(0x18 + ((val >> 4) & 7) + 1, 0);
+
+ /* Use NumaChip PCI accessors for non-extended and extended access */
+ raw_pci_ops = raw_pci_ext_ops = &pci_mmcfg_numachip;
+out:
+ return ret;
+}
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 83e125b95ca..b96b14c250b 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -116,7 +116,7 @@ static const struct pci_raw_ops pci_direct_conf1_mq = {
};
-static void __devinit pci_fixup_i450nx(struct pci_dev *d)
+static void pci_fixup_i450nx(struct pci_dev *d)
{
/*
* i450NX -- Find and scan all secondary buses on all PXB's.
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index da8fe0535ff..c77b24a8b2d 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -124,7 +124,7 @@ static struct {
static int pci_bios_present;
-static int __devinit check_pcibios(void)
+static int check_pcibios(void)
{
u32 signature, eax, ebx, ecx;
u8 status, major_ver, minor_ver, hw_mech;
@@ -312,7 +312,7 @@ static const struct pci_raw_ops pci_bios_access = {
* Try to find PCI BIOS.
*/
-static const struct pci_raw_ops * __devinit pci_find_bios(void)
+static const struct pci_raw_ops *pci_find_bios(void)
{
union bios32 *check;
unsigned char sum;
diff --git a/arch/x86/platform/ce4100/ce4100.c b/arch/x86/platform/ce4100/ce4100.c
index 92525cb8e54..f8ab4945892 100644
--- a/arch/x86/platform/ce4100/ce4100.c
+++ b/arch/x86/platform/ce4100/ce4100.c
@@ -105,8 +105,11 @@ static void ce4100_serial_fixup(int port, struct uart_port *up,
up->membase =
(void __iomem *)__fix_to_virt(FIX_EARLYCON_MEM_BASE);
up->membase += up->mapbase & ~PAGE_MASK;
+ up->mapbase += port * 0x100;
+ up->membase += port * 0x100;
up->iotype = UPIO_MEM32;
up->regshift = 2;
+ up->irq = 4;
}
#endif
up->iobase = 0;
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
index f6a0c1b8e51..d9c1b95af17 100644
--- a/arch/x86/platform/efi/efi-bgrt.c
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -39,6 +39,8 @@ void efi_bgrt_init(void)
if (ACPI_FAILURE(status))
return;
+ if (bgrt_tab->header.length < sizeof(*bgrt_tab))
+ return;
if (bgrt_tab->version != 1)
return;
if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ad4439145f8..928bf837040 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -51,9 +51,6 @@
#define EFI_DEBUG 1
-int efi_enabled;
-EXPORT_SYMBOL(efi_enabled);
-
struct efi __read_mostly efi = {
.mps = EFI_INVALID_TABLE_ADDR,
.acpi = EFI_INVALID_TABLE_ADDR,
@@ -69,19 +66,28 @@ EXPORT_SYMBOL(efi);
struct efi_memory_map memmap;
-bool efi_64bit;
-
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
static inline bool efi_is_native(void)
{
- return IS_ENABLED(CONFIG_X86_64) == efi_64bit;
+ return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT);
+}
+
+unsigned long x86_efi_facility;
+
+/*
+ * Returns 1 if 'facility' is enabled, 0 otherwise.
+ */
+int efi_enabled(int facility)
+{
+ return test_bit(facility, &x86_efi_facility) != 0;
}
+EXPORT_SYMBOL(efi_enabled);
static int __init setup_noefi(char *arg)
{
- efi_enabled = 0;
+ clear_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
return 0;
}
early_param("noefi", setup_noefi);
@@ -426,6 +432,7 @@ void __init efi_reserve_boot_services(void)
void __init efi_unmap_memmap(void)
{
+ clear_bit(EFI_MEMMAP, &x86_efi_facility);
if (memmap.map) {
early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
memmap.map = NULL;
@@ -460,7 +467,7 @@ void __init efi_free_boot_services(void)
static int __init efi_systab_init(void *phys)
{
- if (efi_64bit) {
+ if (efi_enabled(EFI_64BIT)) {
efi_system_table_64_t *systab64;
u64 tmp = 0;
@@ -552,7 +559,7 @@ static int __init efi_config_init(u64 tables, int nr_tables)
void *config_tables, *tablep;
int i, sz;
- if (efi_64bit)
+ if (efi_enabled(EFI_64BIT))
sz = sizeof(efi_config_table_64_t);
else
sz = sizeof(efi_config_table_32_t);
@@ -572,7 +579,7 @@ static int __init efi_config_init(u64 tables, int nr_tables)
efi_guid_t guid;
unsigned long table;
- if (efi_64bit) {
+ if (efi_enabled(EFI_64BIT)) {
u64 table64;
guid = ((efi_config_table_64_t *)tablep)->guid;
table64 = ((efi_config_table_64_t *)tablep)->table;
@@ -684,7 +691,6 @@ void __init efi_init(void)
if (boot_params.efi_info.efi_systab_hi ||
boot_params.efi_info.efi_memmap_hi) {
pr_info("Table located above 4GB, disabling EFI.\n");
- efi_enabled = 0;
return;
}
efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
@@ -694,10 +700,10 @@ void __init efi_init(void)
((__u64)boot_params.efi_info.efi_systab_hi<<32));
#endif
- if (efi_systab_init(efi_phys.systab)) {
- efi_enabled = 0;
+ if (efi_systab_init(efi_phys.systab))
return;
- }
+
+ set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
/*
* Show what we know for posterity
@@ -715,10 +721,10 @@ void __init efi_init(void)
efi.systab->hdr.revision >> 16,
efi.systab->hdr.revision & 0xffff, vendor);
- if (efi_config_init(efi.systab->tables, efi.systab->nr_tables)) {
- efi_enabled = 0;
+ if (efi_config_init(efi.systab->tables, efi.systab->nr_tables))
return;
- }
+
+ set_bit(EFI_CONFIG_TABLES, &x86_efi_facility);
/*
* Note: We currently don't support runtime services on an EFI
@@ -727,15 +733,17 @@ void __init efi_init(void)
if (!efi_is_native())
pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
- else if (efi_runtime_init()) {
- efi_enabled = 0;
- return;
+ else {
+ if (efi_runtime_init())
+ return;
+ set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility);
}
- if (efi_memmap_init()) {
- efi_enabled = 0;
+ if (efi_memmap_init())
return;
- }
+
+ set_bit(EFI_MEMMAP, &x86_efi_facility);
+
#ifdef CONFIG_X86_32
if (efi_is_native()) {
x86_platform.get_wallclock = efi_get_time;
@@ -941,7 +949,7 @@ void __init efi_enter_virtual_mode(void)
*
* Call EFI services through wrapper functions.
*/
- efi.runtime_version = efi_systab.fw_revision;
+ efi.runtime_version = efi_systab.hdr.revision;
efi.get_time = virt_efi_get_time;
efi.set_time = virt_efi_set_time;
efi.get_wakeup_time = virt_efi_get_wakeup_time;
@@ -969,6 +977,9 @@ u32 efi_mem_type(unsigned long phys_addr)
efi_memory_desc_t *md;
void *p;
+ if (!efi_enabled(EFI_MEMMAP))
+ return 0;
+
for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
md = p;
if ((md->phys_addr <= phys_addr) &&
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 95fd505dfeb..2b200386061 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -38,7 +38,7 @@
#include <asm/cacheflush.h>
#include <asm/fixmap.h>
-static pgd_t save_pgd __initdata;
+static pgd_t *save_pgd __initdata;
static unsigned long efi_flags __initdata;
static void __init early_code_mapping_set_exec(int executable)
@@ -61,12 +61,20 @@ static void __init early_code_mapping_set_exec(int executable)
void __init efi_call_phys_prelog(void)
{
unsigned long vaddress;
+ int pgd;
+ int n_pgds;
early_code_mapping_set_exec(1);
local_irq_save(efi_flags);
- vaddress = (unsigned long)__va(0x0UL);
- save_pgd = *pgd_offset_k(0x0UL);
- set_pgd(pgd_offset_k(0x0UL), *pgd_offset_k(vaddress));
+
+ n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
+ save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL);
+
+ for (pgd = 0; pgd < n_pgds; pgd++) {
+ save_pgd[pgd] = *pgd_offset_k(pgd * PGDIR_SIZE);
+ vaddress = (unsigned long)__va(pgd * PGDIR_SIZE);
+ set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
+ }
__flush_tlb_all();
}
@@ -75,7 +83,11 @@ void __init efi_call_phys_epilog(void)
/*
* After the lock is released, the original page table is restored.
*/
- set_pgd(pgd_offset_k(0x0UL), save_pgd);
+ int pgd;
+ int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+ for (pgd = 0; pgd < n_pgds; pgd++)
+ set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
+ kfree(save_pgd);
__flush_tlb_all();
local_irq_restore(efi_flags);
early_code_mapping_set_exec(0);
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
index 5917eb56b31..e6cb80f620a 100644
--- a/arch/x86/platform/iris/iris.c
+++ b/arch/x86/platform/iris/iris.c
@@ -23,6 +23,7 @@
#include <linux/moduleparam.h>
#include <linux/module.h>
+#include <linux/platform_device.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/delay.h>
@@ -62,29 +63,75 @@ static void iris_power_off(void)
* by reading its input port and seeing whether the read value is
* meaningful.
*/
-static int iris_init(void)
+static int iris_probe(struct platform_device *pdev)
{
- unsigned char status;
- if (force != 1) {
- printk(KERN_ERR "The force parameter has not been set to 1 so the Iris poweroff handler will not be installed.\n");
- return -ENODEV;
- }
- status = inb(IRIS_GIO_INPUT);
+ unsigned char status = inb(IRIS_GIO_INPUT);
if (status == IRIS_GIO_NODEV) {
- printk(KERN_ERR "This machine does not seem to be an Iris. Power_off handler not installed.\n");
+ printk(KERN_ERR "This machine does not seem to be an Iris. "
+ "Power off handler not installed.\n");
return -ENODEV;
}
old_pm_power_off = pm_power_off;
pm_power_off = &iris_power_off;
printk(KERN_INFO "Iris power_off handler installed.\n");
-
return 0;
}
-static void iris_exit(void)
+static int iris_remove(struct platform_device *pdev)
{
pm_power_off = old_pm_power_off;
printk(KERN_INFO "Iris power_off handler uninstalled.\n");
+ return 0;
+}
+
+static struct platform_driver iris_driver = {
+ .driver = {
+ .name = "iris",
+ .owner = THIS_MODULE,
+ },
+ .probe = iris_probe,
+ .remove = iris_remove,
+};
+
+static struct resource iris_resources[] = {
+ {
+ .start = IRIS_GIO_BASE,
+ .end = IRIS_GIO_OUTPUT,
+ .flags = IORESOURCE_IO,
+ .name = "address"
+ }
+};
+
+static struct platform_device *iris_device;
+
+static int iris_init(void)
+{
+ int ret;
+ if (force != 1) {
+ printk(KERN_ERR "The force parameter has not been set to 1."
+ " The Iris poweroff handler will not be installed.\n");
+ return -ENODEV;
+ }
+ ret = platform_driver_register(&iris_driver);
+ if (ret < 0) {
+ printk(KERN_ERR "Failed to register iris platform driver: %d\n",
+ ret);
+ return ret;
+ }
+ iris_device = platform_device_register_simple("iris", (-1),
+ iris_resources, ARRAY_SIZE(iris_resources));
+ if (IS_ERR(iris_device)) {
+ printk(KERN_ERR "Failed to register iris platform device\n");
+ platform_driver_unregister(&iris_driver);
+ return PTR_ERR(iris_device);
+ }
+ return 0;
+}
+
+static void iris_exit(void)
+{
+ platform_device_unregister(iris_device);
+ platform_driver_unregister(&iris_driver);
}
module_init(iris_init);
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index fd41a9262d6..e31bcd8f2ee 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -782,7 +782,7 @@ BLOCKING_NOTIFIER_HEAD(intel_scu_notifier);
EXPORT_SYMBOL_GPL(intel_scu_notifier);
/* Called by IPC driver */
-void __devinit intel_scu_devices_create(void)
+void intel_scu_devices_create(void)
{
int i;
diff --git a/arch/x86/platform/olpc/olpc-xo1-pm.c b/arch/x86/platform/olpc/olpc-xo1-pm.c
index d75582d1aa5..ff0174dda81 100644
--- a/arch/x86/platform/olpc/olpc-xo1-pm.c
+++ b/arch/x86/platform/olpc/olpc-xo1-pm.c
@@ -121,7 +121,7 @@ static const struct platform_suspend_ops xo1_suspend_ops = {
.enter = xo1_power_state_enter,
};
-static int __devinit xo1_pm_probe(struct platform_device *pdev)
+static int xo1_pm_probe(struct platform_device *pdev)
{
struct resource *res;
int err;
@@ -154,7 +154,7 @@ static int __devinit xo1_pm_probe(struct platform_device *pdev)
return 0;
}
-static int __devexit xo1_pm_remove(struct platform_device *pdev)
+static int xo1_pm_remove(struct platform_device *pdev)
{
mfd_cell_disable(pdev);
@@ -173,7 +173,7 @@ static struct platform_driver cs5535_pms_driver = {
.owner = THIS_MODULE,
},
.probe = xo1_pm_probe,
- .remove = __devexit_p(xo1_pm_remove),
+ .remove = xo1_pm_remove,
};
static struct platform_driver cs5535_acpi_driver = {
@@ -182,7 +182,7 @@ static struct platform_driver cs5535_acpi_driver = {
.owner = THIS_MODULE,
},
.probe = xo1_pm_probe,
- .remove = __devexit_p(xo1_pm_remove),
+ .remove = xo1_pm_remove,
};
static int __init xo1_pm_init(void)
diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
index 63d4aa40956..74704be7b1f 100644
--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
@@ -309,7 +309,7 @@ static int xo1_sci_resume(struct platform_device *pdev)
return 0;
}
-static int __devinit setup_sci_interrupt(struct platform_device *pdev)
+static int setup_sci_interrupt(struct platform_device *pdev)
{
u32 lo, hi;
u32 sts;
@@ -351,7 +351,7 @@ static int __devinit setup_sci_interrupt(struct platform_device *pdev)
return r;
}
-static int __devinit setup_ec_sci(void)
+static int setup_ec_sci(void)
{
int r;
@@ -395,7 +395,7 @@ static void free_ec_sci(void)
gpio_free(OLPC_GPIO_ECSCI);
}
-static int __devinit setup_lid_events(void)
+static int setup_lid_events(void)
{
int r;
@@ -432,7 +432,7 @@ static void free_lid_events(void)
gpio_free(OLPC_GPIO_LID);
}
-static int __devinit setup_power_button(struct platform_device *pdev)
+static int setup_power_button(struct platform_device *pdev)
{
int r;
@@ -463,7 +463,7 @@ static void free_power_button(void)
input_free_device(power_button_idev);
}
-static int __devinit setup_ebook_switch(struct platform_device *pdev)
+static int setup_ebook_switch(struct platform_device *pdev)
{
int r;
@@ -494,7 +494,7 @@ static void free_ebook_switch(void)
input_free_device(ebook_switch_idev);
}
-static int __devinit setup_lid_switch(struct platform_device *pdev)
+static int setup_lid_switch(struct platform_device *pdev)
{
int r;
@@ -538,7 +538,7 @@ static void free_lid_switch(void)
input_free_device(lid_switch_idev);
}
-static int __devinit xo1_sci_probe(struct platform_device *pdev)
+static int xo1_sci_probe(struct platform_device *pdev)
{
struct resource *res;
int r;
@@ -613,7 +613,7 @@ err_ebook:
return r;
}
-static int __devexit xo1_sci_remove(struct platform_device *pdev)
+static int xo1_sci_remove(struct platform_device *pdev)
{
mfd_cell_disable(pdev);
free_irq(sci_irq, pdev);
@@ -632,7 +632,7 @@ static struct platform_driver xo1_sci_driver = {
.name = "olpc-xo1-sci-acpi",
},
.probe = xo1_sci_probe,
- .remove = __devexit_p(xo1_sci_remove),
+ .remove = xo1_sci_remove,
.suspend = xo1_sci_suspend,
.resume = xo1_sci_resume,
};
diff --git a/arch/x86/platform/scx200/scx200_32.c b/arch/x86/platform/scx200/scx200_32.c
index 7a9ad30d6c9..3dc9aee41d9 100644
--- a/arch/x86/platform/scx200/scx200_32.c
+++ b/arch/x86/platform/scx200/scx200_32.c
@@ -35,7 +35,7 @@ static struct pci_device_id scx200_tbl[] = {
};
MODULE_DEVICE_TABLE(pci,scx200_tbl);
-static int __devinit scx200_probe(struct pci_dev *, const struct pci_device_id *);
+static int scx200_probe(struct pci_dev *, const struct pci_device_id *);
static struct pci_driver scx200_pci_driver = {
.name = "scx200",
@@ -45,7 +45,7 @@ static struct pci_driver scx200_pci_driver = {
static DEFINE_MUTEX(scx200_gpio_config_lock);
-static void __devinit scx200_init_shadow(void)
+static void scx200_init_shadow(void)
{
int bank;
@@ -54,7 +54,7 @@ static void __devinit scx200_init_shadow(void)
scx200_gpio_shadow[bank] = inl(scx200_gpio_base + 0x10 * bank);
}
-static int __devinit scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+static int scx200_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
unsigned base;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index b8b3a37c80c..dbbdca5f508 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1034,7 +1034,8 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
* globally purge translation cache of a virtual address or all TLB's
* @cpumask: mask of all cpu's in which the address is to be removed
* @mm: mm_struct containing virtual address range
- * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
+ * @start: start virtual address to be removed from TLB
+ * @end: end virtual address to be remove from TLB
* @cpu: the current cpu
*
* This is the entry point for initiating any UV global TLB shootdown.
@@ -1056,7 +1057,7 @@ static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp,
*/
const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
struct mm_struct *mm, unsigned long start,
- unsigned end, unsigned int cpu)
+ unsigned long end, unsigned int cpu)
{
int locals = 0;
int remotes = 0;
@@ -1113,7 +1114,10 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
record_send_statistics(stat, locals, hubs, remotes, bau_desc);
- bau_desc->payload.address = start;
+ if (!end || (end - start) <= PAGE_SIZE)
+ bau_desc->payload.address = start;
+ else
+ bau_desc->payload.address = TLB_FLUSH_ALL;
bau_desc->payload.sending_cpu = cpu;
/*
* uv_flush_send_and_wait returns 0 if all cpu's were messaged,
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 218cdb16163..120cee1c3f8 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -21,6 +21,7 @@
#include <asm/suspend.h>
#include <asm/debugreg.h>
#include <asm/fpu-internal.h> /* pcntxt_mask */
+#include <asm/cpu.h>
#ifdef CONFIG_X86_32
static struct saved_context saved_context;
@@ -237,3 +238,84 @@ void restore_processor_state(void)
#ifdef CONFIG_X86_32
EXPORT_SYMBOL(restore_processor_state);
#endif
+
+/*
+ * When bsp_check() is called in hibernate and suspend, cpu hotplug
+ * is disabled already. So it's unnessary to handle race condition between
+ * cpumask query and cpu hotplug.
+ */
+static int bsp_check(void)
+{
+ if (cpumask_first(cpu_online_mask) != 0) {
+ pr_warn("CPU0 is offline.\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static int bsp_pm_callback(struct notifier_block *nb, unsigned long action,
+ void *ptr)
+{
+ int ret = 0;
+
+ switch (action) {
+ case PM_SUSPEND_PREPARE:
+ case PM_HIBERNATION_PREPARE:
+ ret = bsp_check();
+ break;
+#ifdef CONFIG_DEBUG_HOTPLUG_CPU0
+ case PM_RESTORE_PREPARE:
+ /*
+ * When system resumes from hibernation, online CPU0 because
+ * 1. it's required for resume and
+ * 2. the CPU was online before hibernation
+ */
+ if (!cpu_online(0))
+ _debug_hotplug_cpu(0, 1);
+ break;
+ case PM_POST_RESTORE:
+ /*
+ * When a resume really happens, this code won't be called.
+ *
+ * This code is called only when user space hibernation software
+ * prepares for snapshot device during boot time. So we just
+ * call _debug_hotplug_cpu() to restore to CPU0's state prior to
+ * preparing the snapshot device.
+ *
+ * This works for normal boot case in our CPU0 hotplug debug
+ * mode, i.e. CPU0 is offline and user mode hibernation
+ * software initializes during boot time.
+ *
+ * If CPU0 is online and user application accesses snapshot
+ * device after boot time, this will offline CPU0 and user may
+ * see different CPU0 state before and after accessing
+ * the snapshot device. But hopefully this is not a case when
+ * user debugging CPU0 hotplug. Even if users hit this case,
+ * they can easily online CPU0 back.
+ *
+ * To simplify this debug code, we only consider normal boot
+ * case. Otherwise we need to remember CPU0's state and restore
+ * to that state and resolve racy conditions etc.
+ */
+ _debug_hotplug_cpu(0, 0);
+ break;
+#endif
+ default:
+ break;
+ }
+ return notifier_from_errno(ret);
+}
+
+static int __init bsp_pm_check_init(void)
+{
+ /*
+ * Set this bsp_pm_callback as lower priority than
+ * cpu_hotplug_pm_callback. So cpu_hotplug_pm_callback will be called
+ * earlier to disable cpu hotplug before bsp online check.
+ */
+ pm_notifier(bsp_pm_callback, -INT_MAX);
+ return 0;
+}
+
+core_initcall(bsp_pm_check_init);
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index a47103fbc69..28e3fa9056e 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -8,7 +8,7 @@
#
0 i386 restart_syscall sys_restart_syscall
1 i386 exit sys_exit
-2 i386 fork ptregs_fork stub32_fork
+2 i386 fork sys_fork stub32_fork
3 i386 read sys_read
4 i386 write sys_write
5 i386 open sys_open compat_sys_open
@@ -126,7 +126,7 @@
117 i386 ipc sys_ipc sys32_ipc
118 i386 fsync sys_fsync
119 i386 sigreturn ptregs_sigreturn stub32_sigreturn
-120 i386 clone ptregs_clone stub32_clone
+120 i386 clone sys_clone stub32_clone
121 i386 setdomainname sys_setdomainname
122 i386 uname sys_newuname
123 i386 modify_ldt sys_modify_ldt
@@ -192,11 +192,11 @@
183 i386 getcwd sys_getcwd
184 i386 capget sys_capget
185 i386 capset sys_capset
-186 i386 sigaltstack ptregs_sigaltstack stub32_sigaltstack
+186 i386 sigaltstack sys_sigaltstack compat_sys_sigaltstack
187 i386 sendfile sys_sendfile sys32_sendfile
188 i386 getpmsg
189 i386 putpmsg
-190 i386 vfork ptregs_vfork stub32_vfork
+190 i386 vfork sys_vfork stub32_vfork
191 i386 ugetrlimit sys_getrlimit compat_sys_getrlimit
192 i386 mmap2 sys_mmap_pgoff
193 i386 truncate64 sys_truncate64 sys32_truncate64
@@ -356,3 +356,4 @@
347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
349 i386 kcmp sys_kcmp
+350 i386 finit_module sys_finit_module
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index a582bfed95b..dc97328bd90 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -137,7 +137,7 @@
128 64 rt_sigtimedwait sys_rt_sigtimedwait
129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
130 common rt_sigsuspend sys_rt_sigsuspend
-131 64 sigaltstack stub_sigaltstack
+131 64 sigaltstack sys_sigaltstack
132 common utime sys_utime
133 common mknod sys_mknod
134 64 uselib
@@ -319,6 +319,7 @@
310 64 process_vm_readv sys_process_vm_readv
311 64 process_vm_writev sys_process_vm_writev
312 common kcmp sys_kcmp
+313 common finit_module sys_finit_module
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -337,7 +338,7 @@
522 x32 rt_sigpending sys32_rt_sigpending
523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait
524 x32 rt_sigqueueinfo sys32_rt_sigqueueinfo
-525 x32 sigaltstack stub_x32_sigaltstack
+525 x32 sigaltstack compat_sys_sigaltstack
526 x32 timer_create compat_sys_timer_create
527 x32 mq_notify compat_sys_mq_notify
528 x32 kexec_load compat_sys_kexec_load
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index ddcf39b1a18..e6773dc8ac4 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -356,7 +356,7 @@ END {
exit 1
# print escape opcode map's array
print "/* Escape opcode map array */"
- print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \
+ print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
"[INAT_LSTPFX_MAX + 1] = {"
for (i = 0; i < geid; i++)
for (j = 0; j < max_lprefix; j++)
@@ -365,7 +365,7 @@ END {
print "};\n"
# print group opcode map's array
print "/* Group opcode map array */"
- print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\
+ print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
"[INAT_LSTPFX_MAX + 1] = {"
for (i = 0; i < ggid; i++)
for (j = 0; j < max_lprefix; j++)
@@ -374,7 +374,7 @@ END {
print "};\n"
# print AVX opcode map's array
print "/* AVX opcode map array */"
- print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\
+ print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
"[INAT_LSTPFX_MAX + 1] = {"
for (i = 0; i < gaid; i++)
for (j = 0; j < max_lprefix; j++)
diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c
index cc2f8c13128..872eb60e780 100644
--- a/arch/x86/tools/insn_sanity.c
+++ b/arch/x86/tools/insn_sanity.c
@@ -55,7 +55,7 @@ static FILE *input_file; /* Input file name */
static void usage(const char *err)
{
if (err)
- fprintf(stderr, "Error: %s\n\n", err);
+ fprintf(stderr, "%s: Error: %s\n\n", prog, err);
fprintf(stderr, "Usage: %s [-y|-n|-v] [-s seed[,no]] [-m max] [-i input]\n", prog);
fprintf(stderr, "\t-y 64bit mode\n");
fprintf(stderr, "\t-n 32bit mode\n");
@@ -269,7 +269,13 @@ int main(int argc, char **argv)
insns++;
}
- fprintf(stdout, "%s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n", (errors) ? "Failure" : "Success", insns, (input_file) ? "given" : "random", errors, seed);
+ fprintf(stdout, "%s: %s: decoded and checked %d %s instructions with %d errors (seed:0x%x)\n",
+ prog,
+ (errors) ? "Failure" : "Success",
+ insns,
+ (input_file) ? "given" : "random",
+ errors,
+ seed);
return errors ? 1 : 0;
}
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 5a1847d6193..79d67bd507f 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -814,12 +814,14 @@ int main(int argc, char **argv)
read_relocs(fp);
if (show_absolute_syms) {
print_absolute_symbols();
- return 0;
+ goto out;
}
if (show_absolute_relocs) {
print_absolute_relocs();
- return 0;
+ goto out;
}
emit_relocs(as_text, use_real_mode);
+out:
+ fclose(fp);
return 0;
}
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 07611759ce3..53c90fd412d 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,8 +13,7 @@ endmenu
config UML_X86
def_bool y
select GENERIC_FIND_FIRST_BIT
- select GENERIC_KERNEL_THREAD
- select GENERIC_KERNEL_EXECVE
+ select GENERIC_SIGALTSTACK
config 64BIT
bool "64-bit kernel" if SUBARCH = "x86"
@@ -25,13 +24,14 @@ config X86_32
select HAVE_AOUT
select ARCH_WANT_IPC_PARSE_VERSION
select MODULES_USE_ELF_REL
+ select CLONE_BACKWARDS
config X86_64
def_bool 64BIT
select MODULES_USE_ELF_RELA
config RWSEM_XCHGADD_ALGORITHM
- def_bool X86_XADD && 64BIT
+ def_bool 64BIT
config RWSEM_GENERIC_SPINLOCK
def_bool !RWSEM_XCHGADD_ALGORITHM
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index 755133258c4..54f8102ccde 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -86,4 +86,5 @@ extern long arch_prctl(struct task_struct *task, int code,
unsigned long __user *addr);
#endif
+#define user_stack_pointer(regs) PT_REGS_SP(regs)
#endif /* __UM_X86_PTRACE_H */
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
index ca255a805ed..bd9a89b67e4 100644
--- a/arch/x86/um/shared/sysdep/syscalls.h
+++ b/arch/x86/um/shared/sysdep/syscalls.h
@@ -1,5 +1,3 @@
-extern long sys_clone(unsigned long clone_flags, unsigned long newsp,
- void __user *parent_tid, void __user *child_tid);
#ifdef __i386__
#include "syscalls_32.h"
#else
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index bdaa08cfbcf..71cef48ea5c 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -342,9 +342,7 @@ static int copy_ucontext_to_user(struct ucontext __user *uc,
{
int err = 0;
- err |= put_user(current->sas_ss_sp, &uc->uc_stack.ss_sp);
- err |= put_user(sas_ss_flags(sp), &uc->uc_stack.ss_flags);
- err |= put_user(current->sas_ss_size, &uc->uc_stack.ss_size);
+ err |= __save_altstack(&uc->uc_stack, sp);
err |= copy_sc_to_user(&uc->uc_mcontext, fp, &current->thread.regs, 0);
err |= copy_to_user(&uc->uc_sigmask, set, sizeof(*set));
return err;
@@ -529,10 +527,7 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
/* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(PT_REGS_SP(regs)),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+ err |= __save_altstack(&frame->uc.uc_stack, PT_REGS_SP(regs));
err |= copy_sc_to_user(&frame->uc.uc_mcontext, &frame->fpstate, regs,
set->sig[0]);
err |= __put_user(&frame->fpstate, &frame->uc.uc_mcontext.fpstate);
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 232e60504b3..a0c3b0d1a12 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -24,13 +24,9 @@
#define old_mmap sys_old_mmap
-#define ptregs_fork sys_fork
#define ptregs_iopl sys_iopl
#define ptregs_vm86old sys_vm86old
-#define ptregs_clone i386_clone
#define ptregs_vm86 sys_vm86
-#define ptregs_sigaltstack sys_sigaltstack
-#define ptregs_vfork sys_vfork
#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
#include <asm/syscalls_32.h>
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 170bd926a69..f2f0723070c 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -31,7 +31,6 @@
#define stub_fork sys_fork
#define stub_vfork sys_vfork
#define stub_execve sys_execve
-#define stub_sigaltstack sys_sigaltstack
#define stub_rt_sigreturn sys_rt_sigreturn
#define __SYSCALL_COMMON(nr, sym, compat) __SYSCALL_64(nr, sym, compat)
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
index db444c7218f..e8bcea99acd 100644
--- a/arch/x86/um/syscalls_32.c
+++ b/arch/x86/um/syscalls_32.c
@@ -6,21 +6,6 @@
#include <linux/syscalls.h>
#include <sysdep/syscalls.h>
-/*
- * The prototype on i386 is:
- *
- * int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls
- *
- * and the "newtls" arg. on i386 is read by copy_thread directly from the
- * register saved on the stack.
- */
-long i386_clone(unsigned long clone_flags, unsigned long newsp,
- int __user *parent_tid, void *newtls, int __user *child_tid)
-{
- return sys_clone(clone_flags, newsp, parent_tid, child_tid);
-}
-
-
long sys_sigaction(int sig, const struct old_sigaction __user *act,
struct old_sigaction __user *oact)
{
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 4df6c373421..c74436e687b 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -22,6 +22,7 @@
#include <asm/hpet.h>
#include <asm/unistd.h>
#include <asm/io.h>
+#include <asm/pvclock.h>
#define gtod (&VVAR(vsyscall_gtod_data))
@@ -59,9 +60,79 @@ notrace static cycle_t vread_tsc(void)
static notrace cycle_t vread_hpet(void)
{
- return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + 0xf0);
+ return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER);
}
+#ifdef CONFIG_PARAVIRT_CLOCK
+
+static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
+{
+ const struct pvclock_vsyscall_time_info *pvti_base;
+ int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
+ int offset = cpu % (PAGE_SIZE/PVTI_SIZE);
+
+ BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);
+
+ pvti_base = (struct pvclock_vsyscall_time_info *)
+ __fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);
+
+ return &pvti_base[offset];
+}
+
+static notrace cycle_t vread_pvclock(int *mode)
+{
+ const struct pvclock_vsyscall_time_info *pvti;
+ cycle_t ret;
+ u64 last;
+ u32 version;
+ u32 migrate_count;
+ u8 flags;
+ unsigned cpu, cpu1;
+
+
+ /*
+ * When looping to get a consistent (time-info, tsc) pair, we
+ * also need to deal with the possibility we can switch vcpus,
+ * so make sure we always re-fetch time-info for the current vcpu.
+ */
+ do {
+ cpu = __getcpu() & VGETCPU_CPU_MASK;
+ /* TODO: We can put vcpu id into higher bits of pvti.version.
+ * This will save a couple of cycles by getting rid of
+ * __getcpu() calls (Gleb).
+ */
+
+ pvti = get_pvti(cpu);
+
+ migrate_count = pvti->migrate_count;
+
+ version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
+
+ /*
+ * Test we're still on the cpu as well as the version.
+ * We could have been migrated just after the first
+ * vgetcpu but before fetching the version, so we
+ * wouldn't notice a version change.
+ */
+ cpu1 = __getcpu() & VGETCPU_CPU_MASK;
+ } while (unlikely(cpu != cpu1 ||
+ (pvti->pvti.version & 1) ||
+ pvti->pvti.version != version ||
+ pvti->migrate_count != migrate_count));
+
+ if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
+ *mode = VCLOCK_NONE;
+
+ /* refer to tsc.c read_tsc() comment for rationale */
+ last = VVAR(vsyscall_gtod_data).clock.cycle_last;
+
+ if (likely(ret >= last))
+ return ret;
+
+ return last;
+}
+#endif
+
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
long ret;
@@ -80,7 +151,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
}
-notrace static inline u64 vgetsns(void)
+notrace static inline u64 vgetsns(int *mode)
{
long v;
cycles_t cycles;
@@ -88,6 +159,10 @@ notrace static inline u64 vgetsns(void)
cycles = vread_tsc();
else if (gtod->clock.vclock_mode == VCLOCK_HPET)
cycles = vread_hpet();
+#ifdef CONFIG_PARAVIRT_CLOCK
+ else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK)
+ cycles = vread_pvclock(mode);
+#endif
else
return 0;
v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
@@ -107,7 +182,7 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
mode = gtod->clock.vclock_mode;
ts->tv_sec = gtod->wall_time_sec;
ns = gtod->wall_time_snsec;
- ns += vgetsns();
+ ns += vgetsns(&mode);
ns >>= gtod->clock.shift;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
@@ -127,7 +202,7 @@ notrace static int do_monotonic(struct timespec *ts)
mode = gtod->clock.vclock_mode;
ts->tv_sec = gtod->monotonic_time_sec;
ns = gtod->monotonic_time_snsec;
- ns += vgetsns();
+ ns += vgetsns(&mode);
ns >>= gtod->clock.shift;
} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
timespec_add_ns(ts, ns);
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c
index 5463ad55857..2f94b039e55 100644
--- a/arch/x86/vdso/vgetcpu.c
+++ b/arch/x86/vdso/vgetcpu.c
@@ -17,15 +17,10 @@ __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused)
{
unsigned int p;
- if (VVAR(vgetcpu_mode) == VGETCPU_RDTSCP) {
- /* Load per CPU data from RDTSCP */
- native_read_tscp(&p);
- } else {
- /* Load per CPU data from GDT */
- asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
- }
+ p = __getcpu();
+
if (cpu)
- *cpu = p & 0xfff;
+ *cpu = p & VGETCPU_CPU_MASK;
if (node)
*node = p >> 12;
return 0;
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 00aaf047b39..431e8754441 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -141,7 +141,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
* unaligned here as a result of stack start randomization.
*/
addr = PAGE_ALIGN(addr);
- addr = align_addr(addr, NULL, ALIGN_VDSO);
+ addr = align_vdso_addr(addr);
return addr;
}
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index fdce49c7aff..131dacd2748 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,8 +6,9 @@ config XEN
bool "Xen guest support"
select PARAVIRT
select PARAVIRT_CLOCK
+ select XEN_HAVE_PVMMU
depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS)
- depends on X86_CMPXCHG && X86_TSC
+ depends on X86_TSC
help
This is the Linux Xen port. Enabling this will allow the
kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 586d83812b6..39928d16be3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -193,10 +193,11 @@ void xen_vcpu_restore(void)
{
int cpu;
- for_each_online_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
bool other_cpu = (cpu != smp_processor_id());
+ bool is_up = HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL);
- if (other_cpu &&
+ if (other_cpu && is_up &&
HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL))
BUG();
@@ -205,7 +206,7 @@ void xen_vcpu_restore(void)
if (have_vcpu_info_placement)
xen_vcpu_setup(cpu);
- if (other_cpu &&
+ if (other_cpu && is_up &&
HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL))
BUG();
}
@@ -223,6 +224,21 @@ static void __init xen_banner(void)
version >> 16, version & 0xffff, extra.extraversion,
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
}
+/* Check if running on Xen version (major, minor) or later */
+bool
+xen_running_on_version_or_later(unsigned int major, unsigned int minor)
+{
+ unsigned int version;
+
+ if (!xen_domain())
+ return false;
+
+ version = HYPERVISOR_xen_version(XENVER_version, NULL);
+ if ((((version >> 16) == major) && ((version & 0xffff) >= minor)) ||
+ ((version >> 16) > major))
+ return true;
+ return false;
+}
#define CPUID_THERM_POWER_LEAF 6
#define APERFMPERF_PRESENT 0
@@ -287,8 +303,7 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
static bool __init xen_check_mwait(void)
{
-#if defined(CONFIG_ACPI) && !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR) && \
- !defined(CONFIG_ACPI_PROCESSOR_AGGREGATOR_MODULE)
+#ifdef CONFIG_ACPI
struct xen_platform_op op = {
.cmd = XENPF_set_processor_pminfo,
.u.set_pminfo.id = -1,
@@ -309,6 +324,13 @@ static bool __init xen_check_mwait(void)
if (!xen_initial_domain())
return false;
+ /*
+ * When running under platform earlier than Xen4.2, do not expose
+ * mwait, to avoid the risk of loading native acpi pad driver
+ */
+ if (!xen_running_on_version_or_later(4, 2))
+ return false;
+
ax = 1;
cx = 0;
@@ -1615,6 +1637,7 @@ const struct hypervisor_x86 x86_hyper_xen_hvm __refconst = {
.name = "Xen HVM",
.detect = xen_hvm_platform,
.init_platform = xen_hvm_guest_init,
+ .x2apic_available = xen_x2apic_para_available,
};
EXPORT_SYMBOL(x86_hyper_xen_hvm);
#endif
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index dcf5f2dd91e..01de35c7722 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2497,8 +2497,10 @@ static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
unsigned long addr,
- unsigned long mfn, int nr,
- pgprot_t prot, unsigned domid)
+ xen_pfn_t mfn, int nr,
+ pgprot_t prot, unsigned domid,
+ struct page **pages)
+
{
struct remap_data rmd;
struct mmu_update mmu_update[REMAP_BATCH_SIZE];
@@ -2542,3 +2544,14 @@ out:
return err;
}
EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
+
+/* Returns: 0 success */
+int xen_unmap_domain_mfn_range(struct vm_area_struct *vma,
+ int numpgs, struct page **pages)
+{
+ if (!pages || !xen_feature(XENFEAT_auto_translated_physmap))
+ return 0;
+
+ return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(xen_unmap_domain_mfn_range);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 353c50f1870..34bc4cee888 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -254,7 +254,7 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
}
xen_init_lock_cpu(0);
- smp_store_cpu_info(0);
+ smp_store_boot_cpu_info();
cpu_data(0).x86_max_cores = 1;
for_each_possible_cpu(i) {
@@ -432,13 +432,6 @@ static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
cpu_bringup();
- /*
- * Balance out the preempt calls - as we are running in cpu_idle
- * loop which has been called at bootup from cpu_bringup_and_idle.
- * The cpucpu_bringup_and_idle called cpu_bringup which made a
- * preempt_disable() So this preempt_enable will balance it out.
- */
- preempt_enable();
}
#else /* !CONFIG_HOTPLUG_CPU */
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index f9643fc50de..33ca6e42a4c 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -89,11 +89,11 @@ ENTRY(xen_iret)
*/
#ifdef CONFIG_SMP
GET_THREAD_INFO(%eax)
- movl TI_cpu(%eax), %eax
- movl __per_cpu_offset(,%eax,4), %eax
- mov xen_vcpu(%eax), %eax
+ movl %ss:TI_cpu(%eax), %eax
+ movl %ss:__per_cpu_offset(,%eax,4), %eax
+ mov %ss:xen_vcpu(%eax), %eax
#else
- movl xen_vcpu, %eax
+ movl %ss:xen_vcpu, %eax
#endif
/* check IF state we're restoring */
@@ -106,11 +106,11 @@ ENTRY(xen_iret)
* resuming the code, so we don't have to be worried about
* being preempted to another CPU.
*/
- setz XEN_vcpu_info_mask(%eax)
+ setz %ss:XEN_vcpu_info_mask(%eax)
xen_iret_start_crit:
/* check for unmasked and pending */
- cmpw $0x0001, XEN_vcpu_info_pending(%eax)
+ cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax)
/*
* If there's something pending, mask events again so we can
@@ -118,7 +118,7 @@ xen_iret_start_crit:
* touch XEN_vcpu_info_mask.
*/
jne 1f
- movb $1, XEN_vcpu_info_mask(%eax)
+ movb $1, %ss:XEN_vcpu_info_mask(%eax)
1: popl %eax