diff options
Diffstat (limited to 'arch')
78 files changed, 571 insertions, 279 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 6801123932a5..a336548487e6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -71,6 +71,7 @@ config KPROBES config JUMP_LABEL bool "Optimize very unlikely/likely branches" depends on HAVE_ARCH_JUMP_LABEL + depends on CC_HAS_ASM_GOTO help This option enables a transparent branch optimization that makes certain almost-always-true or almost-always-false branch diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h index 07e27f212dc7..d2453e2d3f1f 100644 --- a/arch/arm/include/asm/cp15.h +++ b/arch/arm/include/asm/cp15.h @@ -68,6 +68,8 @@ #define BPIALL __ACCESS_CP15(c7, 0, c5, 6) #define ICIALLU __ACCESS_CP15(c7, 0, c5, 0) +#define CNTVCT __ACCESS_CP15_64(1, c14) + extern unsigned long cr_alignment; /* defined in entry-armv.S */ static inline unsigned long get_cr(void) diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c index 90bce3d9928e..303b3ab87f7e 100644 --- a/arch/arm/kernel/jump_label.c +++ b/arch/arm/kernel/jump_label.c @@ -4,8 +4,6 @@ #include <asm/patch.h> #include <asm/insn.h> -#ifdef HAVE_JUMP_LABEL - static void __arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type, bool is_static) @@ -35,5 +33,3 @@ void arch_jump_label_transform_static(struct jump_entry *entry, { __arch_jump_label_transform(entry, type, true); } - -#endif diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c index a9dd619c6c29..7bdbf5d5c47d 100644 --- a/arch/arm/vdso/vgettimeofday.c +++ b/arch/arm/vdso/vgettimeofday.c @@ -18,9 +18,9 @@ #include <linux/compiler.h> #include <linux/hrtimer.h> #include <linux/time.h> -#include <asm/arch_timer.h> #include <asm/barrier.h> #include <asm/bug.h> +#include <asm/cp15.h> #include <asm/page.h> #include <asm/unistd.h> #include <asm/vdso_datapage.h> @@ -123,7 +123,8 @@ static notrace u64 get_ns(struct vdso_data *vdata) u64 cycle_now; u64 nsec; - cycle_now = arch_counter_get_cntvct(); + isb(); + cycle_now = read_sysreg(CNTVCT); cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1b1a0e95c751..8790a29d0af4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -479,6 +479,24 @@ config ARM64_ERRATUM_1024718 If unsure, say Y. +config ARM64_ERRATUM_1463225 + bool "Cortex-A76: Software Step might prevent interrupt recognition" + default y + help + This option adds a workaround for Arm Cortex-A76 erratum 1463225. + + On the affected Cortex-A76 cores (r0p0 to r3p1), software stepping + of a system call instruction (SVC) can prevent recognition of + subsequent interrupts when software stepping is disabled in the + exception handler of the system call and either kernel debugging + is enabled or VHE is in use. + + Work around the erratum by triggering a dummy step exception + when handling a system call from a task that is being stepped + in a VHE configuration of the kernel. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index ae1f70450fb2..25ce9056cf64 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -51,7 +51,8 @@ #define ARM64_SSBD 30 #define ARM64_MISMATCHED_CACHE_TYPE 31 #define ARM64_HAS_STAGE2_FWB 32 +#define ARM64_WORKAROUND_1463225 33 -#define ARM64_NCAPS 33 +#define ARM64_NCAPS 34 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ea690b3562af..b4a48419769f 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,6 +86,7 @@ #define ARM_CPU_PART_CORTEX_A75 0xD0A #define ARM_CPU_PART_CORTEX_A35 0xD04 #define ARM_CPU_PART_CORTEX_A55 0xD05 +#define ARM_CPU_PART_CORTEX_A76 0xD0B #define APM_CPU_PART_POTENZA 0x000 @@ -110,6 +111,7 @@ #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) #define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35) #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55) +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 1bdeca8918a6..ea423db39364 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -444,6 +444,8 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) return __pmd_to_phys(pmd); } +static inline void pte_unmap(pte_t *pte) { } + /* Find an entry in the third-level page table. */ #define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) @@ -452,7 +454,6 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) -#define pte_unmap(pte) do { } while (0) #define pte_unmap_nested(pte) do { } while (0) #define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr)) diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h index 2b9a63771eda..f89263c8e11a 100644 --- a/arch/arm64/include/asm/vdso_datapage.h +++ b/arch/arm64/include/asm/vdso_datapage.h @@ -38,6 +38,7 @@ struct vdso_data { __u32 tz_minuteswest; /* Whacky timezone stuff */ __u32 tz_dsttime; __u32 use_syscall; + __u32 hrtimer_res; }; #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 323aeb5f2fe6..92fba851ce53 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -99,7 +99,7 @@ int main(void) DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW); - DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC); + DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res)); DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE); DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE); DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index dec10898d688..dc6c535cbd13 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -411,6 +411,22 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, } #endif /* CONFIG_ARM64_SSBD */ +#ifdef CONFIG_ARM64_ERRATUM_1463225 +DEFINE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); + +static bool +has_cortex_a76_erratum_1463225(const struct arm64_cpu_capabilities *entry, + int scope) +{ + u32 midr = read_cpuid_id(); + /* Cortex-A76 r0p0 - r3p1 */ + struct midr_range range = MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 1); + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range(midr, &range) && is_kernel_in_hyp_mode(); +} +#endif + #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) @@ -680,6 +696,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = has_ssbd_mitigation, }, #endif +#ifdef CONFIG_ARM64_ERRATUM_1463225 + { + .desc = "ARM erratum 1463225", + .capability = ARM64_WORKAROUND_1463225, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_cortex_a76_erratum_1463225, + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index ea001241bdd4..00f8b8612b69 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -85,6 +85,7 @@ static const char *__init cpu_read_enable_method(int cpu) pr_err("%pOF: missing enable-method property\n", dn); } + of_node_put(dn); } else { enable_method = acpi_get_enable_method(cpu); if (!enable_method) { diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index e0756416e567..b90754aebd12 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -20,8 +20,6 @@ #include <linux/jump_label.h> #include <asm/insn.h> -#ifdef HAVE_JUMP_LABEL - void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { @@ -49,5 +47,3 @@ void arch_jump_label_transform_static(struct jump_entry *entry, * NOP needs to be replaced by a branch. */ } - -#endif /* HAVE_JUMP_LABEL */ diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index b09b6f75f759..06941c1fe418 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -145,15 +145,15 @@ u64 __init kaslr_early_init(u64 dt_phys) if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { /* - * Randomize the module region over a 4 GB window covering the + * Randomize the module region over a 2 GB window covering the * kernel. This reduces the risk of modules leaking information * about the address of the kernel itself, but results in * branches between modules and the core kernel that are * resolved via PLTs. (Branches between modules will be * resolved normally.) */ - module_range = SZ_4G - (u64)(_end - _stext); - module_alloc_base = max((u64)_end + offset - SZ_4G, + module_range = SZ_2G - (u64)(_end - _stext); + module_alloc_base = max((u64)_end + offset - SZ_2G, (u64)MODULES_VADDR); } else { /* diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f0f27aeefb73..0b368ceccee4 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -56,7 +56,7 @@ void *module_alloc(unsigned long size) * can simply omit this fallback in that case. */ p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_base + SZ_4G, GFP_KERNEL, + module_alloc_base + SZ_2G, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 6f91e8116514..162a95ed0881 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -50,7 +50,7 @@ SYSCALL_DEFINE1(arm64_personality, unsigned int, personality) /* * Wrappers to pass the pt_regs argument. */ -#define sys_personality sys_arm64_personality +#define __arm64_sys_personality __arm64_sys_arm64_personality asmlinkage long sys_ni_syscall(const struct pt_regs *); #define __arm64_sys_ni_syscall sys_ni_syscall diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 5610ac01c1ec..871c739f060a 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -8,6 +8,7 @@ #include <linux/syscalls.h> #include <asm/daifflags.h> +#include <asm/debug-monitors.h> #include <asm/fpsimd.h> #include <asm/syscall.h> #include <asm/thread_info.h> @@ -60,6 +61,35 @@ static inline bool has_syscall_work(unsigned long flags) int syscall_trace_enter(struct pt_regs *regs); void syscall_trace_exit(struct pt_regs *regs); +#ifdef CONFIG_ARM64_ERRATUM_1463225 +DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); + +static void cortex_a76_erratum_1463225_svc_handler(void) +{ + u32 reg, val; + + if (!unlikely(test_thread_flag(TIF_SINGLESTEP))) + return; + + if (!unlikely(this_cpu_has_cap(ARM64_WORKAROUND_1463225))) + return; + + __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 1); + reg = read_sysreg(mdscr_el1); + val = reg | DBG_MDSCR_SS | DBG_MDSCR_KDE; + write_sysreg(val, mdscr_el1); + asm volatile("msr daifclr, #8"); + isb(); + + /* We will have taken a single-step exception by this point */ + + write_sysreg(reg, mdscr_el1); + __this_cpu_write(__in_cortex_a76_erratum_1463225_wa, 0); +} +#else +static void cortex_a76_erratum_1463225_svc_handler(void) { } +#endif /* CONFIG_ARM64_ERRATUM_1463225 */ + static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, const syscall_fn_t syscall_table[]) { @@ -68,6 +98,7 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, regs->orig_x0 = regs->regs[0]; regs->syscallno = scno; + cortex_a76_erratum_1463225_svc_handler(); local_daif_restore(DAIF_PROCCTX); user_exit(); diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 2d419006ad43..ec0bb588d755 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -232,6 +232,9 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; + /* Read without the seqlock held by clock_getres() */ + WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution); + if (!use_syscall) { /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last; diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index e8f60112818f..856fee6d3512 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -308,13 +308,14 @@ ENTRY(__kernel_clock_getres) ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne b.ne 1f - ldr x2, 5f + adr vdso_data, _vdso_data + ldr w2, [vdso_data, #CLOCK_REALTIME_RES] b 2f 1: cmp w0, #CLOCK_REALTIME_COARSE ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne b.ne 4f - ldr x2, 6f + ldr x2, 5f 2: cbz x1, 3f stp xzr, x2, [x1] @@ -328,8 +329,6 @@ ENTRY(__kernel_clock_getres) svc #0 ret 5: - .quad CLOCK_REALTIME_RES -6: .quad CLOCK_COARSE_RES .cfi_endproc ENDPROC(__kernel_clock_getres) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index c389f2bef938..d3a5bb16f0b2 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -664,6 +664,11 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; + if (!is_vmalloc_addr(cpu_addr)) { + unsigned long pfn = page_to_pfn(virt_to_page(cpu_addr)); + return __swiotlb_mmap_pfn(vma, pfn, size); + } + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { /* * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, @@ -687,6 +692,11 @@ static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; struct vm_struct *area = find_vm_area(cpu_addr); + if (!is_vmalloc_addr(cpu_addr)) { + struct page *page = virt_to_page(cpu_addr); + return __swiotlb_get_sgtable_page(sgt, page, size); + } + if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { /* * DMA_ATTR_FORCE_CONTIGUOUS allocations are always remapped, diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index a4c134677285..88cf0a0cb616 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -827,14 +827,47 @@ void __init hook_debug_fault_code(int nr, debug_fault_info[nr].name = name; } +#ifdef CONFIG_ARM64_ERRATUM_1463225 +DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); + +static int __exception +cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +{ + if (user_mode(regs)) + return 0; + + if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa)) + return 0; + + /* + * We've taken a dummy step exception from the kernel to ensure + * that interrupts are re-enabled on the syscall path. Return back + * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions + * masked so that we can safely restore the mdscr and get on with + * handling the syscall. + */ + regs->pstate |= PSR_D_BIT; + return 1; +} +#else +static int __exception +cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +{ + return 0; +} +#endif /* CONFIG_ARM64_ERRATUM_1463225 */ + asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint, - unsigned int esr, - struct pt_regs *regs) + unsigned int esr, + struct pt_regs *regs) { const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); unsigned long pc = instruction_pointer(regs); int rv; + if (cortex_a76_erratum_1463225_debug_handler(regs)) + return 0; + /* * Tell lockdep we disabled irqs in entry.S. Do nothing if they were * already disabled to preserve the last enabled/disabled addresses. diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c index 32e3168316cd..ab943927f97a 100644 --- a/arch/mips/kernel/jump_label.c +++ b/arch/mips/kernel/jump_label.c @@ -16,8 +16,6 @@ #include <asm/cacheflush.h> #include <asm/inst.h> -#ifdef HAVE_JUMP_LABEL - /* * Define parameters for the standard MIPS and the microMIPS jump * instruction encoding respectively: @@ -70,5 +68,3 @@ void arch_jump_label_transform(struct jump_entry *e, mutex_unlock(&text_mutex); } - -#endif /* HAVE_JUMP_LABEL */ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index f7ea8e21656b..e7f5ef6bed0f 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1099,6 +1099,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_MAX_VCPU_ID: + r = KVM_MAX_VCPU_ID; + break; case KVM_CAP_MIPS_FPU: /* We don't handle systems with inconsistent cpu_has_fpu */ r = !!raw_cpu_has_fpu; diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c index 9d9f6f334d3c..3da3e2b1b51b 100644 --- a/arch/powerpc/boot/addnote.c +++ b/arch/powerpc/boot/addnote.c @@ -223,7 +223,11 @@ main(int ac, char **av) PUT_16(E_PHNUM, np + 2); /* write back */ - lseek(fd, (long) 0, SEEK_SET); + i = lseek(fd, (long) 0, SEEK_SET); + if (i < 0) { + perror("lseek"); + exit(1); + } i = write(fd, buf, n); if (i < 0) { perror("write"); diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 1f4691ce4126..e398173ae67d 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -38,7 +38,7 @@ extern struct static_key hcall_tracepoint_key; void __trace_hcall_entry(unsigned long opcode, unsigned long *args); void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf); /* OPAL tracing */ -#ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_JUMP_LABEL extern struct static_key opal_tracepoint_key; #endif diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 4898e9491a1c..9168a247e24f 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -970,7 +970,9 @@ start_here_multiplatform: /* Restore parameters passed from prom_init/kexec */ mr r3,r31 - bl early_setup /* also sets r13 and SPRG_PACA */ + LOAD_REG_ADDR(r12, DOTSYM(early_setup)) + mtctr r12 + bctrl /* also sets r13 and SPRG_PACA */ LOAD_REG_ADDR(r3, start_here_common) ld r4,PACAKMSR(r13) diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index 6472472093d0..0080c5fbd225 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -11,7 +11,6 @@ #include <linux/jump_label.h> #include <asm/code-patching.h> -#ifdef HAVE_JUMP_LABEL void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { @@ -22,4 +21,3 @@ void arch_jump_label_transform(struct jump_entry *entry, else patch_instruction(addr, PPC_INST_NOP); } -#endif diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 3c6ab22a0c4e..af3c15a1d41e 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -77,7 +77,7 @@ static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */ static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */ -static DEFINE_PER_CPU(struct timer_list, wd_timer); +static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer); static DEFINE_PER_CPU(u64, wd_timer_tb); /* SMP checker bits */ @@ -293,21 +293,21 @@ out: nmi_exit(); } -static void wd_timer_reset(unsigned int cpu, struct timer_list *t) -{ - t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms); - if (wd_timer_period_ms > 1000) - t->expires = __round_jiffies_up(t->expires, cpu); - add_timer_on(t, cpu); -} - -static void wd_timer_fn(struct timer_list *t) +static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { int cpu = smp_processor_id(); + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return HRTIMER_NORESTART; + + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return HRTIMER_NORESTART; + watchdog_timer_interrupt(cpu); - wd_timer_reset(cpu, t); + hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms)); + + return HRTIMER_RESTART; } void arch_touch_nmi_watchdog(void) @@ -323,37 +323,22 @@ void arch_touch_nmi_watchdog(void) } EXPORT_SYMBOL(arch_touch_nmi_watchdog); -static void start_watchdog_timer_on(unsigned int cpu) -{ - struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); - - per_cpu(wd_timer_tb, cpu) = get_tb(); - - timer_setup(t, wd_timer_fn, TIMER_PINNED); - wd_timer_reset(cpu, t); -} - -static void stop_watchdog_timer_on(unsigned int cpu) -{ - struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); - - del_timer_sync(t); -} - -static int start_wd_on_cpu(unsigned int cpu) +static void start_watchdog(void *arg) { + struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer); + int cpu = smp_processor_id(); unsigned long flags; if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { WARN_ON(1); - return 0; + return; } if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) - return 0; + return; if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) - return 0; + return; wd_smp_lock(&flags); cpumask_set_cpu(cpu, &wd_cpus_enabled); @@ -363,27 +348,40 @@ static int start_wd_on_cpu(unsigned int cpu) } wd_smp_unlock(&flags); - start_watchdog_timer_on(cpu); + *this_cpu_ptr(&wd_timer_tb) = get_tb(); - return 0; + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = watchdog_timer_fn; + hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms), + HRTIMER_MODE_REL_PINNED); } -static int stop_wd_on_cpu(unsigned int cpu) +static int start_watchdog_on_cpu(unsigned int cpu) { + return smp_call_function_single(cpu, start_watchdog, NULL, true); +} + +static void stop_watchdog(void *arg) +{ + struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer); + int cpu = smp_processor_id(); unsigned long flags; if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) - return 0; /* Can happen in CPU unplug case */ + return; /* Can happen in CPU unplug case */ - stop_watchdog_timer_on(cpu); + hrtimer_cancel(hrtimer); wd_smp_lock(&flags); cpumask_clear_cpu(cpu, &wd_cpus_enabled); wd_smp_unlock(&flags); wd_smp_clear_cpu_pending(cpu, get_tb()); +} - return 0; +static int stop_watchdog_on_cpu(unsigned int cpu) +{ + return smp_call_function_single(cpu, stop_watchdog, NULL, true); } static void watchdog_calc_timeouts(void) @@ -402,7 +400,7 @@ void watchdog_nmi_stop(void) int cpu; for_each_cpu(cpu, &wd_cpus_enabled) - stop_wd_on_cpu(cpu); + stop_watchdog_on_cpu(cpu); } void watchdog_nmi_start(void) @@ -411,7 +409,7 @@ void watchdog_nmi_start(void) watchdog_calc_timeouts(); for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) - start_wd_on_cpu(cpu); + start_watchdog_on_cpu(cpu); } /* @@ -423,7 +421,8 @@ int __init watchdog_nmi_probe(void) err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", - start_wd_on_cpu, stop_wd_on_cpu); + start_watchdog_on_cpu, + stop_watchdog_on_cpu); if (err < 0) { pr_warn("could not be initialized"); return err; diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index 30c2eb766954..aae34f218ab4 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1723,7 +1723,6 @@ static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd) { xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); xive_native_configure_irq(hw_num, 0, MASKED, 0); - xive_cleanup_irq_data(xd); } static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) @@ -1737,9 +1736,10 @@ static void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) continue; kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data); + xive_cleanup_irq_data(&state->ipi_data); xive_native_free_irq(state->ipi_number); - /* Pass-through, cleanup too */ + /* Pass-through, cleanup too but keep IRQ hw data */ if (state->pt_number) kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 79b79408d92e..578174a33d22 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -632,6 +632,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_MAX_VCPU_ID: + r = KVM_MAX_VCPU_ID; + break; #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_PPC_GET_SMMU_INFO: r = 1; diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 10fb43efef50..f473c05e9649 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1495,6 +1495,9 @@ int start_topology_update(void) { int rc = 0; + if (!topology_updates_enabled) + return 0; + if (firmware_has_feature(FW_FEATURE_PRRN)) { if (!prrn_enabled) { prrn_enabled = 1; @@ -1524,6 +1527,9 @@ int stop_topology_update(void) { int rc = 0; + if (!topology_updates_enabled) + return 0; + if (prrn_enabled) { prrn_enabled = 0; #ifdef CONFIG_SMP @@ -1579,11 +1585,13 @@ static ssize_t topology_write(struct file *file, const char __user *buf, kbuf[read_len] = '\0'; - if (!strncmp(kbuf, "on", 2)) + if (!strncmp(kbuf, "on", 2)) { + topology_updates_enabled = true; start_topology_update(); - else if (!strncmp(kbuf, "off", 3)) + } else if (!strncmp(kbuf, "off", 3)) { stop_topology_update(); - else + topology_updates_enabled = false; + } else return -EINVAL; return count; @@ -1598,9 +1606,7 @@ static const struct file_operations topology_ops = { static int topology_update_init(void) { - /* Do not poll for changes if disabled at boot */ - if (topology_updates_enabled) - start_topology_update(); + start_topology_update(); if (vphn_enabled) topology_schedule_update(); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 81f8a0c838ae..4004dbdab9c7 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1827,6 +1827,7 @@ static int power_pmu_event_init(struct perf_event *event) int n; int err; struct cpu_hw_events *cpuhw; + u64 bhrb_filter; if (!ppmu) return -ENOENT; @@ -1932,13 +1933,14 @@ static int power_pmu_event_init(struct perf_event *event) err = power_check_constraints(cpuhw, events, cflags, n + 1); if (has_branch_stack(event)) { - cpuhw->bhrb_filter = ppmu->bhrb_filter_map( + bhrb_filter = ppmu->bhrb_filter_map( event->attr.branch_sample_type); - if (cpuhw->bhrb_filter == -1) { + if (bhrb_filter == -1) { put_cpu_var(cpu_hw_events); return -EOPNOTSUPP; } + cpuhw->bhrb_filter = bhrb_filter; } put_cpu_var(cpu_hw_events); diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 1fafc32b12a0..555322677074 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -496,6 +496,11 @@ static int nest_imc_event_init(struct perf_event *event) * Get the base memory addresss for this cpu. */ chip_id = cpu_to_chip_id(event->cpu); + + /* Return, if chip_id is not valid */ + if (chip_id < 0) + return -ENODEV; + pcni = pmu->mem_info; do { if (pcni->id == chip_id) { @@ -503,7 +508,7 @@ static int nest_imc_event_init(struct perf_event *event) break; } pcni++; - } while (pcni); + } while (pcni->vbase != 0); if (!flag) return -ENODEV; diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index d12a2db26353..d10feef93b6b 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -29,6 +29,7 @@ enum { #define POWER8_MMCRA_IFM1 0x0000000040000000UL #define POWER8_MMCRA_IFM2 0x0000000080000000UL #define POWER8_MMCRA_IFM3 0x00000000C0000000UL +#define POWER8_MMCRA_BHRB_MASK 0x00000000C0000000UL /* * Raw event encoding for PowerISA v2.07 (Power8): @@ -243,6 +244,8 @@ static u64 power8_bhrb_filter_map(u64 branch_sample_type) static void power8_config_bhrb(u64 pmu_bhrb_filter) { + pmu_bhrb_filter &= POWER8_MMCRA_BHRB_MASK; + /* Enable BHRB filter in PMU */ mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter)); } diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index e012b1030a5b..c07b1615ee39 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -100,6 +100,7 @@ enum { #define POWER9_MMCRA_IFM1 0x0000000040000000UL #define POWER9_MMCRA_IFM2 0x0000000080000000UL #define POWER9_MMCRA_IFM3 0x00000000C0000000UL +#define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL /* Nasty Power9 specific hack */ #define PVR_POWER9_CUMULUS 0x00002000 @@ -308,6 +309,8 @@ static u64 power9_bhrb_filter_map(u64 branch_sample_type) static void power9_config_bhrb(u64 pmu_bhrb_filter) { + pmu_bhrb_filter &= POWER9_MMCRA_BHRB_MASK; + /* Enable BHRB filter in PMU */ mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter)); } diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 58a07948c76e..3d27f02695e4 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -127,7 +127,7 @@ static int imc_get_mem_addr_nest(struct device_node *node, nr_chips)) goto error; - pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info), + pmu_ptr->mem_info = kcalloc(nr_chips + 1, sizeof(*pmu_ptr->mem_info), GFP_KERNEL); if (!pmu_ptr->mem_info) goto error; diff --git a/arch/powerpc/platforms/powernv/opal-tracepoints.c b/arch/powerpc/platforms/powernv/opal-tracepoints.c index 1ab7d26c0a2c..f16a43540e30 100644 --- a/arch/powerpc/platforms/powernv/opal-tracepoints.c +++ b/arch/powerpc/platforms/powernv/opal-tracepoints.c @@ -4,7 +4,7 @@ #include <asm/trace.h> #include <asm/asm-prototypes.h> -#ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_JUMP_LABEL struct static_key opal_tracepoint_key = STATIC_KEY_INIT; int opal_tracepoint_regfunc(void) diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 251528231a9e..f4875fe3f8ff 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -20,7 +20,7 @@ .section ".text" #ifdef CONFIG_TRACEPOINTS -#ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_JUMP_LABEL #define OPAL_BRANCH(LABEL) \ ARCH_STATIC_BRANCH(LABEL, opal_tracepoint_key) #else diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index d91412c591ef..50dc9426d0be 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -19,7 +19,7 @@ #ifdef CONFIG_TRACEPOINTS -#ifndef HAVE_JUMP_LABEL +#ifndef CONFIG_JUMP_LABEL .section ".toc","aw" .globl hcall_tracepoint_refcount @@ -79,7 +79,7 @@ hcall_tracepoint_refcount: mr r5,BUFREG; \ __HCALL_INST_POSTCALL -#ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_JUMP_LABEL #define HCALL_BRANCH(LABEL) \ ARCH_STATIC_BRANCH(LABEL, hcall_tracepoint_key) #else diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index d3992ced0782..9e52b686a8fa 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -828,7 +828,7 @@ EXPORT_SYMBOL(arch_free_page); #endif /* CONFIG_PPC_BOOK3S_64 */ #ifdef CONFIG_TRACEPOINTS -#ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_JUMP_LABEL struct static_key hcall_tracepoint_key = STATIC_KEY_INIT; int hcall_tracepoint_regfunc(void) diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index c54cb26eb7f5..8ff7cb3da1cb 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -27,14 +27,14 @@ #include <linux/module.h> #include <linux/cpufeature.h> #include <linux/init.h> -#include <linux/spinlock.h> +#include <linux/mutex.h> #include <linux/fips.h> #include <linux/string.h> #include <crypto/xts.h> #include <asm/cpacf.h> static u8 *ctrblk; -static DEFINE_SPINLOCK(ctrblk_lock); +static DEFINE_MUTEX(ctrblk_lock); static cpacf_mask_t km_functions, kmc_functions, kmctr_functions, kma_functions; @@ -698,7 +698,7 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, unsigned int n, nbytes; int ret, locked; - locked = spin_trylock(&ctrblk_lock); + locked = mutex_trylock(&ctrblk_lock); ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE); while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) { @@ -716,7 +716,7 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier, ret = blkcipher_walk_done(desc, walk, nbytes - n); } if (locked) - spin_unlock(&ctrblk_lock); + mutex_unlock(&ctrblk_lock); /* * final block may be < AES_BLOCK_SIZE, copy only nbytes */ @@ -826,19 +826,45 @@ static int gcm_aes_setauthsize(struct crypto_aead *tfm, unsigned int authsize) return 0; } -static void gcm_sg_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg, - unsigned int len) +static void gcm_walk_start(struct gcm_sg_walk *gw, struct scatterlist *sg, + unsigned int len) { memset(gw, 0, sizeof(*gw)); gw->walk_bytes_remain = len; scatterwalk_start(&gw->walk, sg); } -static int gcm_sg_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) +static inline unsigned int _gcm_sg_clamp_and_map(struct gcm_sg_walk *gw) +{ + struct scatterlist *nextsg; + + gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain); + while (!gw->walk_bytes) { + nextsg = sg_next(gw->walk.sg); + if (!nextsg) + return 0; + scatterwalk_start(&gw->walk, nextsg); + gw->walk_bytes = scatterwalk_clamp(&gw->walk, + gw->walk_bytes_remain); + } + gw->walk_ptr = scatterwalk_map(&gw->walk); + return gw->walk_bytes; +} + +static inline void _gcm_sg_unmap_and_advance(struct gcm_sg_walk *gw, + unsigned int nbytes) +{ + gw->walk_bytes_remain -= nbytes; + scatterwalk_unmap(&gw->walk); + scatterwalk_advance(&gw->walk, nbytes); + scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); + gw->walk_ptr = NULL; +} + +static int gcm_in_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) { int n; - /* minbytesneeded <= AES_BLOCK_SIZE */ if (gw->buf_bytes && gw->buf_bytes >= minbytesneeded) { gw->ptr = gw->buf; gw->nbytes = gw->buf_bytes; @@ -851,13 +877,11 @@ static int gcm_sg_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) goto out; } - gw->walk_bytes = scatterwalk_clamp(&gw->walk, gw->walk_bytes_remain); - if (!gw->walk_bytes) { - scatterwalk_start(&gw->walk, sg_next(gw->walk.sg)); - gw->walk_bytes = scatterwalk_clamp(&gw->walk, - gw->walk_bytes_remain); + if (!_gcm_sg_clamp_and_map(gw)) { + gw->ptr = NULL; + gw->nbytes = 0; + goto out; } - gw->walk_ptr = scatterwalk_map(&gw->walk); if (!gw->buf_bytes && gw->walk_bytes >= minbytesneeded) { gw->ptr = gw->walk_ptr; @@ -869,51 +893,90 @@ static int gcm_sg_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) n = min(gw->walk_bytes, AES_BLOCK_SIZE - gw->buf_bytes); memcpy(gw->buf + gw->buf_bytes, gw->walk_ptr, n); gw->buf_bytes += n; - gw->walk_bytes_remain -= n; - scatterwalk_unmap(&gw->walk); - scatterwalk_advance(&gw->walk, n); - scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); - + _gcm_sg_unmap_and_advance(gw, n); if (gw->buf_bytes >= minbytesneeded) { gw->ptr = gw->buf; gw->nbytes = gw->buf_bytes; goto out; } - - gw->walk_bytes = scatterwalk_clamp(&gw->walk, - gw->walk_bytes_remain); - if (!gw->walk_bytes) { - scatterwalk_start(&gw->walk, sg_next(gw->walk.sg)); - gw->walk_bytes = scatterwalk_clamp(&gw->walk, - gw->walk_bytes_remain); + if (!_gcm_sg_clamp_and_map(gw)) { + gw->ptr = NULL; + gw->nbytes = 0; + goto out; } - gw->walk_ptr = scatterwalk_map(&gw->walk); } out: return gw->nbytes; } -static void gcm_sg_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) +static int gcm_out_walk_go(struct gcm_sg_walk *gw, unsigned int minbytesneeded) { - int n; + if (gw->walk_bytes_remain == 0) { + gw->ptr = NULL; + gw->nbytes = 0; + goto out; + } + if (!_gcm_sg_clamp_and_map(gw)) { + gw->ptr = NULL; + gw->nbytes = 0; + goto out; + } + + if (gw->walk_bytes >= minbytesneeded) { + gw->ptr = gw->walk_ptr; + gw->nbytes = gw->walk_bytes; + goto out; + } + + scatterwalk_unmap(&gw->walk); + gw->walk_ptr = NULL; + + gw->ptr = gw->buf; + gw->nbytes = sizeof(gw->buf); + +out: + return gw->nbytes; +} + +static int gcm_in_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) +{ if (gw->ptr == NULL) - return; + return 0; if (gw->ptr == gw->buf) { - n = gw->buf_bytes - bytesdone; + int n = gw->buf_bytes - bytesdone; if (n > 0) { memmove(gw->buf, gw->buf + bytesdone, n); - gw->buf_bytes -= n; + gw->buf_bytes = n; } else gw->buf_bytes = 0; - } else { - gw->walk_bytes_remain -= bytesdone; - scatterwalk_unmap(&gw->walk); - scatterwalk_advance(&gw->walk, bytesdone); - scatterwalk_done(&gw->walk, 0, gw->walk_bytes_remain); - } + } else + _gcm_sg_unmap_and_advance(gw, bytesdone); + + return bytesdone; +} + +static int gcm_out_walk_done(struct gcm_sg_walk *gw, unsigned int bytesdone) +{ + int i, n; + + if (gw->ptr == NULL) + return 0; + + if (gw->ptr == gw->buf) { + for (i = 0; i < bytesdone; i += n) { + if (!_gcm_sg_clamp_and_map(gw)) + return i; + n = min(gw->walk_bytes, bytesdone - i); + memcpy(gw->walk_ptr, gw->buf + i, n); + _gcm_sg_unmap_and_advance(gw, n); + } + } else + _gcm_sg_unmap_and_advance(gw, bytesdone); + + return bytesdone; } static int gcm_aes_crypt(struct aead_request *req, unsigned int flags) @@ -926,7 +989,7 @@ static int gcm_aes_crypt(struct aead_request *req, unsigned int flags) unsigned int pclen = req->cryptlen; int ret = 0; - unsigned int len, in_bytes, out_bytes, + unsigned int n, len, in_bytes, out_bytes, min_bytes, bytes, aad_bytes, pc_bytes; struct gcm_sg_walk gw_in, gw_out; u8 tag[GHASH_DIGEST_SIZE]; @@ -963,14 +1026,14 @@ static int gcm_aes_crypt(struct aead_request *req, unsigned int flags) *(u32 *)(param.j0 + ivsize) = 1; memcpy(param.k, ctx->key, ctx->key_len); - gcm_sg_walk_start(&gw_in, req->src, len); - gcm_sg_walk_start(&gw_out, req->dst, len); + gcm_walk_start(&gw_in, req->src, len); + gcm_walk_start(&gw_out, req->dst, len); do { min_bytes = min_t(unsigned int, aadlen > 0 ? aadlen : pclen, AES_BLOCK_SIZE); - in_bytes = gcm_sg_walk_go(&gw_in, min_bytes); - out_bytes = gcm_sg_walk_go(&gw_out, min_bytes); + in_bytes = gcm_in_walk_go(&gw_in, min_bytes); + out_bytes = gcm_out_walk_go(&gw_out, min_bytes); bytes = min(in_bytes, out_bytes); if (aadlen + pclen <= bytes) { @@ -997,8 +1060,11 @@ static int gcm_aes_crypt(struct aead_request *req, unsigned int flags) gw_in.ptr + aad_bytes, pc_bytes, gw_in.ptr, aad_bytes); - gcm_sg_walk_done(&gw_in, aad_bytes + pc_bytes); - gcm_sg_walk_done(&gw_out, aad_bytes + pc_bytes); + n = aad_bytes + pc_bytes; + if (gcm_in_walk_done(&gw_in, n) != n) + return -ENOMEM; + if (gcm_out_walk_done(&gw_out, n) != n) + return -ENOMEM; aadlen -= aad_bytes; pclen -= pc_bytes; } while (aadlen + pclen > 0); diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 5346b5a80bb6..65bda1178963 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -14,6 +14,7 @@ #include <linux/cpufeature.h> #include <linux/crypto.h> #include <linux/fips.h> +#include <linux/mutex.h> #include <crypto/algapi.h> #include <crypto/des.h> #include <asm/cpacf.h> @@ -21,7 +22,7 @@ #define DES3_KEY_SIZE (3 * DES_KEY_SIZE) static u8 *ctrblk; -static DEFINE_SPINLOCK(ctrblk_lock); +static DEFINE_MUTEX(ctrblk_lock); static cpacf_mask_t km_functions, kmc_functions, kmctr_functions; @@ -387,7 +388,7 @@ static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc, unsigned int n, nbytes; int ret, locked; - locked = spin_trylock(&ctrblk_lock); + locked = mutex_trylock(&ctrblk_lock); ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE); while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) { @@ -404,7 +405,7 @@ static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc, ret = blkcipher_walk_done(desc, walk, nbytes - n); } if (locked) - spin_unlock(&ctrblk_lock); + mutex_unlock(&ctrblk_lock); /* final block may be < DES_BLOCK_SIZE, copy only nbytes */ if (nbytes) { cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr, diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index dbfd1730e631..b205c0ff0b22 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -44,7 +44,7 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o -obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o +obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o obj-y += nospec-branch.o @@ -68,6 +68,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index 43f8430fb67d..68f415e334a5 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -10,8 +10,6 @@ #include <linux/jump_label.h> #include <asm/ipl.h> -#ifdef HAVE_JUMP_LABEL - struct insn { u16 opcode; s32 offset; @@ -102,5 +100,3 @@ void arch_jump_label_transform_static(struct jump_entry *entry, { __jump_label_transform(entry, type, 1); } - -#endif diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c index 5a286b012043..602e7cc26d11 100644 --- a/arch/s390/kernel/kexec_elf.c +++ b/arch/s390/kernel/kexec_elf.c @@ -19,10 +19,15 @@ static int kexec_file_add_elf_kernel(struct kimage *image, struct kexec_buf buf; const Elf_Ehdr *ehdr; const Elf_Phdr *phdr; + Elf_Addr entry; int i, ret; ehdr = (Elf_Ehdr *)kernel; buf.image = image; + if (image->type == KEXEC_TYPE_CRASH) + entry = STARTUP_KDUMP_OFFSET; + else + entry = ehdr->e_entry; phdr = (void *)ehdr + ehdr->e_phoff; for (i = 0; i < ehdr->e_phnum; i++, phdr++) { @@ -35,7 +40,7 @@ static int kexec_file_add_elf_kernel(struct kimage *image, buf.mem = ALIGN(phdr->p_paddr, phdr->p_align); buf.memsz = phdr->p_memsz; - if (phdr->p_paddr == 0) { + if (entry - phdr->p_paddr < phdr->p_memsz) { data->kernel_buf = buf.buffer; data->memsz += STARTUP_NORMAL_OFFSET; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ac5da6b0b862..f538e3fac7ad 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -489,6 +489,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: + case KVM_CAP_MAX_VCPU_ID: r = KVM_S390_BSCA_CPU_SLOTS; if (!kvm_s390_use_sca_entries()) r = KVM_MAX_VCPUS; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index f2cc7da473e4..ae894ac83fd6 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -410,6 +410,7 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, return old; } +#ifdef CONFIG_PGSTE static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; @@ -427,6 +428,7 @@ static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) pmd = pmd_alloc(mm, pud, addr); return pmd; } +#endif pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t new) diff --git a/arch/sh/include/cpu-sh4/cpu/sh7786.h b/arch/sh/include/cpu-sh4/cpu/sh7786.h index 96b8cb1f754a..029bbadaf7ab 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7786.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7786.h @@ -135,7 +135,7 @@ enum { static inline u32 sh7786_mm_sel(void) { - return __raw_readl(0xFC400020) & 0x7; + return __raw_readl((const volatile void __iomem *)0xFC400020) & 0x7; } #endif /* __CPU_SH7786_H__ */ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index cf8640841b7a..97c0e19263d1 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -118,4 +118,4 @@ pc--$(CONFIG_PERF_EVENTS) := perf_event.o obj-$(CONFIG_SPARC64) += $(pc--y) obj-$(CONFIG_UPROBES) += uprobes.o -obj-$(CONFIG_SPARC64) += jump_label.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c index 7f8eac51df33..a4cfaeecaf5e 100644 --- a/arch/sparc/kernel/jump_label.c +++ b/arch/sparc/kernel/jump_label.c @@ -9,8 +9,6 @@ #include <asm/cacheflush.h> -#ifdef HAVE_JUMP_LABEL - void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { @@ -47,5 +45,3 @@ void arch_jump_label_transform(struct jump_entry *entry, flushi(insn); mutex_unlock(&text_mutex); } - -#endif diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index d245f89d1395..d220b6848746 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -587,7 +587,7 @@ xcall_flush_tlb_kernel_range: /* 44 insns */ sub %g7, %g1, %g3 srlx %g3, 18, %g2 brnz,pn %g2, 2f - add %g2, 1, %g2 + sethi %hi(PAGE_SIZE), %g2 sub %g3, %g2, %g3 or %g1, 0x20, %g1 ! Nucleus 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP @@ -751,7 +751,7 @@ __cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */ sub %g7, %g1, %g3 srlx %g3, 18, %g2 brnz,pn %g2, 2f - add %g2, 1, %g2 + sethi %hi(PAGE_SIZE), %g2 sub %g3, %g2, %g3 or %g1, 0x20, %g1 ! Nucleus 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP diff --git a/arch/x86/Makefile b/arch/x86/Makefile index ffc823a8312f..ce0d0424a53d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -47,7 +47,7 @@ export REALMODE_CFLAGS export BITS ifdef CONFIG_X86_NEED_RELOCS - LDFLAGS_vmlinux := --emit-relocs + LDFLAGS_vmlinux := --emit-relocs --discard-none endif # @@ -305,7 +305,7 @@ vdso_install: archprepare: checkbin checkbin: -ifndef CC_HAVE_ASM_GOTO +ifndef CONFIG_CC_HAS_ASM_GOTO @echo Compiler lacks asm-goto support. @exit 1 endif diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 352e70cd33e8..e699b2041665 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -337,7 +337,7 @@ For 32-bit we have the following conventions - kernel is built with */ .macro CALL_enter_from_user_mode #ifdef CONFIG_CONTEXT_TRACKING -#ifdef HAVE_JUMP_LABEL +#ifdef CONFIG_JUMP_LABEL STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0 #endif call enter_from_user_mode diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 56194c571299..4a650eb3d94a 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -584,6 +584,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates), X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates), + + X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 91039ffed633..2413169ce362 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -780,6 +780,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init), + + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, skl_rapl_init), {}, }; diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 1b9f85abf9bc..ace6c1e752fb 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -89,6 +89,7 @@ static bool test_intel(int idx) case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_KABYLAKE_MOBILE: case INTEL_FAM6_KABYLAKE_DESKTOP: + case INTEL_FAM6_ICELAKE_MOBILE: if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF) return true; break; diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 86b1341cba9a..513ba49c204f 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -61,9 +61,8 @@ } while (0) #define RELOAD_SEG(seg) { \ - unsigned int pre = GET_SEG(seg); \ + unsigned int pre = (seg) | 3; \ unsigned int cur = get_user_seg(seg); \ - pre |= 3; \ if (pre != cur) \ set_user_seg(seg, pre); \ } @@ -72,6 +71,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, struct sigcontext_32 __user *sc) { unsigned int tmpflags, err = 0; + u16 gs, fs, es, ds; void __user *buf; u32 tmp; @@ -79,16 +79,10 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, current->restart_block.fn = do_no_restart_syscall; get_user_try { - /* - * Reload fs and gs if they have changed in the signal - * handler. This does not handle long fs/gs base changes in - * the handler, but does not clobber them at least in the - * normal case. - */ - RELOAD_SEG(gs); - RELOAD_SEG(fs); - RELOAD_SEG(ds); - RELOAD_SEG(es); + gs = GET_SEG(gs); + fs = GET_SEG(fs); + ds = GET_SEG(ds); + es = GET_SEG(es); COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(dx); COPY(cx); COPY(ip); COPY(ax); @@ -106,6 +100,17 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, buf = compat_ptr(tmp); } get_user_catch(err); + /* + * Reload fs and gs if they have changed in the signal + * handler. This does not handle long fs/gs base changes in + * the handler, but does not clobber them at least in the + * normal case. + */ + RELOAD_SEG(gs); + RELOAD_SEG(fs); + RELOAD_SEG(ds); + RELOAD_SEG(es); + err |= fpu__restore_sig(buf, 1); force_iret(); diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index aced6c9290d6..ce95b8cbd229 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -140,7 +140,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) -#if defined(__clang__) && !defined(CC_HAVE_ASM_GOTO) +#if defined(__clang__) && !defined(CONFIG_CC_HAS_ASM_GOTO) /* * Workaround for the sake of BPF compilation which utilizes kernel diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 8c0de4282659..7010e1c594c4 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -2,19 +2,6 @@ #ifndef _ASM_X86_JUMP_LABEL_H #define _ASM_X86_JUMP_LABEL_H -#ifndef HAVE_JUMP_LABEL -/* - * For better or for worse, if jump labels (the gcc extension) are missing, - * then the entire static branch patching infrastructure is compiled out. - * If that happens, the code in here will malfunction. Raise a compiler - * error instead. - * - * In theory, jump labels and the static branch patching infrastructure - * could be decoupled to fix this. - */ -#error asm/jump_label.h included on a non-jump-label kernel -#endif - #define JUMP_LABEL_NOP_SIZE 5 #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 4914a3e7c803..033dc7ca49e9 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -4,7 +4,7 @@ #define __CLOBBERS_MEM(clb...) "memory", ## clb -#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) +#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CONFIG_CC_HAS_ASM_GOTO) /* Use asm goto */ @@ -21,7 +21,7 @@ cc_label: \ #define __BINARY_RMWcc_ARG " %1, " -#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ +#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */ /* Use flags output or a set instruction */ @@ -36,7 +36,7 @@ do { \ #define __BINARY_RMWcc_ARG " %2, " -#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ +#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CONFIG_CC_HAS_ASM_GOTO) */ #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ __GEN_RMWcc(op " " arg0, var, cc, __CLOBBERS_MEM()) diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 05861cc08787..0bbb07eaed6b 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -39,6 +39,7 @@ extern int poke_int3_handler(struct pt_regs *regs); extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); extern int after_bootmem; +#ifndef CONFIG_UML_X86 static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) { regs->ip = ip; @@ -65,6 +66,7 @@ static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); int3_emulate_jmp(regs, func); } -#endif +#endif /* CONFIG_X86_64 */ +#endif /* !CONFIG_UML_X86 */ #endif /* _ASM_X86_TEXT_PATCHING_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 8824d01c0c35..da0b6bc090f3 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -49,7 +49,8 @@ obj-$(CONFIG_COMPAT) += signal_compat.o obj-y += traps.o idt.o irq.o irq_$(BITS).o dumpstack_$(BITS).o obj-y += time.o ioport.o dumpstack.o nmi.o obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o -obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o +obj-y += setup.o x86_init.o i8259.o irqinit.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-y += probe_roms.o obj-$(CONFIG_X86_64) += sys_x86_64.o diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index b9d5e7c9ef43..918a23704c0c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -662,15 +662,29 @@ void __init alternative_instructions(void) * handlers seeing an inconsistent instruction while you patch. */ void *__init_or_module text_poke_early(void *addr, const void *opcode, - size_t len) + size_t len) { unsigned long flags; - local_irq_save(flags); - memcpy(addr, opcode, len); - local_irq_restore(flags); - sync_core(); - /* Could also do a CLFLUSH here to speed up CPU recovery; but - that causes hangs on some VIA CPUs. */ + + if (boot_cpu_has(X86_FEATURE_NX) && + is_module_text_address((unsigned long)addr)) { + /* + * Modules text is marked initially as non-executable, so the + * code cannot be running and speculative code-fetches are + * prevented. Just change the code. + */ + memcpy(addr, opcode, len); + } else { + local_irq_save(flags); + memcpy(addr, opcode, len); + local_irq_restore(flags); + sync_core(); + + /* + * Could also do a CLFLUSH here to speed up CPU recovery; but + * that causes hangs on some VIA CPUs. + */ + } return addr; } diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index c805a06e14c3..ff1c00b695ae 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -46,8 +46,6 @@ static struct mce i_mce; static struct dentry *dfs_inj; -static u8 n_banks; - #define MAX_FLAG_OPT_SIZE 4 #define NBCFG 0x44 @@ -567,9 +565,15 @@ err: static int inj_bank_set(void *data, u64 val) { struct mce *m = (struct mce *)data; + u8 n_banks; + u64 cap; + + /* Get bank count on target CPU so we can handle non-uniform values. */ + rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap); + n_banks = cap & MCG_BANKCNT_MASK; if (val >= n_banks) { - pr_err("Non-existent MCE bank: %llu\n", val); + pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu); return -EINVAL; } @@ -659,10 +663,6 @@ static struct dfs_node { static int __init debugfs_init(void) { unsigned int i; - u64 cap; - - rdmsrl(MSR_IA32_MCG_CAP, cap); - n_banks = cap & MCG_BANKCNT_MASK; dfs_inj = debugfs_create_dir("mce-inject", NULL); if (!dfs_inj) diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index f9e7096b1804..fee118b3b69f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -711,19 +711,49 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) barrier(); m.status = mce_rdmsrl(msr_ops.status(i)); + + /* If this entry is not valid, ignore it */ if (!(m.status & MCI_STATUS_VAL)) continue; /* - * Uncorrected or signalled events are handled by the exception - * handler when it is enabled, so don't process those here. - * - * TBD do the same check for MCI_STATUS_EN here? + * If we are logging everything (at CPU online) or this + * is a corrected error, then we must log it. */ - if (!(flags & MCP_UC) && - (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC))) - continue; + if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC)) + goto log_it; + + /* + * Newer Intel systems that support software error + * recovery need to make additional checks. Other + * CPUs should skip over uncorrected errors, but log + * everything else. + */ + if (!mca_cfg.ser) { + if (m.status & MCI_STATUS_UC) + continue; + goto log_it; + } + + /* Log "not enabled" (speculative) errors */ + if (!(m.status & MCI_STATUS_EN)) + goto log_it; + + /* + * Log UCNA (SDM: 15.6.3 "UCR Error Classification") + * UC == 1 && PCC == 0 && S == 0 + */ + if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S)) + goto log_it; + + /* + * Skip anything else. Presumption is that our read of this + * bank is racing with a machine check. Leave the log alone + * for do_machine_check() to deal with it. + */ + continue; +log_it: error_seen = true; mce_read_aux(&m, i); @@ -1450,13 +1480,12 @@ EXPORT_SYMBOL_GPL(mce_notify_irq); static int __mcheck_cpu_mce_banks_init(void) { int i; - u8 num_banks = mca_cfg.banks; - mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL); + mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL); if (!mce_banks) return -ENOMEM; - for (i = 0; i < num_banks; i++) { + for (i = 0; i < MAX_NR_BANKS; i++) { struct mce_bank *b = &mce_banks[i]; b->ctl = -1ULL; @@ -1470,28 +1499,19 @@ static int __mcheck_cpu_mce_banks_init(void) */ static int __mcheck_cpu_cap_init(void) { - unsigned b; u64 cap; + u8 b; rdmsrl(MSR_IA32_MCG_CAP, cap); b = cap & MCG_BANKCNT_MASK; - if (!mca_cfg.banks) - pr_info("CPU supports %d MCE banks\n", b); - - if (b > MAX_NR_BANKS) { - pr_warn("Using only %u machine check banks out of %u\n", - MAX_NR_BANKS, b); + if (WARN_ON_ONCE(b > MAX_NR_BANKS)) b = MAX_NR_BANKS; - } - /* Don't support asymmetric configurations today */ - WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks); - mca_cfg.banks = b; + mca_cfg.banks = max(mca_cfg.banks, b); if (!mce_banks) { int err = __mcheck_cpu_mce_banks_init(); - if (err) return err; } @@ -2473,6 +2493,8 @@ EXPORT_SYMBOL_GPL(mcsafe_key); static int __init mcheck_late_init(void) { + pr_info("Using %d MCE banks\n", mca_cfg.banks); + if (mca_cfg.recovery) static_branch_inc(&mcsafe_key); diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b9bc8a1a584e..b43ddefd77f4 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -418,8 +418,9 @@ static int do_microcode_update(const void __user *buf, size_t size) if (ustate == UCODE_ERROR) { error = -1; break; - } else if (ustate == UCODE_OK) + } else if (ustate == UCODE_NEW) { apply_microcode_on_target(cpu); + } } return error; diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 4d2a401c178b..9f033dfd2766 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -752,18 +752,21 @@ union ftrace_op_code_union { } __attribute__((packed)); }; +#define RET_SIZE 1 + static unsigned long create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) { - unsigned const char *jmp; unsigned long start_offset; unsigned long end_offset; unsigned long op_offset; unsigned long offset; + unsigned long npages; unsigned long size; - unsigned long ip; + unsigned long retq; unsigned long *ptr; void *trampoline; + void *ip; /* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */ unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; union ftrace_op_code_union op_ptr; @@ -783,27 +786,28 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* * Allocate enough size to store the ftrace_caller code, - * the jmp to ftrace_epilogue, as well as the address of - * the ftrace_ops this trampoline is used for. + * the iret , as well as the address of the ftrace_ops this + * trampoline is used for. */ - trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *)); + trampoline = alloc_tramp(size + RET_SIZE + sizeof(void *)); if (!trampoline) return 0; - *tramp_size = size + MCOUNT_INSN_SIZE + sizeof(void *); + *tramp_size = size + RET_SIZE + sizeof(void *); + npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE); /* Copy ftrace_caller onto the trampoline memory */ ret = probe_kernel_read(trampoline, (void *)start_offset, size); - if (WARN_ON(ret < 0)) { - tramp_free(trampoline, *tramp_size); - return 0; - } + if (WARN_ON(ret < 0)) + goto fail; - ip = (unsigned long)trampoline + size; + ip = trampoline + size; - /* The trampoline ends with a jmp to ftrace_epilogue */ - jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_epilogue); - memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE); + /* The trampoline ends with ret(q) */ + retq = (unsigned long)ftrace_stub; + ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + if (WARN_ON(ret < 0)) + goto fail; /* * The address of the ftrace_ops that is used for this trampoline @@ -813,17 +817,15 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) * the global function_trace_op variable. */ - ptr = (unsigned long *)(trampoline + size + MCOUNT_INSN_SIZE); + ptr = (unsigned long *)(trampoline + size + RET_SIZE); *ptr = (unsigned long)ops; op_offset -= start_offset; memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE); /* Are we pointing to the reference? */ - if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) { - tramp_free(trampoline, *tramp_size); - return 0; - } + if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) + goto fail; /* Load the contents of ptr into the callback parameter */ offset = (unsigned long)ptr; @@ -837,7 +839,16 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* ALLOC_TRAMP flags lets us know we created it */ ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; + /* + * Module allocation needs to be completed by making the page + * executable. The page is still writable, which is a security hazard, + * but anyhow ftrace breaks W^X completely. + */ + set_memory_x((unsigned long)trampoline, npages); return (unsigned long)trampoline; +fail: + tramp_free(trampoline, *tramp_size); + return 0; } static unsigned long calc_trampoline_call_offset(bool save_regs) diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 91b2cff4b79a..75f2b36b41a6 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -171,9 +171,6 @@ GLOBAL(ftrace_call) restore_mcount_regs /* - * The copied trampoline must call ftrace_epilogue as it - * still may need to call the function graph tracer. - * * The code up to this label is copied into trampolines so * think twice before adding any new code or changing the * layout here. @@ -185,7 +182,10 @@ GLOBAL(ftrace_graph_call) jmp ftrace_stub #endif -/* This is weak to keep gas from relaxing the jumps */ +/* + * This is weak to keep gas from relaxing the jumps. + * It is also used to copy the retq for trampolines. + */ WEAK(ftrace_stub) retq ENDPROC(ftrace_caller) diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 0469cd078db1..b50ac9c7397b 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -26,9 +26,18 @@ int sysctl_panic_on_stackoverflow; /* * Probabilistic stack overflow check: * - * Only check the stack in process context, because everything else - * runs on the big interrupt stacks. Checking reliably is too expensive, - * so we just check from interrupts. + * Regular device interrupts can enter on the following stacks: + * + * - User stack + * + * - Kernel task stack + * + * - Interrupt stack if a device driver reenables interrupts + * which should only happen in really old drivers. + * + * - Debug IST stack + * + * All other contexts are invalid. */ static inline void stack_overflow_check(struct pt_regs *regs) { @@ -53,8 +62,8 @@ static inline void stack_overflow_check(struct pt_regs *regs) return; oist = this_cpu_ptr(&orig_ist); - estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN; - estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1]; + estack_bottom = (u64)oist->ist[DEBUG_STACK]; + estack_top = estack_bottom - DEBUG_STKSZ + STACK_TOP_MARGIN; if (regs->sp >= estack_top && regs->sp <= estack_bottom) return; diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index eeea935e9bb5..4c3d9a3d45b2 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -16,8 +16,6 @@ #include <asm/alternative.h> #include <asm/text-patching.h> -#ifdef HAVE_JUMP_LABEL - union jump_code_union { char code[JUMP_LABEL_NOP_SIZE]; struct { @@ -142,5 +140,3 @@ __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, if (jlstate == JL_STATE_UPDATE) __jump_label_transform(entry, type, text_poke_early, 1); } - -#endif diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 544bc2dfe408..e83a057564d1 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -431,8 +431,20 @@ void *alloc_insn_page(void) void *page; page = module_alloc(PAGE_SIZE); - if (page) - set_memory_ro((unsigned long)page & PAGE_MASK, 1); + if (!page) + return NULL; + + /* + * First make the page read-only, and only then make it executable to + * prevent it from being W+X in between. + */ + set_memory_ro((unsigned long)page, 1); + + /* + * TODO: Once additional kernel code protection mechanisms are set, ensure + * that the page was not maliciously altered and it is still zeroed. + */ + set_memory_x((unsigned long)page, 1); return page; } @@ -440,8 +452,12 @@ void *alloc_insn_page(void) /* Recover page to RW mode before releasing it */ void free_insn_page(void *page) { - set_memory_nx((unsigned long)page & PAGE_MASK, 1); - set_memory_rw((unsigned long)page & PAGE_MASK, 1); + /* + * First make the page non-executable, and only then make it writable to + * prevent it from being W+X in between. + */ + set_memory_nx((unsigned long)page, 1); + set_memory_rw((unsigned long)page, 1); module_memfree(page); } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index f58336af095c..6645f123419c 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -87,7 +87,7 @@ void *module_alloc(unsigned long size) p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR + get_module_load_offset(), MODULES_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); if (p && (kasan_module_alloc(p, size) < 0)) { vfree(p); diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 92a3b312a53c..44e647a65de8 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -132,16 +132,6 @@ static int restore_sigcontext(struct pt_regs *regs, COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); -#ifdef CONFIG_X86_64 - /* - * Fix up SS if needed for the benefit of old DOSEMU and - * CRIU. - */ - if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && - user_64bit_mode(regs))) - force_valid_ss(regs); -#endif - get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); regs->orig_ax = -1; /* disable syscall checks */ @@ -150,6 +140,15 @@ static int restore_sigcontext(struct pt_regs *regs, buf = (void __user *)buf_val; } get_user_catch(err); +#ifdef CONFIG_X86_64 + /* + * Fix up SS if needed for the benefit of old DOSEMU and + * CRIU. + */ + if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && user_64bit_mode(regs))) + force_valid_ss(regs); +#endif + err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32)); force_iret(); @@ -461,6 +460,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, { struct rt_sigframe __user *frame; void __user *fp = NULL; + unsigned long uc_flags; int err = 0; frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp); @@ -473,9 +473,11 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, return -EFAULT; } + uc_flags = frame_uc_flags(regs); + put_user_try { /* Create the ucontext. */ - put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); + put_user_ex(uc_flags, &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); save_altstack_ex(&frame->uc.uc_stack, regs->sp); @@ -541,6 +543,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, { #ifdef CONFIG_X86_X32_ABI struct rt_sigframe_x32 __user *frame; + unsigned long uc_flags; void __user *restorer; int err = 0; void __user *fpstate = NULL; @@ -555,9 +558,11 @@ static int x32_setup_rt_frame(struct ksignal *ksig, return -EFAULT; } + uc_flags = frame_uc_flags(regs); + put_user_try { /* Create the ucontext. */ - put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); + put_user_ex(uc_flags, &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp); put_user_ex(0, &frame->uc.uc__pad0); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 860bd271619d..4a688ef9e448 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -456,7 +456,7 @@ FOP_END; /* * XXX: inoutclob user must know where the argument is being expanded. - * Relying on CC_HAVE_ASM_GOTO would allow us to remove _fault. + * Relying on CONFIG_CC_HAS_ASM_GOTO would allow us to remove _fault. */ #define asm_safe(insn, inoutclob...) \ ({ \ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8dd9208ae4de..ea454d3f7763 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2022,7 +2022,11 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (!kvm_vcpu_apicv_active(vcpu)) return; - if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT)) + /* + * Since the host physical APIC id is 8 bits, + * we can support host APIC ID upto 255. + */ + if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; entry = READ_ONCE(*(svm->avic_physical_id_cache)); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d0eb37c069b8..7fed1d6dd1a1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1188,7 +1188,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info) u64 efer = msr_info->data; if (efer & efer_reserved_bits) - return false; + return 1; if (!msr_info->host_initiated) { if (!__kvm_valid_efer(vcpu, efer)) @@ -2987,6 +2987,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; + case KVM_CAP_MAX_VCPU_ID: + r = KVM_MAX_VCPU_ID; + break; case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; break; diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 3b24dc05251c..9d05572370ed 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -257,6 +257,7 @@ ENTRY(__memcpy_mcsafe) /* Copy successful. Return zero */ .L_done_memcpy_trap: xorl %eax, %eax +.L_done: ret ENDPROC(__memcpy_mcsafe) EXPORT_SYMBOL_GPL(__memcpy_mcsafe) @@ -273,7 +274,7 @@ EXPORT_SYMBOL_GPL(__memcpy_mcsafe) addl %edx, %ecx .E_trailing_bytes: mov %ecx, %eax - ret + jmp .L_done /* * For write fault handling, given the destination is unaligned, diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 47bebfe6efa7..9d9765e4d1ef 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -427,8 +427,6 @@ static noinline int vmalloc_fault(unsigned long address) if (!(address >= VMALLOC_START && address < VMALLOC_END)) return -1; - WARN_ON_ONCE(in_nmi()); - /* * Copy kernel mappings over when needed. This can also * happen within a race in page table update. In the later |