diff options
author | Russell King <rmk+kernel@arm.linux.org.uk> | 2014-08-05 10:27:13 +0100 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2014-08-05 10:27:13 +0100 |
commit | 7109561524dc57b95fd3f9b61547268b9b6db8ed (patch) | |
tree | df08f565b2c616b7e68fe35cd428a8c5ba3bfcb1 /arch | |
parent | 6bf755db4d5e7ccea61fb17727a183b9bd8945b1 (diff) | |
parent | 3bb70de692f70861f5c5729cd2b870d0104a7cc9 (diff) | |
download | linux-exynos-7109561524dc57b95fd3f9b61547268b9b6db8ed.tar.gz linux-exynos-7109561524dc57b95fd3f9b61547268b9b6db8ed.tar.bz2 linux-exynos-7109561524dc57b95fd3f9b61547268b9b6db8ed.zip |
Merge branches 'fixes' and 'misc' into for-next
Conflicts:
arch/arm/kernel/iwmmxt.S
arch/arm/mm/cache-l2x0.c
arch/arm/mm/mmu.c
Diffstat (limited to 'arch')
262 files changed, 3582 insertions, 2164 deletions
diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index c1d3d2da1191..b3c750979aa1 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -60,7 +60,7 @@ extern void read_decode_cache_bcr(void); #define ARC_REG_IC_IVIC 0x10 #define ARC_REG_IC_CTRL 0x11 #define ARC_REG_IC_IVIL 0x19 -#if defined(CONFIG_ARC_MMU_V3) || defined (CONFIG_ARC_MMU_V4) +#if defined(CONFIG_ARC_MMU_V3) #define ARC_REG_IC_PTAG 0x1E #endif @@ -74,7 +74,7 @@ extern void read_decode_cache_bcr(void); #define ARC_REG_DC_IVDL 0x4A #define ARC_REG_DC_FLSH 0x4B #define ARC_REG_DC_FLDL 0x4C -#if defined(CONFIG_ARC_MMU_V3) || defined (CONFIG_ARC_MMU_V4) +#if defined(CONFIG_ARC_MMU_V3) #define ARC_REG_DC_PTAG 0x5C #endif diff --git a/arch/arc/include/uapi/asm/ptrace.h b/arch/arc/include/uapi/asm/ptrace.h index 2618cc13ba75..76a7739aab1c 100644 --- a/arch/arc/include/uapi/asm/ptrace.h +++ b/arch/arc/include/uapi/asm/ptrace.h @@ -11,6 +11,7 @@ #ifndef _UAPI__ASM_ARC_PTRACE_H #define _UAPI__ASM_ARC_PTRACE_H +#define PTRACE_GET_THREAD_AREA 25 #ifndef __ASSEMBLY__ /* diff --git a/arch/arc/kernel/ctx_sw_asm.S b/arch/arc/kernel/ctx_sw_asm.S index 2ff0347a2fd7..e248594097e7 100644 --- a/arch/arc/kernel/ctx_sw_asm.S +++ b/arch/arc/kernel/ctx_sw_asm.S @@ -10,9 +10,9 @@ * -This is the more "natural" hand written assembler */ +#include <linux/linkage.h> #include <asm/entry.h> /* For the SAVE_* macros */ #include <asm/asm-offsets.h> -#include <asm/linkage.h> #define KSP_WORD_OFF ((TASK_THREAD + THREAD_KSP) / 4) diff --git a/arch/arc/kernel/devtree.c b/arch/arc/kernel/devtree.c index 0b3ef4025d89..fffdb5e41b20 100644 --- a/arch/arc/kernel/devtree.c +++ b/arch/arc/kernel/devtree.c @@ -41,7 +41,7 @@ const struct machine_desc * __init setup_machine_fdt(void *dt) { const struct machine_desc *mdesc; unsigned long dt_root; - void *clk; + const void *clk; int len; if (!early_init_dt_scan(dt)) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 07a58f2d3077..4d2481bd8b98 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -77,10 +77,11 @@ stext: ; Clear BSS before updating any globals ; XXX: use ZOL here mov r5, __bss_start - mov r6, __bss_stop + sub r6, __bss_stop, r5 + lsr.f lp_count, r6, 2 + lpnz 1f + st.ab 0, [r5, 4] 1: - st.ab 0, [r5,4] - brlt r5, r6, 1b ; Uboot - kernel ABI ; r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2 diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index 5d76706139dd..13b3ffb27a38 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -146,6 +146,10 @@ long arch_ptrace(struct task_struct *child, long request, pr_debug("REQ=%ld: ADDR =0x%lx, DATA=0x%lx)\n", request, addr, data); switch (request) { + case PTRACE_GET_THREAD_AREA: + ret = put_user(task_thread_info(child)->thr_ptr, + (unsigned long __user *)data); + break; default: ret = ptrace_request(child, request, addr, data); break; diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index cf90b6f4d3e0..c802bb500602 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -337,8 +337,19 @@ irqreturn_t do_IPI(int irq, void *dev_id) * API called by platform code to hookup arch-common ISR to their IPI IRQ */ static DEFINE_PER_CPU(int, ipi_dev); + +static struct irqaction arc_ipi_irq = { + .name = "IPI Interrupt", + .flags = IRQF_PERCPU, + .handler = do_IPI, +}; + int smp_ipi_irq_setup(int cpu, int irq) { - int *dev_id = &per_cpu(ipi_dev, smp_processor_id()); - return request_percpu_irq(irq, do_IPI, "IPI Interrupt", dev_id); + if (!cpu) + return setup_irq(irq, &arc_ipi_irq); + else + arch_unmask_irq(irq); + + return 0; } diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S index 2555f5886af6..dd35bde39f69 100644 --- a/arch/arc/kernel/vmlinux.lds.S +++ b/arch/arc/kernel/vmlinux.lds.S @@ -116,7 +116,7 @@ SECTIONS _edata = .; - BSS_SECTION(0, 0, 0) + BSS_SECTION(4, 4, 4) #ifdef CONFIG_ARC_DW2_UNWIND . = ALIGN(PAGE_SIZE); diff --git a/arch/arc/mm/cache_arc700.c b/arch/arc/mm/cache_arc700.c index 1f676c4794e0..353b202c37c9 100644 --- a/arch/arc/mm/cache_arc700.c +++ b/arch/arc/mm/cache_arc700.c @@ -389,7 +389,7 @@ static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, /*********************************************************** * Machine specific helper for per line I-Cache invalidate. */ -static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, +static void __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, unsigned long sz) { unsigned long flags; @@ -405,6 +405,23 @@ static inline void __ic_entire_inv(void) read_aux_reg(ARC_REG_IC_CTRL); /* blocks */ } +struct ic_line_inv_vaddr_ipi { + unsigned long paddr, vaddr; + int sz; +}; + +static void __ic_line_inv_vaddr_helper(void *info) +{ + struct ic_line_inv_vaddr_ipi *ic_inv = (struct ic_line_inv_vaddr_ipi*) info; + __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz); +} + +static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, + unsigned long sz) +{ + struct ic_line_inv_vaddr_ipi ic_inv = { paddr, vaddr , sz}; + on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1); +} #else #define __ic_entire_inv() @@ -553,12 +570,8 @@ void flush_icache_range(unsigned long kstart, unsigned long kend) */ void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len) { - unsigned long flags; - - local_irq_save(flags); - __ic_line_inv_vaddr(paddr, vaddr, len); __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV); - local_irq_restore(flags); + __ic_line_inv_vaddr(paddr, vaddr, len); } /* wrapper to compile time eliminate alignment checks in flush loop */ diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 7a14af6e3d2c..75b3b721347c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -262,8 +262,22 @@ config NEED_MACH_MEMORY_H config PHYS_OFFSET hex "Physical address of main memory" if MMU - depends on !ARM_PATCH_PHYS_VIRT && !NEED_MACH_MEMORY_H + depends on !ARM_PATCH_PHYS_VIRT default DRAM_BASE if !MMU + default 0x00000000 if ARCH_EBSA110 || \ + EP93XX_SDCE3_SYNC_PHYS_OFFSET || \ + ARCH_FOOTBRIDGE || \ + ARCH_INTEGRATOR || \ + ARCH_IOP13XX || \ + ARCH_KS8695 || \ + (ARCH_REALVIEW && !REALVIEW_HIGH_PHYS_OFFSET) + default 0x10000000 if ARCH_OMAP1 || ARCH_RPC + default 0x20000000 if ARCH_S5PV210 + default 0x70000000 if REALVIEW_HIGH_PHYS_OFFSET + default 0xc0000000 if EP93XX_SDCE0_PHYS_OFFSET || ARCH_SA1100 + default 0xd0000000 if EP93XX_SDCE1_PHYS_OFFSET + default 0xe0000000 if EP93XX_SDCE2_PHYS_OFFSET + default 0xf0000000 if EP93XX_SDCE3_ASYNC_PHYS_OFFSET help Please provide the physical address corresponding to the location of main memory in your system. @@ -435,7 +449,6 @@ config ARCH_EP93XX select ARM_VIC select CLKDEV_LOOKUP select CPU_ARM920T - select NEED_MACH_MEMORY_H help This enables support for the Cirrus EP93xx series of CPUs. diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 6721fab13734..718913dfe815 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -127,6 +127,9 @@ CHECKFLAGS += -D__arm__ #Default value head-y := arch/arm/kernel/head$(MMUEXT).o + +# Text offset. This list is sorted numerically by address in order to +# provide a means to avoid/resolve conflicts in multi-arch kernels. textofs-y := 0x00008000 textofs-$(CONFIG_ARCH_CLPS711X) := 0x00028000 # We don't want the htc bootloader to corrupt kernel during resume diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 68c918362b79..76a50ecae1c3 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -81,7 +81,7 @@ ZTEXTADDR := 0 ZBSSADDR := ALIGN(8) endif -SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/ +CPPFLAGS_vmlinux.lds := -DTEXT_START="$(ZTEXTADDR)" -DBSS_START="$(ZBSSADDR)" suffix_$(CONFIG_KERNEL_GZIP) = gzip suffix_$(CONFIG_KERNEL_LZO) = lzo @@ -199,8 +199,5 @@ CFLAGS_font.o := -Dstatic= $(obj)/font.c: $(FONTC) $(call cmd,shipped) -$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG) - @sed "$(SEDFLAGS)" < $< > $@ - $(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S $(call cmd,shipped) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 3a8b32df6b31..413fd94b5301 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -125,9 +125,11 @@ start: THUMB( adr r12, BSYM(1f) ) THUMB( bx r12 ) - .word 0x016f2818 @ Magic numbers to help the loader - .word start @ absolute load/run zImage address - .word _edata @ zImage end address + .word _magic_sig @ Magic numbers to help the loader + .word _magic_start @ absolute load/run zImage address + .word _magic_end @ zImage end address + .word 0x04030201 @ endianness flag + THUMB( .thumb ) 1: ARM_BE8( setend be ) @ go BE8 if compiled for BE8 diff --git a/arch/arm/boot/compressed/vmlinux.lds.in b/arch/arm/boot/compressed/vmlinux.lds.S index 4919f2ac8b89..2b60b843ac5e 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.in +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -1,12 +1,20 @@ /* - * linux/arch/arm/boot/compressed/vmlinux.lds.in - * * Copyright (C) 2000 Russell King * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + +#ifdef CONFIG_CPU_ENDIAN_BE8 +#define ZIMAGE_MAGIC(x) ( (((x) >> 24) & 0x000000ff) | \ + (((x) >> 8) & 0x0000ff00) | \ + (((x) << 8) & 0x00ff0000) | \ + (((x) << 24) & 0xff000000) ) +#else +#define ZIMAGE_MAGIC(x) (x) +#endif + OUTPUT_ARCH(arm) ENTRY(_start) SECTIONS @@ -57,6 +65,10 @@ SECTIONS .pad : { BYTE(0); . = ALIGN(8); } _edata = .; + _magic_sig = ZIMAGE_MAGIC(0x016f2818); + _magic_start = ZIMAGE_MAGIC(_start); + _magic_end = ZIMAGE_MAGIC(_edata); + . = BSS_START; __bss_start = .; .bss : { *(.bss) } @@ -73,4 +85,3 @@ SECTIONS .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } } - diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 5986ff63b901..adb5ed9e269e 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -357,7 +357,7 @@ dtb-$(CONFIG_ARCH_STI)+= stih407-b2120.dtb \ stih415-b2020.dtb \ stih416-b2000.dtb \ stih416-b2020.dtb \ - stih416-b2020-revE.dtb + stih416-b2020e.dtb dtb-$(CONFIG_MACH_SUN4I) += \ sun4i-a10-a1000.dtb \ sun4i-a10-cubieboard.dtb \ diff --git a/arch/arm/boot/dts/armada-380.dtsi b/arch/arm/boot/dts/armada-380.dtsi index e69bc6759c39..4173a8ab34e7 100644 --- a/arch/arm/boot/dts/armada-380.dtsi +++ b/arch/arm/boot/dts/armada-380.dtsi @@ -16,7 +16,7 @@ / { model = "Marvell Armada 380 family SoC"; - compatible = "marvell,armada380", "marvell,armada38x"; + compatible = "marvell,armada380"; cpus { #address-cells = <1>; diff --git a/arch/arm/boot/dts/armada-385-db.dts b/arch/arm/boot/dts/armada-385-db.dts index 5bae4731828b..1af886f1e486 100644 --- a/arch/arm/boot/dts/armada-385-db.dts +++ b/arch/arm/boot/dts/armada-385-db.dts @@ -16,7 +16,7 @@ / { model = "Marvell Armada 385 Development Board"; - compatible = "marvell,a385-db", "marvell,armada385", "marvell,armada38x"; + compatible = "marvell,a385-db", "marvell,armada385", "marvell,armada380"; chosen { bootargs = "console=ttyS0,115200 earlyprintk"; diff --git a/arch/arm/boot/dts/armada-385-rd.dts b/arch/arm/boot/dts/armada-385-rd.dts index 40893255a3f0..aaca2861dc87 100644 --- a/arch/arm/boot/dts/armada-385-rd.dts +++ b/arch/arm/boot/dts/armada-385-rd.dts @@ -17,7 +17,7 @@ / { model = "Marvell Armada 385 Reference Design"; - compatible = "marvell,a385-rd", "marvell,armada385", "marvell,armada38x"; + compatible = "marvell,a385-rd", "marvell,armada385", "marvell,armada380"; chosen { bootargs = "console=ttyS0,115200 earlyprintk"; diff --git a/arch/arm/boot/dts/armada-385.dtsi b/arch/arm/boot/dts/armada-385.dtsi index f011009bf4cf..6283d7912f71 100644 --- a/arch/arm/boot/dts/armada-385.dtsi +++ b/arch/arm/boot/dts/armada-385.dtsi @@ -16,7 +16,7 @@ / { model = "Marvell Armada 385 family SoC"; - compatible = "marvell,armada385", "marvell,armada38x"; + compatible = "marvell,armada385", "marvell,armada380"; cpus { #address-cells = <1>; diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 3de364e81b52..689fa1a46728 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -20,7 +20,7 @@ / { model = "Marvell Armada 38x family SoC"; - compatible = "marvell,armada38x"; + compatible = "marvell,armada380"; aliases { gpio0 = &gpio0; diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi index b309c1c6e848..04927db1d6bf 100644 --- a/arch/arm/boot/dts/at91sam9261.dtsi +++ b/arch/arm/boot/dts/at91sam9261.dtsi @@ -568,24 +568,17 @@ #size-cells = <0>; #interrupt-cells = <1>; - slow_rc_osc: slow_rc_osc { - compatible = "fixed-clock"; + main_osc: main_osc { + compatible = "atmel,at91rm9200-clk-main-osc"; #clock-cells = <0>; - clock-frequency = <32768>; - clock-accuracy = <50000000>; - }; - - clk32k: slck { - compatible = "atmel,at91sam9260-clk-slow"; - #clock-cells = <0>; - clocks = <&slow_rc_osc &slow_xtal>; + interrupts-extended = <&pmc AT91_PMC_MOSCS>; + clocks = <&main_xtal>; }; main: mainck { compatible = "atmel,at91rm9200-clk-main"; #clock-cells = <0>; - interrupts-extended = <&pmc AT91_PMC_MOSCS>; - clocks = <&main_xtal>; + clocks = <&main_osc>; }; plla: pllack { @@ -615,7 +608,7 @@ compatible = "atmel,at91rm9200-clk-master"; #clock-cells = <0>; interrupts-extended = <&pmc AT91_PMC_MCKRDY>; - clocks = <&clk32k>, <&main>, <&plla>, <&pllb>; + clocks = <&slow_xtal>, <&main>, <&plla>, <&pllb>; atmel,clk-output-range = <0 94000000>; atmel,clk-divisors = <1 2 4 0>; }; @@ -632,7 +625,7 @@ #address-cells = <1>; #size-cells = <0>; interrupt-parent = <&pmc>; - clocks = <&clk32k>, <&main>, <&plla>, <&pllb>; + clocks = <&slow_xtal>, <&main>, <&plla>, <&pllb>; prog0: prog0 { #clock-cells = <0>; diff --git a/arch/arm/boot/dts/at91sam9261ek.dts b/arch/arm/boot/dts/at91sam9261ek.dts index c6683ea8b743..aa35a7aec9a8 100644 --- a/arch/arm/boot/dts/at91sam9261ek.dts +++ b/arch/arm/boot/dts/at91sam9261ek.dts @@ -20,6 +20,10 @@ reg = <0x20000000 0x4000000>; }; + slow_xtal { + clock-frequency = <32768>; + }; + main_xtal { clock-frequency = <18432000>; }; diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi index d1b82e6635d5..287795985e32 100644 --- a/arch/arm/boot/dts/at91sam9n12.dtsi +++ b/arch/arm/boot/dts/at91sam9n12.dtsi @@ -132,8 +132,8 @@ <595000000 650000000 3 0>, <545000000 600000000 0 1>, <495000000 555000000 1 1>, - <445000000 500000000 1 2>, - <400000000 450000000 1 3>; + <445000000 500000000 2 1>, + <400000000 450000000 3 1>; }; plladiv: plladivck { diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi index 1a57298636a5..d6133f497207 100644 --- a/arch/arm/boot/dts/at91sam9x5.dtsi +++ b/arch/arm/boot/dts/at91sam9x5.dtsi @@ -140,8 +140,8 @@ 595000000 650000000 3 0 545000000 600000000 0 1 495000000 555000000 1 1 - 445000000 500000000 1 2 - 400000000 450000000 1 3>; + 445000000 500000000 2 1 + 400000000 450000000 3 1>; }; plladiv: plladivck { diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi index b8ece4be41ca..fbaf426d2daa 100644 --- a/arch/arm/boot/dts/exynos4.dtsi +++ b/arch/arm/boot/dts/exynos4.dtsi @@ -113,7 +113,7 @@ compatible = "arm,cortex-a9-gic"; #interrupt-cells = <3>; interrupt-controller; - reg = <0x10490000 0x1000>, <0x10480000 0x100>; + reg = <0x10490000 0x10000>, <0x10480000 0x10000>; }; combiner: interrupt-controller@10440000 { diff --git a/arch/arm/boot/dts/imx51-babbage.dts b/arch/arm/boot/dts/imx51-babbage.dts index 6bc3243a80d3..181d77fa2fa6 100644 --- a/arch/arm/boot/dts/imx51-babbage.dts +++ b/arch/arm/boot/dts/imx51-babbage.dts @@ -315,15 +315,15 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - fsl,cd-controller; - fsl,wp-controller; + cd-gpios = <&gpio1 0 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 1 GPIO_ACTIVE_HIGH>; status = "okay"; }; &esdhc2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc2>; - cd-gpios = <&gpio1 6 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio1 6 GPIO_ACTIVE_LOW>; wp-gpios = <&gpio1 5 GPIO_ACTIVE_HIGH>; status = "okay"; }; @@ -468,8 +468,8 @@ MX51_PAD_SD1_DATA1__SD1_DATA1 0x20d5 MX51_PAD_SD1_DATA2__SD1_DATA2 0x20d5 MX51_PAD_SD1_DATA3__SD1_DATA3 0x20d5 - MX51_PAD_GPIO1_0__SD1_CD 0x20d5 - MX51_PAD_GPIO1_1__SD1_WP 0x20d5 + MX51_PAD_GPIO1_0__GPIO1_0 0x100 + MX51_PAD_GPIO1_1__GPIO1_1 0x100 >; }; diff --git a/arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts b/arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts index 75e66c9c6144..31cfb7f2b02e 100644 --- a/arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts +++ b/arch/arm/boot/dts/imx51-eukrea-mbimxsd51-baseboard.dts @@ -107,7 +107,7 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1 &pinctrl_esdhc1_cd>; - fsl,cd-controller; + cd-gpios = <&gpio1 0 GPIO_ACTIVE_LOW>; status = "okay"; }; @@ -206,7 +206,7 @@ pinctrl_esdhc1_cd: esdhc1_cd { fsl,pins = < - MX51_PAD_GPIO1_0__SD1_CD 0x20d5 + MX51_PAD_GPIO1_0__GPIO1_0 0xd5 >; }; diff --git a/arch/arm/boot/dts/imx53-m53evk.dts b/arch/arm/boot/dts/imx53-m53evk.dts index d5d146a8b149..c4956b0ffb35 100644 --- a/arch/arm/boot/dts/imx53-m53evk.dts +++ b/arch/arm/boot/dts/imx53-m53evk.dts @@ -21,27 +21,25 @@ <0xb0000000 0x20000000>; }; - soc { - display1: display@di1 { - compatible = "fsl,imx-parallel-display"; - interface-pix-fmt = "bgr666"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_ipu_disp1>; - - display-timings { - 800x480p60 { - native-mode; - clock-frequency = <31500000>; - hactive = <800>; - vactive = <480>; - hfront-porch = <40>; - hback-porch = <88>; - hsync-len = <128>; - vback-porch = <33>; - vfront-porch = <9>; - vsync-len = <3>; - vsync-active = <1>; - }; + display1: display@di1 { + compatible = "fsl,imx-parallel-display"; + interface-pix-fmt = "bgr666"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_ipu_disp1>; + + display-timings { + 800x480p60 { + native-mode; + clock-frequency = <31500000>; + hactive = <800>; + vactive = <480>; + hfront-porch = <40>; + hback-porch = <88>; + hsync-len = <128>; + vback-porch = <33>; + vfront-porch = <9>; + vsync-len = <3>; + vsync-active = <1>; }; }; diff --git a/arch/arm/boot/dts/imx6dl-hummingboard.dts b/arch/arm/boot/dts/imx6dl-hummingboard.dts index 5373a5f2782b..c8e51dd41b8f 100644 --- a/arch/arm/boot/dts/imx6dl-hummingboard.dts +++ b/arch/arm/boot/dts/imx6dl-hummingboard.dts @@ -143,6 +143,14 @@ fsl,pins = <MX6QDL_PAD_GPIO_0__GPIO1_IO00 0x1b0b0>; }; + pinctrl_hummingboard_usbotg_id: hummingboard-usbotg-id { + /* + * Similar to pinctrl_usbotg_2, but we want it + * pulled down for a fixed host connection. + */ + fsl,pins = <MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x13059>; + }; + pinctrl_hummingboard_usbotg_vbus: hummingboard-usbotg-vbus { fsl,pins = <MX6QDL_PAD_EIM_D22__GPIO3_IO22 0x1b0b0>; }; @@ -178,6 +186,8 @@ }; &usbotg { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hummingboard_usbotg_id>; vbus-supply = <®_usbotg_vbus>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6q-gw51xx.dts b/arch/arm/boot/dts/imx6q-gw51xx.dts index af4929aee075..0e1406e58eff 100644 --- a/arch/arm/boot/dts/imx6q-gw51xx.dts +++ b/arch/arm/boot/dts/imx6q-gw51xx.dts @@ -11,7 +11,7 @@ /dts-v1/; #include "imx6q.dtsi" -#include "imx6qdl-gw54xx.dtsi" +#include "imx6qdl-gw51xx.dtsi" / { model = "Gateworks Ventana i.MX6 Quad GW51XX"; diff --git a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi index 25da82a03110..e8e781656b3f 100644 --- a/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi +++ b/arch/arm/boot/dts/imx6qdl-cubox-i.dtsi @@ -12,6 +12,19 @@ pinctrl-0 = <&pinctrl_cubox_i_ir>; }; + pwmleds { + compatible = "pwm-leds"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_cubox_i_pwm1>; + + front { + active-low; + label = "imx6:red:front"; + max-brightness = <248>; + pwms = <&pwm1 0 50000>; + }; + }; + regulators { compatible = "simple-bus"; @@ -109,6 +122,10 @@ >; }; + pinctrl_cubox_i_pwm1: cubox-i-pwm1-front-led { + fsl,pins = <MX6QDL_PAD_DISP0_DAT8__PWM1_OUT 0x1b0b0>; + }; + pinctrl_cubox_i_spdif: cubox-i-spdif { fsl,pins = <MX6QDL_PAD_GPIO_17__SPDIF_OUT 0x13091>; }; @@ -117,6 +134,14 @@ fsl,pins = <MX6QDL_PAD_GPIO_0__GPIO1_IO00 0x4001b0b0>; }; + pinctrl_cubox_i_usbotg_id: cubox-i-usbotg-id { + /* + * The Cubox-i pulls this low, but as it's pointless + * leaving it as a pull-up, even if it is just 10uA. + */ + fsl,pins = <MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x13059>; + }; + pinctrl_cubox_i_usbotg_vbus: cubox-i-usbotg-vbus { fsl,pins = <MX6QDL_PAD_EIM_D22__GPIO3_IO22 0x4001b0b0>; }; @@ -153,6 +178,8 @@ }; &usbotg { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_cubox_i_usbotg_id>; vbus-supply = <®_usbotg_vbus>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi index 31665adcbf39..0db15af41cb1 100644 --- a/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw51xx.dtsi @@ -161,7 +161,7 @@ status = "okay"; pmic: ltc3676@3c { - compatible = "ltc,ltc3676"; + compatible = "lltc,ltc3676"; reg = <0x3c>; regulators { diff --git a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi index 367af3ec9435..744c8a2d81f6 100644 --- a/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw52xx.dtsi @@ -220,7 +220,7 @@ }; pmic: ltc3676@3c { - compatible = "ltc,ltc3676"; + compatible = "lltc,ltc3676"; reg = <0x3c>; regulators { @@ -288,7 +288,7 @@ codec: sgtl5000@0a { compatible = "fsl,sgtl5000"; reg = <0x0a>; - clocks = <&clks 169>; + clocks = <&clks 201>; VDDA-supply = <®_1p8v>; VDDIO-supply = <®_3p3v>; }; diff --git a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi index c91b5a6c769b..adf150c1be90 100644 --- a/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi +++ b/arch/arm/boot/dts/imx6qdl-gw53xx.dtsi @@ -234,7 +234,7 @@ }; pmic: ltc3676@3c { - compatible = "ltc,ltc3676"; + compatible = "lltc,ltc3676"; reg = <0x3c>; regulators { diff --git a/arch/arm/boot/dts/imx6qdl-microsom.dtsi b/arch/arm/boot/dts/imx6qdl-microsom.dtsi index d729d0b15f25..79eac6849d4c 100644 --- a/arch/arm/boot/dts/imx6qdl-microsom.dtsi +++ b/arch/arm/boot/dts/imx6qdl-microsom.dtsi @@ -10,14 +10,6 @@ MX6QDL_PAD_CSI0_DAT11__UART1_RX_DATA 0x1b0b1 >; }; - - pinctrl_microsom_usbotg: microsom-usbotg { - /* - * Similar to pinctrl_usbotg_2, but we want it - * pulled down for a fixed host connection. - */ - fsl,pins = <MX6QDL_PAD_GPIO_1__USB_OTG_ID 0x13059>; - }; }; }; @@ -26,8 +18,3 @@ pinctrl-0 = <&pinctrl_microsom_uart1>; status = "okay"; }; - -&usbotg { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_microsom_usbotg>; -}; diff --git a/arch/arm/boot/dts/imx6sl.dtsi b/arch/arm/boot/dts/imx6sl.dtsi index 2d4e5285f3f3..57d4abe03a94 100644 --- a/arch/arm/boot/dts/imx6sl.dtsi +++ b/arch/arm/boot/dts/imx6sl.dtsi @@ -686,7 +686,7 @@ compatible = "fsl,imx6sl-fec", "fsl,imx25-fec"; reg = <0x02188000 0x4000>; interrupts = <0 114 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&clks IMX6SL_CLK_ENET_REF>, + clocks = <&clks IMX6SL_CLK_ENET>, <&clks IMX6SL_CLK_ENET_REF>; clock-names = "ipg", "ahb"; status = "disabled"; diff --git a/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts b/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts index c5a1fc75c7a3..b2d9834bf458 100644 --- a/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts +++ b/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts @@ -105,7 +105,6 @@ compatible = "ethernet-phy-id0141.0cb0", "ethernet-phy-ieee802.3-c22"; reg = <0>; - phy-connection-type = "rgmii-id"; }; ethphy1: ethernet-phy@1 { @@ -113,7 +112,6 @@ compatible = "ethernet-phy-id0141.0cb0", "ethernet-phy-ieee802.3-c22"; reg = <1>; - phy-connection-type = "rgmii-id"; }; }; @@ -121,6 +119,7 @@ status = "okay"; ethernet0-port@0 { phy-handle = <ðphy0>; + phy-connection-type = "rgmii-id"; }; }; @@ -128,5 +127,6 @@ status = "okay"; ethernet1-port@0 { phy-handle = <ðphy1>; + phy-connection-type = "rgmii-id"; }; }; diff --git a/arch/arm/boot/dts/stih415.dtsi b/arch/arm/boot/dts/stih415.dtsi index d6f254f302fe..a0f6f75fe3b5 100644 --- a/arch/arm/boot/dts/stih415.dtsi +++ b/arch/arm/boot/dts/stih415.dtsi @@ -169,8 +169,8 @@ pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mii0>; - clock-names = "stmmaceth"; - clocks = <&clk_s_a1_ls CLK_GMAC0_PHY>; + clock-names = "stmmaceth", "sti-ethclk"; + clocks = <&clk_s_a1_ls CLK_ICN_IF_2>, <&clk_s_a1_ls CLK_GMAC0_PHY>; }; ethernet1: dwmac@fef08000 { @@ -192,8 +192,8 @@ reset-names = "stmmaceth"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mii1>; - clock-names = "stmmaceth"; - clocks = <&clk_s_a0_ls CLK_ETH1_PHY>; + clock-names = "stmmaceth", "sti-ethclk"; + clocks = <&clk_s_a0_ls CLK_ICN_REG>, <&clk_s_a0_ls CLK_ETH1_PHY>; }; rc: rc@fe518000 { diff --git a/arch/arm/boot/dts/stih416-b2020-revE.dts b/arch/arm/boot/dts/stih416-b2020e.dts index ba0fa2caaf18..ba0fa2caaf18 100644 --- a/arch/arm/boot/dts/stih416-b2020-revE.dts +++ b/arch/arm/boot/dts/stih416-b2020e.dts diff --git a/arch/arm/boot/dts/stih416.dtsi b/arch/arm/boot/dts/stih416.dtsi index 06473c5d9ea9..84758d76d064 100644 --- a/arch/arm/boot/dts/stih416.dtsi +++ b/arch/arm/boot/dts/stih416.dtsi @@ -175,8 +175,8 @@ reset-names = "stmmaceth"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mii0>; - clock-names = "stmmaceth"; - clocks = <&clk_s_a1_ls CLK_GMAC0_PHY>; + clock-names = "stmmaceth", "sti-ethclk"; + clocks = <&clk_s_a1_ls CLK_ICN_IF_2>, <&clk_s_a1_ls CLK_GMAC0_PHY>; }; ethernet1: dwmac@fef08000 { @@ -197,8 +197,8 @@ reset-names = "stmmaceth"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_mii1>; - clock-names = "stmmaceth"; - clocks = <&clk_s_a0_ls CLK_ETH1_PHY>; + clock-names = "stmmaceth", "sti-ethclk"; + clocks = <&clk_s_a0_ls CLK_ICN_REG>, <&clk_s_a0_ls CLK_ETH1_PHY>; }; rc: rc@fe518000 { diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index f91136ab447e..3c165fc2dce2 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -12,11 +12,13 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/irqflags.h> +#include <linux/cpu_pm.h> #include <asm/mcpm.h> #include <asm/cacheflush.h> #include <asm/idmap.h> #include <asm/cputype.h> +#include <asm/suspend.h> extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER]; @@ -146,6 +148,56 @@ int mcpm_cpu_powered_up(void) return 0; } +#ifdef CONFIG_ARM_CPU_SUSPEND + +static int __init nocache_trampoline(unsigned long _arg) +{ + void (*cache_disable)(void) = (void *)_arg; + unsigned int mpidr = read_cpuid_mpidr(); + unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1); + phys_reset_t phys_reset; + + mcpm_set_entry_vector(cpu, cluster, cpu_resume); + setup_mm_for_reboot(); + + __mcpm_cpu_going_down(cpu, cluster); + BUG_ON(!__mcpm_outbound_enter_critical(cpu, cluster)); + cache_disable(); + __mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN); + __mcpm_cpu_down(cpu, cluster); + + phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); + phys_reset(virt_to_phys(mcpm_entry_point)); + BUG(); +} + +int __init mcpm_loopback(void (*cache_disable)(void)) +{ + int ret; + + /* + * We're going to soft-restart the current CPU through the + * low-level MCPM code by leveraging the suspend/resume + * infrastructure. Let's play it safe by using cpu_pm_enter() + * in case the CPU init code path resets the VFP or similar. + */ + local_irq_disable(); + local_fiq_disable(); + ret = cpu_pm_enter(); + if (!ret) { + ret = cpu_suspend((unsigned long)cache_disable, nocache_trampoline); + cpu_pm_exit(); + } + local_fiq_enable(); + local_irq_enable(); + if (ret) + pr_err("%s returned %d\n", __func__, ret); + return ret; +} + +#endif + struct sync_struct mcpm_sync; /* diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index ef8815327e5b..59b7e45142d8 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -186,6 +186,7 @@ CONFIG_VIDEO_MX3=y CONFIG_V4L_MEM2MEM_DRIVERS=y CONFIG_VIDEO_CODA=y CONFIG_SOC_CAMERA_OV2640=y +CONFIG_IMX_IPUV3_CORE=y CONFIG_DRM=y CONFIG_DRM_PANEL_SIMPLE=y CONFIG_BACKLIGHT_LCD_SUPPORT=y diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 17d9462b9fb9..be1a3455a9fe 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -353,6 +353,7 @@ CONFIG_MFD_NVEC=y CONFIG_KEYBOARD_NVEC=y CONFIG_SERIO_NVEC_PS2=y CONFIG_NVEC_POWER=y +CONFIG_QCOM_GSBI=y CONFIG_COMMON_CLK_QCOM=y CONFIG_MSM_GCC_8660=y CONFIG_MSM_MMCC_8960=y diff --git a/arch/arm/configs/mvebu_v7_defconfig b/arch/arm/configs/mvebu_v7_defconfig index e11170e37442..b0bfefa23902 100644 --- a/arch/arm/configs/mvebu_v7_defconfig +++ b/arch/arm/configs/mvebu_v7_defconfig @@ -14,6 +14,7 @@ CONFIG_MACH_ARMADA_370=y CONFIG_MACH_ARMADA_375=y CONFIG_MACH_ARMADA_38X=y CONFIG_MACH_ARMADA_XP=y +CONFIG_MACH_DOVE=y CONFIG_NEON=y # CONFIG_CACHE_L2X0 is not set # CONFIG_SWP_EMULATE is not set @@ -52,6 +53,7 @@ CONFIG_INPUT_EVDEV=y CONFIG_KEYBOARD_GPIO=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_I2C=y CONFIG_SPI=y CONFIG_SPI_ORION=y diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 81cda39860c5..b48fa341648d 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -5,10 +5,14 @@ obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o +obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o +obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o aes-arm-y := aes-armv4.o aes_glue.o aes-arm-bs-y := aesbs-core.o aesbs-glue.o sha1-arm-y := sha1-armv4-large.o sha1_glue.o +sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o +sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $(<) > $(@) diff --git a/arch/arm/crypto/aes-armv4.S b/arch/arm/crypto/aes-armv4.S index 3a14ea8fe97e..ebb9761fb572 100644 --- a/arch/arm/crypto/aes-armv4.S +++ b/arch/arm/crypto/aes-armv4.S @@ -35,6 +35,7 @@ @ that is being targetted. #include <linux/linkage.h> +#include <asm/assembler.h> .text @@ -648,7 +649,7 @@ _armv4_AES_set_encrypt_key: .Ldone: mov r0,#0 ldmia sp!,{r4-r12,lr} -.Labrt: mov pc,lr +.Labrt: ret lr ENDPROC(private_AES_set_encrypt_key) .align 5 diff --git a/arch/arm/crypto/sha1-armv7-neon.S b/arch/arm/crypto/sha1-armv7-neon.S new file mode 100644 index 000000000000..50013c0e2864 --- /dev/null +++ b/arch/arm/crypto/sha1-armv7-neon.S @@ -0,0 +1,634 @@ +/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function + * + * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <linux/linkage.h> + + +.syntax unified +.code 32 +.fpu neon + +.text + + +/* Context structure */ + +#define state_h0 0 +#define state_h1 4 +#define state_h2 8 +#define state_h3 12 +#define state_h4 16 + + +/* Constants */ + +#define K1 0x5A827999 +#define K2 0x6ED9EBA1 +#define K3 0x8F1BBCDC +#define K4 0xCA62C1D6 +.align 4 +.LK_VEC: +.LK1: .long K1, K1, K1, K1 +.LK2: .long K2, K2, K2, K2 +.LK3: .long K3, K3, K3, K3 +.LK4: .long K4, K4, K4, K4 + + +/* Register macros */ + +#define RSTATE r0 +#define RDATA r1 +#define RNBLKS r2 +#define ROLDSTACK r3 +#define RWK lr + +#define _a r4 +#define _b r5 +#define _c r6 +#define _d r7 +#define _e r8 + +#define RT0 r9 +#define RT1 r10 +#define RT2 r11 +#define RT3 r12 + +#define W0 q0 +#define W1 q1 +#define W2 q2 +#define W3 q3 +#define W4 q4 +#define W5 q5 +#define W6 q6 +#define W7 q7 + +#define tmp0 q8 +#define tmp1 q9 +#define tmp2 q10 +#define tmp3 q11 + +#define qK1 q12 +#define qK2 q13 +#define qK3 q14 +#define qK4 q15 + + +/* Round function macros. */ + +#define WK_offs(i) (((i) & 15) * 4) + +#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + ldr RT3, [sp, WK_offs(i)]; \ + pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + bic RT0, d, b; \ + add e, e, a, ror #(32 - 5); \ + and RT1, c, b; \ + pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add RT0, RT0, RT3; \ + add e, e, RT1; \ + ror b, #(32 - 30); \ + pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add e, e, RT0; + +#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + ldr RT3, [sp, WK_offs(i)]; \ + pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + eor RT0, d, b; \ + add e, e, a, ror #(32 - 5); \ + eor RT0, RT0, c; \ + pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add e, e, RT3; \ + ror b, #(32 - 30); \ + pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add e, e, RT0; \ + +#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + ldr RT3, [sp, WK_offs(i)]; \ + pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + eor RT0, b, c; \ + and RT1, b, c; \ + add e, e, a, ror #(32 - 5); \ + pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + and RT0, RT0, d; \ + add RT1, RT1, RT3; \ + add e, e, RT0; \ + ror b, #(32 - 30); \ + pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \ + add e, e, RT1; + +#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) + +#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + _R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) + +#define R(a,b,c,d,e,f,i) \ + _R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\ + W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) + +#define dummy(...) + + +/* Input expansion macros. */ + +/********* Precalc macros for rounds 0-15 *************************************/ + +#define W_PRECALC_00_15() \ + add RWK, sp, #(WK_offs(0)); \ + \ + vld1.32 {tmp0, tmp1}, [RDATA]!; \ + vrev32.8 W0, tmp0; /* big => little */ \ + vld1.32 {tmp2, tmp3}, [RDATA]!; \ + vadd.u32 tmp0, W0, curK; \ + vrev32.8 W7, tmp1; /* big => little */ \ + vrev32.8 W6, tmp2; /* big => little */ \ + vadd.u32 tmp1, W7, curK; \ + vrev32.8 W5, tmp3; /* big => little */ \ + vadd.u32 tmp2, W6, curK; \ + vst1.32 {tmp0, tmp1}, [RWK]!; \ + vadd.u32 tmp3, W5, curK; \ + vst1.32 {tmp2, tmp3}, [RWK]; \ + +#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vld1.32 {tmp0, tmp1}, [RDATA]!; \ + +#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + add RWK, sp, #(WK_offs(0)); \ + +#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vrev32.8 W0, tmp0; /* big => little */ \ + +#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vld1.32 {tmp2, tmp3}, [RDATA]!; \ + +#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vadd.u32 tmp0, W0, curK; \ + +#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vrev32.8 W7, tmp1; /* big => little */ \ + +#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vrev32.8 W6, tmp2; /* big => little */ \ + +#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vadd.u32 tmp1, W7, curK; \ + +#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vrev32.8 W5, tmp3; /* big => little */ \ + +#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vadd.u32 tmp2, W6, curK; \ + +#define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vst1.32 {tmp0, tmp1}, [RWK]!; \ + +#define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vadd.u32 tmp3, W5, curK; \ + +#define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vst1.32 {tmp2, tmp3}, [RWK]; \ + + +/********* Precalc macros for rounds 16-31 ************************************/ + +#define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor tmp0, tmp0; \ + vext.8 W, W_m16, W_m12, #8; \ + +#define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + add RWK, sp, #(WK_offs(i)); \ + vext.8 tmp0, W_m04, tmp0, #4; \ + +#define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor tmp0, tmp0, W_m16; \ + veor.32 W, W, W_m08; \ + +#define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor tmp1, tmp1; \ + veor W, W, tmp0; \ + +#define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vshl.u32 tmp0, W, #1; \ + +#define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vext.8 tmp1, tmp1, W, #(16-12); \ + vshr.u32 W, W, #31; \ + +#define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vorr tmp0, tmp0, W; \ + vshr.u32 W, tmp1, #30; \ + +#define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vshl.u32 tmp1, tmp1, #2; \ + +#define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor tmp0, tmp0, W; \ + +#define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor W, tmp0, tmp1; \ + +#define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vadd.u32 tmp0, W, curK; \ + +#define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vst1.32 {tmp0}, [RWK]; + + +/********* Precalc macros for rounds 32-79 ************************************/ + +#define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor W, W_m28; \ + +#define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vext.8 tmp0, W_m08, W_m04, #8; \ + +#define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor W, W_m16; \ + +#define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + veor W, tmp0; \ + +#define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + add RWK, sp, #(WK_offs(i&~3)); \ + +#define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vshl.u32 tmp1, W, #2; \ + +#define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vshr.u32 tmp0, W, #30; \ + +#define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vorr W, tmp0, tmp1; \ + +#define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vadd.u32 tmp0, W, curK; \ + +#define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \ + vst1.32 {tmp0}, [RWK]; + + +/* + * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA. + * + * unsigned int + * sha1_transform_neon (void *ctx, const unsigned char *data, + * unsigned int nblks) + */ +.align 3 +ENTRY(sha1_transform_neon) + /* input: + * r0: ctx, CTX + * r1: data (64*nblks bytes) + * r2: nblks + */ + + cmp RNBLKS, #0; + beq .Ldo_nothing; + + push {r4-r12, lr}; + /*vpush {q4-q7};*/ + + adr RT3, .LK_VEC; + + mov ROLDSTACK, sp; + + /* Align stack. */ + sub RT0, sp, #(16*4); + and RT0, #(~(16-1)); + mov sp, RT0; + + vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */ + + /* Get the values of the chaining variables. */ + ldm RSTATE, {_a-_e}; + + vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */ + +#undef curK +#define curK qK1 + /* Precalc 0-15. */ + W_PRECALC_00_15(); + +.Loop: + /* Transform 0-15 + Precalc 16-31. */ + _R( _a, _b, _c, _d, _e, F1, 0, + WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16, + W4, W5, W6, W7, W0, _, _, _ ); + _R( _e, _a, _b, _c, _d, F1, 1, + WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16, + W4, W5, W6, W7, W0, _, _, _ ); + _R( _d, _e, _a, _b, _c, F1, 2, + WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16, + W4, W5, W6, W7, W0, _, _, _ ); + _R( _c, _d, _e, _a, _b, F1, 3, + WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16, + W4, W5, W6, W7, W0, _, _, _ ); + +#undef curK +#define curK qK2 + _R( _b, _c, _d, _e, _a, F1, 4, + WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20, + W3, W4, W5, W6, W7, _, _, _ ); + _R( _a, _b, _c, _d, _e, F1, 5, + WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20, + W3, W4, W5, W6, W7, _, _, _ ); + _R( _e, _a, _b, _c, _d, F1, 6, + WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20, + W3, W4, W5, W6, W7, _, _, _ ); + _R( _d, _e, _a, _b, _c, F1, 7, + WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20, + W3, W4, W5, W6, W7, _, _, _ ); + + _R( _c, _d, _e, _a, _b, F1, 8, + WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24, + W2, W3, W4, W5, W6, _, _, _ ); + _R( _b, _c, _d, _e, _a, F1, 9, + WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24, + W2, W3, W4, W5, W6, _, _, _ ); + _R( _a, _b, _c, _d, _e, F1, 10, + WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24, + W2, W3, W4, W5, W6, _, _, _ ); + _R( _e, _a, _b, _c, _d, F1, 11, + WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24, + W2, W3, W4, W5, W6, _, _, _ ); + + _R( _d, _e, _a, _b, _c, F1, 12, + WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28, + W1, W2, W3, W4, W5, _, _, _ ); + _R( _c, _d, _e, _a, _b, F1, 13, + WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28, + W1, W2, W3, W4, W5, _, _, _ ); + _R( _b, _c, _d, _e, _a, F1, 14, + WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28, + W1, W2, W3, W4, W5, _, _, _ ); + _R( _a, _b, _c, _d, _e, F1, 15, + WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28, + W1, W2, W3, W4, W5, _, _, _ ); + + /* Transform 16-63 + Precalc 32-79. */ + _R( _e, _a, _b, _c, _d, F1, 16, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32, + W0, W1, W2, W3, W4, W5, W6, W7); + _R( _d, _e, _a, _b, _c, F1, 17, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32, + W0, W1, W2, W3, W4, W5, W6, W7); + _R( _c, _d, _e, _a, _b, F1, 18, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32, + W0, W1, W2, W3, W4, W5, W6, W7); + _R( _b, _c, _d, _e, _a, F1, 19, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32, + W0, W1, W2, W3, W4, W5, W6, W7); + + _R( _a, _b, _c, _d, _e, F2, 20, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36, + W7, W0, W1, W2, W3, W4, W5, W6); + _R( _e, _a, _b, _c, _d, F2, 21, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36, + W7, W0, W1, W2, W3, W4, W5, W6); + _R( _d, _e, _a, _b, _c, F2, 22, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36, + W7, W0, W1, W2, W3, W4, W5, W6); + _R( _c, _d, _e, _a, _b, F2, 23, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36, + W7, W0, W1, W2, W3, W4, W5, W6); + +#undef curK +#define curK qK3 + _R( _b, _c, _d, _e, _a, F2, 24, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40, + W6, W7, W0, W1, W2, W3, W4, W5); + _R( _a, _b, _c, _d, _e, F2, 25, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40, + W6, W7, W0, W1, W2, W3, W4, W5); + _R( _e, _a, _b, _c, _d, F2, 26, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40, + W6, W7, W0, W1, W2, W3, W4, W5); + _R( _d, _e, _a, _b, _c, F2, 27, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40, + W6, W7, W0, W1, W2, W3, W4, W5); + + _R( _c, _d, _e, _a, _b, F2, 28, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44, + W5, W6, W7, W0, W1, W2, W3, W4); + _R( _b, _c, _d, _e, _a, F2, 29, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44, + W5, W6, W7, W0, W1, W2, W3, W4); + _R( _a, _b, _c, _d, _e, F2, 30, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44, + W5, W6, W7, W0, W1, W2, W3, W4); + _R( _e, _a, _b, _c, _d, F2, 31, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44, + W5, W6, W7, W0, W1, W2, W3, W4); + + _R( _d, _e, _a, _b, _c, F2, 32, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48, + W4, W5, W6, W7, W0, W1, W2, W3); + _R( _c, _d, _e, _a, _b, F2, 33, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48, + W4, W5, W6, W7, W0, W1, W2, W3); + _R( _b, _c, _d, _e, _a, F2, 34, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48, + W4, W5, W6, W7, W0, W1, W2, W3); + _R( _a, _b, _c, _d, _e, F2, 35, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48, + W4, W5, W6, W7, W0, W1, W2, W3); + + _R( _e, _a, _b, _c, _d, F2, 36, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52, + W3, W4, W5, W6, W7, W0, W1, W2); + _R( _d, _e, _a, _b, _c, F2, 37, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52, + W3, W4, W5, W6, W7, W0, W1, W2); + _R( _c, _d, _e, _a, _b, F2, 38, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52, + W3, W4, W5, W6, W7, W0, W1, W2); + _R( _b, _c, _d, _e, _a, F2, 39, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52, + W3, W4, W5, W6, W7, W0, W1, W2); + + _R( _a, _b, _c, _d, _e, F3, 40, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56, + W2, W3, W4, W5, W6, W7, W0, W1); + _R( _e, _a, _b, _c, _d, F3, 41, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56, + W2, W3, W4, W5, W6, W7, W0, W1); + _R( _d, _e, _a, _b, _c, F3, 42, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56, + W2, W3, W4, W5, W6, W7, W0, W1); + _R( _c, _d, _e, _a, _b, F3, 43, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56, + W2, W3, W4, W5, W6, W7, W0, W1); + +#undef curK +#define curK qK4 + _R( _b, _c, _d, _e, _a, F3, 44, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60, + W1, W2, W3, W4, W5, W6, W7, W0); + _R( _a, _b, _c, _d, _e, F3, 45, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60, + W1, W2, W3, W4, W5, W6, W7, W0); + _R( _e, _a, _b, _c, _d, F3, 46, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60, + W1, W2, W3, W4, W5, W6, W7, W0); + _R( _d, _e, _a, _b, _c, F3, 47, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60, + W1, W2, W3, W4, W5, W6, W7, W0); + + _R( _c, _d, _e, _a, _b, F3, 48, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64, + W0, W1, W2, W3, W4, W5, W6, W7); + _R( _b, _c, _d, _e, _a, F3, 49, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64, + W0, W1, W2, W3, W4, W5, W6, W7); + _R( _a, _b, _c, _d, _e, F3, 50, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64, + W0, W1, W2, W3, W4, W5, W6, W7); + _R( _e, _a, _b, _c, _d, F3, 51, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64, + W0, W1, W2, W3, W4, W5, W6, W7); + + _R( _d, _e, _a, _b, _c, F3, 52, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68, + W7, W0, W1, W2, W3, W4, W5, W6); + _R( _c, _d, _e, _a, _b, F3, 53, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68, + W7, W0, W1, W2, W3, W4, W5, W6); + _R( _b, _c, _d, _e, _a, F3, 54, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68, + W7, W0, W1, W2, W3, W4, W5, W6); + _R( _a, _b, _c, _d, _e, F3, 55, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68, + W7, W0, W1, W2, W3, W4, W5, W6); + + _R( _e, _a, _b, _c, _d, F3, 56, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72, + W6, W7, W0, W1, W2, W3, W4, W5); + _R( _d, _e, _a, _b, _c, F3, 57, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72, + W6, W7, W0, W1, W2, W3, W4, W5); + _R( _c, _d, _e, _a, _b, F3, 58, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72, + W6, W7, W0, W1, W2, W3, W4, W5); + _R( _b, _c, _d, _e, _a, F3, 59, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72, + W6, W7, W0, W1, W2, W3, W4, W5); + + subs RNBLKS, #1; + + _R( _a, _b, _c, _d, _e, F4, 60, + WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76, + W5, W6, W7, W0, W1, W2, W3, W4); + _R( _e, _a, _b, _c, _d, F4, 61, + WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76, + W5, W6, W7, W0, W1, W2, W3, W4); + _R( _d, _e, _a, _b, _c, F4, 62, + WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76, + W5, W6, W7, W0, W1, W2, W3, W4); + _R( _c, _d, _e, _a, _b, F4, 63, + WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76, + W5, W6, W7, W0, W1, W2, W3, W4); + + beq .Lend; + + /* Transform 64-79 + Precalc 0-15 of next block. */ +#undef curK +#define curK qK1 + _R( _b, _c, _d, _e, _a, F4, 64, + WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _a, _b, _c, _d, _e, F4, 65, + WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _e, _a, _b, _c, _d, F4, 66, + WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _d, _e, _a, _b, _c, F4, 67, + WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + + _R( _c, _d, _e, _a, _b, F4, 68, + dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _b, _c, _d, _e, _a, F4, 69, + dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _a, _b, _c, _d, _e, F4, 70, + WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _e, _a, _b, _c, _d, F4, 71, + WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + + _R( _d, _e, _a, _b, _c, F4, 72, + dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _c, _d, _e, _a, _b, F4, 73, + dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _b, _c, _d, _e, _a, F4, 74, + WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _a, _b, _c, _d, _e, F4, 75, + WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + + _R( _e, _a, _b, _c, _d, F4, 76, + WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _d, _e, _a, _b, _c, F4, 77, + WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _c, _d, _e, _a, _b, F4, 78, + WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ ); + _R( _b, _c, _d, _e, _a, F4, 79, + WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ ); + + /* Update the chaining variables. */ + ldm RSTATE, {RT0-RT3}; + add _a, RT0; + ldr RT0, [RSTATE, #state_h4]; + add _b, RT1; + add _c, RT2; + add _d, RT3; + add _e, RT0; + stm RSTATE, {_a-_e}; + + b .Loop; + +.Lend: + /* Transform 64-79 */ + R( _b, _c, _d, _e, _a, F4, 64 ); + R( _a, _b, _c, _d, _e, F4, 65 ); + R( _e, _a, _b, _c, _d, F4, 66 ); + R( _d, _e, _a, _b, _c, F4, 67 ); + R( _c, _d, _e, _a, _b, F4, 68 ); + R( _b, _c, _d, _e, _a, F4, 69 ); + R( _a, _b, _c, _d, _e, F4, 70 ); + R( _e, _a, _b, _c, _d, F4, 71 ); + R( _d, _e, _a, _b, _c, F4, 72 ); + R( _c, _d, _e, _a, _b, F4, 73 ); + R( _b, _c, _d, _e, _a, F4, 74 ); + R( _a, _b, _c, _d, _e, F4, 75 ); + R( _e, _a, _b, _c, _d, F4, 76 ); + R( _d, _e, _a, _b, _c, F4, 77 ); + R( _c, _d, _e, _a, _b, F4, 78 ); + R( _b, _c, _d, _e, _a, F4, 79 ); + + mov sp, ROLDSTACK; + + /* Update the chaining variables. */ + ldm RSTATE, {RT0-RT3}; + add _a, RT0; + ldr RT0, [RSTATE, #state_h4]; + add _b, RT1; + add _c, RT2; + add _d, RT3; + /*vpop {q4-q7};*/ + add _e, RT0; + stm RSTATE, {_a-_e}; + + pop {r4-r12, pc}; + +.Ldo_nothing: + bx lr +ENDPROC(sha1_transform_neon) diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index 76cd976230bc..84f2a756588b 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -23,32 +23,27 @@ #include <linux/types.h> #include <crypto/sha.h> #include <asm/byteorder.h> +#include <asm/crypto/sha1.h> -struct SHA1_CTX { - uint32_t h0,h1,h2,h3,h4; - u64 count; - u8 data[SHA1_BLOCK_SIZE]; -}; -asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest, +asmlinkage void sha1_block_data_order(u32 *digest, const unsigned char *data, unsigned int rounds); static int sha1_init(struct shash_desc *desc) { - struct SHA1_CTX *sctx = shash_desc_ctx(desc); - memset(sctx, 0, sizeof(*sctx)); - sctx->h0 = SHA1_H0; - sctx->h1 = SHA1_H1; - sctx->h2 = SHA1_H2; - sctx->h3 = SHA1_H3; - sctx->h4 = SHA1_H4; + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + return 0; } -static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data, - unsigned int len, unsigned int partial) +static int __sha1_update(struct sha1_state *sctx, const u8 *data, + unsigned int len, unsigned int partial) { unsigned int done = 0; @@ -56,43 +51,44 @@ static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data, if (partial) { done = SHA1_BLOCK_SIZE - partial; - memcpy(sctx->data + partial, data, done); - sha1_block_data_order(sctx, sctx->data, 1); + memcpy(sctx->buffer + partial, data, done); + sha1_block_data_order(sctx->state, sctx->buffer, 1); } if (len - done >= SHA1_BLOCK_SIZE) { const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; - sha1_block_data_order(sctx, data + done, rounds); + sha1_block_data_order(sctx->state, data + done, rounds); done += rounds * SHA1_BLOCK_SIZE; } - memcpy(sctx->data, data + done, len - done); + memcpy(sctx->buffer, data + done, len - done); return 0; } -static int sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +int sha1_update_arm(struct shash_desc *desc, const u8 *data, + unsigned int len) { - struct SHA1_CTX *sctx = shash_desc_ctx(desc); + struct sha1_state *sctx = shash_desc_ctx(desc); unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; int res; /* Handle the fast case right here */ if (partial + len < SHA1_BLOCK_SIZE) { sctx->count += len; - memcpy(sctx->data + partial, data, len); + memcpy(sctx->buffer + partial, data, len); return 0; } res = __sha1_update(sctx, data, len, partial); return res; } +EXPORT_SYMBOL_GPL(sha1_update_arm); /* Add padding and return the message digest. */ static int sha1_final(struct shash_desc *desc, u8 *out) { - struct SHA1_CTX *sctx = shash_desc_ctx(desc); + struct sha1_state *sctx = shash_desc_ctx(desc); unsigned int i, index, padlen; __be32 *dst = (__be32 *)out; __be64 bits; @@ -106,7 +102,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out) /* We need to fill a whole block for __sha1_update() */ if (padlen <= 56) { sctx->count += padlen; - memcpy(sctx->data + index, padding, padlen); + memcpy(sctx->buffer + index, padding, padlen); } else { __sha1_update(sctx, padding, padlen, index); } @@ -114,7 +110,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out) /* Store state in digest */ for (i = 0; i < 5; i++) - dst[i] = cpu_to_be32(((u32 *)sctx)[i]); + dst[i] = cpu_to_be32(sctx->state[i]); /* Wipe context */ memset(sctx, 0, sizeof(*sctx)); @@ -124,7 +120,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out) static int sha1_export(struct shash_desc *desc, void *out) { - struct SHA1_CTX *sctx = shash_desc_ctx(desc); + struct sha1_state *sctx = shash_desc_ctx(desc); memcpy(out, sctx, sizeof(*sctx)); return 0; } @@ -132,7 +128,7 @@ static int sha1_export(struct shash_desc *desc, void *out) static int sha1_import(struct shash_desc *desc, const void *in) { - struct SHA1_CTX *sctx = shash_desc_ctx(desc); + struct sha1_state *sctx = shash_desc_ctx(desc); memcpy(sctx, in, sizeof(*sctx)); return 0; } @@ -141,12 +137,12 @@ static int sha1_import(struct shash_desc *desc, const void *in) static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_init, - .update = sha1_update, + .update = sha1_update_arm, .final = sha1_final, .export = sha1_export, .import = sha1_import, - .descsize = sizeof(struct SHA1_CTX), - .statesize = sizeof(struct SHA1_CTX), + .descsize = sizeof(struct sha1_state), + .statesize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", .cra_driver_name= "sha1-asm", diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c new file mode 100644 index 000000000000..6f1b411b1d55 --- /dev/null +++ b/arch/arm/crypto/sha1_neon_glue.c @@ -0,0 +1,197 @@ +/* + * Glue code for the SHA1 Secure Hash Algorithm assembler implementation using + * ARM NEON instructions. + * + * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is based on sha1_generic.c and sha1_ssse3_glue.c: + * Copyright (c) Alan Smithee. + * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> + * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> + * Copyright (c) Mathias Krause <minipli@googlemail.com> + * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <crypto/sha.h> +#include <asm/byteorder.h> +#include <asm/neon.h> +#include <asm/simd.h> +#include <asm/crypto/sha1.h> + + +asmlinkage void sha1_transform_neon(void *state_h, const char *data, + unsigned int rounds); + + +static int sha1_neon_init(struct shash_desc *desc) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + *sctx = (struct sha1_state){ + .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, + }; + + return 0; +} + +static int __sha1_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len, unsigned int partial) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int done = 0; + + sctx->count += len; + + if (partial) { + done = SHA1_BLOCK_SIZE - partial; + memcpy(sctx->buffer + partial, data, done); + sha1_transform_neon(sctx->state, sctx->buffer, 1); + } + + if (len - done >= SHA1_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE; + + sha1_transform_neon(sctx->state, data + done, rounds); + done += rounds * SHA1_BLOCK_SIZE; + } + + memcpy(sctx->buffer, data + done, len - done); + + return 0; +} + +static int sha1_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE; + int res; + + /* Handle the fast case right here */ + if (partial + len < SHA1_BLOCK_SIZE) { + sctx->count += len; + memcpy(sctx->buffer + partial, data, len); + + return 0; + } + + if (!may_use_simd()) { + res = sha1_update_arm(desc, data, len); + } else { + kernel_neon_begin(); + res = __sha1_neon_update(desc, data, len, partial); + kernel_neon_end(); + } + + return res; +} + + +/* Add padding and return the message digest. */ +static int sha1_neon_final(struct shash_desc *desc, u8 *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be32 *dst = (__be32 *)out; + __be64 bits; + static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, }; + + bits = cpu_to_be64(sctx->count << 3); + + /* Pad out to 56 mod 64 and append length */ + index = sctx->count % SHA1_BLOCK_SIZE; + padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index); + if (!may_use_simd()) { + sha1_update_arm(desc, padding, padlen); + sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits)); + } else { + kernel_neon_begin(); + /* We need to fill a whole block for __sha1_neon_update() */ + if (padlen <= 56) { + sctx->count += padlen; + memcpy(sctx->buffer + index, padding, padlen); + } else { + __sha1_neon_update(desc, padding, padlen, index); + } + __sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56); + kernel_neon_end(); + } + + /* Store state in digest */ + for (i = 0; i < 5; i++) + dst[i] = cpu_to_be32(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha1_neon_export(struct shash_desc *desc, void *out) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int sha1_neon_import(struct shash_desc *desc, const void *in) +{ + struct sha1_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static struct shash_alg alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_neon_init, + .update = sha1_neon_update, + .final = sha1_neon_final, + .export = sha1_neon_export, + .import = sha1_neon_import, + .descsize = sizeof(struct sha1_state), + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-neon", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sha1_neon_mod_init(void) +{ + if (!cpu_has_neon()) + return -ENODEV; + + return crypto_register_shash(&alg); +} + +static void __exit sha1_neon_mod_fini(void) +{ + crypto_unregister_shash(&alg); +} + +module_init(sha1_neon_mod_init); +module_exit(sha1_neon_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated"); +MODULE_ALIAS("sha1"); diff --git a/arch/arm/crypto/sha512-armv7-neon.S b/arch/arm/crypto/sha512-armv7-neon.S new file mode 100644 index 000000000000..fe99472e507c --- /dev/null +++ b/arch/arm/crypto/sha512-armv7-neon.S @@ -0,0 +1,455 @@ +/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform + * + * Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <linux/linkage.h> + + +.syntax unified +.code 32 +.fpu neon + +.text + +/* structure of SHA512_CONTEXT */ +#define hd_a 0 +#define hd_b ((hd_a) + 8) +#define hd_c ((hd_b) + 8) +#define hd_d ((hd_c) + 8) +#define hd_e ((hd_d) + 8) +#define hd_f ((hd_e) + 8) +#define hd_g ((hd_f) + 8) + +/* register macros */ +#define RK %r2 + +#define RA d0 +#define RB d1 +#define RC d2 +#define RD d3 +#define RE d4 +#define RF d5 +#define RG d6 +#define RH d7 + +#define RT0 d8 +#define RT1 d9 +#define RT2 d10 +#define RT3 d11 +#define RT4 d12 +#define RT5 d13 +#define RT6 d14 +#define RT7 d15 + +#define RT01q q4 +#define RT23q q5 +#define RT45q q6 +#define RT67q q7 + +#define RW0 d16 +#define RW1 d17 +#define RW2 d18 +#define RW3 d19 +#define RW4 d20 +#define RW5 d21 +#define RW6 d22 +#define RW7 d23 +#define RW8 d24 +#define RW9 d25 +#define RW10 d26 +#define RW11 d27 +#define RW12 d28 +#define RW13 d29 +#define RW14 d30 +#define RW15 d31 + +#define RW01q q8 +#define RW23q q9 +#define RW45q q10 +#define RW67q q11 +#define RW89q q12 +#define RW1011q q13 +#define RW1213q q14 +#define RW1415q q15 + +/*********************************************************************** + * ARM assembly implementation of sha512 transform + ***********************************************************************/ +#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \ + rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \ + /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \ + vshr.u64 RT2, re, #14; \ + vshl.u64 RT3, re, #64 - 14; \ + interleave_op(arg1); \ + vshr.u64 RT4, re, #18; \ + vshl.u64 RT5, re, #64 - 18; \ + vld1.64 {RT0}, [RK]!; \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, re, #41; \ + vshl.u64 RT5, re, #64 - 41; \ + vadd.u64 RT0, RT0, rw0; \ + veor.64 RT23q, RT23q, RT45q; \ + vmov.64 RT7, re; \ + veor.64 RT1, RT2, RT3; \ + vbsl.64 RT7, rf, rg; \ + \ + vadd.u64 RT1, RT1, rh; \ + vshr.u64 RT2, ra, #28; \ + vshl.u64 RT3, ra, #64 - 28; \ + vadd.u64 RT1, RT1, RT0; \ + vshr.u64 RT4, ra, #34; \ + vshl.u64 RT5, ra, #64 - 34; \ + vadd.u64 RT1, RT1, RT7; \ + \ + /* h = Sum0 (a) + Maj (a, b, c); */ \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, ra, #39; \ + vshl.u64 RT5, ra, #64 - 39; \ + veor.64 RT0, ra, rb; \ + veor.64 RT23q, RT23q, RT45q; \ + vbsl.64 RT0, rc, rb; \ + vadd.u64 rd, rd, RT1; /* d+=t1; */ \ + veor.64 rh, RT2, RT3; \ + \ + /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \ + vshr.u64 RT2, rd, #14; \ + vshl.u64 RT3, rd, #64 - 14; \ + vadd.u64 rh, rh, RT0; \ + vshr.u64 RT4, rd, #18; \ + vshl.u64 RT5, rd, #64 - 18; \ + vadd.u64 rh, rh, RT1; /* h+=t1; */ \ + vld1.64 {RT0}, [RK]!; \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, rd, #41; \ + vshl.u64 RT5, rd, #64 - 41; \ + vadd.u64 RT0, RT0, rw1; \ + veor.64 RT23q, RT23q, RT45q; \ + vmov.64 RT7, rd; \ + veor.64 RT1, RT2, RT3; \ + vbsl.64 RT7, re, rf; \ + \ + vadd.u64 RT1, RT1, rg; \ + vshr.u64 RT2, rh, #28; \ + vshl.u64 RT3, rh, #64 - 28; \ + vadd.u64 RT1, RT1, RT0; \ + vshr.u64 RT4, rh, #34; \ + vshl.u64 RT5, rh, #64 - 34; \ + vadd.u64 RT1, RT1, RT7; \ + \ + /* g = Sum0 (h) + Maj (h, a, b); */ \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, rh, #39; \ + vshl.u64 RT5, rh, #64 - 39; \ + veor.64 RT0, rh, ra; \ + veor.64 RT23q, RT23q, RT45q; \ + vbsl.64 RT0, rb, ra; \ + vadd.u64 rc, rc, RT1; /* c+=t1; */ \ + veor.64 rg, RT2, RT3; \ + \ + /* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \ + /* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \ + \ + /**** S0(w[1:2]) */ \ + \ + /* w[0:1] += w[9:10] */ \ + /* RT23q = rw1:rw2 */ \ + vext.u64 RT23q, rw01q, rw23q, #1; \ + vadd.u64 rw0, rw9; \ + vadd.u64 rg, rg, RT0; \ + vadd.u64 rw1, rw10;\ + vadd.u64 rg, rg, RT1; /* g+=t1; */ \ + \ + vshr.u64 RT45q, RT23q, #1; \ + vshl.u64 RT67q, RT23q, #64 - 1; \ + vshr.u64 RT01q, RT23q, #8; \ + veor.u64 RT45q, RT45q, RT67q; \ + vshl.u64 RT67q, RT23q, #64 - 8; \ + veor.u64 RT45q, RT45q, RT01q; \ + vshr.u64 RT01q, RT23q, #7; \ + veor.u64 RT45q, RT45q, RT67q; \ + \ + /**** S1(w[14:15]) */ \ + vshr.u64 RT23q, rw1415q, #6; \ + veor.u64 RT01q, RT01q, RT45q; \ + vshr.u64 RT45q, rw1415q, #19; \ + vshl.u64 RT67q, rw1415q, #64 - 19; \ + veor.u64 RT23q, RT23q, RT45q; \ + vshr.u64 RT45q, rw1415q, #61; \ + veor.u64 RT23q, RT23q, RT67q; \ + vshl.u64 RT67q, rw1415q, #64 - 61; \ + veor.u64 RT23q, RT23q, RT45q; \ + vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \ + veor.u64 RT01q, RT23q, RT67q; +#define vadd_RT01q(rw01q) \ + /* w[0:1] += S(w[14:15]) */ \ + vadd.u64 rw01q, RT01q; + +#define dummy(_) /*_*/ + +#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \ + interleave_op1, arg1, interleave_op2, arg2) \ + /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \ + vshr.u64 RT2, re, #14; \ + vshl.u64 RT3, re, #64 - 14; \ + interleave_op1(arg1); \ + vshr.u64 RT4, re, #18; \ + vshl.u64 RT5, re, #64 - 18; \ + interleave_op2(arg2); \ + vld1.64 {RT0}, [RK]!; \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, re, #41; \ + vshl.u64 RT5, re, #64 - 41; \ + vadd.u64 RT0, RT0, rw0; \ + veor.64 RT23q, RT23q, RT45q; \ + vmov.64 RT7, re; \ + veor.64 RT1, RT2, RT3; \ + vbsl.64 RT7, rf, rg; \ + \ + vadd.u64 RT1, RT1, rh; \ + vshr.u64 RT2, ra, #28; \ + vshl.u64 RT3, ra, #64 - 28; \ + vadd.u64 RT1, RT1, RT0; \ + vshr.u64 RT4, ra, #34; \ + vshl.u64 RT5, ra, #64 - 34; \ + vadd.u64 RT1, RT1, RT7; \ + \ + /* h = Sum0 (a) + Maj (a, b, c); */ \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, ra, #39; \ + vshl.u64 RT5, ra, #64 - 39; \ + veor.64 RT0, ra, rb; \ + veor.64 RT23q, RT23q, RT45q; \ + vbsl.64 RT0, rc, rb; \ + vadd.u64 rd, rd, RT1; /* d+=t1; */ \ + veor.64 rh, RT2, RT3; \ + \ + /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \ + vshr.u64 RT2, rd, #14; \ + vshl.u64 RT3, rd, #64 - 14; \ + vadd.u64 rh, rh, RT0; \ + vshr.u64 RT4, rd, #18; \ + vshl.u64 RT5, rd, #64 - 18; \ + vadd.u64 rh, rh, RT1; /* h+=t1; */ \ + vld1.64 {RT0}, [RK]!; \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, rd, #41; \ + vshl.u64 RT5, rd, #64 - 41; \ + vadd.u64 RT0, RT0, rw1; \ + veor.64 RT23q, RT23q, RT45q; \ + vmov.64 RT7, rd; \ + veor.64 RT1, RT2, RT3; \ + vbsl.64 RT7, re, rf; \ + \ + vadd.u64 RT1, RT1, rg; \ + vshr.u64 RT2, rh, #28; \ + vshl.u64 RT3, rh, #64 - 28; \ + vadd.u64 RT1, RT1, RT0; \ + vshr.u64 RT4, rh, #34; \ + vshl.u64 RT5, rh, #64 - 34; \ + vadd.u64 RT1, RT1, RT7; \ + \ + /* g = Sum0 (h) + Maj (h, a, b); */ \ + veor.64 RT23q, RT23q, RT45q; \ + vshr.u64 RT4, rh, #39; \ + vshl.u64 RT5, rh, #64 - 39; \ + veor.64 RT0, rh, ra; \ + veor.64 RT23q, RT23q, RT45q; \ + vbsl.64 RT0, rb, ra; \ + vadd.u64 rc, rc, RT1; /* c+=t1; */ \ + veor.64 rg, RT2, RT3; +#define vadd_rg_RT0(rg) \ + vadd.u64 rg, rg, RT0; +#define vadd_rg_RT1(rg) \ + vadd.u64 rg, rg, RT1; /* g+=t1; */ + +.align 3 +ENTRY(sha512_transform_neon) + /* Input: + * %r0: SHA512_CONTEXT + * %r1: data + * %r2: u64 k[] constants + * %r3: nblks + */ + push {%lr}; + + mov %lr, #0; + + /* Load context to d0-d7 */ + vld1.64 {RA-RD}, [%r0]!; + vld1.64 {RE-RH}, [%r0]; + sub %r0, #(4*8); + + /* Load input to w[16], d16-d31 */ + /* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */ + vld1.64 {RW0-RW3}, [%r1]!; + vld1.64 {RW4-RW7}, [%r1]!; + vld1.64 {RW8-RW11}, [%r1]!; + vld1.64 {RW12-RW15}, [%r1]!; +#ifdef __ARMEL__ + /* byteswap */ + vrev64.8 RW01q, RW01q; + vrev64.8 RW23q, RW23q; + vrev64.8 RW45q, RW45q; + vrev64.8 RW67q, RW67q; + vrev64.8 RW89q, RW89q; + vrev64.8 RW1011q, RW1011q; + vrev64.8 RW1213q, RW1213q; + vrev64.8 RW1415q, RW1415q; +#endif + + /* EABI says that d8-d15 must be preserved by callee. */ + /*vpush {RT0-RT7};*/ + +.Loop: + rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, + RW23q, RW1415q, RW9, RW10, dummy, _); + b .Lenter_rounds; + +.Loop_rounds: + rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, + RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q); +.Lenter_rounds: + rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4, + RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q); + rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6, + RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q); + rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8, + RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q); + rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10, + RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q); + rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12, + RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q); + add %lr, #16; + rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14, + RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q); + cmp %lr, #64; + rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0, + RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q); + bne .Loop_rounds; + + subs %r3, #1; + + rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, + vadd_RT01q, RW1415q, dummy, _); + rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, + vadd_rg_RT0, RG, vadd_rg_RT1, RG); + beq .Lhandle_tail; + vld1.64 {RW0-RW3}, [%r1]!; + rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, + vadd_rg_RT0, RE, vadd_rg_RT1, RE); + rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, + vadd_rg_RT0, RC, vadd_rg_RT1, RC); +#ifdef __ARMEL__ + vrev64.8 RW01q, RW01q; + vrev64.8 RW23q, RW23q; +#endif + vld1.64 {RW4-RW7}, [%r1]!; + rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, + vadd_rg_RT0, RA, vadd_rg_RT1, RA); + rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, + vadd_rg_RT0, RG, vadd_rg_RT1, RG); +#ifdef __ARMEL__ + vrev64.8 RW45q, RW45q; + vrev64.8 RW67q, RW67q; +#endif + vld1.64 {RW8-RW11}, [%r1]!; + rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, + vadd_rg_RT0, RE, vadd_rg_RT1, RE); + rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, + vadd_rg_RT0, RC, vadd_rg_RT1, RC); +#ifdef __ARMEL__ + vrev64.8 RW89q, RW89q; + vrev64.8 RW1011q, RW1011q; +#endif + vld1.64 {RW12-RW15}, [%r1]!; + vadd_rg_RT0(RA); + vadd_rg_RT1(RA); + + /* Load context */ + vld1.64 {RT0-RT3}, [%r0]!; + vld1.64 {RT4-RT7}, [%r0]; + sub %r0, #(4*8); + +#ifdef __ARMEL__ + vrev64.8 RW1213q, RW1213q; + vrev64.8 RW1415q, RW1415q; +#endif + + vadd.u64 RA, RT0; + vadd.u64 RB, RT1; + vadd.u64 RC, RT2; + vadd.u64 RD, RT3; + vadd.u64 RE, RT4; + vadd.u64 RF, RT5; + vadd.u64 RG, RT6; + vadd.u64 RH, RT7; + + /* Store the first half of context */ + vst1.64 {RA-RD}, [%r0]!; + sub RK, $(8*80); + vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ + mov %lr, #0; + sub %r0, #(4*8); + + b .Loop; + +.Lhandle_tail: + rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, + vadd_rg_RT0, RE, vadd_rg_RT1, RE); + rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, + vadd_rg_RT0, RC, vadd_rg_RT1, RC); + rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, + vadd_rg_RT0, RA, vadd_rg_RT1, RA); + rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, + vadd_rg_RT0, RG, vadd_rg_RT1, RG); + rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, + vadd_rg_RT0, RE, vadd_rg_RT1, RE); + rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, + vadd_rg_RT0, RC, vadd_rg_RT1, RC); + + /* Load context to d16-d23 */ + vld1.64 {RW0-RW3}, [%r0]!; + vadd_rg_RT0(RA); + vld1.64 {RW4-RW7}, [%r0]; + vadd_rg_RT1(RA); + sub %r0, #(4*8); + + vadd.u64 RA, RW0; + vadd.u64 RB, RW1; + vadd.u64 RC, RW2; + vadd.u64 RD, RW3; + vadd.u64 RE, RW4; + vadd.u64 RF, RW5; + vadd.u64 RG, RW6; + vadd.u64 RH, RW7; + + /* Store the first half of context */ + vst1.64 {RA-RD}, [%r0]!; + + /* Clear used registers */ + /* d16-d31 */ + veor.u64 RW01q, RW01q; + veor.u64 RW23q, RW23q; + veor.u64 RW45q, RW45q; + veor.u64 RW67q, RW67q; + vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */ + veor.u64 RW89q, RW89q; + veor.u64 RW1011q, RW1011q; + veor.u64 RW1213q, RW1213q; + veor.u64 RW1415q, RW1415q; + /* d8-d15 */ + /*vpop {RT0-RT7};*/ + /* d0-d7 (q0-q3) */ + veor.u64 %q0, %q0; + veor.u64 %q1, %q1; + veor.u64 %q2, %q2; + veor.u64 %q3, %q3; + + pop {%pc}; +ENDPROC(sha512_transform_neon) diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c new file mode 100644 index 000000000000..0d2758ff5e12 --- /dev/null +++ b/arch/arm/crypto/sha512_neon_glue.c @@ -0,0 +1,305 @@ +/* + * Glue code for the SHA512 Secure Hash Algorithm assembly implementation + * using NEON instructions. + * + * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * + * This file is based on sha512_ssse3_glue.c: + * Copyright (C) 2013 Intel Corporation + * Author: Tim Chen <tim.c.chen@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include <crypto/internal/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/cryptohash.h> +#include <linux/types.h> +#include <linux/string.h> +#include <crypto/sha.h> +#include <asm/byteorder.h> +#include <asm/simd.h> +#include <asm/neon.h> + + +static const u64 sha512_k[] = { + 0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, + 0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL, + 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL, + 0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, + 0xd807aa98a3030242ULL, 0x12835b0145706fbeULL, + 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL, + 0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, + 0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL, + 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL, + 0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, + 0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL, + 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL, + 0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, + 0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL, + 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL, + 0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, + 0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL, + 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL, + 0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, + 0x81c2c92e47edaee6ULL, 0x92722c851482353bULL, + 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL, + 0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, + 0xd192e819d6ef5218ULL, 0xd69906245565a910ULL, + 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL, + 0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, + 0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL, + 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL, + 0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, + 0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL, + 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL, + 0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, + 0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL, + 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, + 0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, + 0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL, + 0x113f9804bef90daeULL, 0x1b710b35131c471bULL, + 0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, + 0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL, + 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, + 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL +}; + + +asmlinkage void sha512_transform_neon(u64 *digest, const void *data, + const u64 k[], unsigned int num_blks); + + +static int sha512_neon_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA512_H0; + sctx->state[1] = SHA512_H1; + sctx->state[2] = SHA512_H2; + sctx->state[3] = SHA512_H3; + sctx->state[4] = SHA512_H4; + sctx->state[5] = SHA512_H5; + sctx->state[6] = SHA512_H6; + sctx->state[7] = SHA512_H7; + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static int __sha512_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len, unsigned int partial) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int done = 0; + + sctx->count[0] += len; + if (sctx->count[0] < len) + sctx->count[1]++; + + if (partial) { + done = SHA512_BLOCK_SIZE - partial; + memcpy(sctx->buf + partial, data, done); + sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1); + } + + if (len - done >= SHA512_BLOCK_SIZE) { + const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE; + + sha512_transform_neon(sctx->state, data + done, sha512_k, + rounds); + + done += rounds * SHA512_BLOCK_SIZE; + } + + memcpy(sctx->buf, data + done, len - done); + + return 0; +} + +static int sha512_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE; + int res; + + /* Handle the fast case right here */ + if (partial + len < SHA512_BLOCK_SIZE) { + sctx->count[0] += len; + if (sctx->count[0] < len) + sctx->count[1]++; + memcpy(sctx->buf + partial, data, len); + + return 0; + } + + if (!may_use_simd()) { + res = crypto_sha512_update(desc, data, len); + } else { + kernel_neon_begin(); + res = __sha512_neon_update(desc, data, len, partial); + kernel_neon_end(); + } + + return res; +} + + +/* Add padding and return the message digest. */ +static int sha512_neon_final(struct shash_desc *desc, u8 *out) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + unsigned int i, index, padlen; + __be64 *dst = (__be64 *)out; + __be64 bits[2]; + static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, }; + + /* save number of bits */ + bits[1] = cpu_to_be64(sctx->count[0] << 3); + bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61); + + /* Pad out to 112 mod 128 and append length */ + index = sctx->count[0] & 0x7f; + padlen = (index < 112) ? (112 - index) : ((128+112) - index); + + if (!may_use_simd()) { + crypto_sha512_update(desc, padding, padlen); + crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits)); + } else { + kernel_neon_begin(); + /* We need to fill a whole block for __sha512_neon_update() */ + if (padlen <= 112) { + sctx->count[0] += padlen; + if (sctx->count[0] < padlen) + sctx->count[1]++; + memcpy(sctx->buf + index, padding, padlen); + } else { + __sha512_neon_update(desc, padding, padlen, index); + } + __sha512_neon_update(desc, (const u8 *)&bits, + sizeof(bits), 112); + kernel_neon_end(); + } + + /* Store state in digest */ + for (i = 0; i < 8; i++) + dst[i] = cpu_to_be64(sctx->state[i]); + + /* Wipe context */ + memset(sctx, 0, sizeof(*sctx)); + + return 0; +} + +static int sha512_neon_export(struct shash_desc *desc, void *out) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + memcpy(out, sctx, sizeof(*sctx)); + + return 0; +} + +static int sha512_neon_import(struct shash_desc *desc, const void *in) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + memcpy(sctx, in, sizeof(*sctx)); + + return 0; +} + +static int sha384_neon_init(struct shash_desc *desc) +{ + struct sha512_state *sctx = shash_desc_ctx(desc); + + sctx->state[0] = SHA384_H0; + sctx->state[1] = SHA384_H1; + sctx->state[2] = SHA384_H2; + sctx->state[3] = SHA384_H3; + sctx->state[4] = SHA384_H4; + sctx->state[5] = SHA384_H5; + sctx->state[6] = SHA384_H6; + sctx->state[7] = SHA384_H7; + + sctx->count[0] = sctx->count[1] = 0; + + return 0; +} + +static int sha384_neon_final(struct shash_desc *desc, u8 *hash) +{ + u8 D[SHA512_DIGEST_SIZE]; + + sha512_neon_final(desc, D); + + memcpy(hash, D, SHA384_DIGEST_SIZE); + memset(D, 0, SHA512_DIGEST_SIZE); + + return 0; +} + +static struct shash_alg algs[] = { { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_neon_init, + .update = sha512_neon_update, + .final = sha512_neon_final, + .export = sha512_neon_export, + .import = sha512_neon_import, + .descsize = sizeof(struct sha512_state), + .statesize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-neon", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_neon_init, + .update = sha512_neon_update, + .final = sha384_neon_final, + .export = sha512_neon_export, + .import = sha512_neon_import, + .descsize = sizeof(struct sha512_state), + .statesize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-neon", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static int __init sha512_neon_mod_init(void) +{ + if (!cpu_has_neon()) + return -ENODEV; + + return crypto_register_shashes(algs, ARRAY_SIZE(algs)); +} + +static void __exit sha512_neon_mod_fini(void) +{ + crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); +} + +module_init(sha512_neon_mod_init); +module_exit(sha512_neon_mod_fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated"); + +MODULE_ALIAS("sha512"); +MODULE_ALIAS("sha384"); diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 57f0584e8d97..f67fd3afebdf 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -24,6 +24,8 @@ #include <asm/domain.h> #include <asm/opcodes-virt.h> #include <asm/asm-offsets.h> +#include <asm/page.h> +#include <asm/thread_info.h> #define IOMEM(x) (x) @@ -179,10 +181,10 @@ * Get current thread_info. */ .macro get_thread_info, rd - ARM( mov \rd, sp, lsr #13 ) + ARM( mov \rd, sp, lsr #THREAD_SIZE_ORDER + PAGE_SHIFT ) THUMB( mov \rd, sp ) - THUMB( lsr \rd, \rd, #13 ) - mov \rd, \rd, lsl #13 + THUMB( lsr \rd, \rd, #THREAD_SIZE_ORDER + PAGE_SHIFT ) + mov \rd, \rd, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT .endm /* @@ -425,4 +427,25 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm + .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo + .macro ret\c, reg +#if __LINUX_ARM_ARCH__ < 6 + mov\c pc, \reg +#else + .ifeqs "\reg", "lr" + bx\c \reg + .else + mov\c pc, \reg + .endif +#endif + .endm + .endr + + .macro ret.w, reg + ret \reg +#ifdef CONFIG_THUMB2_KERNEL + nop +#endif + .endm + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 8c2b7321a478..963a2515906d 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -62,17 +62,18 @@ #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_INTEL 0x69 -#define ARM_CPU_PART_ARM1136 0xB360 -#define ARM_CPU_PART_ARM1156 0xB560 -#define ARM_CPU_PART_ARM1176 0xB760 -#define ARM_CPU_PART_ARM11MPCORE 0xB020 -#define ARM_CPU_PART_CORTEX_A8 0xC080 -#define ARM_CPU_PART_CORTEX_A9 0xC090 -#define ARM_CPU_PART_CORTEX_A5 0xC050 -#define ARM_CPU_PART_CORTEX_A15 0xC0F0 -#define ARM_CPU_PART_CORTEX_A7 0xC070 -#define ARM_CPU_PART_CORTEX_A12 0xC0D0 -#define ARM_CPU_PART_CORTEX_A17 0xC0E0 +/* ARM implemented processors */ +#define ARM_CPU_PART_ARM1136 0x4100b360 +#define ARM_CPU_PART_ARM1156 0x4100b560 +#define ARM_CPU_PART_ARM1176 0x4100b760 +#define ARM_CPU_PART_ARM11MPCORE 0x4100b020 +#define ARM_CPU_PART_CORTEX_A8 0x4100c080 +#define ARM_CPU_PART_CORTEX_A9 0x4100c090 +#define ARM_CPU_PART_CORTEX_A5 0x4100c050 +#define ARM_CPU_PART_CORTEX_A7 0x4100c070 +#define ARM_CPU_PART_CORTEX_A12 0x4100c0d0 +#define ARM_CPU_PART_CORTEX_A17 0x4100c0e0 +#define ARM_CPU_PART_CORTEX_A15 0x4100c0f0 #define ARM_CPU_XSCALE_ARCH_MASK 0xe000 #define ARM_CPU_XSCALE_ARCH_V1 0x2000 @@ -171,14 +172,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void) return (read_cpuid_id() & 0xFF000000) >> 24; } -static inline unsigned int __attribute_const__ read_cpuid_part_number(void) +/* + * The CPU part number is meaningless without referring to the CPU + * implementer: implementers are free to define their own part numbers + * which are permitted to clash with other implementer part numbers. + */ +static inline unsigned int __attribute_const__ read_cpuid_part(void) +{ + return read_cpuid_id() & 0xff00fff0; +} + +static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void) { return read_cpuid_id() & 0xFFF0; } static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void) { - return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK; + return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK; } static inline unsigned int __attribute_const__ read_cpuid_cachetype(void) diff --git a/arch/arm/include/asm/crypto/sha1.h b/arch/arm/include/asm/crypto/sha1.h new file mode 100644 index 000000000000..75e6a417416b --- /dev/null +++ b/arch/arm/include/asm/crypto/sha1.h @@ -0,0 +1,10 @@ +#ifndef ASM_ARM_CRYPTO_SHA1_H +#define ASM_ARM_CRYPTO_SHA1_H + +#include <linux/crypto.h> +#include <crypto/sha.h> + +extern int sha1_update_arm(struct shash_desc *desc, const u8 *data, + unsigned int len); + +#endif diff --git a/arch/arm/include/asm/entry-macro-multi.S b/arch/arm/include/asm/entry-macro-multi.S index 88d61815f0c0..469a2b30fa27 100644 --- a/arch/arm/include/asm/entry-macro-multi.S +++ b/arch/arm/include/asm/entry-macro-multi.S @@ -35,5 +35,5 @@ \symbol_name: mov r8, lr arch_irq_handler_default - mov pc, r8 + ret r8 .endm diff --git a/arch/arm/include/asm/glue-proc.h b/arch/arm/include/asm/glue-proc.h index 74a8b84f3cb1..74be7c22035a 100644 --- a/arch/arm/include/asm/glue-proc.h +++ b/arch/arm/include/asm/glue-proc.h @@ -221,15 +221,6 @@ # endif #endif -#ifdef CONFIG_CPU_V7 -# ifdef CPU_NAME -# undef MULTI_CPU -# define MULTI_CPU -# else -# define CPU_NAME cpu_v7 -# endif -#endif - #ifdef CONFIG_CPU_V7M # ifdef CPU_NAME # undef MULTI_CPU @@ -248,6 +239,15 @@ # endif #endif +#ifdef CONFIG_CPU_V7 +/* + * Cortex-A9 needs a different suspend/resume function, so we need + * multiple CPU support for ARMv7 anyway. + */ +# undef MULTI_CPU +# define MULTI_CPU +#endif + #ifndef MULTI_CPU #define cpu_proc_init __glue(CPU_NAME,_proc_init) #define cpu_proc_fin __glue(CPU_NAME,_proc_fin) diff --git a/arch/arm/include/asm/mcpm.h b/arch/arm/include/asm/mcpm.h index 94060adba174..57ff7f2a3084 100644 --- a/arch/arm/include/asm/mcpm.h +++ b/arch/arm/include/asm/mcpm.h @@ -217,6 +217,22 @@ int __mcpm_cluster_state(unsigned int cluster); int __init mcpm_sync_init( void (*power_up_setup)(unsigned int affinity_level)); +/** + * mcpm_loopback - make a run through the MCPM low-level code + * + * @cache_disable: pointer to function performing cache disabling + * + * This exercises the MCPM machinery by soft resetting the CPU and branching + * to the MCPM low-level entry code before returning to the caller. + * The @cache_disable function must do the necessary cache disabling to + * let the regular kernel init code turn it back on as if the CPU was + * hotplugged in. The MCPM state machine is set as if the cluster was + * initialized meaning the power_up_setup callback passed to mcpm_sync_init() + * will be invoked for all affinity levels. This may be useful to initialize + * some resources such as enabling the CCI that requires the cache to be off, or simply for testing purposes. + */ +int __init mcpm_loopback(void (*cache_disable)(void)); + void __init mcpm_smp_set_ops(void); #else diff --git a/arch/arm/include/asm/mcs_spinlock.h b/arch/arm/include/asm/mcs_spinlock.h new file mode 100644 index 000000000000..f652ad65840a --- /dev/null +++ b/arch/arm/include/asm/mcs_spinlock.h @@ -0,0 +1,23 @@ +#ifndef __ASM_MCS_LOCK_H +#define __ASM_MCS_LOCK_H + +#ifdef CONFIG_SMP +#include <asm/spinlock.h> + +/* MCS spin-locking. */ +#define arch_mcs_spin_lock_contended(lock) \ +do { \ + /* Ensure prior stores are observed before we enter wfe. */ \ + smp_mb(); \ + while (!(smp_load_acquire(lock))) \ + wfe(); \ +} while (0) \ + +#define arch_mcs_spin_unlock_contended(lock) \ +do { \ + smp_store_release(lock, 1); \ + dsb_sev(); \ +} while (0) + +#endif /* CONFIG_SMP */ +#endif /* __ASM_MCS_LOCK_H */ diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 2b751464d6ff..04ccf1c0a1af 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -150,13 +150,11 @@ /* * PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical - * memory. This is used for XIP and NoMMU kernels, or by kernels which - * have their own mach/memory.h. Assembly code must always use + * memory. This is used for XIP and NoMMU kernels, and on platforms that don't + * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use * PLAT_PHYS_OFFSET and not PHYS_OFFSET. */ -#ifndef PLAT_PHYS_OFFSET #define PLAT_PHYS_OFFSET UL(CONFIG_PHYS_OFFSET) -#endif #ifndef __ASSEMBLY__ diff --git a/arch/arm/include/asm/pgtable-3level-hwdef.h b/arch/arm/include/asm/pgtable-3level-hwdef.h index 626989fec4d3..9fd61c72a33a 100644 --- a/arch/arm/include/asm/pgtable-3level-hwdef.h +++ b/arch/arm/include/asm/pgtable-3level-hwdef.h @@ -43,7 +43,7 @@ #define PMD_SECT_BUFFERABLE (_AT(pmdval_t, 1) << 2) #define PMD_SECT_CACHEABLE (_AT(pmdval_t, 1) << 3) #define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ -#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ +#define PMD_SECT_AP2 (_AT(pmdval_t, 1) << 7) /* read only */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_nG (_AT(pmdval_t, 1) << 11) @@ -72,6 +72,7 @@ #define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1) #define PTE_BUFFERABLE (_AT(pteval_t, 1) << 2) /* AttrIndx[0] */ #define PTE_CACHEABLE (_AT(pteval_t, 1) << 3) /* AttrIndx[1] */ +#define PTE_AP2 (_AT(pteval_t, 1) << 7) /* AP[2] */ #define PTE_EXT_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define PTE_EXT_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ #define PTE_EXT_NG (_AT(pteval_t, 1) << 11) /* nG */ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 85c60adc8b60..06e0bc0f8b00 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -79,18 +79,19 @@ #define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */ #define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */ #define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */ -#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */ #define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */ #define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */ -#define L_PTE_DIRTY (_AT(pteval_t, 1) << 55) /* unused */ -#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) /* unused */ +#define L_PTE_DIRTY (_AT(pteval_t, 1) << 55) +#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) #define L_PTE_NONE (_AT(pteval_t, 1) << 57) /* PROT_NONE */ +#define L_PTE_RDONLY (_AT(pteval_t, 1) << 58) /* READ ONLY */ -#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) -#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) -#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56) -#define PMD_SECT_NONE (_AT(pmdval_t, 1) << 57) +#define L_PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) +#define L_PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55) +#define L_PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56) +#define L_PMD_SECT_NONE (_AT(pmdval_t, 1) << 57) +#define L_PMD_SECT_RDONLY (_AT(pteval_t, 1) << 58) /* * To be used in assembly code with the upper page attributes. @@ -207,27 +208,32 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) #define pte_huge(pte) (pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT)) #define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT)) -#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF) +#define pmd_isset(pmd, val) ((u32)(val) == (val) ? pmd_val(pmd) & (val) \ + : !!(pmd_val(pmd) & (val))) +#define pmd_isclear(pmd, val) (!(pmd_val(pmd) & (val))) + +#define pmd_young(pmd) (pmd_isset((pmd), PMD_SECT_AF)) #define __HAVE_ARCH_PMD_WRITE -#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY)) +#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) +#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) #define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd)) #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT)) -#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING) +#define pmd_trans_huge(pmd) (pmd_val(pmd) && !pmd_table(pmd)) +#define pmd_trans_splitting(pmd) (pmd_isset((pmd), L_PMD_SECT_SPLITTING)) #endif #define PMD_BIT_FUNC(fn,op) \ static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; } -PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY); +PMD_BIT_FUNC(wrprotect, |= L_PMD_SECT_RDONLY); PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF); -PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING); -PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY); -PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY); +PMD_BIT_FUNC(mksplitting, |= L_PMD_SECT_SPLITTING); +PMD_BIT_FUNC(mkwrite, &= ~L_PMD_SECT_RDONLY); +PMD_BIT_FUNC(mkdirty, |= L_PMD_SECT_DIRTY); PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) @@ -241,8 +247,8 @@ PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF); static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) { - const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY | - PMD_SECT_VALID | PMD_SECT_NONE; + const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | L_PMD_SECT_RDONLY | + L_PMD_SECT_VALID | L_PMD_SECT_NONE; pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask); return pmd; } @@ -253,8 +259,13 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, BUG_ON(addr >= TASK_SIZE); /* create a faulting entry if PROT_NONE protected */ - if (pmd_val(pmd) & PMD_SECT_NONE) - pmd_val(pmd) &= ~PMD_SECT_VALID; + if (pmd_val(pmd) & L_PMD_SECT_NONE) + pmd_val(pmd) &= ~L_PMD_SECT_VALID; + + if (pmd_write(pmd) && pmd_dirty(pmd)) + pmd_val(pmd) &= ~PMD_SECT_AP2; + else + pmd_val(pmd) |= PMD_SECT_AP2; *pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG); flush_pmd_entry(pmdp); diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 5478e5d6ad89..01baef07cd0c 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -214,18 +214,22 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0) +#define pte_isset(pte, val) ((u32)(val) == (val) ? pte_val(pte) & (val) \ + : !!(pte_val(pte) & (val))) +#define pte_isclear(pte, val) (!(pte_val(pte) & (val))) + #define pte_none(pte) (!pte_val(pte)) -#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT) -#define pte_valid(pte) (pte_val(pte) & L_PTE_VALID) +#define pte_present(pte) (pte_isset((pte), L_PTE_PRESENT)) +#define pte_valid(pte) (pte_isset((pte), L_PTE_VALID)) #define pte_accessible(mm, pte) (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte)) -#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY)) -#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY) -#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG) -#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN)) +#define pte_write(pte) (pte_isclear((pte), L_PTE_RDONLY)) +#define pte_dirty(pte) (pte_isset((pte), L_PTE_DIRTY)) +#define pte_young(pte) (pte_isset((pte), L_PTE_YOUNG)) +#define pte_exec(pte) (pte_isclear((pte), L_PTE_XN)) #define pte_special(pte) (0) #define pte_valid_user(pte) \ - (pte_valid(pte) && (pte_val(pte) & L_PTE_USER) && pte_young(pte)) + (pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte)) #if __LINUX_ARM_ARCH__ < 6 static inline void __sync_icache_dcache(pte_t pteval) diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index c877654fe3bf..601264d983fa 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -84,6 +84,12 @@ static inline long regs_return_value(struct pt_regs *regs) #define instruction_pointer(regs) (regs)->ARM_pc +#ifdef CONFIG_THUMB2_KERNEL +#define frame_pointer(regs) (regs)->ARM_r7 +#else +#define frame_pointer(regs) (regs)->ARM_fp +#endif + static inline void instruction_pointer_set(struct pt_regs *regs, unsigned long val) { diff --git a/arch/arm/include/asm/smp_scu.h b/arch/arm/include/asm/smp_scu.h index 0393fbab8dd5..bfe163c40024 100644 --- a/arch/arm/include/asm/smp_scu.h +++ b/arch/arm/include/asm/smp_scu.h @@ -11,7 +11,7 @@ static inline bool scu_a9_has_base(void) { - return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; + return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9; } static inline unsigned long scu_a9_get_base(void) diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h index 4d0a16441b29..7722201ead19 100644 --- a/arch/arm/include/asm/stacktrace.h +++ b/arch/arm/include/asm/stacktrace.h @@ -1,13 +1,28 @@ #ifndef __ASM_STACKTRACE_H #define __ASM_STACKTRACE_H +#include <asm/ptrace.h> + struct stackframe { + /* + * FP member should hold R7 when CONFIG_THUMB2_KERNEL is enabled + * and R11 otherwise. + */ unsigned long fp; unsigned long sp; unsigned long lr; unsigned long pc; }; +static __always_inline +void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame) +{ + frame->fp = frame_pointer(regs); + frame->sp = regs->ARM_sp; + frame->lr = regs->ARM_lr; + frame->pc = regs->ARM_pc; +} + extern int unwind_frame(struct stackframe *frame); extern void walk_stackframe(struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index e4e4208a9130..fc44d3761f9e 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -14,9 +14,10 @@ #include <linux/compiler.h> #include <asm/fpstate.h> +#include <asm/page.h> #define THREAD_SIZE_ORDER 1 -#define THREAD_SIZE 8192 +#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define THREAD_START_SP (THREAD_SIZE - 8) #ifndef __ASSEMBLY__ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 75d95799b6e6..7057cf8b87d0 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -107,6 +107,8 @@ static inline void set_fs(mm_segment_t fs) extern int __get_user_1(void *); extern int __get_user_2(void *); extern int __get_user_4(void *); +extern int __get_user_lo8(void *); +extern int __get_user_8(void *); #define __GUP_CLOBBER_1 "lr", "cc" #ifdef CONFIG_CPU_USE_DOMAINS @@ -115,6 +117,8 @@ extern int __get_user_4(void *); #define __GUP_CLOBBER_2 "lr", "cc" #endif #define __GUP_CLOBBER_4 "lr", "cc" +#define __GUP_CLOBBER_lo8 "lr", "cc" +#define __GUP_CLOBBER_8 "lr", "cc" #define __get_user_x(__r2,__p,__e,__l,__s) \ __asm__ __volatile__ ( \ @@ -125,11 +129,19 @@ extern int __get_user_4(void *); : "0" (__p), "r" (__l) \ : __GUP_CLOBBER_##__s) +/* narrowing a double-word get into a single 32bit word register: */ +#ifdef __ARMEB__ +#define __get_user_xb(__r2, __p, __e, __l, __s) \ + __get_user_x(__r2, __p, __e, __l, lo8) +#else +#define __get_user_xb __get_user_x +#endif + #define __get_user_check(x,p) \ ({ \ unsigned long __limit = current_thread_info()->addr_limit - 1; \ register const typeof(*(p)) __user *__p asm("r0") = (p);\ - register unsigned long __r2 asm("r2"); \ + register typeof(x) __r2 asm("r2"); \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ switch (sizeof(*(__p))) { \ @@ -142,6 +154,12 @@ extern int __get_user_4(void *); case 4: \ __get_user_x(__r2, __p, __e, __l, 4); \ break; \ + case 8: \ + if (sizeof((x)) < 8) \ + __get_user_xb(__r2, __p, __e, __l, 4); \ + else \ + __get_user_x(__r2, __p, __e, __l, 8); \ + break; \ default: __e = __get_user_bad(); break; \ } \ x = (typeof(*(p))) __r2; \ diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 43876245fc57..21ca0cebcab0 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -15,7 +15,17 @@ #include <uapi/asm/unistd.h> +/* + * This may need to be greater than __NR_last_syscall+1 in order to + * account for the padding in the syscall table + */ #define __NR_syscalls (384) + +/* + * *NOTE*: This is a ghost syscall private to the kernel. Only the + * __kuser_cmpxchg code in entry-armv.S should be aware of its + * existence. Don't ever use this from user code. + */ #define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0) #define __ARCH_WANT_STAT64 diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index ba94446c72d9..acd5b66ea3aa 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -411,11 +411,6 @@ #define __NR_renameat2 (__NR_SYSCALL_BASE+382) /* - * This may need to be greater than __NR_last_syscall+1 in order to - * account for the padding in the syscall table - */ - -/* * The following SWIs are ARM private. */ #define __ARM_NR_BASE (__NR_SYSCALL_BASE+0x0f0000) @@ -426,12 +421,6 @@ #define __ARM_NR_set_tls (__ARM_NR_BASE+5) /* - * *NOTE*: This is a ghost syscall private to the kernel. Only the - * __kuser_cmpxchg code in entry-armv.S should be aware of its - * existence. Don't ever use this from user code. - */ - -/* * The following syscalls are obsolete and no longer available for EABI. */ #if !defined(__KERNEL__) diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S index 14f7c3b14632..78c91b5f97d4 100644 --- a/arch/arm/kernel/debug.S +++ b/arch/arm/kernel/debug.S @@ -90,7 +90,7 @@ ENTRY(printascii) ldrneb r1, [r0], #1 teqne r1, #0 bne 1b - mov pc, lr + ret lr ENDPROC(printascii) ENTRY(printch) @@ -105,7 +105,7 @@ ENTRY(debug_ll_addr) addruart r2, r3, ip str r2, [r0] str r3, [r1] - mov pc, lr + ret lr ENDPROC(debug_ll_addr) #endif @@ -116,7 +116,7 @@ ENTRY(printascii) mov r0, #0x04 @ SYS_WRITE0 ARM( svc #0x123456 ) THUMB( svc #0xab ) - mov pc, lr + ret lr ENDPROC(printascii) ENTRY(printch) @@ -125,14 +125,14 @@ ENTRY(printch) mov r0, #0x03 @ SYS_WRITEC ARM( svc #0x123456 ) THUMB( svc #0xab ) - mov pc, lr + ret lr ENDPROC(printch) ENTRY(debug_ll_addr) mov r2, #0 str r2, [r0] str r2, [r1] - mov pc, lr + ret lr ENDPROC(debug_ll_addr) #endif diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 52a949a8077d..36276cdccfbc 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -224,7 +224,7 @@ svc_preempt: 1: bl preempt_schedule_irq @ irq en/disable is done inside ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS tst r0, #_TIF_NEED_RESCHED - moveq pc, r8 @ go again + reteq r8 @ go again b 1b #endif @@ -490,7 +490,7 @@ ENDPROC(__und_usr) .pushsection .fixup, "ax" .align 2 4: str r4, [sp, #S_PC] @ retry current instruction - mov pc, r9 + ret r9 .popsection .pushsection __ex_table,"a" .long 1b, 4b @@ -552,7 +552,7 @@ call_fpe: #endif tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27 tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2 - moveq pc, lr + reteq lr and r8, r0, #0x00000f00 @ mask out CP number THUMB( lsr r8, r8, #8 ) mov r7, #1 @@ -571,33 +571,33 @@ call_fpe: THUMB( add pc, r8 ) nop - movw_pc lr @ CP#0 + ret.w lr @ CP#0 W(b) do_fpe @ CP#1 (FPE) W(b) do_fpe @ CP#2 (FPE) - movw_pc lr @ CP#3 + ret.w lr @ CP#3 #ifdef CONFIG_CRUNCH b crunch_task_enable @ CP#4 (MaverickCrunch) b crunch_task_enable @ CP#5 (MaverickCrunch) b crunch_task_enable @ CP#6 (MaverickCrunch) #else - movw_pc lr @ CP#4 - movw_pc lr @ CP#5 - movw_pc lr @ CP#6 + ret.w lr @ CP#4 + ret.w lr @ CP#5 + ret.w lr @ CP#6 #endif - movw_pc lr @ CP#7 - movw_pc lr @ CP#8 - movw_pc lr @ CP#9 + ret.w lr @ CP#7 + ret.w lr @ CP#8 + ret.w lr @ CP#9 #ifdef CONFIG_VFP W(b) do_vfp @ CP#10 (VFP) W(b) do_vfp @ CP#11 (VFP) #else - movw_pc lr @ CP#10 (VFP) - movw_pc lr @ CP#11 (VFP) + ret.w lr @ CP#10 (VFP) + ret.w lr @ CP#11 (VFP) #endif - movw_pc lr @ CP#12 - movw_pc lr @ CP#13 - movw_pc lr @ CP#14 (Debug) - movw_pc lr @ CP#15 (Control) + ret.w lr @ CP#12 + ret.w lr @ CP#13 + ret.w lr @ CP#14 (Debug) + ret.w lr @ CP#15 (Control) #ifdef NEED_CPU_ARCHITECTURE .align 2 @@ -649,7 +649,7 @@ ENTRY(fp_enter) .popsection ENTRY(no_fp) - mov pc, lr + ret lr ENDPROC(no_fp) __und_usr_fault_32: @@ -745,7 +745,7 @@ ENDPROC(__switch_to) #ifdef CONFIG_ARM_THUMB bx \reg #else - mov pc, \reg + ret \reg #endif .endm @@ -837,7 +837,7 @@ kuser_cmpxchg64_fixup: #if __LINUX_ARM_ARCH__ < 6 bcc kuser_cmpxchg32_fixup #endif - mov pc, lr + ret lr .previous #else @@ -905,7 +905,7 @@ kuser_cmpxchg32_fixup: subs r8, r4, r7 rsbcss r8, r8, #(2b - 1b) strcs r7, [sp, #S_PC] - mov pc, lr + ret lr .previous #else diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 7139d4a7dea7..e52fe5a2d843 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ +#include <asm/assembler.h> #include <asm/unistd.h> #include <asm/ftrace.h> #include <asm/unwind.h> @@ -88,7 +89,7 @@ ENTRY(ret_from_fork) cmp r5, #0 movne r0, r4 adrne lr, BSYM(1f) - movne pc, r5 + retne r5 1: get_thread_info tsk b ret_slow_syscall ENDPROC(ret_from_fork) @@ -290,7 +291,7 @@ ENDPROC(ftrace_graph_caller_old) .macro mcount_exit ldmia sp!, {r0-r3, ip, lr} - mov pc, ip + ret ip .endm ENTRY(__gnu_mcount_nc) @@ -298,7 +299,7 @@ UNWIND(.fnstart) #ifdef CONFIG_DYNAMIC_FTRACE mov ip, lr ldmia sp!, {lr} - mov pc, ip + ret ip #else __mcount #endif @@ -333,12 +334,12 @@ return_to_handler: bl ftrace_return_to_handler mov lr, r0 @ r0 has real ret addr ldmia sp!, {r0-r3} - mov pc, lr + ret lr #endif ENTRY(ftrace_stub) .Lftrace_stub: - mov pc, lr + ret lr ENDPROC(ftrace_stub) #endif /* CONFIG_FUNCTION_TRACER */ @@ -561,7 +562,7 @@ sys_mmap2: streq r5, [sp, #4] beq sys_mmap_pgoff mov r0, #-EINVAL - mov pc, lr + ret lr #else str r5, [sp, #4] b sys_mmap_pgoff diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 5d702f8900b1..8db307d0954b 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -240,12 +240,6 @@ movs pc, lr @ return & move spsr_svc into cpsr .endm - @ - @ 32-bit wide "mov pc, reg" - @ - .macro movw_pc, reg - mov pc, \reg - .endm #else /* CONFIG_THUMB2_KERNEL */ .macro svc_exit, rpsr, irq = 0 .if \irq != 0 @@ -304,14 +298,6 @@ movs pc, lr @ return & move spsr_svc into cpsr .endm #endif /* ifdef CONFIG_CPU_V7M / else */ - - @ - @ 32-bit wide "mov pc, reg" - @ - .macro movw_pc, reg - mov pc, \reg - nop - .endm #endif /* !CONFIG_THUMB2_KERNEL */ /* diff --git a/arch/arm/kernel/fiqasm.S b/arch/arm/kernel/fiqasm.S index 207f9d652010..8dd26e1a9bd6 100644 --- a/arch/arm/kernel/fiqasm.S +++ b/arch/arm/kernel/fiqasm.S @@ -32,7 +32,7 @@ ENTRY(__set_fiq_regs) ldr lr, [r0] msr cpsr_c, r1 @ return to SVC mode mov r0, r0 @ avoid hazard prior to ARMv4 - mov pc, lr + ret lr ENDPROC(__set_fiq_regs) ENTRY(__get_fiq_regs) @@ -45,5 +45,5 @@ ENTRY(__get_fiq_regs) str lr, [r0] msr cpsr_c, r1 @ return to SVC mode mov r0, r0 @ avoid hazard prior to ARMv4 - mov pc, lr + ret lr ENDPROC(__get_fiq_regs) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 572a38335c96..8733012d231f 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -10,6 +10,7 @@ * published by the Free Software Foundation. * */ +#include <asm/assembler.h> #define ATAG_CORE 0x54410001 #define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2) @@ -61,10 +62,10 @@ __vet_atags: cmp r5, r6 bne 1f -2: mov pc, lr @ atag/dtb pointer is ok +2: ret lr @ atag/dtb pointer is ok 1: mov r2, #0 - mov pc, lr + ret lr ENDPROC(__vet_atags) /* @@ -162,7 +163,7 @@ __lookup_processor_type: cmp r5, r6 blo 1b mov r5, #0 @ unknown processor -2: mov pc, lr +2: ret lr ENDPROC(__lookup_processor_type) /* diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 716249cc2ee1..cc176b67c134 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -82,7 +82,7 @@ ENTRY(stext) adr lr, BSYM(1f) @ return (PIC) address ARM( add pc, r10, #PROCINFO_INITFUNC ) THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( mov pc, r12 ) + THUMB( ret r12 ) 1: b __after_proc_init ENDPROC(stext) @@ -119,7 +119,7 @@ ENTRY(secondary_startup) mov r13, r12 @ __secondary_switched address ARM( add pc, r10, #PROCINFO_INITFUNC ) THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( mov pc, r12 ) + THUMB( ret r12 ) ENDPROC(secondary_startup) ENTRY(__secondary_switched) @@ -164,7 +164,7 @@ __after_proc_init: #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg #endif /* CONFIG_CPU_CP15 */ - mov pc, r13 + ret r13 ENDPROC(__after_proc_init) .ltorg @@ -254,7 +254,7 @@ ENTRY(__setup_mpu) orr r0, r0, #CR_M @ Set SCTRL.M (MPU on) mcr p15, 0, r0, c1, c0, 0 @ Enable MPU isb - mov pc,lr + ret lr ENDPROC(__setup_mpu) #endif #include "head-common.S" diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 2c35f0ff2fdc..664eee8c4a26 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -140,7 +140,7 @@ ENTRY(stext) mov r8, r4 @ set TTBR1 to swapper_pg_dir ARM( add pc, r10, #PROCINFO_INITFUNC ) THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( mov pc, r12 ) + THUMB( ret r12 ) 1: b __enable_mmu ENDPROC(stext) .ltorg @@ -335,7 +335,7 @@ __create_page_tables: sub r4, r4, #0x1000 @ point to the PGD table mov r4, r4, lsr #ARCH_PGD_SHIFT #endif - mov pc, lr + ret lr ENDPROC(__create_page_tables) .ltorg .align @@ -383,7 +383,7 @@ ENTRY(secondary_startup) ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor @ (return control reg) THUMB( add r12, r10, #PROCINFO_INITFUNC ) - THUMB( mov pc, r12 ) + THUMB( ret r12 ) ENDPROC(secondary_startup) /* @@ -468,7 +468,7 @@ ENTRY(__turn_mmu_on) instr_sync mov r3, r3 mov r3, r13 - mov pc, r3 + ret r3 __turn_mmu_on_end: ENDPROC(__turn_mmu_on) .popsection @@ -487,7 +487,7 @@ __fixup_smp: orr r4, r4, #0x0000b000 orr r4, r4, #0x00000020 @ val 0x4100b020 teq r3, r4 @ ARM 11MPCore? - moveq pc, lr @ yes, assume SMP + reteq lr @ yes, assume SMP mrc p15, 0, r0, c0, c0, 5 @ read MPIDR and r0, r0, #0xc0000000 @ multiprocessing extensions and @@ -500,7 +500,7 @@ __fixup_smp: orr r4, r4, #0x0000c000 orr r4, r4, #0x00000090 teq r3, r4 @ Check for ARM Cortex-A9 - movne pc, lr @ Not ARM Cortex-A9, + retne lr @ Not ARM Cortex-A9, @ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the @ below address check will need to be #ifdef'd or equivalent @@ -512,7 +512,7 @@ __fixup_smp: ARM_BE8(rev r0, r0) @ byteswap if big endian and r0, r0, #0x3 @ number of CPUs teq r0, #0x0 @ is 1? - movne pc, lr + retne lr __fixup_smp_on_up: adr r0, 1f @@ -539,7 +539,7 @@ smp_on_up: .text __do_fixup_smp_on_up: cmp r4, r5 - movhs pc, lr + reths lr ldmia r4!, {r0, r6} ARM( str r6, [r0, r3] ) THUMB( add r0, r0, r3 ) @@ -672,7 +672,7 @@ ARM_BE8(rev16 ip, ip) 2: cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot bcc 1b - mov pc, lr + ret lr #endif ENDPROC(__fixup_a_pv_table) diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 797b1a6a4906..56ce6290c831 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -99,7 +99,7 @@ ENTRY(__hyp_stub_install_secondary) * immediately. */ compare_cpu_mode_with_primary r4, r5, r6, r7 - movne pc, lr + retne lr /* * Once we have given up on one CPU, we do not try to install the @@ -111,7 +111,7 @@ ENTRY(__hyp_stub_install_secondary) */ cmp r4, #HYP_MODE - movne pc, lr @ give up if the CPU is not in HYP mode + retne lr @ give up if the CPU is not in HYP mode /* * Configure HSCTLR to set correct exception endianness/instruction set @@ -201,7 +201,7 @@ ENDPROC(__hyp_get_vectors) @ fall through ENTRY(__hyp_set_vectors) __HVC(0) - mov pc, lr + ret lr ENDPROC(__hyp_set_vectors) #ifndef ZIMAGE diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index 2b32978ae905..ad58e565fe98 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -100,7 +100,7 @@ ENTRY(iwmmxt_task_enable) get_thread_info r10 #endif 4: dec_preempt_count r10, r3 - mov pc, r9 @ normal exit from exception + ret r9 @ normal exit from exception concan_save: @@ -144,7 +144,7 @@ concan_dump: wstrd wR15, [r1, #MMX_WR15] 2: teq r0, #0 @ anything to load? - moveq pc, lr @ if not, return + reteq lr @ if not, return concan_load: @@ -177,10 +177,10 @@ concan_load: @ clear CUP/MUP (only if r1 != 0) teq r1, #0 mov r2, #0 - moveq pc, lr + reteq lr tmcr wCon, r2 - mov pc, lr + ret lr /* * Back up Concan regs to save area and disable access to them @@ -266,7 +266,7 @@ ENTRY(iwmmxt_task_copy) mov r3, lr @ preserve return address bl concan_dump msr cpsr_c, ip @ restore interrupt mode - mov pc, r3 + ret r3 /* * Restore Concan state from given memory address @@ -302,7 +302,7 @@ ENTRY(iwmmxt_task_restore) mov r3, lr @ preserve return address bl concan_load msr cpsr_c, ip @ restore interrupt mode - mov pc, r3 + ret r3 /* * Concan handling on task switch @@ -324,7 +324,7 @@ ENTRY(iwmmxt_task_switch) add r3, r0, #TI_IWMMXT_STATE @ get next task Concan save area ldr r2, [r2] @ get current Concan owner teq r2, r3 @ next task owns it? - movne pc, lr @ no: leave Concan disabled + retne lr @ no: leave Concan disabled 1: @ flip Concan access XSC(eor r1, r1, #0x3) @@ -351,7 +351,7 @@ ENTRY(iwmmxt_task_release) eors r0, r0, r1 @ if equal... streq r0, [r3] @ then clear ownership msr cpsr_c, r2 @ restore interrupts - mov pc, lr + ret lr .data concan_owner: diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 4238bcba9d60..ae3e216a52c2 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -621,10 +621,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) return; } - fr.fp = regs->ARM_fp; - fr.sp = regs->ARM_sp; - fr.lr = regs->ARM_lr; - fr.pc = regs->ARM_pc; + arm_get_current_stackframe(regs, &fr); walk_stackframe(&fr, callchain_trace, entry); } diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c index af9e35e8836f..c02c2e8c877d 100644 --- a/arch/arm/kernel/perf_event_cpu.c +++ b/arch/arm/kernel/perf_event_cpu.c @@ -250,40 +250,39 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = { static int probe_current_pmu(struct arm_pmu *pmu) { int cpu = get_cpu(); - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); int ret = -ENODEV; pr_info("probing PMU on CPU %d\n", cpu); + switch (read_cpuid_part()) { /* ARM Ltd CPUs. */ - if (implementor == ARM_CPU_IMP_ARM) { - switch (part_number) { - case ARM_CPU_PART_ARM1136: - case ARM_CPU_PART_ARM1156: - case ARM_CPU_PART_ARM1176: - ret = armv6pmu_init(pmu); - break; - case ARM_CPU_PART_ARM11MPCORE: - ret = armv6mpcore_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A8: - ret = armv7_a8_pmu_init(pmu); - break; - case ARM_CPU_PART_CORTEX_A9: - ret = armv7_a9_pmu_init(pmu); - break; - } - /* Intel CPUs [xscale]. */ - } else if (implementor == ARM_CPU_IMP_INTEL) { - switch (xscale_cpu_arch_version()) { - case ARM_CPU_XSCALE_ARCH_V1: - ret = xscale1pmu_init(pmu); - break; - case ARM_CPU_XSCALE_ARCH_V2: - ret = xscale2pmu_init(pmu); - break; + case ARM_CPU_PART_ARM1136: + case ARM_CPU_PART_ARM1156: + case ARM_CPU_PART_ARM1176: + ret = armv6pmu_init(pmu); + break; + case ARM_CPU_PART_ARM11MPCORE: + ret = armv6mpcore_pmu_init(pmu); + break; + case ARM_CPU_PART_CORTEX_A8: + ret = armv7_a8_pmu_init(pmu); + break; + case ARM_CPU_PART_CORTEX_A9: + ret = armv7_a9_pmu_init(pmu); + break; + + default: + if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) { + switch (xscale_cpu_arch_version()) { + case ARM_CPU_XSCALE_ARCH_V1: + ret = xscale1pmu_init(pmu); + break; + case ARM_CPU_XSCALE_ARCH_V2: + ret = xscale2pmu_init(pmu); + break; + } } + break; } put_cpu(); diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index 95858966d84e..35e72585ec1d 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -3,6 +3,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/kexec.h> .align 3 /* not needed for this code, but keeps fncpy() happy */ @@ -59,7 +60,7 @@ ENTRY(relocate_new_kernel) mov r0,#0 ldr r1,kexec_mach_type ldr r2,kexec_boot_atags - ARM( mov pc, lr ) + ARM( ret lr ) THUMB( bx lr ) .align diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 1b880db2a033..e1e60e5a7a27 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -107,7 +107,7 @@ ENTRY(cpu_resume_mmu) instr_sync mov r0, r0 mov r0, r0 - mov pc, r3 @ jump to virtual address + ret r3 @ jump to virtual address ENDPROC(cpu_resume_mmu) .popsection cpu_resume_after_mmu: diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 1aafa0d785eb..72f9241ad5db 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -17,6 +17,8 @@ #include <asm/cputype.h> #define SCU_CTRL 0x00 +#define SCU_ENABLE (1 << 0) +#define SCU_STANDBY_ENABLE (1 << 5) #define SCU_CONFIG 0x04 #define SCU_CPU_STATUS 0x08 #define SCU_INVALIDATE 0x0c @@ -50,10 +52,16 @@ void scu_enable(void __iomem *scu_base) scu_ctrl = readl_relaxed(scu_base + SCU_CTRL); /* already enabled? */ - if (scu_ctrl & 1) + if (scu_ctrl & SCU_ENABLE) return; - scu_ctrl |= 1; + scu_ctrl |= SCU_ENABLE; + + /* Cortex-A9 earlier than r2p0 has no standby bit in SCU */ + if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090 && + (read_cpuid_id() & 0x00f0000f) >= 0x00200000) + scu_ctrl |= SCU_STANDBY_ENABLE; + writel_relaxed(scu_ctrl, scu_base + SCU_CTRL); /* diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index 95d063620b76..2e72be4f623e 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -92,15 +92,19 @@ void erratum_a15_798181_init(void) unsigned int midr = read_cpuid_id(); unsigned int revidr = read_cpuid(CPUID_REVIDR); - /* Cortex-A15 r0p0..r3p2 w/o ECO fix affected */ - if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2 || - (revidr & 0x210) == 0x210) { - return; - } - if (revidr & 0x10) - erratum_a15_798181_handler = erratum_a15_798181_partial; - else + /* Brahma-B15 r0p0..r0p2 affected + * Cortex-A15 r0p0..r3p2 w/o ECO fix affected */ + if ((midr & 0xff0ffff0) == 0x420f00f0 && midr <= 0x420f00f2) erratum_a15_798181_handler = erratum_a15_798181_broadcast; + else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr <= 0x413fc0f2 && + (revidr & 0x210) != 0x210) { + if (revidr & 0x10) + erratum_a15_798181_handler = + erratum_a15_798181_partial; + else + erratum_a15_798181_handler = + erratum_a15_798181_broadcast; + } } #endif diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 829a96d4a179..0cc7e58c47cc 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -50,10 +50,7 @@ unsigned long profile_pc(struct pt_regs *regs) if (!in_lock_functions(regs->ARM_pc)) return regs->ARM_pc; - frame.fp = regs->ARM_fp; - frame.sp = regs->ARM_sp; - frame.lr = regs->ARM_lr; - frame.pc = regs->ARM_pc; + arm_get_current_stackframe(regs, &frame); do { int ret = unwind_frame(&frame); if (ret < 0) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index abd2fc067736..c8e4bb714944 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -31,11 +31,13 @@ #include <asm/exception.h> #include <asm/unistd.h> #include <asm/traps.h> +#include <asm/ptrace.h> #include <asm/unwind.h> #include <asm/tls.h> #include <asm/system_misc.h> #include <asm/opcodes.h> + static const char *handler[]= { "prefetch abort", "data abort", @@ -184,7 +186,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) tsk = current; if (regs) { - fp = regs->ARM_fp; + fp = frame_pointer(regs); mode = processor_mode(regs); } else if (tsk != current) { fp = thread_saved_fp(tsk); @@ -719,7 +721,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) dump_instr("", regs); if (user_mode(regs)) { __show_regs(regs); - c_backtrace(regs->ARM_fp, processor_mode(regs)); + c_backtrace(frame_pointer(regs), processor_mode(regs)); } } #endif diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index e67682f02cb2..a61a1dfbb0db 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -479,12 +479,10 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk) tsk = current; if (regs) { - frame.fp = regs->ARM_fp; - frame.sp = regs->ARM_sp; - frame.lr = regs->ARM_lr; + arm_get_current_stackframe(regs, &frame); /* PC might be corrupted, use LR in that case. */ - frame.pc = kernel_text_address(regs->ARM_pc) - ? regs->ARM_pc : regs->ARM_lr; + if (!kernel_text_address(regs->ARM_pc)) + frame.pc = regs->ARM_lr; } else if (tsk == current) { frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_sp; diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 7bcee5c9b604..6f57cb94367f 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -318,7 +318,6 @@ SECTIONS _end = .; STABS_DEBUG - .comment 0 : { *(.comment) } } /* diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index b23a59c1c522..70bf49b8b244 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -274,13 +274,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, int __attribute_const__ kvm_target_cpu(void) { - unsigned long implementor = read_cpuid_implementor(); - unsigned long part_number = read_cpuid_part_number(); - - if (implementor != ARM_CPU_IMP_ARM) - return -EINVAL; - - switch (part_number) { + switch (read_cpuid_part()) { case ARM_CPU_PART_CORTEX_A7: return KVM_ARM_TARGET_CORTEX_A7; case ARM_CPU_PART_CORTEX_A15: diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 1b9844d369cc..b2d229f09c07 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -17,6 +17,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/unified.h> #include <asm/asm-offsets.h> #include <asm/kvm_asm.h> @@ -134,7 +135,7 @@ phase2: ldr r0, =TRAMPOLINE_VA adr r1, target bfi r0, r1, #0, #PAGE_SHIFT - mov pc, r0 + ret r0 target: @ We're now in the trampoline code, switch page tables mcrr p15, 4, r2, r3, c2 diff --git a/arch/arm/lib/ashldi3.S b/arch/arm/lib/ashldi3.S index 638deb13da1c..b05e95840651 100644 --- a/arch/arm/lib/ashldi3.S +++ b/arch/arm/lib/ashldi3.S @@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define al r1 @@ -47,7 +48,7 @@ ENTRY(__aeabi_llsl) THUMB( lsrmi r3, al, ip ) THUMB( orrmi ah, ah, r3 ) mov al, al, lsl r2 - mov pc, lr + ret lr ENDPROC(__ashldi3) ENDPROC(__aeabi_llsl) diff --git a/arch/arm/lib/ashrdi3.S b/arch/arm/lib/ashrdi3.S index 015e8aa5a1d1..275d7d2341a4 100644 --- a/arch/arm/lib/ashrdi3.S +++ b/arch/arm/lib/ashrdi3.S @@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define al r1 @@ -47,7 +48,7 @@ ENTRY(__aeabi_lasr) THUMB( lslmi r3, ah, ip ) THUMB( orrmi al, al, r3 ) mov ah, ah, asr r2 - mov pc, lr + ret lr ENDPROC(__ashrdi3) ENDPROC(__aeabi_lasr) diff --git a/arch/arm/lib/backtrace.S b/arch/arm/lib/backtrace.S index 4102be617fce..fab5a50503ae 100644 --- a/arch/arm/lib/backtrace.S +++ b/arch/arm/lib/backtrace.S @@ -25,7 +25,7 @@ ENTRY(c_backtrace) #if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK) - mov pc, lr + ret lr ENDPROC(c_backtrace) #else stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location... diff --git a/arch/arm/lib/bitops.h b/arch/arm/lib/bitops.h index 9f12ed1eea86..7d807cfd8ef5 100644 --- a/arch/arm/lib/bitops.h +++ b/arch/arm/lib/bitops.h @@ -1,3 +1,4 @@ +#include <asm/assembler.h> #include <asm/unwind.h> #if __LINUX_ARM_ARCH__ >= 6 @@ -70,7 +71,7 @@ UNWIND( .fnstart ) \instr r2, r2, r3 str r2, [r1, r0, lsl #2] restore_irqs ip - mov pc, lr + ret lr UNWIND( .fnend ) ENDPROC(\name ) .endm @@ -98,7 +99,7 @@ UNWIND( .fnstart ) \store r2, [r1] moveq r0, #0 restore_irqs ip - mov pc, lr + ret lr UNWIND( .fnend ) ENDPROC(\name ) .endm diff --git a/arch/arm/lib/bswapsdi2.S b/arch/arm/lib/bswapsdi2.S index 9fcdd154eff9..07cda737bb11 100644 --- a/arch/arm/lib/bswapsdi2.S +++ b/arch/arm/lib/bswapsdi2.S @@ -1,4 +1,5 @@ #include <linux/linkage.h> +#include <asm/assembler.h> #if __LINUX_ARM_ARCH__ >= 6 ENTRY(__bswapsi2) @@ -18,7 +19,7 @@ ENTRY(__bswapsi2) mov r3, r3, lsr #8 bic r3, r3, #0xff00 eor r0, r3, r0, ror #8 - mov pc, lr + ret lr ENDPROC(__bswapsi2) ENTRY(__bswapdi2) @@ -31,6 +32,6 @@ ENTRY(__bswapdi2) bic r1, r1, #0xff00 eor r1, r1, r0, ror #8 eor r0, r3, ip, ror #8 - mov pc, lr + ret lr ENDPROC(__bswapdi2) #endif diff --git a/arch/arm/lib/call_with_stack.S b/arch/arm/lib/call_with_stack.S index 916c80f13ae7..ed1a421813cb 100644 --- a/arch/arm/lib/call_with_stack.S +++ b/arch/arm/lib/call_with_stack.S @@ -36,9 +36,9 @@ ENTRY(call_with_stack) mov r0, r1 adr lr, BSYM(1f) - mov pc, r2 + ret r2 1: ldr lr, [sp] ldr sp, [sp, #4] - mov pc, lr + ret lr ENDPROC(call_with_stack) diff --git a/arch/arm/lib/csumpartial.S b/arch/arm/lib/csumpartial.S index 31d3cb34740d..984e0f29d548 100644 --- a/arch/arm/lib/csumpartial.S +++ b/arch/arm/lib/csumpartial.S @@ -97,7 +97,7 @@ td3 .req lr #endif #endif adcnes sum, sum, td0 @ update checksum - mov pc, lr + ret lr ENTRY(csum_partial) stmfd sp!, {buf, lr} diff --git a/arch/arm/lib/csumpartialcopygeneric.S b/arch/arm/lib/csumpartialcopygeneric.S index d6e742d24007..10b45909610c 100644 --- a/arch/arm/lib/csumpartialcopygeneric.S +++ b/arch/arm/lib/csumpartialcopygeneric.S @@ -7,6 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <asm/assembler.h> /* * unsigned int @@ -40,7 +41,7 @@ sum .req r3 adcs sum, sum, ip, put_byte_1 @ update checksum strb ip, [dst], #1 tst dst, #2 - moveq pc, lr @ dst is now 32bit aligned + reteq lr @ dst is now 32bit aligned .Ldst_16bit: load2b r8, ip sub len, len, #2 @@ -48,7 +49,7 @@ sum .req r3 strb r8, [dst], #1 adcs sum, sum, ip, put_byte_1 strb ip, [dst], #1 - mov pc, lr @ dst is now 32bit aligned + ret lr @ dst is now 32bit aligned /* * Handle 0 to 7 bytes, with any alignment of source and diff --git a/arch/arm/lib/delay-loop.S b/arch/arm/lib/delay-loop.S index bc1033b897b4..518bf6e93f78 100644 --- a/arch/arm/lib/delay-loop.S +++ b/arch/arm/lib/delay-loop.S @@ -35,7 +35,7 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06 mul r0, r2, r0 @ max = 2^32-1 add r0, r0, r1, lsr #32-6 movs r0, r0, lsr #6 - moveq pc, lr + reteq lr /* * loops = r0 * HZ * loops_per_jiffy / 1000000 @@ -46,23 +46,23 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06 ENTRY(__loop_delay) subs r0, r0, #1 #if 0 - movls pc, lr + retls lr subs r0, r0, #1 - movls pc, lr + retls lr subs r0, r0, #1 - movls pc, lr + retls lr subs r0, r0, #1 - movls pc, lr + retls lr subs r0, r0, #1 - movls pc, lr + retls lr subs r0, r0, #1 - movls pc, lr + retls lr subs r0, r0, #1 - movls pc, lr + retls lr subs r0, r0, #1 #endif bhi __loop_delay - mov pc, lr + ret lr ENDPROC(__loop_udelay) ENDPROC(__loop_const_udelay) ENDPROC(__loop_delay) diff --git a/arch/arm/lib/div64.S b/arch/arm/lib/div64.S index e55c4842c290..a9eafe4981eb 100644 --- a/arch/arm/lib/div64.S +++ b/arch/arm/lib/div64.S @@ -13,6 +13,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/unwind.h> #ifdef __ARMEB__ @@ -97,7 +98,7 @@ UNWIND(.fnstart) mov yl, #0 cmpeq xl, r4 movlo xh, xl - movlo pc, lr + retlo lr @ The division loop for lower bit positions. @ Here we shift remainer bits leftwards rather than moving the @@ -111,14 +112,14 @@ UNWIND(.fnstart) subcs xh, xh, r4 movs ip, ip, lsr #1 bne 4b - mov pc, lr + ret lr @ The top part of remainder became zero. If carry is set @ (the 33th bit) this is a false positive so resume the loop. @ Otherwise, if lower part is also null then we are done. 6: bcs 5b cmp xl, #0 - moveq pc, lr + reteq lr @ We still have remainer bits in the low part. Bring them up. @@ -144,7 +145,7 @@ UNWIND(.fnstart) movs ip, ip, lsr #1 mov xh, #1 bne 4b - mov pc, lr + ret lr 8: @ Division by a power of 2: determine what that divisor order is @ then simply shift values around @@ -184,13 +185,13 @@ UNWIND(.fnstart) THUMB( orr yl, yl, xh ) mov xh, xl, lsl ip mov xh, xh, lsr ip - mov pc, lr + ret lr @ eq -> division by 1: obvious enough... 9: moveq yl, xl moveq yh, xh moveq xh, #0 - moveq pc, lr + reteq lr UNWIND(.fnend) UNWIND(.fnstart) diff --git a/arch/arm/lib/findbit.S b/arch/arm/lib/findbit.S index 64f6bc1a9132..7848780e8834 100644 --- a/arch/arm/lib/findbit.S +++ b/arch/arm/lib/findbit.S @@ -35,7 +35,7 @@ ENTRY(_find_first_zero_bit_le) 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits - mov pc, lr + ret lr ENDPROC(_find_first_zero_bit_le) /* @@ -76,7 +76,7 @@ ENTRY(_find_first_bit_le) 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits - mov pc, lr + ret lr ENDPROC(_find_first_bit_le) /* @@ -114,7 +114,7 @@ ENTRY(_find_first_zero_bit_be) 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits - mov pc, lr + ret lr ENDPROC(_find_first_zero_bit_be) ENTRY(_find_next_zero_bit_be) @@ -148,7 +148,7 @@ ENTRY(_find_first_bit_be) 2: cmp r2, r1 @ any more? blo 1b 3: mov r0, r1 @ no free bits - mov pc, lr + ret lr ENDPROC(_find_first_bit_be) ENTRY(_find_next_bit_be) @@ -192,5 +192,5 @@ ENDPROC(_find_next_bit_be) #endif cmp r1, r0 @ Clamp to maxbit movlo r0, r1 - mov pc, lr + ret lr diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 9b06bb41fca6..938600098b88 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -18,7 +18,7 @@ * Inputs: r0 contains the address * r1 contains the address limit, which must be preserved * Outputs: r0 is the error code - * r2 contains the zero-extended value + * r2, r3 contains the zero-extended value * lr corrupted * * No other registers must be altered. (see <asm/uaccess.h> @@ -36,7 +36,7 @@ ENTRY(__get_user_1) check_uaccess r0, 1, r1, r2, __get_user_bad 1: TUSER(ldrb) r2, [r0] mov r0, #0 - mov pc, lr + ret lr ENDPROC(__get_user_1) ENTRY(__get_user_2) @@ -56,25 +56,60 @@ rb .req r0 orr r2, rb, r2, lsl #8 #endif mov r0, #0 - mov pc, lr + ret lr ENDPROC(__get_user_2) ENTRY(__get_user_4) check_uaccess r0, 4, r1, r2, __get_user_bad 4: TUSER(ldr) r2, [r0] mov r0, #0 - mov pc, lr + ret lr ENDPROC(__get_user_4) +ENTRY(__get_user_8) + check_uaccess r0, 8, r1, r2, __get_user_bad +#ifdef CONFIG_THUMB2_KERNEL +5: TUSER(ldr) r2, [r0] +6: TUSER(ldr) r3, [r0, #4] +#else +5: TUSER(ldr) r2, [r0], #4 +6: TUSER(ldr) r3, [r0] +#endif + mov r0, #0 + ret lr +ENDPROC(__get_user_8) + +#ifdef __ARMEB__ +ENTRY(__get_user_lo8) + check_uaccess r0, 8, r1, r2, __get_user_bad +#ifdef CONFIG_CPU_USE_DOMAINS + add r0, r0, #4 +7: ldrt r2, [r0] +#else +7: ldr r2, [r0, #4] +#endif + mov r0, #0 + ret lr +ENDPROC(__get_user_lo8) +#endif + +__get_user_bad8: + mov r3, #0 __get_user_bad: mov r2, #0 mov r0, #-EFAULT - mov pc, lr + ret lr ENDPROC(__get_user_bad) +ENDPROC(__get_user_bad8) .pushsection __ex_table, "a" .long 1b, __get_user_bad .long 2b, __get_user_bad .long 3b, __get_user_bad .long 4b, __get_user_bad + .long 5b, __get_user_bad8 + .long 6b, __get_user_bad8 +#ifdef __ARMEB__ + .long 7b, __get_user_bad +#endif .popsection diff --git a/arch/arm/lib/io-readsb.S b/arch/arm/lib/io-readsb.S index 9f4238987fe9..c31b2f3153f1 100644 --- a/arch/arm/lib/io-readsb.S +++ b/arch/arm/lib/io-readsb.S @@ -25,7 +25,7 @@ ENTRY(__raw_readsb) teq r2, #0 @ do we have to check for the zero len? - moveq pc, lr + reteq lr ands ip, r1, #3 bne .Linsb_align diff --git a/arch/arm/lib/io-readsl.S b/arch/arm/lib/io-readsl.S index 7a7430950c79..2ed86fa5465f 100644 --- a/arch/arm/lib/io-readsl.S +++ b/arch/arm/lib/io-readsl.S @@ -12,7 +12,7 @@ ENTRY(__raw_readsl) teq r2, #0 @ do we have to check for the zero len? - moveq pc, lr + reteq lr ands ip, r1, #3 bne 3f @@ -33,7 +33,7 @@ ENTRY(__raw_readsl) stmcsia r1!, {r3, ip} ldrne r3, [r0, #0] strne r3, [r1, #0] - mov pc, lr + ret lr 3: ldr r3, [r0] cmp ip, #2 @@ -75,5 +75,5 @@ ENTRY(__raw_readsl) strb r3, [r1, #1] 8: mov r3, ip, get_byte_0 strb r3, [r1, #0] - mov pc, lr + ret lr ENDPROC(__raw_readsl) diff --git a/arch/arm/lib/io-readsw-armv3.S b/arch/arm/lib/io-readsw-armv3.S index 88487c8c4f23..413da9914529 100644 --- a/arch/arm/lib/io-readsw-armv3.S +++ b/arch/arm/lib/io-readsw-armv3.S @@ -27,11 +27,11 @@ strb r3, [r1], #1 subs r2, r2, #1 - moveq pc, lr + reteq lr ENTRY(__raw_readsw) teq r2, #0 @ do we have to check for the zero len? - moveq pc, lr + reteq lr tst r1, #3 bne .Linsw_align diff --git a/arch/arm/lib/io-readsw-armv4.S b/arch/arm/lib/io-readsw-armv4.S index 1f393d42593d..d9a45e9692ae 100644 --- a/arch/arm/lib/io-readsw-armv4.S +++ b/arch/arm/lib/io-readsw-armv4.S @@ -26,7 +26,7 @@ ENTRY(__raw_readsw) teq r2, #0 - moveq pc, lr + reteq lr tst r1, #3 bne .Linsw_align diff --git a/arch/arm/lib/io-writesb.S b/arch/arm/lib/io-writesb.S index 68b92f4acaeb..a46bbc9b168b 100644 --- a/arch/arm/lib/io-writesb.S +++ b/arch/arm/lib/io-writesb.S @@ -45,7 +45,7 @@ ENTRY(__raw_writesb) teq r2, #0 @ do we have to check for the zero len? - moveq pc, lr + reteq lr ands ip, r1, #3 bne .Loutsb_align diff --git a/arch/arm/lib/io-writesl.S b/arch/arm/lib/io-writesl.S index d0d104a0dd11..4ea2435988c1 100644 --- a/arch/arm/lib/io-writesl.S +++ b/arch/arm/lib/io-writesl.S @@ -12,7 +12,7 @@ ENTRY(__raw_writesl) teq r2, #0 @ do we have to check for the zero len? - moveq pc, lr + reteq lr ands ip, r1, #3 bne 3f @@ -33,7 +33,7 @@ ENTRY(__raw_writesl) ldrne r3, [r1, #0] strcs ip, [r0, #0] strne r3, [r0, #0] - mov pc, lr + ret lr 3: bic r1, r1, #3 ldr r3, [r1], #4 @@ -47,7 +47,7 @@ ENTRY(__raw_writesl) orr ip, ip, r3, lspush #16 str ip, [r0] bne 4b - mov pc, lr + ret lr 5: mov ip, r3, lspull #8 ldr r3, [r1], #4 @@ -55,7 +55,7 @@ ENTRY(__raw_writesl) orr ip, ip, r3, lspush #24 str ip, [r0] bne 5b - mov pc, lr + ret lr 6: mov ip, r3, lspull #24 ldr r3, [r1], #4 @@ -63,5 +63,5 @@ ENTRY(__raw_writesl) orr ip, ip, r3, lspush #8 str ip, [r0] bne 6b - mov pc, lr + ret lr ENDPROC(__raw_writesl) diff --git a/arch/arm/lib/io-writesw-armv3.S b/arch/arm/lib/io-writesw-armv3.S index 49b800419e32..121789eb6802 100644 --- a/arch/arm/lib/io-writesw-armv3.S +++ b/arch/arm/lib/io-writesw-armv3.S @@ -28,11 +28,11 @@ orr r3, r3, r3, lsl #16 str r3, [r0] subs r2, r2, #1 - moveq pc, lr + reteq lr ENTRY(__raw_writesw) teq r2, #0 @ do we have to check for the zero len? - moveq pc, lr + reteq lr tst r1, #3 bne .Loutsw_align diff --git a/arch/arm/lib/io-writesw-armv4.S b/arch/arm/lib/io-writesw-armv4.S index ff4f71b579ee..269f90c51ad2 100644 --- a/arch/arm/lib/io-writesw-armv4.S +++ b/arch/arm/lib/io-writesw-armv4.S @@ -31,7 +31,7 @@ ENTRY(__raw_writesw) teq r2, #0 - moveq pc, lr + reteq lr ands r3, r1, #3 bne .Loutsw_align @@ -96,5 +96,5 @@ ENTRY(__raw_writesw) tst r2, #1 3: movne ip, r3, lsr #8 strneh ip, [r0] - mov pc, lr + ret lr ENDPROC(__raw_writesw) diff --git a/arch/arm/lib/lib1funcs.S b/arch/arm/lib/lib1funcs.S index c562f649734c..947567ff67f9 100644 --- a/arch/arm/lib/lib1funcs.S +++ b/arch/arm/lib/lib1funcs.S @@ -210,7 +210,7 @@ ENTRY(__aeabi_uidiv) UNWIND(.fnstart) subs r2, r1, #1 - moveq pc, lr + reteq lr bcc Ldiv0 cmp r0, r1 bls 11f @@ -220,16 +220,16 @@ UNWIND(.fnstart) ARM_DIV_BODY r0, r1, r2, r3 mov r0, r2 - mov pc, lr + ret lr 11: moveq r0, #1 movne r0, #0 - mov pc, lr + ret lr 12: ARM_DIV2_ORDER r1, r2 mov r0, r0, lsr r2 - mov pc, lr + ret lr UNWIND(.fnend) ENDPROC(__udivsi3) @@ -244,11 +244,11 @@ UNWIND(.fnstart) moveq r0, #0 tsthi r1, r2 @ see if divisor is power of 2 andeq r0, r0, r2 - movls pc, lr + retls lr ARM_MOD_BODY r0, r1, r2, r3 - mov pc, lr + ret lr UNWIND(.fnend) ENDPROC(__umodsi3) @@ -274,23 +274,23 @@ UNWIND(.fnstart) cmp ip, #0 rsbmi r0, r0, #0 - mov pc, lr + ret lr 10: teq ip, r0 @ same sign ? rsbmi r0, r0, #0 - mov pc, lr + ret lr 11: movlo r0, #0 moveq r0, ip, asr #31 orreq r0, r0, #1 - mov pc, lr + ret lr 12: ARM_DIV2_ORDER r1, r2 cmp ip, #0 mov r0, r3, lsr r2 rsbmi r0, r0, #0 - mov pc, lr + ret lr UNWIND(.fnend) ENDPROC(__divsi3) @@ -315,7 +315,7 @@ UNWIND(.fnstart) 10: cmp ip, #0 rsbmi r0, r0, #0 - mov pc, lr + ret lr UNWIND(.fnend) ENDPROC(__modsi3) @@ -331,7 +331,7 @@ UNWIND(.save {r0, r1, ip, lr} ) ldmfd sp!, {r1, r2, ip, lr} mul r3, r0, r2 sub r1, r1, r3 - mov pc, lr + ret lr UNWIND(.fnend) ENDPROC(__aeabi_uidivmod) @@ -344,7 +344,7 @@ UNWIND(.save {r0, r1, ip, lr} ) ldmfd sp!, {r1, r2, ip, lr} mul r3, r0, r2 sub r1, r1, r3 - mov pc, lr + ret lr UNWIND(.fnend) ENDPROC(__aeabi_idivmod) diff --git a/arch/arm/lib/lshrdi3.S b/arch/arm/lib/lshrdi3.S index f83d449141f7..922dcd88b02b 100644 --- a/arch/arm/lib/lshrdi3.S +++ b/arch/arm/lib/lshrdi3.S @@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define al r1 @@ -47,7 +48,7 @@ ENTRY(__aeabi_llsr) THUMB( lslmi r3, ah, ip ) THUMB( orrmi al, al, r3 ) mov ah, ah, lsr r2 - mov pc, lr + ret lr ENDPROC(__lshrdi3) ENDPROC(__aeabi_llsr) diff --git a/arch/arm/lib/memchr.S b/arch/arm/lib/memchr.S index 1da86991d700..74a5bed6d999 100644 --- a/arch/arm/lib/memchr.S +++ b/arch/arm/lib/memchr.S @@ -22,5 +22,5 @@ ENTRY(memchr) bne 1b sub r0, r0, #1 2: movne r0, #0 - mov pc, lr + ret lr ENDPROC(memchr) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 94b0650ea98f..671455c854fa 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -110,7 +110,7 @@ ENTRY(memset) strneb r1, [ip], #1 tst r2, #1 strneb r1, [ip], #1 - mov pc, lr + ret lr 6: subs r2, r2, #4 @ 1 do we have enough blt 5b @ 1 bytes to align with? diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S index 3fbdef5f802a..385ccb306fa2 100644 --- a/arch/arm/lib/memzero.S +++ b/arch/arm/lib/memzero.S @@ -121,5 +121,5 @@ ENTRY(__memzero) strneb r2, [r0], #1 @ 1 tst r1, #1 @ 1 a byte left over strneb r2, [r0], #1 @ 1 - mov pc, lr @ 1 + ret lr @ 1 ENDPROC(__memzero) diff --git a/arch/arm/lib/muldi3.S b/arch/arm/lib/muldi3.S index 36c91b4957e2..204305956925 100644 --- a/arch/arm/lib/muldi3.S +++ b/arch/arm/lib/muldi3.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define xh r0 @@ -41,7 +42,7 @@ ENTRY(__aeabi_lmul) adc xh, xh, yh, lsr #16 adds xl, xl, ip, lsl #16 adc xh, xh, ip, lsr #16 - mov pc, lr + ret lr ENDPROC(__muldi3) ENDPROC(__aeabi_lmul) diff --git a/arch/arm/lib/putuser.S b/arch/arm/lib/putuser.S index 3d73dcb959b0..38d660d3705f 100644 --- a/arch/arm/lib/putuser.S +++ b/arch/arm/lib/putuser.S @@ -36,7 +36,7 @@ ENTRY(__put_user_1) check_uaccess r0, 1, r1, ip, __put_user_bad 1: TUSER(strb) r2, [r0] mov r0, #0 - mov pc, lr + ret lr ENDPROC(__put_user_1) ENTRY(__put_user_2) @@ -60,14 +60,14 @@ ENTRY(__put_user_2) #endif #endif /* CONFIG_THUMB2_KERNEL */ mov r0, #0 - mov pc, lr + ret lr ENDPROC(__put_user_2) ENTRY(__put_user_4) check_uaccess r0, 4, r1, ip, __put_user_bad 4: TUSER(str) r2, [r0] mov r0, #0 - mov pc, lr + ret lr ENDPROC(__put_user_4) ENTRY(__put_user_8) @@ -80,12 +80,12 @@ ENTRY(__put_user_8) 6: TUSER(str) r3, [r0] #endif mov r0, #0 - mov pc, lr + ret lr ENDPROC(__put_user_8) __put_user_bad: mov r0, #-EFAULT - mov pc, lr + ret lr ENDPROC(__put_user_bad) .pushsection __ex_table, "a" diff --git a/arch/arm/lib/strchr.S b/arch/arm/lib/strchr.S index d8f2a1c1aea4..013d64c71e8d 100644 --- a/arch/arm/lib/strchr.S +++ b/arch/arm/lib/strchr.S @@ -23,5 +23,5 @@ ENTRY(strchr) teq r2, r1 movne r0, #0 subeq r0, r0, #1 - mov pc, lr + ret lr ENDPROC(strchr) diff --git a/arch/arm/lib/strrchr.S b/arch/arm/lib/strrchr.S index 302f20cd2423..3cec1c7482c4 100644 --- a/arch/arm/lib/strrchr.S +++ b/arch/arm/lib/strrchr.S @@ -22,5 +22,5 @@ ENTRY(strrchr) teq r2, #0 bne 1b mov r0, r3 - mov pc, lr + ret lr ENDPROC(strrchr) diff --git a/arch/arm/lib/ucmpdi2.S b/arch/arm/lib/ucmpdi2.S index f0df6a91db04..ad4a6309141a 100644 --- a/arch/arm/lib/ucmpdi2.S +++ b/arch/arm/lib/ucmpdi2.S @@ -11,6 +11,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #ifdef __ARMEB__ #define xh r0 @@ -31,7 +32,7 @@ ENTRY(__ucmpdi2) movlo r0, #0 moveq r0, #1 movhi r0, #2 - mov pc, lr + ret lr ENDPROC(__ucmpdi2) @@ -44,7 +45,7 @@ ENTRY(__aeabi_ulcmp) movlo r0, #-1 moveq r0, #0 movhi r0, #1 - mov pc, lr + ret lr ENDPROC(__aeabi_ulcmp) diff --git a/arch/arm/mach-davinci/sleep.S b/arch/arm/mach-davinci/sleep.S index d4e9316ecacb..a5336a5e2739 100644 --- a/arch/arm/mach-davinci/sleep.S +++ b/arch/arm/mach-davinci/sleep.S @@ -213,7 +213,7 @@ ddr2clk_stop_done: cmp ip, r0 bne ddr2clk_stop_done - mov pc, lr + ret lr ENDPROC(davinci_ddr_psc_config) CACHE_FLUSH: diff --git a/arch/arm/mach-ebsa110/include/mach/memory.h b/arch/arm/mach-ebsa110/include/mach/memory.h index 8e49066ad850..866f8a1c6ff7 100644 --- a/arch/arm/mach-ebsa110/include/mach/memory.h +++ b/arch/arm/mach-ebsa110/include/mach/memory.h @@ -17,11 +17,6 @@ #define __ASM_ARCH_MEMORY_H /* - * Physical DRAM offset. - */ -#define PLAT_PHYS_OFFSET UL(0x00000000) - -/* * Cache flushing area - SRAM */ #define FLUSH_BASE_PHYS 0x40000000 diff --git a/arch/arm/mach-ep93xx/crunch-bits.S b/arch/arm/mach-ep93xx/crunch-bits.S index e96923a3017b..ee0be2af5c61 100644 --- a/arch/arm/mach-ep93xx/crunch-bits.S +++ b/arch/arm/mach-ep93xx/crunch-bits.S @@ -198,7 +198,7 @@ crunch_load: get_thread_info r10 #endif 2: dec_preempt_count r10, r3 - mov pc, lr + ret lr /* * Back up crunch regs to save area and disable access to them @@ -277,7 +277,7 @@ ENTRY(crunch_task_copy) mov r3, lr @ preserve return address bl crunch_save msr cpsr_c, ip @ restore interrupt mode - mov pc, r3 + ret r3 /* * Restore crunch state from given memory address @@ -310,4 +310,4 @@ ENTRY(crunch_task_restore) mov r3, lr @ preserve return address bl crunch_load msr cpsr_c, ip @ restore interrupt mode - mov pc, r3 + ret r3 diff --git a/arch/arm/mach-ep93xx/include/mach/memory.h b/arch/arm/mach-ep93xx/include/mach/memory.h deleted file mode 100644 index c9400cf0051c..000000000000 --- a/arch/arm/mach-ep93xx/include/mach/memory.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * arch/arm/mach-ep93xx/include/mach/memory.h - */ - -#ifndef __ASM_ARCH_MEMORY_H -#define __ASM_ARCH_MEMORY_H - -#if defined(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET) -#define PLAT_PHYS_OFFSET UL(0x00000000) -#elif defined(CONFIG_EP93XX_SDCE0_PHYS_OFFSET) -#define PLAT_PHYS_OFFSET UL(0xc0000000) -#elif defined(CONFIG_EP93XX_SDCE1_PHYS_OFFSET) -#define PLAT_PHYS_OFFSET UL(0xd0000000) -#elif defined(CONFIG_EP93XX_SDCE2_PHYS_OFFSET) -#define PLAT_PHYS_OFFSET UL(0xe0000000) -#elif defined(CONFIG_EP93XX_SDCE3_ASYNC_PHYS_OFFSET) -#define PLAT_PHYS_OFFSET UL(0xf0000000) -#else -#error "Kconfig bug: No EP93xx PHYS_OFFSET set" -#endif - -#endif diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 8f9b66c4ac78..5d4ff6571dcd 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -119,6 +119,7 @@ config EXYNOS5420_MCPM bool "Exynos5420 Multi-Cluster PM support" depends on MCPM && SOC_EXYNOS5420 select ARM_CCI + select ARM_CPU_SUSPEND help This is needed to provide CPU and cluster power management on Exynos5420 implementing big.LITTLE. diff --git a/arch/arm/mach-exynos/hotplug.c b/arch/arm/mach-exynos/hotplug.c index 69fa48397394..8a134d019cb3 100644 --- a/arch/arm/mach-exynos/hotplug.c +++ b/arch/arm/mach-exynos/hotplug.c @@ -46,13 +46,7 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious) if (cpu == 1) exynos_cpu_power_down(cpu); - /* - * here's the WFI - */ - asm(".word 0xe320f003\n" - : - : - : "memory", "cc"); + wfi(); if (pen_release == cpu_logical_map(cpu)) { /* diff --git a/arch/arm/mach-exynos/mcpm-exynos.c b/arch/arm/mach-exynos/mcpm-exynos.c index 0498d0b887ef..a96b78f93f2b 100644 --- a/arch/arm/mach-exynos/mcpm-exynos.c +++ b/arch/arm/mach-exynos/mcpm-exynos.c @@ -25,7 +25,6 @@ #define EXYNOS5420_CPUS_PER_CLUSTER 4 #define EXYNOS5420_NR_CLUSTERS 2 -#define MCPM_BOOT_ADDR_OFFSET 0x1c /* * The common v7_exit_coherency_flush API could not be used because of the @@ -197,7 +196,7 @@ static void exynos_power_down(void) if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { arch_spin_unlock(&exynos_mcpm_lock); - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) { + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) { /* * On the Cortex-A15 we need to disable * L2 prefetching before flushing the cache. @@ -290,6 +289,19 @@ static void __naked exynos_pm_power_up_setup(unsigned int affinity_level) "b cci_enable_port_for_self"); } +static void __init exynos_cache_off(void) +{ + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) { + /* disable L2 prefetching on the Cortex-A15 */ + asm volatile( + "mcr p15, 1, %0, c15, c0, 3\n\t" + "isb\n\t" + "dsb" + : : "r" (0x400)); + } + exynos_v7_exit_coherency_flush(all); +} + static const struct of_device_id exynos_dt_mcpm_match[] = { { .compatible = "samsung,exynos5420" }, { .compatible = "samsung,exynos5800" }, @@ -333,6 +345,8 @@ static int __init exynos_mcpm_init(void) ret = mcpm_platform_register(&exynos_power_ops); if (!ret) ret = mcpm_sync_init(exynos_pm_power_up_setup); + if (!ret) + ret = mcpm_loopback(exynos_cache_off); /* turn on the CCI */ if (ret) { iounmap(ns_sram_base_addr); return ret; @@ -343,11 +357,13 @@ static int __init exynos_mcpm_init(void) pr_info("Exynos MCPM support installed\n"); /* - * Future entries into the kernel can now go - * through the cluster entry vectors. + * U-Boot SPL is hardcoded to jump to the start of ns_sram_base_addr + * as part of secondary_cpu_start(). Let's redirect it to the + * mcpm_entry_point(). */ - __raw_writel(virt_to_phys(mcpm_entry_point), - ns_sram_base_addr + MCPM_BOOT_ADDR_OFFSET); + __raw_writel(0xe59f0000, ns_sram_base_addr); /* ldr r0, [pc, #0] */ + __raw_writel(0xe12fff10, ns_sram_base_addr + 4); /* bx r0 */ + __raw_writel(virt_to_phys(mcpm_entry_point), ns_sram_base_addr + 8); iounmap(ns_sram_base_addr); diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c index 1c8d31e39520..8dc1d3a3a8bf 100644 --- a/arch/arm/mach-exynos/platsmp.c +++ b/arch/arm/mach-exynos/platsmp.c @@ -188,7 +188,7 @@ static void __init exynos_smp_init_cpus(void) void __iomem *scu_base = scu_base_addr(); unsigned int i, ncores; - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9) + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) ncores = scu_base ? scu_get_core_count(scu_base) : 1; else /* @@ -214,7 +214,7 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus) exynos_sysram_init(); - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9) + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) scu_enable(scu_base_addr()); /* diff --git a/arch/arm/mach-exynos/pm.c b/arch/arm/mach-exynos/pm.c index 87c0d34c7fba..67d383de614f 100644 --- a/arch/arm/mach-exynos/pm.c +++ b/arch/arm/mach-exynos/pm.c @@ -300,7 +300,7 @@ static int exynos_pm_suspend(void) tmp = (S5P_USE_STANDBY_WFI0 | S5P_USE_STANDBY_WFE0); __raw_writel(tmp, S5P_CENTRAL_SEQ_OPTION); - if (!soc_is_exynos5250()) + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) exynos_cpu_save_register(); return 0; @@ -334,7 +334,7 @@ static void exynos_pm_resume(void) if (exynos_pm_central_resume()) goto early_wakeup; - if (!soc_is_exynos5250()) + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) exynos_cpu_restore_register(); /* For release retention */ @@ -353,7 +353,7 @@ static void exynos_pm_resume(void) s3c_pm_do_restore_core(exynos_core_save, ARRAY_SIZE(exynos_core_save)); - if (!soc_is_exynos5250()) + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) scu_enable(S5P_VA_SCU); early_wakeup: @@ -440,15 +440,17 @@ static int exynos_cpu_pm_notifier(struct notifier_block *self, case CPU_PM_ENTER: if (cpu == 0) { exynos_pm_central_suspend(); - exynos_cpu_save_register(); + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) + exynos_cpu_save_register(); } break; case CPU_PM_EXIT: if (cpu == 0) { - if (!soc_is_exynos5250()) + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) { scu_enable(S5P_VA_SCU); - exynos_cpu_restore_register(); + exynos_cpu_restore_register(); + } exynos_pm_central_resume(); } break; diff --git a/arch/arm/mach-footbridge/include/mach/memory.h b/arch/arm/mach-footbridge/include/mach/memory.h index 5c6df377f969..6f2ecccdf323 100644 --- a/arch/arm/mach-footbridge/include/mach/memory.h +++ b/arch/arm/mach-footbridge/include/mach/memory.h @@ -59,11 +59,6 @@ extern unsigned long __bus_to_pfn(unsigned long); */ #define FLUSH_BASE 0xf9000000 -/* - * Physical DRAM offset. - */ -#define PLAT_PHYS_OFFSET UL(0x00000000) - #define FLUSH_BASE_PHYS 0x50000000 #endif diff --git a/arch/arm/mach-imx/clk-imx6sl.c b/arch/arm/mach-imx/clk-imx6sl.c index 21cf06cebade..5408ca70c8d6 100644 --- a/arch/arm/mach-imx/clk-imx6sl.c +++ b/arch/arm/mach-imx/clk-imx6sl.c @@ -312,6 +312,7 @@ static void __init imx6sl_clocks_init(struct device_node *ccm_node) clks[IMX6SL_CLK_ECSPI2] = imx_clk_gate2("ecspi2", "ecspi_root", base + 0x6c, 2); clks[IMX6SL_CLK_ECSPI3] = imx_clk_gate2("ecspi3", "ecspi_root", base + 0x6c, 4); clks[IMX6SL_CLK_ECSPI4] = imx_clk_gate2("ecspi4", "ecspi_root", base + 0x6c, 6); + clks[IMX6SL_CLK_ENET] = imx_clk_gate2("enet", "ipg", base + 0x6c, 10); clks[IMX6SL_CLK_EPIT1] = imx_clk_gate2("epit1", "perclk", base + 0x6c, 12); clks[IMX6SL_CLK_EPIT2] = imx_clk_gate2("epit2", "perclk", base + 0x6c, 14); clks[IMX6SL_CLK_EXTERN_AUDIO] = imx_clk_gate2("extern_audio", "extern_audio_podf", base + 0x6c, 16); diff --git a/arch/arm/mach-imx/suspend-imx6.S b/arch/arm/mach-imx/suspend-imx6.S index fe123b079c05..74b50f1982db 100644 --- a/arch/arm/mach-imx/suspend-imx6.S +++ b/arch/arm/mach-imx/suspend-imx6.S @@ -10,6 +10,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/hardware/cache-l2x0.h> #include "hardware.h" @@ -301,7 +302,7 @@ rbc_loop: resume_mmdc /* return to suspend finish */ - mov pc, lr + ret lr resume: /* invalidate L1 I-cache first */ @@ -325,7 +326,7 @@ resume: mov r5, #0x1 resume_mmdc - mov pc, lr + ret lr ENDPROC(imx6_suspend) /* diff --git a/arch/arm/mach-integrator/include/mach/memory.h b/arch/arm/mach-integrator/include/mach/memory.h index 334d5e271889..7268cb50ded0 100644 --- a/arch/arm/mach-integrator/include/mach/memory.h +++ b/arch/arm/mach-integrator/include/mach/memory.h @@ -20,11 +20,6 @@ #ifndef __ASM_ARCH_MEMORY_H #define __ASM_ARCH_MEMORY_H -/* - * Physical DRAM offset. - */ -#define PLAT_PHYS_OFFSET UL(0x00000000) - #define BUS_OFFSET UL(0x80000000) #define __virt_to_bus(x) ((x) - PAGE_OFFSET + BUS_OFFSET) #define __bus_to_virt(x) ((x) - BUS_OFFSET + PAGE_OFFSET) diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index dd0cc677d596..660ca6feff40 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -480,25 +480,18 @@ static const struct of_device_id ebi_match[] = { static void __init ap_init_of(void) { unsigned long sc_dec; - struct device_node *root; struct device_node *syscon; struct device_node *ebi; struct device *parent; struct soc_device *soc_dev; struct soc_device_attribute *soc_dev_attr; u32 ap_sc_id; - int err; int i; - /* Here we create an SoC device for the root node */ - root = of_find_node_by_path("/"); - if (!root) - return; - - syscon = of_find_matching_node(root, ap_syscon_match); + syscon = of_find_matching_node(NULL, ap_syscon_match); if (!syscon) return; - ebi = of_find_matching_node(root, ebi_match); + ebi = of_find_matching_node(NULL, ebi_match); if (!ebi) return; @@ -509,19 +502,17 @@ static void __init ap_init_of(void) if (!ebi_base) return; + of_platform_populate(NULL, of_default_bus_match_table, + ap_auxdata_lookup, NULL); + ap_sc_id = readl(ap_syscon_base); soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL); if (!soc_dev_attr) return; - err = of_property_read_string(root, "compatible", - &soc_dev_attr->soc_id); - if (err) - return; - err = of_property_read_string(root, "model", &soc_dev_attr->machine); - if (err) - return; + soc_dev_attr->soc_id = "XVC"; + soc_dev_attr->machine = "Integrator/AP"; soc_dev_attr->family = "Integrator"; soc_dev_attr->revision = kasprintf(GFP_KERNEL, "%c", 'A' + (ap_sc_id & 0x0f)); @@ -536,9 +527,6 @@ static void __init ap_init_of(void) parent = soc_device_to_device(soc_dev); integrator_init_sysfs(parent, ap_sc_id); - of_platform_populate(root, of_default_bus_match_table, - ap_auxdata_lookup, parent); - sc_dec = readl(ap_syscon_base + INTEGRATOR_SC_DEC_OFFSET); for (i = 0; i < 4; i++) { struct lm_device *lmdev; diff --git a/arch/arm/mach-integrator/integrator_cp.c b/arch/arm/mach-integrator/integrator_cp.c index a938242b0c95..0e57f8f820a5 100644 --- a/arch/arm/mach-integrator/integrator_cp.c +++ b/arch/arm/mach-integrator/integrator_cp.c @@ -279,20 +279,13 @@ static const struct of_device_id intcp_syscon_match[] = { static void __init intcp_init_of(void) { - struct device_node *root; struct device_node *cpcon; struct device *parent; struct soc_device *soc_dev; struct soc_device_attribute *soc_dev_attr; u32 intcp_sc_id; - int err; - /* Here we create an SoC device for the root node */ - root = of_find_node_by_path("/"); - if (!root) - return; - - cpcon = of_find_matching_node(root, intcp_syscon_match); + cpcon = of_find_matching_node(NULL, intcp_syscon_match); if (!cpcon) return; @@ -300,19 +293,17 @@ static void __init intcp_init_of(void) if (!intcp_con_base) return; + of_platform_populate(NULL, of_default_bus_match_table, + intcp_auxdata_lookup, NULL); + intcp_sc_id = readl(intcp_con_base); soc_dev_attr = kzalloc(sizeof(*soc_dev_attr), GFP_KERNEL); if (!soc_dev_attr) return; - err = of_property_read_string(root, "compatible", - &soc_dev_attr->soc_id); - if (err) - return; - err = of_property_read_string(root, "model", &soc_dev_attr->machine); - if (err) - return; + soc_dev_attr->soc_id = "XCV"; + soc_dev_attr->machine = "Integrator/CP"; soc_dev_attr->family = "Integrator"; soc_dev_attr->revision = kasprintf(GFP_KERNEL, "%c", 'A' + (intcp_sc_id & 0x0f)); @@ -326,8 +317,6 @@ static void __init intcp_init_of(void) parent = soc_device_to_device(soc_dev); integrator_init_sysfs(parent, intcp_sc_id); - of_platform_populate(root, of_default_bus_match_table, - intcp_auxdata_lookup, parent); } static const char * intcp_dt_board_compat[] = { diff --git a/arch/arm/mach-iop13xx/include/mach/iop13xx.h b/arch/arm/mach-iop13xx/include/mach/iop13xx.h index 17b40279e0a4..9311ee2126d6 100644 --- a/arch/arm/mach-iop13xx/include/mach/iop13xx.h +++ b/arch/arm/mach-iop13xx/include/mach/iop13xx.h @@ -3,7 +3,7 @@ #ifndef __ASSEMBLY__ -#include <linux/reboot.h> +enum reboot_mode; /* The ATU offsets can change based on the strapping */ extern u32 iop13xx_atux_pmmr_offset; diff --git a/arch/arm/mach-iop13xx/include/mach/memory.h b/arch/arm/mach-iop13xx/include/mach/memory.h index 7c032d0ab24a..59307e787588 100644 --- a/arch/arm/mach-iop13xx/include/mach/memory.h +++ b/arch/arm/mach-iop13xx/include/mach/memory.h @@ -3,11 +3,6 @@ #include <mach/hardware.h> -/* - * Physical DRAM offset. - */ -#define PLAT_PHYS_OFFSET UL(0x00000000) - #ifndef __ASSEMBLY__ #if defined(CONFIG_ARCH_IOP13XX) diff --git a/arch/arm/mach-iop13xx/setup.c b/arch/arm/mach-iop13xx/setup.c index bca96f433495..53c316f7301e 100644 --- a/arch/arm/mach-iop13xx/setup.c +++ b/arch/arm/mach-iop13xx/setup.c @@ -20,6 +20,7 @@ #include <linux/dma-mapping.h> #include <linux/serial_8250.h> #include <linux/io.h> +#include <linux/reboot.h> #ifdef CONFIG_MTD_PHYSMAP #include <linux/mtd/physmap.h> #endif diff --git a/arch/arm/mach-ks8695/include/mach/memory.h b/arch/arm/mach-ks8695/include/mach/memory.h index 95e731a7ed6a..ab0d27fa8969 100644 --- a/arch/arm/mach-ks8695/include/mach/memory.h +++ b/arch/arm/mach-ks8695/include/mach/memory.h @@ -15,11 +15,6 @@ #include <mach/hardware.h> -/* - * Physical SRAM offset. - */ -#define PLAT_PHYS_OFFSET KS8695_SDRAM_PA - #ifndef __ASSEMBLY__ #ifdef CONFIG_PCI diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index 4a7c250c9a30..b9bc599a5fd0 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -10,6 +10,7 @@ menuconfig ARCH_MVEBU select ZONE_DMA if ARM_LPAE select ARCH_REQUIRE_GPIOLIB select PCI_QUIRKS if PCI + select OF_ADDRESS_PCI if ARCH_MVEBU @@ -17,6 +18,7 @@ config MACH_MVEBU_V7 bool select ARMADA_370_XP_TIMER select CACHE_L2X0 + select ARM_CPU_SUSPEND config MACH_ARMADA_370 bool "Marvell Armada 370 boards" if ARCH_MULTI_V7 diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S index 510c29e079ca..f5d881b5d0f7 100644 --- a/arch/arm/mach-mvebu/coherency_ll.S +++ b/arch/arm/mach-mvebu/coherency_ll.S @@ -46,7 +46,7 @@ ENTRY(ll_get_coherency_base) ldr r1, =coherency_base ldr r1, [r1] 2: - mov pc, lr + ret lr ENDPROC(ll_get_coherency_base) /* @@ -63,7 +63,7 @@ ENTRY(ll_get_coherency_cpumask) mov r2, #(1 << 24) lsl r3, r2, r3 ARM_BE8(rev r3, r3) - mov pc, lr + ret lr ENDPROC(ll_get_coherency_cpumask) /* @@ -94,7 +94,7 @@ ENTRY(ll_add_cpu_to_smp_group) strex r1, r2, [r0] cmp r1, #0 bne 1b - mov pc, lr + ret lr ENDPROC(ll_add_cpu_to_smp_group) ENTRY(ll_enable_coherency) @@ -118,7 +118,7 @@ ENTRY(ll_enable_coherency) bne 1b dsb mov r0, #0 - mov pc, lr + ret lr ENDPROC(ll_enable_coherency) ENTRY(ll_disable_coherency) @@ -141,7 +141,7 @@ ENTRY(ll_disable_coherency) cmp r1, #0 bne 1b dsb - mov pc, lr + ret lr ENDPROC(ll_disable_coherency) .align 2 diff --git a/arch/arm/mach-mvebu/headsmp-a9.S b/arch/arm/mach-mvebu/headsmp-a9.S index 5925366bc03c..7c91ddb6f1f7 100644 --- a/arch/arm/mach-mvebu/headsmp-a9.S +++ b/arch/arm/mach-mvebu/headsmp-a9.S @@ -14,6 +14,7 @@ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> __CPUINIT #define CPU_RESUME_ADDR_REG 0xf10182d4 @@ -24,7 +25,7 @@ armada_375_smp_cpu1_enable_code_start: ldr r0, [pc, #4] ldr r1, [r0] - mov pc, r1 + ret r1 .word CPU_RESUME_ADDR_REG armada_375_smp_cpu1_enable_code_end: diff --git a/arch/arm/mach-omap1/include/mach/memory.h b/arch/arm/mach-omap1/include/mach/memory.h index 3c2530523111..058a4f7d44c5 100644 --- a/arch/arm/mach-omap1/include/mach/memory.h +++ b/arch/arm/mach-omap1/include/mach/memory.h @@ -6,11 +6,6 @@ #define __ASM_ARCH_MEMORY_H /* - * Physical DRAM offset. - */ -#define PLAT_PHYS_OFFSET UL(0x10000000) - -/* * Bus address is physical address, except for OMAP-1510 Local Bus. * OMAP-1510 bus address is translated into a Local Bus address if the * OMAP bus type is lbus. We do the address translation based on the diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S index 9086ce03ae12..b84a0122d823 100644 --- a/arch/arm/mach-omap2/sleep44xx.S +++ b/arch/arm/mach-omap2/sleep44xx.S @@ -10,6 +10,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/smp_scu.h> #include <asm/memory.h> #include <asm/hardware/cache-l2x0.h> @@ -334,7 +335,7 @@ ENDPROC(omap4_cpu_resume) #ifndef CONFIG_OMAP4_ERRATA_I688 ENTRY(omap_bus_sync) - mov pc, lr + ret lr ENDPROC(omap_bus_sync) #endif diff --git a/arch/arm/mach-omap2/sram242x.S b/arch/arm/mach-omap2/sram242x.S index 680a7c56cc3e..2c88ff2d0236 100644 --- a/arch/arm/mach-omap2/sram242x.S +++ b/arch/arm/mach-omap2/sram242x.S @@ -101,7 +101,7 @@ i_dll_wait: i_dll_delay: subs r4, r4, #0x1 bne i_dll_delay - mov pc, lr + ret lr /* * shift up or down voltage, use R9 as input to tell level. @@ -125,7 +125,7 @@ volt_delay: ldr r7, [r3] @ get timer value cmp r5, r7 @ time up? bhi volt_delay @ not yet->branch - mov pc, lr @ back to caller. + ret lr @ back to caller. omap242x_sdi_cm_clksel2_pll: .word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2) @@ -220,7 +220,7 @@ volt_delay_c: ldr r7, [r10] @ get timer value cmp r8, r7 @ time up? bhi volt_delay_c @ not yet->branch - mov pc, lr @ back to caller + ret lr @ back to caller omap242x_srs_cm_clksel2_pll: .word OMAP2420_CM_REGADDR(PLL_MOD, CM_CLKSEL2) diff --git a/arch/arm/mach-omap2/sram243x.S b/arch/arm/mach-omap2/sram243x.S index a1e9edd673f4..d5deb9761fc7 100644 --- a/arch/arm/mach-omap2/sram243x.S +++ b/arch/arm/mach-omap2/sram243x.S @@ -101,7 +101,7 @@ i_dll_wait: i_dll_delay: subs r4, r4, #0x1 bne i_dll_delay - mov pc, lr + ret lr /* * shift up or down voltage, use R9 as input to tell level. @@ -125,7 +125,7 @@ volt_delay: ldr r7, [r3] @ get timer value cmp r5, r7 @ time up? bhi volt_delay @ not yet->branch - mov pc, lr @ back to caller. + ret lr @ back to caller. omap243x_sdi_cm_clksel2_pll: .word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2) @@ -220,7 +220,7 @@ volt_delay_c: ldr r7, [r10] @ get timer value cmp r8, r7 @ time up? bhi volt_delay_c @ not yet->branch - mov pc, lr @ back to caller + ret lr @ back to caller omap243x_srs_cm_clksel2_pll: .word OMAP2430_CM_REGADDR(PLL_MOD, CM_CLKSEL2) diff --git a/arch/arm/mach-pxa/mioa701_bootresume.S b/arch/arm/mach-pxa/mioa701_bootresume.S index 324d25a48c85..81591491ab94 100644 --- a/arch/arm/mach-pxa/mioa701_bootresume.S +++ b/arch/arm/mach-pxa/mioa701_bootresume.S @@ -29,7 +29,7 @@ ENTRY(mioa701_jumpaddr) str r1, [r0] @ Early disable resume for next boot ldr r0, mioa701_jumpaddr @ (Murphy's Law) ldr r0, [r0] - mov pc, r0 + ret r0 2: ENTRY(mioa701_bootstrap_lg) diff --git a/arch/arm/mach-pxa/standby.S b/arch/arm/mach-pxa/standby.S index 29f5f5c180b7..eab1645bb4ad 100644 --- a/arch/arm/mach-pxa/standby.S +++ b/arch/arm/mach-pxa/standby.S @@ -29,7 +29,7 @@ ENTRY(pxa_cpu_standby) .align 5 1: mcr p14, 0, r2, c7, c0, 0 @ put the system into Standby str r1, [r0] @ make sure PSSR_PH/STS are clear - mov pc, lr + ret lr #endif @@ -108,7 +108,7 @@ ENTRY(pm_enter_standby_start) bic r0, r0, #0x20000000 str r0, [r1, #PXA3_DMCIER] - mov pc, lr + ret lr ENTRY(pm_enter_standby_end) #endif diff --git a/arch/arm/mach-realview/include/mach/memory.h b/arch/arm/mach-realview/include/mach/memory.h index db09170e3832..23e7a313f75d 100644 --- a/arch/arm/mach-realview/include/mach/memory.h +++ b/arch/arm/mach-realview/include/mach/memory.h @@ -20,15 +20,6 @@ #ifndef __ASM_ARCH_MEMORY_H #define __ASM_ARCH_MEMORY_H -/* - * Physical DRAM offset. - */ -#ifdef CONFIG_REALVIEW_HIGH_PHYS_OFFSET -#define PLAT_PHYS_OFFSET UL(0x70000000) -#else -#define PLAT_PHYS_OFFSET UL(0x00000000) -#endif - #ifdef CONFIG_SPARSEMEM /* diff --git a/arch/arm/mach-rpc/include/mach/memory.h b/arch/arm/mach-rpc/include/mach/memory.h index 18a221093bf5..b7e49571417d 100644 --- a/arch/arm/mach-rpc/include/mach/memory.h +++ b/arch/arm/mach-rpc/include/mach/memory.h @@ -19,11 +19,6 @@ #define __ASM_ARCH_MEMORY_H /* - * Physical DRAM offset. - */ -#define PLAT_PHYS_OFFSET UL(0x10000000) - -/* * Cache flushing area - ROM */ #define FLUSH_BASE_PHYS 0x00000000 diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2410.S b/arch/arm/mach-s3c24xx/sleep-s3c2410.S index c9b91223697c..875ba8911127 100644 --- a/arch/arm/mach-s3c24xx/sleep-s3c2410.S +++ b/arch/arm/mach-s3c24xx/sleep-s3c2410.S @@ -66,4 +66,4 @@ s3c2410_do_sleep: streq r8, [r5] @ SDRAM power-down config streq r9, [r6] @ CPU sleep 1: beq 1b - mov pc, r14 + ret lr diff --git a/arch/arm/mach-s3c24xx/sleep-s3c2412.S b/arch/arm/mach-s3c24xx/sleep-s3c2412.S index 5adaceb7da13..6bf5b4d8743c 100644 --- a/arch/arm/mach-s3c24xx/sleep-s3c2412.S +++ b/arch/arm/mach-s3c24xx/sleep-s3c2412.S @@ -65,4 +65,4 @@ s3c2412_sleep_enter1: strne r9, [r3] bne s3c2412_sleep_enter1 - mov pc, r14 + ret lr diff --git a/arch/arm/mach-s5pv210/include/mach/memory.h b/arch/arm/mach-s5pv210/include/mach/memory.h index 2d3cfa221d5f..d584fac9156b 100644 --- a/arch/arm/mach-s5pv210/include/mach/memory.h +++ b/arch/arm/mach-s5pv210/include/mach/memory.h @@ -13,8 +13,6 @@ #ifndef __ASM_ARCH_MEMORY_H #define __ASM_ARCH_MEMORY_H -#define PLAT_PHYS_OFFSET UL(0x20000000) - /* * Sparsemem support * Physical memory can be located from 0x20000000 to 0x7fffffff, diff --git a/arch/arm/mach-sa1100/include/mach/memory.h b/arch/arm/mach-sa1100/include/mach/memory.h index 12d376795abc..2054051eb797 100644 --- a/arch/arm/mach-sa1100/include/mach/memory.h +++ b/arch/arm/mach-sa1100/include/mach/memory.h @@ -10,11 +10,6 @@ #include <asm/sizes.h> /* - * Physical DRAM offset is 0xc0000000 on the SA1100 - */ -#define PLAT_PHYS_OFFSET UL(0xc0000000) - -/* * Because of the wide memory address space between physical RAM banks on the * SA1100, it's much convenient to use Linux's SparseMEM support to implement * our memory map representation. Assuming all memory nodes have equal access diff --git a/arch/arm/mach-shmobile/headsmp.S b/arch/arm/mach-shmobile/headsmp.S index e5be5c88644b..293007579b8e 100644 --- a/arch/arm/mach-shmobile/headsmp.S +++ b/arch/arm/mach-shmobile/headsmp.S @@ -12,6 +12,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/memory.h> ENTRY(shmobile_invalidate_start) @@ -75,7 +76,7 @@ shmobile_smp_boot_next: shmobile_smp_boot_found: ldr r0, [r7, r1, lsl #2] - mov pc, r9 + ret r9 ENDPROC(shmobile_smp_boot) ENTRY(shmobile_smp_sleep) diff --git a/arch/arm/mach-tegra/sleep-tegra20.S b/arch/arm/mach-tegra/sleep-tegra20.S index aaaf3abd2688..be4bc5f853f5 100644 --- a/arch/arm/mach-tegra/sleep-tegra20.S +++ b/arch/arm/mach-tegra/sleep-tegra20.S @@ -78,7 +78,7 @@ ENTRY(tegra20_hotplug_shutdown) /* Put this CPU down */ cpu_id r0 bl tegra20_cpu_shutdown - mov pc, lr @ should never get here + ret lr @ should never get here ENDPROC(tegra20_hotplug_shutdown) /* @@ -96,7 +96,7 @@ ENDPROC(tegra20_hotplug_shutdown) */ ENTRY(tegra20_cpu_shutdown) cmp r0, #0 - moveq pc, lr @ must not be called for CPU 0 + reteq lr @ must not be called for CPU 0 mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 mov r12, #CPU_RESETTABLE str r12, [r1] @@ -117,7 +117,7 @@ ENTRY(tegra20_cpu_shutdown) cpu_id r3 cmp r3, r0 beq . - mov pc, lr + ret lr ENDPROC(tegra20_cpu_shutdown) #endif @@ -164,7 +164,7 @@ ENTRY(tegra_pen_lock) cmpeq r12, r0 @ !turn == cpu? beq 1b @ while !turn == cpu && flag[!cpu] == 1 - mov pc, lr @ locked + ret lr @ locked ENDPROC(tegra_pen_lock) ENTRY(tegra_pen_unlock) @@ -176,7 +176,7 @@ ENTRY(tegra_pen_unlock) addne r2, r3, #PMC_SCRATCH39 mov r12, #0 str r12, [r2] - mov pc, lr + ret lr ENDPROC(tegra_pen_unlock) /* @@ -189,7 +189,7 @@ ENTRY(tegra20_cpu_clear_resettable) mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 mov r12, #CPU_NOT_RESETTABLE str r12, [r1] - mov pc, lr + ret lr ENDPROC(tegra20_cpu_clear_resettable) /* @@ -202,7 +202,7 @@ ENTRY(tegra20_cpu_set_resettable_soon) mov32 r1, TEGRA_PMC_VIRT + PMC_SCRATCH41 mov r12, #CPU_RESETTABLE_SOON str r12, [r1] - mov pc, lr + ret lr ENDPROC(tegra20_cpu_set_resettable_soon) /* @@ -217,7 +217,7 @@ ENTRY(tegra20_cpu_is_resettable_soon) cmp r12, #CPU_RESETTABLE_SOON moveq r0, #1 movne r0, #0 - mov pc, lr + ret lr ENDPROC(tegra20_cpu_is_resettable_soon) /* @@ -239,7 +239,7 @@ ENTRY(tegra20_sleep_core_finish) mov32 r1, TEGRA_IRAM_LPx_RESUME_AREA add r0, r0, r1 - mov pc, r3 + ret r3 ENDPROC(tegra20_sleep_core_finish) /* @@ -402,7 +402,7 @@ exit_selfrefresh_loop: mov32 r0, TEGRA_PMC_BASE ldr r0, [r0, #PMC_SCRATCH41] - mov pc, r0 @ jump to tegra_resume + ret r0 @ jump to tegra_resume ENDPROC(tegra20_lp1_reset) /* @@ -455,7 +455,7 @@ tegra20_switch_cpu_to_clk32k: mov r0, #0 /* brust policy = 32KHz */ str r0, [r5, #CLK_RESET_SCLK_BURST] - mov pc, lr + ret lr /* * tegra20_enter_sleep @@ -535,7 +535,7 @@ padsave_done: adr r2, tegra20_sclk_save str r0, [r2] dsb - mov pc, lr + ret lr tegra20_sdram_pad_address: .word TEGRA_APB_MISC_BASE + APB_MISC_XM2CFGCPADCTRL diff --git a/arch/arm/mach-tegra/sleep-tegra30.S b/arch/arm/mach-tegra/sleep-tegra30.S index b16d4a57fa59..09cad9b071de 100644 --- a/arch/arm/mach-tegra/sleep-tegra30.S +++ b/arch/arm/mach-tegra/sleep-tegra30.S @@ -142,7 +142,7 @@ ENTRY(tegra30_hotplug_shutdown) /* Powergate this CPU */ mov r0, #TEGRA30_POWER_HOTPLUG_SHUTDOWN bl tegra30_cpu_shutdown - mov pc, lr @ should never get here + ret lr @ should never get here ENDPROC(tegra30_hotplug_shutdown) /* @@ -161,7 +161,7 @@ ENTRY(tegra30_cpu_shutdown) bne _no_cpu0_chk @ It's not Tegra30 cmp r3, #0 - moveq pc, lr @ Must never be called for CPU 0 + reteq lr @ Must never be called for CPU 0 _no_cpu0_chk: ldr r12, =TEGRA_FLOW_CTRL_VIRT @@ -266,7 +266,7 @@ ENTRY(tegra30_sleep_core_finish) mov32 r1, TEGRA_IRAM_LPx_RESUME_AREA add r0, r0, r1 - mov pc, r3 + ret r3 ENDPROC(tegra30_sleep_core_finish) /* @@ -285,7 +285,7 @@ ENTRY(tegra30_sleep_cpu_secondary_finish) mov r0, #0 @ power mode flags (!hotplug) bl tegra30_cpu_shutdown mov r0, #1 @ never return here - mov pc, r7 + ret r7 ENDPROC(tegra30_sleep_cpu_secondary_finish) /* @@ -529,7 +529,7 @@ __no_dual_emc_chanl: mov32 r0, TEGRA_PMC_BASE ldr r0, [r0, #PMC_SCRATCH41] - mov pc, r0 @ jump to tegra_resume + ret r0 @ jump to tegra_resume ENDPROC(tegra30_lp1_reset) .align L1_CACHE_SHIFT @@ -659,7 +659,7 @@ _no_pll_in_iddq: mov r0, #0 /* brust policy = 32KHz */ str r0, [r5, #CLK_RESET_SCLK_BURST] - mov pc, lr + ret lr /* * tegra30_enter_sleep @@ -819,7 +819,7 @@ pmc_io_dpd_skip: dsb - mov pc, lr + ret lr .ltorg /* dummy symbol for end of IRAM */ diff --git a/arch/arm/mach-tegra/sleep.S b/arch/arm/mach-tegra/sleep.S index 8d06213fbc47..f024a5109e8e 100644 --- a/arch/arm/mach-tegra/sleep.S +++ b/arch/arm/mach-tegra/sleep.S @@ -87,7 +87,7 @@ ENTRY(tegra_init_l2_for_a15) mcrne p15, 0x1, r0, c9, c0, 2 _exit_init_l2_a15: - mov pc, lr + ret lr ENDPROC(tegra_init_l2_for_a15) /* @@ -111,7 +111,7 @@ ENTRY(tegra_sleep_cpu_finish) add r3, r3, r0 mov r0, r1 - mov pc, r3 + ret r3 ENDPROC(tegra_sleep_cpu_finish) /* @@ -139,7 +139,7 @@ ENTRY(tegra_shut_off_mmu) moveq r3, #0 streq r3, [r2, #L2X0_CTRL] #endif - mov pc, r0 + ret r0 ENDPROC(tegra_shut_off_mmu) .popsection @@ -156,6 +156,6 @@ ENTRY(tegra_switch_cpu_to_pllp) str r0, [r5, #CLK_RESET_CCLK_BURST] mov r0, #0 str r0, [r5, #CLK_RESET_CCLK_DIVIDER] - mov pc, lr + ret lr ENDPROC(tegra_switch_cpu_to_pllp) #endif diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c index b743a0ae02ce..2fb78b4648cb 100644 --- a/arch/arm/mach-vexpress/tc2_pm.c +++ b/arch/arm/mach-vexpress/tc2_pm.c @@ -152,7 +152,7 @@ static void tc2_pm_down(u64 residency) if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) { arch_spin_unlock(&tc2_pm_lock); - if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) { + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) { /* * On the Cortex-A15 we need to disable * L2 prefetching before flushing the cache. @@ -323,6 +323,21 @@ static void __naked tc2_pm_power_up_setup(unsigned int affinity_level) " b cci_enable_port_for_self "); } +static void __init tc2_cache_off(void) +{ + pr_info("TC2: disabling cache during MCPM loopback test\n"); + if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) { + /* disable L2 prefetching on the Cortex-A15 */ + asm volatile( + "mcr p15, 1, %0, c15, c0, 3 \n\t" + "isb \n\t" + "dsb " + : : "r" (0x400) ); + } + v7_exit_coherency_flush(all); + cci_disable_port_by_cpu(read_cpuid_mpidr()); +} + static int __init tc2_pm_init(void) { int ret, irq; @@ -370,6 +385,8 @@ static int __init tc2_pm_init(void) ret = mcpm_platform_register(&tc2_pm_power_ops); if (!ret) { mcpm_sync_init(tc2_pm_power_up_setup); + /* test if we can (re)enable the CCI on our own */ + BUG_ON(mcpm_loopback(tc2_cache_off) != 0); pr_info("TC2 power management initialized\n"); } return ret; diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index c348eaee7ee2..df46620206af 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -907,8 +907,8 @@ config PL310_ERRATA_588369 They are architecturally defined to behave as the execution of a clean operation followed immediately by an invalidate operation, both performing to the same memory location. This functionality - is not correctly implemented in PL310 as clean lines are not - invalidated as a result of these operations. + is not correctly implemented in PL310 prior to r2p0 (fixed in r2p0) + as clean lines are not invalidated as a result of these operations. config PL310_ERRATA_727915 bool "PL310 errata: Background Clean & Invalidate by Way operation can cause data corruption" @@ -918,7 +918,8 @@ config PL310_ERRATA_727915 PL310 can handle normal accesses while it is in progress. Under very rare circumstances, due to this erratum, write data can be lost when PL310 treats a cacheable write transaction during a Clean & - Invalidate by Way operation. + Invalidate by Way operation. Revisions prior to r3p1 are affected by + this errata (fixed in r3p1). config PL310_ERRATA_753970 bool "PL310 errata: cache sync operation may be faulty" diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index b8cb1a2688a0..0c1ab49e5f7b 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -76,6 +76,7 @@ static unsigned long ai_user; static unsigned long ai_sys; +static void *ai_sys_last_pc; static unsigned long ai_skipped; static unsigned long ai_half; static unsigned long ai_word; @@ -130,7 +131,7 @@ static const char *usermode_action[] = { static int alignment_proc_show(struct seq_file *m, void *v) { seq_printf(m, "User:\t\t%lu\n", ai_user); - seq_printf(m, "System:\t\t%lu\n", ai_sys); + seq_printf(m, "System:\t\t%lu (%pF)\n", ai_sys, ai_sys_last_pc); seq_printf(m, "Skipped:\t%lu\n", ai_skipped); seq_printf(m, "Half:\t\t%lu\n", ai_half); seq_printf(m, "Word:\t\t%lu\n", ai_word); @@ -794,6 +795,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) goto user; ai_sys += 1; + ai_sys_last_pc = (void *)instruction_pointer(regs); fixup: diff --git a/arch/arm/mm/cache-fa.S b/arch/arm/mm/cache-fa.S index e505befe51b5..2f0c58836ae7 100644 --- a/arch/arm/mm/cache-fa.S +++ b/arch/arm/mm/cache-fa.S @@ -15,6 +15,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/memory.h> #include <asm/page.h> @@ -45,7 +46,7 @@ ENTRY(fa_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(fa_flush_icache_all) /* @@ -71,7 +72,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -99,7 +100,7 @@ ENTRY(fa_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB mcrne p15, 0, ip, c7, c10, 4 @ data write barrier mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -135,7 +136,7 @@ ENTRY(fa_coherent_user_range) mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mcr p15, 0, r0, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -155,7 +156,7 @@ ENTRY(fa_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -181,7 +182,7 @@ fa_dma_inv_range: blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -199,7 +200,7 @@ fa_dma_clean_range: blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_flush_range(start,end) @@ -214,7 +215,7 @@ ENTRY(fa_dma_flush_range) blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -237,7 +238,7 @@ ENDPROC(fa_dma_map_area) * - dir - DMA direction */ ENTRY(fa_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(fa_dma_unmap_area) .globl fa_flush_kern_cache_louis diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 7c3fb41a462e..5f2c988a06ac 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -665,7 +665,7 @@ static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, v static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) { unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; - bool cortex_a9 = read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9; + bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9; if (rev >= L310_CACHE_ID_RTL_R2P0) { if (cortex_a9) { diff --git a/arch/arm/mm/cache-nop.S b/arch/arm/mm/cache-nop.S index 8e12ddca0031..f1cc9861031f 100644 --- a/arch/arm/mm/cache-nop.S +++ b/arch/arm/mm/cache-nop.S @@ -5,11 +5,12 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include "proc-macros.S" ENTRY(nop_flush_icache_all) - mov pc, lr + ret lr ENDPROC(nop_flush_icache_all) .globl nop_flush_kern_cache_all @@ -29,7 +30,7 @@ ENDPROC(nop_flush_icache_all) ENTRY(nop_coherent_user_range) mov r0, 0 - mov pc, lr + ret lr ENDPROC(nop_coherent_user_range) .globl nop_flush_kern_dcache_area diff --git a/arch/arm/mm/cache-v4.S b/arch/arm/mm/cache-v4.S index a7ba68f59f0c..91e3adf155cb 100644 --- a/arch/arm/mm/cache-v4.S +++ b/arch/arm/mm/cache-v4.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/page.h> #include "proc-macros.S" @@ -18,7 +19,7 @@ * Unconditionally clean and invalidate the entire icache. */ ENTRY(v4_flush_icache_all) - mov pc, lr + ret lr ENDPROC(v4_flush_icache_all) /* @@ -40,7 +41,7 @@ ENTRY(v4_flush_kern_cache_all) #ifdef CONFIG_CPU_CP15 mov r0, #0 mcr p15, 0, r0, c7, c7, 0 @ flush ID cache - mov pc, lr + ret lr #else /* FALLTHROUGH */ #endif @@ -59,7 +60,7 @@ ENTRY(v4_flush_user_cache_range) #ifdef CONFIG_CPU_CP15 mov ip, #0 mcr p15, 0, ip, c7, c7, 0 @ flush ID cache - mov pc, lr + ret lr #else /* FALLTHROUGH */ #endif @@ -89,7 +90,7 @@ ENTRY(v4_coherent_kern_range) */ ENTRY(v4_coherent_user_range) mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -116,7 +117,7 @@ ENTRY(v4_dma_flush_range) mov r0, #0 mcr p15, 0, r0, c7, c7, 0 @ flush ID cache #endif - mov pc, lr + ret lr /* * dma_unmap_area(start, size, dir) @@ -136,7 +137,7 @@ ENTRY(v4_dma_unmap_area) * - dir - DMA direction */ ENTRY(v4_dma_map_area) - mov pc, lr + ret lr ENDPROC(v4_dma_unmap_area) ENDPROC(v4_dma_map_area) diff --git a/arch/arm/mm/cache-v4wb.S b/arch/arm/mm/cache-v4wb.S index cd4945321407..2522f8c8fbb1 100644 --- a/arch/arm/mm/cache-v4wb.S +++ b/arch/arm/mm/cache-v4wb.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/memory.h> #include <asm/page.h> #include "proc-macros.S" @@ -58,7 +59,7 @@ flush_base: ENTRY(v4wb_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(v4wb_flush_icache_all) /* @@ -94,7 +95,7 @@ __flush_whole_cache: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -122,7 +123,7 @@ ENTRY(v4wb_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -170,7 +171,7 @@ ENTRY(v4wb_coherent_user_range) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* @@ -195,7 +196,7 @@ v4wb_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -212,7 +213,7 @@ v4wb_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -248,7 +249,7 @@ ENDPROC(v4wb_dma_map_area) * - dir - DMA direction */ ENTRY(v4wb_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(v4wb_dma_unmap_area) .globl v4wb_flush_kern_cache_louis diff --git a/arch/arm/mm/cache-v4wt.S b/arch/arm/mm/cache-v4wt.S index 11e5e5838bc5..a0982ce49007 100644 --- a/arch/arm/mm/cache-v4wt.S +++ b/arch/arm/mm/cache-v4wt.S @@ -13,6 +13,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/page.h> #include "proc-macros.S" @@ -48,7 +49,7 @@ ENTRY(v4wt_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(v4wt_flush_icache_all) /* @@ -71,7 +72,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -94,7 +95,7 @@ ENTRY(v4wt_flush_user_cache_range) add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -126,7 +127,7 @@ ENTRY(v4wt_coherent_user_range) cmp r0, r1 blo 1b mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -160,7 +161,7 @@ v4wt_dma_inv_range: add r0, r0, #CACHE_DLINESIZE cmp r0, r1 blo 1b - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -192,7 +193,7 @@ ENTRY(v4wt_dma_unmap_area) * - dir - DMA direction */ ENTRY(v4wt_dma_map_area) - mov pc, lr + ret lr ENDPROC(v4wt_dma_unmap_area) ENDPROC(v4wt_dma_map_area) diff --git a/arch/arm/mm/cache-v6.S b/arch/arm/mm/cache-v6.S index d8fd4d4bd3d4..24659952c278 100644 --- a/arch/arm/mm/cache-v6.S +++ b/arch/arm/mm/cache-v6.S @@ -51,7 +51,7 @@ ENTRY(v6_flush_icache_all) #else mcr p15, 0, r0, c7, c5, 0 @ invalidate I-cache #endif - mov pc, lr + ret lr ENDPROC(v6_flush_icache_all) /* @@ -73,7 +73,7 @@ ENTRY(v6_flush_kern_cache_all) #else mcr p15, 0, r0, c7, c15, 0 @ Cache clean+invalidate #endif - mov pc, lr + ret lr /* * v6_flush_cache_all() @@ -98,7 +98,7 @@ ENTRY(v6_flush_user_cache_all) * - we have a VIPT cache. */ ENTRY(v6_flush_user_cache_range) - mov pc, lr + ret lr /* * v6_coherent_kern_range(start,end) @@ -150,7 +150,7 @@ ENTRY(v6_coherent_user_range) #else mcr p15, 0, r0, c7, c5, 6 @ invalidate BTB #endif - mov pc, lr + ret lr /* * Fault handling for the cache operation above. If the virtual address in r0 @@ -158,7 +158,7 @@ ENTRY(v6_coherent_user_range) */ 9001: mov r0, #-EFAULT - mov pc, lr + ret lr UNWIND(.fnend ) ENDPROC(v6_coherent_user_range) ENDPROC(v6_coherent_kern_range) @@ -188,7 +188,7 @@ ENTRY(v6_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c10, 4 #endif - mov pc, lr + ret lr /* @@ -239,7 +239,7 @@ v6_dma_inv_range: blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * v6_dma_clean_range(start,end) @@ -262,7 +262,7 @@ v6_dma_clean_range: blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * v6_dma_flush_range(start,end) @@ -290,7 +290,7 @@ ENTRY(v6_dma_flush_range) blo 1b mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -323,7 +323,7 @@ ENTRY(v6_dma_unmap_area) teq r2, #DMA_TO_DEVICE bne v6_dma_inv_range #endif - mov pc, lr + ret lr ENDPROC(v6_dma_unmap_area) .globl v6_flush_kern_cache_louis diff --git a/arch/arm/mm/cache-v7.S b/arch/arm/mm/cache-v7.S index 615c99e38ba1..b966656d2c2d 100644 --- a/arch/arm/mm/cache-v7.S +++ b/arch/arm/mm/cache-v7.S @@ -61,7 +61,7 @@ ENTRY(v7_invalidate_l1) bgt 1b dsb st isb - mov pc, lr + ret lr ENDPROC(v7_invalidate_l1) /* @@ -76,7 +76,7 @@ ENTRY(v7_flush_icache_all) mov r0, #0 ALT_SMP(mcr p15, 0, r0, c7, c1, 0) @ invalidate I-cache inner shareable ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate - mov pc, lr + ret lr ENDPROC(v7_flush_icache_all) /* @@ -94,7 +94,7 @@ ENTRY(v7_flush_dcache_louis) ALT_UP(ands r3, r0, #(7 << 27)) @ extract LoUU from clidr #ifdef CONFIG_ARM_ERRATA_643719 ALT_SMP(mrceq p15, 0, r2, c0, c0, 0) @ read main ID register - ALT_UP(moveq pc, lr) @ LoUU is zero, so nothing to do + ALT_UP(reteq lr) @ LoUU is zero, so nothing to do ldreq r1, =0x410fc090 @ ID of ARM Cortex A9 r0p? biceq r2, r2, #0x0000000f @ clear minor revision number teqeq r2, r1 @ test for errata affected core and if so... @@ -102,7 +102,7 @@ ENTRY(v7_flush_dcache_louis) #endif ALT_SMP(mov r3, r3, lsr #20) @ r3 = LoUIS * 2 ALT_UP(mov r3, r3, lsr #26) @ r3 = LoUU * 2 - moveq pc, lr @ return if level == 0 + reteq lr @ return if level == 0 mov r10, #0 @ r10 (starting level) = 0 b flush_levels @ start flushing cache levels ENDPROC(v7_flush_dcache_louis) @@ -168,7 +168,7 @@ finished: mcr p15, 2, r10, c0, c0, 0 @ select current cache level in cssr dsb st isb - mov pc, lr + ret lr ENDPROC(v7_flush_dcache_all) /* @@ -191,7 +191,7 @@ ENTRY(v7_flush_kern_cache_all) ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) - mov pc, lr + ret lr ENDPROC(v7_flush_kern_cache_all) /* @@ -209,7 +209,7 @@ ENTRY(v7_flush_kern_cache_louis) ALT_UP(mcr p15, 0, r0, c7, c5, 0) @ I+BTB cache invalidate ARM( ldmfd sp!, {r4-r5, r7, r9-r11, lr} ) THUMB( ldmfd sp!, {r4-r7, r9-r11, lr} ) - mov pc, lr + ret lr ENDPROC(v7_flush_kern_cache_louis) /* @@ -235,7 +235,7 @@ ENTRY(v7_flush_user_cache_all) * - we have a VIPT cache. */ ENTRY(v7_flush_user_cache_range) - mov pc, lr + ret lr ENDPROC(v7_flush_user_cache_all) ENDPROC(v7_flush_user_cache_range) @@ -296,7 +296,7 @@ ENTRY(v7_coherent_user_range) ALT_UP(mcr p15, 0, r0, c7, c5, 6) @ invalidate BTB dsb ishst isb - mov pc, lr + ret lr /* * Fault handling for the cache operation above. If the virtual address in r0 @@ -307,7 +307,7 @@ ENTRY(v7_coherent_user_range) dsb #endif mov r0, #-EFAULT - mov pc, lr + ret lr UNWIND(.fnend ) ENDPROC(v7_coherent_kern_range) ENDPROC(v7_coherent_user_range) @@ -336,7 +336,7 @@ ENTRY(v7_flush_kern_dcache_area) cmp r0, r1 blo 1b dsb st - mov pc, lr + ret lr ENDPROC(v7_flush_kern_dcache_area) /* @@ -369,7 +369,7 @@ v7_dma_inv_range: cmp r0, r1 blo 1b dsb st - mov pc, lr + ret lr ENDPROC(v7_dma_inv_range) /* @@ -391,7 +391,7 @@ v7_dma_clean_range: cmp r0, r1 blo 1b dsb st - mov pc, lr + ret lr ENDPROC(v7_dma_clean_range) /* @@ -413,7 +413,7 @@ ENTRY(v7_dma_flush_range) cmp r0, r1 blo 1b dsb st - mov pc, lr + ret lr ENDPROC(v7_dma_flush_range) /* @@ -439,7 +439,7 @@ ENTRY(v7_dma_unmap_area) add r1, r1, r0 teq r2, #DMA_TO_DEVICE bne v7_dma_inv_range - mov pc, lr + ret lr ENDPROC(v7_dma_unmap_area) __INITDATA diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index c508f41a43bc..59424937e52b 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -126,8 +126,8 @@ static const struct prot_bits section_bits[] = { .val = PMD_SECT_USER, .set = "USR", }, { - .mask = PMD_SECT_RDONLY, - .val = PMD_SECT_RDONLY, + .mask = L_PMD_SECT_RDONLY, + .val = L_PMD_SECT_RDONLY, .set = "ro", .clear = "RW", #elif __LINUX_ARM_ARCH__ >= 6 diff --git a/arch/arm/mm/l2c-l2x0-resume.S b/arch/arm/mm/l2c-l2x0-resume.S index 99b05f21a59a..fda415e4ca8f 100644 --- a/arch/arm/mm/l2c-l2x0-resume.S +++ b/arch/arm/mm/l2c-l2x0-resume.S @@ -6,6 +6,7 @@ * This code can only be used to if you are running in the secure world. */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/hardware/cache-l2x0.h> .text @@ -27,7 +28,7 @@ ENTRY(l2c310_early_resume) @ Check that the address has been initialised teq r1, #0 - moveq pc, lr + reteq lr @ The prefetch and power control registers are revision dependent @ and can be written whether or not the L2 cache is enabled @@ -41,7 +42,7 @@ ENTRY(l2c310_early_resume) @ Don't setup the L2 cache if it is already enabled ldr r0, [r1, #L2X0_CTRL] tst r0, #L2X0_CTRL_EN - movne pc, lr + retne lr str r3, [r1, #L310_TAG_LATENCY_CTRL] str r4, [r1, #L310_DATA_LATENCY_CTRL] @@ -51,7 +52,7 @@ ENTRY(l2c310_early_resume) str r2, [r1, #L2X0_AUX_CTRL] mov r9, #L2X0_CTRL_EN str r9, [r1, #L2X0_CTRL] - mov pc, lr + ret lr ENDPROC(l2c310_early_resume) .align diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6e3ba8d112a2..8348ed6b2efe 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1434,23 +1434,64 @@ void __init early_paging_init(const struct machine_desc *mdesc, dsb(ishst); isb(); - /* remap level 1 table */ + /* + * FIXME: This code is not architecturally compliant: we modify + * the mappings in-place, indeed while they are in use by this + * very same code. This may lead to unpredictable behaviour of + * the CPU. + * + * Even modifying the mappings in a separate page table does + * not resolve this. + * + * The architecture strongly recommends that when a mapping is + * changed, that it is changed by first going via an invalid + * mapping and back to the new mapping. This is to ensure that + * no TLB conflicts (caused by the TLB having more than one TLB + * entry match a translation) can occur. However, doing that + * here will result in unmapping the code we are running. + */ + pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + + /* + * Remap level 1 table. This changes the physical addresses + * used to refer to the level 2 page tables to the high + * physical address alias, leaving everything else the same. + */ for (i = 0; i < PTRS_PER_PGD; pud0++, i++) { set_pud(pud0, __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); pmd0 += PTRS_PER_PMD; } - /* remap pmds for kernel mapping */ + /* + * Remap the level 2 table, pointing the mappings at the high + * physical address alias of these pages. + */ phys = __pa(map_start); do { *pmdk++ = __pmd(phys | pmdprot); phys += PMD_SIZE; } while (phys < map_end); + /* + * Ensure that the above updates are flushed out of the cache. + * This is not strictly correct; on a system where the caches + * are coherent with each other, but the MMU page table walks + * may not be coherent, flush_cache_all() may be a no-op, and + * this will fail. + */ flush_cache_all(); + + /* + * Re-write the TTBR values to point them at the high physical + * alias of the page tables. We expect __va() will work on + * cpu_get_pgd(), which returns the value of TTBR0. + */ cpu_switch_mm(pgd0, &init_mm); cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); + + /* Finally flush any stale TLB values. */ local_flush_bp_all(); local_flush_tlb_all(); } diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index d1a2d05971e0..86ee5d47ce3c 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -73,7 +73,7 @@ * cpu_arm1020_proc_init() */ ENTRY(cpu_arm1020_proc_init) - mov pc, lr + ret lr /* * cpu_arm1020_proc_fin() @@ -83,7 +83,7 @@ ENTRY(cpu_arm1020_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm1020_reset(loc) @@ -107,7 +107,7 @@ ENTRY(cpu_arm1020_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm1020_reset) .popsection @@ -117,7 +117,7 @@ ENDPROC(cpu_arm1020_reset) .align 5 ENTRY(cpu_arm1020_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -133,7 +133,7 @@ ENTRY(arm1020_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache #endif - mov pc, lr + ret lr ENDPROC(arm1020_flush_icache_all) /* @@ -169,7 +169,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -200,7 +200,7 @@ ENTRY(arm1020_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -242,7 +242,7 @@ ENTRY(arm1020_coherent_user_range) blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -264,7 +264,7 @@ ENTRY(arm1020_flush_kern_dcache_area) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -297,7 +297,7 @@ arm1020_dma_inv_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -320,7 +320,7 @@ arm1020_dma_clean_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -342,7 +342,7 @@ ENTRY(arm1020_dma_flush_range) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -365,7 +365,7 @@ ENDPROC(arm1020_dma_map_area) * - dir - DMA direction */ ENTRY(arm1020_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm1020_dma_unmap_area) .globl arm1020_flush_kern_cache_louis @@ -384,7 +384,7 @@ ENTRY(cpu_arm1020_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b #endif - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -423,7 +423,7 @@ ENTRY(cpu_arm1020_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs #endif /* CONFIG_MMU */ - mov pc, lr + ret lr /* * cpu_arm1020_set_pte(ptep, pte) @@ -441,7 +441,7 @@ ENTRY(cpu_arm1020_set_pte_ext) #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif /* CONFIG_MMU */ - mov pc, lr + ret lr .type __arm1020_setup, #function __arm1020_setup: @@ -460,7 +460,7 @@ __arm1020_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .R.. .... .... .... #endif - mov pc, lr + ret lr .size __arm1020_setup, . - __arm1020_setup /* diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 9d89405c3d03..a6331d78601f 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -73,7 +73,7 @@ * cpu_arm1020e_proc_init() */ ENTRY(cpu_arm1020e_proc_init) - mov pc, lr + ret lr /* * cpu_arm1020e_proc_fin() @@ -83,7 +83,7 @@ ENTRY(cpu_arm1020e_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm1020e_reset(loc) @@ -107,7 +107,7 @@ ENTRY(cpu_arm1020e_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm1020e_reset) .popsection @@ -117,7 +117,7 @@ ENDPROC(cpu_arm1020e_reset) .align 5 ENTRY(cpu_arm1020e_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -133,7 +133,7 @@ ENTRY(arm1020e_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache #endif - mov pc, lr + ret lr ENDPROC(arm1020e_flush_icache_all) /* @@ -168,7 +168,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -197,7 +197,7 @@ ENTRY(arm1020e_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -236,7 +236,7 @@ ENTRY(arm1020e_coherent_user_range) blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -257,7 +257,7 @@ ENTRY(arm1020e_flush_kern_dcache_area) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -286,7 +286,7 @@ arm1020e_dma_inv_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -308,7 +308,7 @@ arm1020e_dma_clean_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -328,7 +328,7 @@ ENTRY(arm1020e_dma_flush_range) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -351,7 +351,7 @@ ENDPROC(arm1020e_dma_map_area) * - dir - DMA direction */ ENTRY(arm1020e_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm1020e_dma_unmap_area) .globl arm1020e_flush_kern_cache_louis @@ -369,7 +369,7 @@ ENTRY(cpu_arm1020e_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b #endif - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -407,7 +407,7 @@ ENTRY(cpu_arm1020e_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm1020e_set_pte(ptep, pte) @@ -423,7 +423,7 @@ ENTRY(cpu_arm1020e_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry #endif #endif /* CONFIG_MMU */ - mov pc, lr + ret lr .type __arm1020e_setup, #function __arm1020e_setup: @@ -441,7 +441,7 @@ __arm1020e_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .R.. .... .... .... #endif - mov pc, lr + ret lr .size __arm1020e_setup, . - __arm1020e_setup /* diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index 6f01a0ae3b30..a126b7a59928 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -62,7 +62,7 @@ * cpu_arm1022_proc_init() */ ENTRY(cpu_arm1022_proc_init) - mov pc, lr + ret lr /* * cpu_arm1022_proc_fin() @@ -72,7 +72,7 @@ ENTRY(cpu_arm1022_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm1022_reset(loc) @@ -96,7 +96,7 @@ ENTRY(cpu_arm1022_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm1022_reset) .popsection @@ -106,7 +106,7 @@ ENDPROC(cpu_arm1022_reset) .align 5 ENTRY(cpu_arm1022_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -122,7 +122,7 @@ ENTRY(arm1022_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache #endif - mov pc, lr + ret lr ENDPROC(arm1022_flush_icache_all) /* @@ -156,7 +156,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -185,7 +185,7 @@ ENTRY(arm1022_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -225,7 +225,7 @@ ENTRY(arm1022_coherent_user_range) blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -246,7 +246,7 @@ ENTRY(arm1022_flush_kern_dcache_area) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -275,7 +275,7 @@ arm1022_dma_inv_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -297,7 +297,7 @@ arm1022_dma_clean_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -317,7 +317,7 @@ ENTRY(arm1022_dma_flush_range) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -340,7 +340,7 @@ ENDPROC(arm1022_dma_map_area) * - dir - DMA direction */ ENTRY(arm1022_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm1022_dma_unmap_area) .globl arm1022_flush_kern_cache_louis @@ -358,7 +358,7 @@ ENTRY(cpu_arm1022_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b #endif - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -389,7 +389,7 @@ ENTRY(cpu_arm1022_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm1022_set_pte_ext(ptep, pte, ext) @@ -405,7 +405,7 @@ ENTRY(cpu_arm1022_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry #endif #endif /* CONFIG_MMU */ - mov pc, lr + ret lr .type __arm1022_setup, #function __arm1022_setup: @@ -423,7 +423,7 @@ __arm1022_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .R.............. #endif - mov pc, lr + ret lr .size __arm1022_setup, . - __arm1022_setup /* diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index 4799a24b43e6..fc294067e977 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -62,7 +62,7 @@ * cpu_arm1026_proc_init() */ ENTRY(cpu_arm1026_proc_init) - mov pc, lr + ret lr /* * cpu_arm1026_proc_fin() @@ -72,7 +72,7 @@ ENTRY(cpu_arm1026_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm1026_reset(loc) @@ -96,7 +96,7 @@ ENTRY(cpu_arm1026_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm1026_reset) .popsection @@ -106,7 +106,7 @@ ENDPROC(cpu_arm1026_reset) .align 5 ENTRY(cpu_arm1026_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -122,7 +122,7 @@ ENTRY(arm1026_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache #endif - mov pc, lr + ret lr ENDPROC(arm1026_flush_icache_all) /* @@ -151,7 +151,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -180,7 +180,7 @@ ENTRY(arm1026_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache #endif mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -219,7 +219,7 @@ ENTRY(arm1026_coherent_user_range) blo 1b mcr p15, 0, ip, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -240,7 +240,7 @@ ENTRY(arm1026_flush_kern_dcache_area) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -269,7 +269,7 @@ arm1026_dma_inv_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -291,7 +291,7 @@ arm1026_dma_clean_range: blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -311,7 +311,7 @@ ENTRY(arm1026_dma_flush_range) blo 1b #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -334,7 +334,7 @@ ENDPROC(arm1026_dma_map_area) * - dir - DMA direction */ ENTRY(arm1026_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm1026_dma_unmap_area) .globl arm1026_flush_kern_cache_louis @@ -352,7 +352,7 @@ ENTRY(cpu_arm1026_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b #endif - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -378,7 +378,7 @@ ENTRY(cpu_arm1026_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, r1, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm1026_set_pte_ext(ptep, pte, ext) @@ -394,7 +394,7 @@ ENTRY(cpu_arm1026_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry #endif #endif /* CONFIG_MMU */ - mov pc, lr + ret lr .type __arm1026_setup, #function __arm1026_setup: @@ -417,7 +417,7 @@ __arm1026_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .R.. .... .... .... #endif - mov pc, lr + ret lr .size __arm1026_setup, . - __arm1026_setup /* diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index d42c37f9f5bc..2baa66b3ac9b 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -51,14 +51,14 @@ */ ENTRY(cpu_arm720_dcache_clean_area) ENTRY(cpu_arm720_proc_init) - mov pc, lr + ret lr ENTRY(cpu_arm720_proc_fin) mrc p15, 0, r0, c1, c0, 0 bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * Function: arm720_proc_do_idle(void) @@ -66,7 +66,7 @@ ENTRY(cpu_arm720_proc_fin) * Purpose : put the processor in proper idle mode */ ENTRY(cpu_arm720_do_idle) - mov pc, lr + ret lr /* * Function: arm720_switch_mm(unsigned long pgd_phys) @@ -81,7 +81,7 @@ ENTRY(cpu_arm720_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ update page table ptr mcr p15, 0, r1, c8, c7, 0 @ flush TLB (v4) #endif - mov pc, lr + ret lr /* * Function: arm720_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext) @@ -94,7 +94,7 @@ ENTRY(cpu_arm720_set_pte_ext) #ifdef CONFIG_MMU armv3_set_pte_ext wc_disable=0 #endif - mov pc, lr + ret lr /* * Function: arm720_reset @@ -112,7 +112,7 @@ ENTRY(cpu_arm720_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x2100 @ ..v....s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm720_reset) .popsection @@ -128,7 +128,7 @@ __arm710_setup: bic r0, r0, r5 ldr r5, arm710_cr1_set orr r0, r0, r5 - mov pc, lr @ __ret (head.S) + ret lr @ __ret (head.S) .size __arm710_setup, . - __arm710_setup /* @@ -156,7 +156,7 @@ __arm720_setup: mrc p15, 0, r0, c1, c0 @ get control register bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr @ __ret (head.S) + ret lr @ __ret (head.S) .size __arm720_setup, . - __arm720_setup /* diff --git a/arch/arm/mm/proc-arm740.S b/arch/arm/mm/proc-arm740.S index 9b0ae90cbf17..ac1ea6b3bce4 100644 --- a/arch/arm/mm/proc-arm740.S +++ b/arch/arm/mm/proc-arm740.S @@ -32,7 +32,7 @@ ENTRY(cpu_arm740_proc_init) ENTRY(cpu_arm740_do_idle) ENTRY(cpu_arm740_dcache_clean_area) ENTRY(cpu_arm740_switch_mm) - mov pc, lr + ret lr /* * cpu_arm740_proc_fin() @@ -42,7 +42,7 @@ ENTRY(cpu_arm740_proc_fin) bic r0, r0, #0x3f000000 @ bank/f/lock/s bic r0, r0, #0x0000000c @ w-buffer/cache mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm740_reset(loc) @@ -56,7 +56,7 @@ ENTRY(cpu_arm740_reset) mrc p15, 0, ip, c1, c0, 0 @ get ctrl register bic ip, ip, #0x0000000c @ ............wc.. mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm740_reset) .popsection @@ -115,7 +115,7 @@ __arm740_setup: @ need some benchmark orr r0, r0, #0x0000000d @ MPU/Cache/WB - mov pc, lr + ret lr .size __arm740_setup, . - __arm740_setup diff --git a/arch/arm/mm/proc-arm7tdmi.S b/arch/arm/mm/proc-arm7tdmi.S index f6cc3f63ce39..bf6ba4bc30ff 100644 --- a/arch/arm/mm/proc-arm7tdmi.S +++ b/arch/arm/mm/proc-arm7tdmi.S @@ -32,13 +32,13 @@ ENTRY(cpu_arm7tdmi_proc_init) ENTRY(cpu_arm7tdmi_do_idle) ENTRY(cpu_arm7tdmi_dcache_clean_area) ENTRY(cpu_arm7tdmi_switch_mm) - mov pc, lr + ret lr /* * cpu_arm7tdmi_proc_fin() */ ENTRY(cpu_arm7tdmi_proc_fin) - mov pc, lr + ret lr /* * Function: cpu_arm7tdmi_reset(loc) @@ -47,13 +47,13 @@ ENTRY(cpu_arm7tdmi_proc_fin) */ .pushsection .idmap.text, "ax" ENTRY(cpu_arm7tdmi_reset) - mov pc, r0 + ret r0 ENDPROC(cpu_arm7tdmi_reset) .popsection .type __arm7tdmi_setup, #function __arm7tdmi_setup: - mov pc, lr + ret lr .size __arm7tdmi_setup, . - __arm7tdmi_setup __INITDATA diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 549557df6d57..22bf8dde4f84 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -63,7 +63,7 @@ * cpu_arm920_proc_init() */ ENTRY(cpu_arm920_proc_init) - mov pc, lr + ret lr /* * cpu_arm920_proc_fin() @@ -73,7 +73,7 @@ ENTRY(cpu_arm920_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm920_reset(loc) @@ -97,7 +97,7 @@ ENTRY(cpu_arm920_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm920_reset) .popsection @@ -107,7 +107,7 @@ ENDPROC(cpu_arm920_reset) .align 5 ENTRY(cpu_arm920_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -120,7 +120,7 @@ ENTRY(cpu_arm920_do_idle) ENTRY(arm920_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(arm920_flush_icache_all) /* @@ -151,7 +151,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -177,7 +177,7 @@ ENTRY(arm920_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -211,7 +211,7 @@ ENTRY(arm920_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -231,7 +231,7 @@ ENTRY(arm920_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -257,7 +257,7 @@ arm920_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -276,7 +276,7 @@ arm920_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -293,7 +293,7 @@ ENTRY(arm920_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -316,7 +316,7 @@ ENDPROC(arm920_dma_map_area) * - dir - DMA direction */ ENTRY(arm920_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm920_dma_unmap_area) .globl arm920_flush_kern_cache_louis @@ -332,7 +332,7 @@ ENTRY(cpu_arm920_dcache_clean_area) add r0, r0, #CACHE_DLINESIZE subs r1, r1, #CACHE_DLINESIZE bhi 1b - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -367,7 +367,7 @@ ENTRY(cpu_arm920_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm920_set_pte(ptep, pte, ext) @@ -382,7 +382,7 @@ ENTRY(cpu_arm920_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif - mov pc, lr + ret lr /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm920_suspend_size @@ -423,7 +423,7 @@ __arm920_setup: mrc p15, 0, r0, c1, c0 @ get control register v4 bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __arm920_setup, . - __arm920_setup /* diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 2a758b06c6f6..0c6d5ac5a6d4 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -65,7 +65,7 @@ * cpu_arm922_proc_init() */ ENTRY(cpu_arm922_proc_init) - mov pc, lr + ret lr /* * cpu_arm922_proc_fin() @@ -75,7 +75,7 @@ ENTRY(cpu_arm922_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm922_reset(loc) @@ -99,7 +99,7 @@ ENTRY(cpu_arm922_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm922_reset) .popsection @@ -109,7 +109,7 @@ ENDPROC(cpu_arm922_reset) .align 5 ENTRY(cpu_arm922_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr #ifndef CONFIG_CPU_DCACHE_WRITETHROUGH @@ -122,7 +122,7 @@ ENTRY(cpu_arm922_do_idle) ENTRY(arm922_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(arm922_flush_icache_all) /* @@ -153,7 +153,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -179,7 +179,7 @@ ENTRY(arm922_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -213,7 +213,7 @@ ENTRY(arm922_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -233,7 +233,7 @@ ENTRY(arm922_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -259,7 +259,7 @@ arm922_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -278,7 +278,7 @@ arm922_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -295,7 +295,7 @@ ENTRY(arm922_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -318,7 +318,7 @@ ENDPROC(arm922_dma_map_area) * - dir - DMA direction */ ENTRY(arm922_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm922_dma_unmap_area) .globl arm922_flush_kern_cache_louis @@ -336,7 +336,7 @@ ENTRY(cpu_arm922_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b #endif - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -371,7 +371,7 @@ ENTRY(cpu_arm922_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm922_set_pte_ext(ptep, pte, ext) @@ -386,7 +386,7 @@ ENTRY(cpu_arm922_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif /* CONFIG_MMU */ - mov pc, lr + ret lr .type __arm922_setup, #function __arm922_setup: @@ -401,7 +401,7 @@ __arm922_setup: mrc p15, 0, r0, c1, c0 @ get control register v4 bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __arm922_setup, . - __arm922_setup /* diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index ba0d58e1a2a2..c32d073282ea 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -86,7 +86,7 @@ * cpu_arm925_proc_init() */ ENTRY(cpu_arm925_proc_init) - mov pc, lr + ret lr /* * cpu_arm925_proc_fin() @@ -96,7 +96,7 @@ ENTRY(cpu_arm925_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm925_reset(loc) @@ -129,7 +129,7 @@ ENDPROC(cpu_arm925_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 /* * cpu_arm925_do_idle() @@ -145,7 +145,7 @@ ENTRY(cpu_arm925_do_idle) mcr p15, 0, r2, c1, c0, 0 @ Disable I cache mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable - mov pc, lr + ret lr /* * flush_icache_all() @@ -155,7 +155,7 @@ ENTRY(cpu_arm925_do_idle) ENTRY(arm925_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(arm925_flush_icache_all) /* @@ -188,7 +188,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -225,7 +225,7 @@ ENTRY(arm925_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -259,7 +259,7 @@ ENTRY(arm925_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -279,7 +279,7 @@ ENTRY(arm925_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -307,7 +307,7 @@ arm925_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -328,7 +328,7 @@ arm925_dma_clean_range: blo 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -350,7 +350,7 @@ ENTRY(arm925_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -373,7 +373,7 @@ ENDPROC(arm925_dma_map_area) * - dir - DMA direction */ ENTRY(arm925_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm925_dma_unmap_area) .globl arm925_flush_kern_cache_louis @@ -390,7 +390,7 @@ ENTRY(cpu_arm925_dcache_clean_area) bhi 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -419,7 +419,7 @@ ENTRY(cpu_arm925_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm925_set_pte_ext(ptep, pte, ext) @@ -436,7 +436,7 @@ ENTRY(cpu_arm925_set_pte_ext) #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif /* CONFIG_MMU */ - mov pc, lr + ret lr .type __arm925_setup, #function __arm925_setup: @@ -469,7 +469,7 @@ __arm925_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .1.. .... .... .... #endif - mov pc, lr + ret lr .size __arm925_setup, . - __arm925_setup /* diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 0f098f407c9f..252b2503038d 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -55,7 +55,7 @@ * cpu_arm926_proc_init() */ ENTRY(cpu_arm926_proc_init) - mov pc, lr + ret lr /* * cpu_arm926_proc_fin() @@ -65,7 +65,7 @@ ENTRY(cpu_arm926_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm926_reset(loc) @@ -89,7 +89,7 @@ ENTRY(cpu_arm926_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm926_reset) .popsection @@ -111,7 +111,7 @@ ENTRY(cpu_arm926_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt mcr p15, 0, r1, c1, c0, 0 @ Restore ICache enable msr cpsr_c, r3 @ Restore FIQ state - mov pc, lr + ret lr /* * flush_icache_all() @@ -121,7 +121,7 @@ ENTRY(cpu_arm926_do_idle) ENTRY(arm926_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(arm926_flush_icache_all) /* @@ -151,7 +151,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -188,7 +188,7 @@ ENTRY(arm926_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -222,7 +222,7 @@ ENTRY(arm926_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -242,7 +242,7 @@ ENTRY(arm926_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -270,7 +270,7 @@ arm926_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -291,7 +291,7 @@ arm926_dma_clean_range: blo 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -313,7 +313,7 @@ ENTRY(arm926_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -336,7 +336,7 @@ ENDPROC(arm926_dma_map_area) * - dir - DMA direction */ ENTRY(arm926_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm926_dma_unmap_area) .globl arm926_flush_kern_cache_louis @@ -353,7 +353,7 @@ ENTRY(cpu_arm926_dcache_clean_area) bhi 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -380,7 +380,7 @@ ENTRY(cpu_arm926_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif - mov pc, lr + ret lr /* * cpu_arm926_set_pte_ext(ptep, pte, ext) @@ -397,7 +397,7 @@ ENTRY(cpu_arm926_set_pte_ext) #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif - mov pc, lr + ret lr /* Suspend/resume support: taken from arch/arm/plat-s3c24xx/sleep.S */ .globl cpu_arm926_suspend_size @@ -448,7 +448,7 @@ __arm926_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x4000 @ .1.. .... .... .... #endif - mov pc, lr + ret lr .size __arm926_setup, . - __arm926_setup /* diff --git a/arch/arm/mm/proc-arm940.S b/arch/arm/mm/proc-arm940.S index 1c39a704ff6e..e5212d489377 100644 --- a/arch/arm/mm/proc-arm940.S +++ b/arch/arm/mm/proc-arm940.S @@ -31,7 +31,7 @@ */ ENTRY(cpu_arm940_proc_init) ENTRY(cpu_arm940_switch_mm) - mov pc, lr + ret lr /* * cpu_arm940_proc_fin() @@ -41,7 +41,7 @@ ENTRY(cpu_arm940_proc_fin) bic r0, r0, #0x00001000 @ i-cache bic r0, r0, #0x00000004 @ d-cache mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm940_reset(loc) @@ -58,7 +58,7 @@ ENTRY(cpu_arm940_reset) bic ip, ip, #0x00000005 @ .............c.p bic ip, ip, #0x00001000 @ i-cache mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm940_reset) .popsection @@ -68,7 +68,7 @@ ENDPROC(cpu_arm940_reset) .align 5 ENTRY(cpu_arm940_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* * flush_icache_all() @@ -78,7 +78,7 @@ ENTRY(cpu_arm940_do_idle) ENTRY(arm940_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(arm940_flush_icache_all) /* @@ -122,7 +122,7 @@ ENTRY(arm940_flush_user_cache_range) tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -170,7 +170,7 @@ ENTRY(arm940_flush_kern_dcache_area) bcs 1b @ segments 7 to 0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -191,7 +191,7 @@ arm940_dma_inv_range: subs r1, r1, #1 << 4 bcs 1b @ segments 7 to 0 mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -215,7 +215,7 @@ ENTRY(cpu_arm940_dcache_clean_area) bcs 1b @ segments 7 to 0 #endif mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -241,7 +241,7 @@ ENTRY(arm940_dma_flush_range) subs r1, r1, #1 << 4 bcs 1b @ segments 7 to 0 mcr p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -264,7 +264,7 @@ ENDPROC(arm940_dma_map_area) * - dir - DMA direction */ ENTRY(arm940_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm940_dma_unmap_area) .globl arm940_flush_kern_cache_louis @@ -337,7 +337,7 @@ __arm940_setup: orr r0, r0, #0x00001000 @ I-cache orr r0, r0, #0x00000005 @ MPU/D-cache - mov pc, lr + ret lr .size __arm940_setup, . - __arm940_setup diff --git a/arch/arm/mm/proc-arm946.S b/arch/arm/mm/proc-arm946.S index 0289cd905e73..b3dd9b2d0b8e 100644 --- a/arch/arm/mm/proc-arm946.S +++ b/arch/arm/mm/proc-arm946.S @@ -38,7 +38,7 @@ */ ENTRY(cpu_arm946_proc_init) ENTRY(cpu_arm946_switch_mm) - mov pc, lr + ret lr /* * cpu_arm946_proc_fin() @@ -48,7 +48,7 @@ ENTRY(cpu_arm946_proc_fin) bic r0, r0, #0x00001000 @ i-cache bic r0, r0, #0x00000004 @ d-cache mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_arm946_reset(loc) @@ -65,7 +65,7 @@ ENTRY(cpu_arm946_reset) bic ip, ip, #0x00000005 @ .............c.p bic ip, ip, #0x00001000 @ i-cache mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_arm946_reset) .popsection @@ -75,7 +75,7 @@ ENDPROC(cpu_arm946_reset) .align 5 ENTRY(cpu_arm946_do_idle) mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* * flush_icache_all() @@ -85,7 +85,7 @@ ENTRY(cpu_arm946_do_idle) ENTRY(arm946_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(arm946_flush_icache_all) /* @@ -117,7 +117,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ flush I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -156,7 +156,7 @@ ENTRY(arm946_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -191,7 +191,7 @@ ENTRY(arm946_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -212,7 +212,7 @@ ENTRY(arm946_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -239,7 +239,7 @@ arm946_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -260,7 +260,7 @@ arm946_dma_clean_range: blo 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -284,7 +284,7 @@ ENTRY(arm946_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -307,7 +307,7 @@ ENDPROC(arm946_dma_map_area) * - dir - DMA direction */ ENTRY(arm946_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(arm946_dma_unmap_area) .globl arm946_flush_kern_cache_louis @@ -324,7 +324,7 @@ ENTRY(cpu_arm946_dcache_clean_area) bhi 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr .type __arm946_setup, #function __arm946_setup: @@ -392,7 +392,7 @@ __arm946_setup: #ifdef CONFIG_CPU_CACHE_ROUND_ROBIN orr r0, r0, #0x00004000 @ .1.. .... .... .... #endif - mov pc, lr + ret lr .size __arm946_setup, . - __arm946_setup diff --git a/arch/arm/mm/proc-arm9tdmi.S b/arch/arm/mm/proc-arm9tdmi.S index f51197ba754a..8227322bbb8f 100644 --- a/arch/arm/mm/proc-arm9tdmi.S +++ b/arch/arm/mm/proc-arm9tdmi.S @@ -32,13 +32,13 @@ ENTRY(cpu_arm9tdmi_proc_init) ENTRY(cpu_arm9tdmi_do_idle) ENTRY(cpu_arm9tdmi_dcache_clean_area) ENTRY(cpu_arm9tdmi_switch_mm) - mov pc, lr + ret lr /* * cpu_arm9tdmi_proc_fin() */ ENTRY(cpu_arm9tdmi_proc_fin) - mov pc, lr + ret lr /* * Function: cpu_arm9tdmi_reset(loc) @@ -47,13 +47,13 @@ ENTRY(cpu_arm9tdmi_proc_fin) */ .pushsection .idmap.text, "ax" ENTRY(cpu_arm9tdmi_reset) - mov pc, r0 + ret r0 ENDPROC(cpu_arm9tdmi_reset) .popsection .type __arm9tdmi_setup, #function __arm9tdmi_setup: - mov pc, lr + ret lr .size __arm9tdmi_setup, . - __arm9tdmi_setup __INITDATA diff --git a/arch/arm/mm/proc-fa526.S b/arch/arm/mm/proc-fa526.S index 2dfc0f1d3bfd..c494886892ba 100644 --- a/arch/arm/mm/proc-fa526.S +++ b/arch/arm/mm/proc-fa526.S @@ -32,7 +32,7 @@ * cpu_fa526_proc_init() */ ENTRY(cpu_fa526_proc_init) - mov pc, lr + ret lr /* * cpu_fa526_proc_fin() @@ -44,7 +44,7 @@ ENTRY(cpu_fa526_proc_fin) mcr p15, 0, r0, c1, c0, 0 @ disable caches nop nop - mov pc, lr + ret lr /* * cpu_fa526_reset(loc) @@ -72,7 +72,7 @@ ENTRY(cpu_fa526_reset) mcr p15, 0, ip, c1, c0, 0 @ ctrl register nop nop - mov pc, r0 + ret r0 ENDPROC(cpu_fa526_reset) .popsection @@ -81,7 +81,7 @@ ENDPROC(cpu_fa526_reset) */ .align 4 ENTRY(cpu_fa526_do_idle) - mov pc, lr + ret lr ENTRY(cpu_fa526_dcache_clean_area) @@ -90,7 +90,7 @@ ENTRY(cpu_fa526_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -117,7 +117,7 @@ ENTRY(cpu_fa526_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate UTLB #endif - mov pc, lr + ret lr /* * cpu_fa526_set_pte_ext(ptep, pte, ext) @@ -133,7 +133,7 @@ ENTRY(cpu_fa526_set_pte_ext) mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif - mov pc, lr + ret lr .type __fa526_setup, #function __fa526_setup: @@ -162,7 +162,7 @@ __fa526_setup: bic r0, r0, r5 ldr r5, fa526_cr1_set orr r0, r0, r5 - mov pc, lr + ret lr .size __fa526_setup, . - __fa526_setup /* diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S index db79b62c92fb..03a1b75f2e16 100644 --- a/arch/arm/mm/proc-feroceon.S +++ b/arch/arm/mm/proc-feroceon.S @@ -69,7 +69,7 @@ ENTRY(cpu_feroceon_proc_init) movne r2, r2, lsr #2 @ turned into # of sets sub r2, r2, #(1 << 5) stmia r1, {r2, r3} - mov pc, lr + ret lr /* * cpu_feroceon_proc_fin() @@ -86,7 +86,7 @@ ENTRY(cpu_feroceon_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_feroceon_reset(loc) @@ -110,7 +110,7 @@ ENTRY(cpu_feroceon_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_feroceon_reset) .popsection @@ -124,7 +124,7 @@ ENTRY(cpu_feroceon_do_idle) mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ Drain write buffer mcr p15, 0, r0, c7, c0, 4 @ Wait for interrupt - mov pc, lr + ret lr /* * flush_icache_all() @@ -134,7 +134,7 @@ ENTRY(cpu_feroceon_do_idle) ENTRY(feroceon_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(feroceon_flush_icache_all) /* @@ -169,7 +169,7 @@ __flush_whole_cache: mov ip, #0 mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -198,7 +198,7 @@ ENTRY(feroceon_flush_user_cache_range) tst r2, #VM_EXEC mov ip, #0 mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -233,7 +233,7 @@ ENTRY(feroceon_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -254,7 +254,7 @@ ENTRY(feroceon_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr .align 5 ENTRY(feroceon_range_flush_kern_dcache_area) @@ -268,7 +268,7 @@ ENTRY(feroceon_range_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -295,7 +295,7 @@ feroceon_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr .align 5 feroceon_range_dma_inv_range: @@ -311,7 +311,7 @@ feroceon_range_dma_inv_range: mcr p15, 5, r0, c15, c14, 0 @ D inv range start mcr p15, 5, r1, c15, c14, 1 @ D inv range top msr cpsr_c, r2 @ restore interrupts - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -331,7 +331,7 @@ feroceon_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr .align 5 feroceon_range_dma_clean_range: @@ -344,7 +344,7 @@ feroceon_range_dma_clean_range: mcr p15, 5, r1, c15, c13, 1 @ D clean range top msr cpsr_c, r2 @ restore interrupts mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -362,7 +362,7 @@ ENTRY(feroceon_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr .align 5 ENTRY(feroceon_range_dma_flush_range) @@ -375,7 +375,7 @@ ENTRY(feroceon_range_dma_flush_range) mcr p15, 5, r1, c15, c15, 1 @ D clean/inv range top msr cpsr_c, r2 @ restore interrupts mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -412,7 +412,7 @@ ENDPROC(feroceon_range_dma_map_area) * - dir - DMA direction */ ENTRY(feroceon_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(feroceon_dma_unmap_area) .globl feroceon_flush_kern_cache_louis @@ -461,7 +461,7 @@ ENTRY(cpu_feroceon_dcache_clean_area) bhi 1b #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -490,9 +490,9 @@ ENTRY(cpu_feroceon_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - mov pc, r2 + ret r2 #else - mov pc, lr + ret lr #endif /* @@ -512,7 +512,7 @@ ENTRY(cpu_feroceon_set_pte_ext) #endif mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif - mov pc, lr + ret lr /* Suspend/resume support: taken from arch/arm/mm/proc-arm926.S */ .globl cpu_feroceon_suspend_size @@ -554,7 +554,7 @@ __feroceon_setup: mrc p15, 0, r0, c1, c0 @ get control register v4 bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __feroceon_setup, . - __feroceon_setup /* diff --git a/arch/arm/mm/proc-mohawk.S b/arch/arm/mm/proc-mohawk.S index 40acba595731..53d393455f13 100644 --- a/arch/arm/mm/proc-mohawk.S +++ b/arch/arm/mm/proc-mohawk.S @@ -45,7 +45,7 @@ * cpu_mohawk_proc_init() */ ENTRY(cpu_mohawk_proc_init) - mov pc, lr + ret lr /* * cpu_mohawk_proc_fin() @@ -55,7 +55,7 @@ ENTRY(cpu_mohawk_proc_fin) bic r0, r0, #0x1800 @ ...iz........... bic r0, r0, #0x0006 @ .............ca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_mohawk_reset(loc) @@ -79,7 +79,7 @@ ENTRY(cpu_mohawk_reset) bic ip, ip, #0x0007 @ .............cam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_mohawk_reset) .popsection @@ -93,7 +93,7 @@ ENTRY(cpu_mohawk_do_idle) mov r0, #0 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt - mov pc, lr + ret lr /* * flush_icache_all() @@ -103,7 +103,7 @@ ENTRY(cpu_mohawk_do_idle) ENTRY(mohawk_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(mohawk_flush_icache_all) /* @@ -128,7 +128,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache mcrne p15, 0, ip, c7, c10, 0 @ drain write buffer - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, flags) @@ -158,7 +158,7 @@ ENTRY(mohawk_flush_user_cache_range) blo 1b tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -194,7 +194,7 @@ ENTRY(mohawk_coherent_user_range) blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB mov r0, #0 - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -214,7 +214,7 @@ ENTRY(mohawk_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -240,7 +240,7 @@ mohawk_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -259,7 +259,7 @@ mohawk_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -277,7 +277,7 @@ ENTRY(mohawk_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -300,7 +300,7 @@ ENDPROC(mohawk_dma_map_area) * - dir - DMA direction */ ENTRY(mohawk_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(mohawk_dma_unmap_area) .globl mohawk_flush_kern_cache_louis @@ -315,7 +315,7 @@ ENTRY(cpu_mohawk_dcache_clean_area) subs r1, r1, #CACHE_DLINESIZE bhi 1b mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr /* * cpu_mohawk_switch_mm(pgd) @@ -333,7 +333,7 @@ ENTRY(cpu_mohawk_switch_mm) orr r0, r0, #0x18 @ cache the page table in L2 mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - mov pc, lr + ret lr /* * cpu_mohawk_set_pte_ext(ptep, pte, ext) @@ -346,7 +346,7 @@ ENTRY(cpu_mohawk_set_pte_ext) mov r0, r0 mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB - mov pc, lr + ret lr .globl cpu_mohawk_suspend_size .equ cpu_mohawk_suspend_size, 4 * 6 @@ -400,7 +400,7 @@ __mohawk_setup: mrc p15, 0, r0, c1, c0 @ get control register bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __mohawk_setup, . - __mohawk_setup diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index c45319c8f1d9..8008a0461cf5 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -38,7 +38,7 @@ ENTRY(cpu_sa110_proc_init) mov r0, #0 mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching - mov pc, lr + ret lr /* * cpu_sa110_proc_fin() @@ -50,7 +50,7 @@ ENTRY(cpu_sa110_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_sa110_reset(loc) @@ -74,7 +74,7 @@ ENTRY(cpu_sa110_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_sa110_reset) .popsection @@ -103,7 +103,7 @@ ENTRY(cpu_sa110_do_idle) mov r0, r0 @ safety mov r0, r0 @ safety mcr p15, 0, r0, c15, c1, 2 @ enable clock switching - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -121,7 +121,7 @@ ENTRY(cpu_sa110_dcache_clean_area) add r0, r0, #DCACHELINESIZE subs r1, r1, #DCACHELINESIZE bhi 1b - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -141,7 +141,7 @@ ENTRY(cpu_sa110_switch_mm) mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs ldr pc, [sp], #4 #else - mov pc, lr + ret lr #endif /* @@ -157,7 +157,7 @@ ENTRY(cpu_sa110_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif - mov pc, lr + ret lr .type __sa110_setup, #function __sa110_setup: @@ -173,7 +173,7 @@ __sa110_setup: mrc p15, 0, r0, c1, c0 @ get control register v4 bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __sa110_setup, . - __sa110_setup /* diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index 09d241ae2dbe..89f97ac648a9 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -43,7 +43,7 @@ ENTRY(cpu_sa1100_proc_init) mov r0, #0 mcr p15, 0, r0, c15, c1, 2 @ Enable clock switching mcr p15, 0, r0, c9, c0, 5 @ Allow read-buffer operations from userland - mov pc, lr + ret lr /* * cpu_sa1100_proc_fin() @@ -58,7 +58,7 @@ ENTRY(cpu_sa1100_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x000e @ ............wca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_sa1100_reset(loc) @@ -82,7 +82,7 @@ ENTRY(cpu_sa1100_reset) bic ip, ip, #0x000f @ ............wcam bic ip, ip, #0x1100 @ ...i...s........ mcr p15, 0, ip, c1, c0, 0 @ ctrl register - mov pc, r0 + ret r0 ENDPROC(cpu_sa1100_reset) .popsection @@ -113,7 +113,7 @@ ENTRY(cpu_sa1100_do_idle) mcr p15, 0, r0, c15, c8, 2 @ wait for interrupt mov r0, r0 @ safety mcr p15, 0, r0, c15, c1, 2 @ enable clock switching - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -131,7 +131,7 @@ ENTRY(cpu_sa1100_dcache_clean_area) add r0, r0, #DCACHELINESIZE subs r1, r1, #DCACHELINESIZE bhi 1b - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -152,7 +152,7 @@ ENTRY(cpu_sa1100_switch_mm) mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs ldr pc, [sp], #4 #else - mov pc, lr + ret lr #endif /* @@ -168,7 +168,7 @@ ENTRY(cpu_sa1100_set_pte_ext) mcr p15, 0, r0, c7, c10, 1 @ clean D entry mcr p15, 0, r0, c7, c10, 4 @ drain WB #endif - mov pc, lr + ret lr .globl cpu_sa1100_suspend_size .equ cpu_sa1100_suspend_size, 4 * 3 @@ -211,7 +211,7 @@ __sa1100_setup: mrc p15, 0, r0, c1, c0 @ get control register v4 bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __sa1100_setup, . - __sa1100_setup /* diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 32b3558321c4..d0390f4b3f18 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -36,14 +36,14 @@ #define PMD_FLAGS_SMP PMD_SECT_WBWA|PMD_SECT_S ENTRY(cpu_v6_proc_init) - mov pc, lr + ret lr ENTRY(cpu_v6_proc_fin) mrc p15, 0, r0, c1, c0, 0 @ ctrl register bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x0006 @ .............ca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_v6_reset(loc) @@ -62,7 +62,7 @@ ENTRY(cpu_v6_reset) mcr p15, 0, r1, c1, c0, 0 @ disable MMU mov r1, #0 mcr p15, 0, r1, c7, c5, 4 @ ISB - mov pc, r0 + ret r0 ENDPROC(cpu_v6_reset) .popsection @@ -77,14 +77,14 @@ ENTRY(cpu_v6_do_idle) mov r1, #0 mcr p15, 0, r1, c7, c10, 4 @ DWB - WFI may enter a low-power mode mcr p15, 0, r1, c7, c0, 4 @ wait for interrupt - mov pc, lr + ret lr ENTRY(cpu_v6_dcache_clean_area) 1: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, #D_CACHE_LINE_SIZE subs r1, r1, #D_CACHE_LINE_SIZE bhi 1b - mov pc, lr + ret lr /* * cpu_v6_switch_mm(pgd_phys, tsk) @@ -113,7 +113,7 @@ ENTRY(cpu_v6_switch_mm) #endif mcr p15, 0, r1, c13, c0, 1 @ set context ID #endif - mov pc, lr + ret lr /* * cpu_v6_set_pte_ext(ptep, pte, ext) @@ -131,7 +131,7 @@ ENTRY(cpu_v6_set_pte_ext) #ifdef CONFIG_MMU armv6_set_pte_ext cpu_v6 #endif - mov pc, lr + ret lr /* Suspend/resume support: taken from arch/arm/mach-s3c64xx/sleep.S */ .globl cpu_v6_suspend_size @@ -241,7 +241,7 @@ __v6_setup: mcreq p15, 0, r5, c1, c0, 1 @ write aux control reg orreq r0, r0, #(1 << 21) @ low interrupt latency configuration #endif - mov pc, lr @ return to head.S:__ret + ret lr @ return to head.S:__ret /* * V X F I D LR diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 1f52915f2b28..ed448d8a596b 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -59,7 +59,7 @@ ENTRY(cpu_v7_switch_mm) mcr p15, 0, r0, c2, c0, 0 @ set TTB 0 isb #endif - mov pc, lr + bx lr ENDPROC(cpu_v7_switch_mm) /* @@ -106,7 +106,7 @@ ENTRY(cpu_v7_set_pte_ext) ALT_SMP(W(nop)) ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte #endif - mov pc, lr + bx lr ENDPROC(cpu_v7_set_pte_ext) /* diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index 22e3ad63500c..e4c8acfc1323 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -19,6 +19,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <asm/assembler.h> #define TTB_IRGN_NC (0 << 8) #define TTB_IRGN_WBWA (1 << 8) @@ -61,7 +62,7 @@ ENTRY(cpu_v7_switch_mm) mcrr p15, 0, rpgdl, rpgdh, c2 @ set TTB 0 isb #endif - mov pc, lr + ret lr ENDPROC(cpu_v7_switch_mm) #ifdef __ARMEB__ @@ -86,13 +87,18 @@ ENTRY(cpu_v7_set_pte_ext) tst rh, #1 << (57 - 32) @ L_PTE_NONE bicne rl, #L_PTE_VALID bne 1f - tst rh, #1 << (55 - 32) @ L_PTE_DIRTY - orreq rl, #L_PTE_RDONLY + + eor ip, rh, #1 << (55 - 32) @ toggle L_PTE_DIRTY in temp reg to + @ test for !L_PTE_DIRTY || L_PTE_RDONLY + tst ip, #1 << (55 - 32) | 1 << (58 - 32) + orrne rl, #PTE_AP2 + biceq rl, #PTE_AP2 + 1: strd r2, r3, [r0] ALT_SMP(W(nop)) ALT_UP (mcr p15, 0, r0, c7, c10, 1) @ flush_pte #endif - mov pc, lr + ret lr ENDPROC(cpu_v7_set_pte_ext) /* diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3db2c2f04a30..b5d67db20897 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -26,7 +26,7 @@ #endif ENTRY(cpu_v7_proc_init) - mov pc, lr + ret lr ENDPROC(cpu_v7_proc_init) ENTRY(cpu_v7_proc_fin) @@ -34,7 +34,7 @@ ENTRY(cpu_v7_proc_fin) bic r0, r0, #0x1000 @ ...i............ bic r0, r0, #0x0006 @ .............ca. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr ENDPROC(cpu_v7_proc_fin) /* @@ -71,20 +71,20 @@ ENDPROC(cpu_v7_reset) ENTRY(cpu_v7_do_idle) dsb @ WFI may enter a low-power mode wfi - mov pc, lr + ret lr ENDPROC(cpu_v7_do_idle) ENTRY(cpu_v7_dcache_clean_area) ALT_SMP(W(nop)) @ MP extensions imply L1 PTW ALT_UP_B(1f) - mov pc, lr + ret lr 1: dcache_line_size r2, r3 2: mcr p15, 0, r0, c7, c10, 1 @ clean D entry add r0, r0, r2 subs r1, r1, r2 bhi 2b dsb ishst - mov pc, lr + ret lr ENDPROC(cpu_v7_dcache_clean_area) string cpu_v7_name, "ARMv7 Processor" @@ -152,6 +152,40 @@ ENTRY(cpu_v7_do_resume) ENDPROC(cpu_v7_do_resume) #endif +/* + * Cortex-A9 processor functions + */ + globl_equ cpu_ca9mp_proc_init, cpu_v7_proc_init + globl_equ cpu_ca9mp_proc_fin, cpu_v7_proc_fin + globl_equ cpu_ca9mp_reset, cpu_v7_reset + globl_equ cpu_ca9mp_do_idle, cpu_v7_do_idle + globl_equ cpu_ca9mp_dcache_clean_area, cpu_v7_dcache_clean_area + globl_equ cpu_ca9mp_switch_mm, cpu_v7_switch_mm + globl_equ cpu_ca9mp_set_pte_ext, cpu_v7_set_pte_ext +.globl cpu_ca9mp_suspend_size +.equ cpu_ca9mp_suspend_size, cpu_v7_suspend_size + 4 * 2 +#ifdef CONFIG_ARM_CPU_SUSPEND +ENTRY(cpu_ca9mp_do_suspend) + stmfd sp!, {r4 - r5} + mrc p15, 0, r4, c15, c0, 1 @ Diagnostic register + mrc p15, 0, r5, c15, c0, 0 @ Power register + stmia r0!, {r4 - r5} + ldmfd sp!, {r4 - r5} + b cpu_v7_do_suspend +ENDPROC(cpu_ca9mp_do_suspend) + +ENTRY(cpu_ca9mp_do_resume) + ldmia r0!, {r4 - r5} + mrc p15, 0, r10, c15, c0, 1 @ Read Diagnostic register + teq r4, r10 @ Already restored? + mcrne p15, 0, r4, c15, c0, 1 @ No, so restore it + mrc p15, 0, r10, c15, c0, 0 @ Read Power register + teq r5, r10 @ Already restored? + mcrne p15, 0, r5, c15, c0, 0 @ No, so restore it + b cpu_v7_do_resume +ENDPROC(cpu_ca9mp_do_resume) +#endif + #ifdef CONFIG_CPU_PJ4B globl_equ cpu_pj4b_switch_mm, cpu_v7_switch_mm globl_equ cpu_pj4b_set_pte_ext, cpu_v7_set_pte_ext @@ -163,7 +197,7 @@ ENTRY(cpu_pj4b_do_idle) dsb @ WFI may enter a low-power mode wfi dsb @barrier - mov pc, lr + ret lr ENDPROC(cpu_pj4b_do_idle) #else globl_equ cpu_pj4b_do_idle, cpu_v7_do_idle @@ -184,16 +218,16 @@ ENDPROC(cpu_pj4b_do_suspend) ENTRY(cpu_pj4b_do_resume) ldmia r0!, {r6 - r10} - mcr p15, 1, r6, c15, c1, 0 @ save CP15 - extra features - mcr p15, 1, r7, c15, c2, 0 @ save CP15 - Aux Func Modes Ctrl 0 - mcr p15, 1, r8, c15, c1, 2 @ save CP15 - Aux Debug Modes Ctrl 2 - mcr p15, 1, r9, c15, c1, 1 @ save CP15 - Aux Debug Modes Ctrl 1 - mcr p15, 0, r10, c9, c14, 0 @ save CP15 - PMC + mcr p15, 1, r6, c15, c1, 0 @ restore CP15 - extra features + mcr p15, 1, r7, c15, c2, 0 @ restore CP15 - Aux Func Modes Ctrl 0 + mcr p15, 1, r8, c15, c1, 2 @ restore CP15 - Aux Debug Modes Ctrl 2 + mcr p15, 1, r9, c15, c1, 1 @ restore CP15 - Aux Debug Modes Ctrl 1 + mcr p15, 0, r10, c9, c14, 0 @ restore CP15 - PMC b cpu_v7_do_resume ENDPROC(cpu_pj4b_do_resume) #endif .globl cpu_pj4b_suspend_size -.equ cpu_pj4b_suspend_size, 4 * 14 +.equ cpu_pj4b_suspend_size, cpu_v7_suspend_size + 4 * 5 #endif @@ -216,6 +250,7 @@ __v7_cr7mp_setup: __v7_ca7mp_setup: __v7_ca12mp_setup: __v7_ca15mp_setup: +__v7_b15mp_setup: __v7_ca17mp_setup: mov r10, #0 1: @@ -407,7 +442,7 @@ __v7_setup: bic r0, r0, r5 @ clear bits them orr r0, r0, r6 @ set them THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions - mov pc, lr @ return to head.S:__ret + ret lr @ return to head.S:__ret ENDPROC(__v7_setup) .align 2 @@ -418,6 +453,7 @@ __v7_setup_stack: @ define struct processor (see <asm/proc-fns.h> and proc-macros.S) define_processor_functions v7, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 + define_processor_functions ca9mp, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 #ifdef CONFIG_CPU_PJ4B define_processor_functions pj4b, dabort=v7_early_abort, pabort=v7_pabort, suspend=1 #endif @@ -470,7 +506,7 @@ __v7_ca5mp_proc_info: __v7_ca9mp_proc_info: .long 0x410fc090 .long 0xff0ffff0 - __v7_proc __v7_ca9mp_setup + __v7_proc __v7_ca9mp_setup, proc_fns = ca9mp_processor_functions .size __v7_ca9mp_proc_info, . - __v7_ca9mp_proc_info #endif /* CONFIG_ARM_LPAE */ @@ -528,6 +564,16 @@ __v7_ca15mp_proc_info: .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info /* + * Broadcom Corporation Brahma-B15 processor. + */ + .type __v7_b15mp_proc_info, #object +__v7_b15mp_proc_info: + .long 0x420f00f0 + .long 0xff0ffff0 + __v7_proc __v7_b15mp_setup, hwcaps = HWCAP_IDIV + .size __v7_b15mp_proc_info, . - __v7_b15mp_proc_info + + /* * ARM Ltd. Cortex A17 processor. */ .type __v7_ca17mp_proc_info, #object diff --git a/arch/arm/mm/proc-v7m.S b/arch/arm/mm/proc-v7m.S index 1ca37c72f12f..d1e68b553d3b 100644 --- a/arch/arm/mm/proc-v7m.S +++ b/arch/arm/mm/proc-v7m.S @@ -16,11 +16,11 @@ #include "proc-macros.S" ENTRY(cpu_v7m_proc_init) - mov pc, lr + ret lr ENDPROC(cpu_v7m_proc_init) ENTRY(cpu_v7m_proc_fin) - mov pc, lr + ret lr ENDPROC(cpu_v7m_proc_fin) /* @@ -34,7 +34,7 @@ ENDPROC(cpu_v7m_proc_fin) */ .align 5 ENTRY(cpu_v7m_reset) - mov pc, r0 + ret r0 ENDPROC(cpu_v7m_reset) /* @@ -46,18 +46,18 @@ ENDPROC(cpu_v7m_reset) */ ENTRY(cpu_v7m_do_idle) wfi - mov pc, lr + ret lr ENDPROC(cpu_v7m_do_idle) ENTRY(cpu_v7m_dcache_clean_area) - mov pc, lr + ret lr ENDPROC(cpu_v7m_dcache_clean_area) /* * There is no MMU, so here is nothing to do. */ ENTRY(cpu_v7m_switch_mm) - mov pc, lr + ret lr ENDPROC(cpu_v7m_switch_mm) .globl cpu_v7m_suspend_size @@ -65,11 +65,11 @@ ENDPROC(cpu_v7m_switch_mm) #ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_v7m_do_suspend) - mov pc, lr + ret lr ENDPROC(cpu_v7m_do_suspend) ENTRY(cpu_v7m_do_resume) - mov pc, lr + ret lr ENDPROC(cpu_v7m_do_resume) #endif @@ -120,7 +120,7 @@ __v7m_setup: ldr r12, [r0, V7M_SCB_CCR] @ system control register orr r12, #V7M_SCB_CCR_STKALIGN str r12, [r0, V7M_SCB_CCR] - mov pc, lr + ret lr ENDPROC(__v7m_setup) .align 2 diff --git a/arch/arm/mm/proc-xsc3.S b/arch/arm/mm/proc-xsc3.S index dc1645890042..f8acdfece036 100644 --- a/arch/arm/mm/proc-xsc3.S +++ b/arch/arm/mm/proc-xsc3.S @@ -83,7 +83,7 @@ * Nothing too exciting at the moment */ ENTRY(cpu_xsc3_proc_init) - mov pc, lr + ret lr /* * cpu_xsc3_proc_fin() @@ -93,7 +93,7 @@ ENTRY(cpu_xsc3_proc_fin) bic r0, r0, #0x1800 @ ...IZ........... bic r0, r0, #0x0006 @ .............CA. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_xsc3_reset(loc) @@ -119,7 +119,7 @@ ENTRY(cpu_xsc3_reset) @ CAUTION: MMU turned off from this point. We count on the pipeline @ already containing those two last instructions to survive. mcr p15, 0, ip, c8, c7, 0 @ invalidate I and D TLBs - mov pc, r0 + ret r0 ENDPROC(cpu_xsc3_reset) .popsection @@ -138,7 +138,7 @@ ENDPROC(cpu_xsc3_reset) ENTRY(cpu_xsc3_do_idle) mov r0, #1 mcr p14, 0, r0, c7, c0, 0 @ go to idle - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -150,7 +150,7 @@ ENTRY(cpu_xsc3_do_idle) ENTRY(xsc3_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(xsc3_flush_icache_all) /* @@ -176,7 +176,7 @@ __flush_whole_cache: mcrne p15, 0, ip, c7, c5, 0 @ invalidate L1 I cache and BTB mcrne p15, 0, ip, c7, c10, 4 @ data write barrier mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, vm_flags) @@ -205,7 +205,7 @@ ENTRY(xsc3_flush_user_cache_range) mcrne p15, 0, ip, c7, c5, 6 @ invalidate BTB mcrne p15, 0, ip, c7, c10, 4 @ data write barrier mcrne p15, 0, ip, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -232,7 +232,7 @@ ENTRY(xsc3_coherent_user_range) mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB mcr p15, 0, r0, c7, c10, 4 @ data write barrier mcr p15, 0, r0, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -253,7 +253,7 @@ ENTRY(xsc3_flush_kern_dcache_area) mcr p15, 0, r0, c7, c5, 0 @ invalidate L1 I cache and BTB mcr p15, 0, r0, c7, c10, 4 @ data write barrier mcr p15, 0, r0, c7, c5, 4 @ prefetch flush - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -277,7 +277,7 @@ xsc3_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ data write barrier - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -294,7 +294,7 @@ xsc3_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ data write barrier - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -311,7 +311,7 @@ ENTRY(xsc3_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ data write barrier - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -334,7 +334,7 @@ ENDPROC(xsc3_dma_map_area) * - dir - DMA direction */ ENTRY(xsc3_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(xsc3_dma_unmap_area) .globl xsc3_flush_kern_cache_louis @@ -348,7 +348,7 @@ ENTRY(cpu_xsc3_dcache_clean_area) add r0, r0, #CACHELINESIZE subs r1, r1, #CACHELINESIZE bhi 1b - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -406,7 +406,7 @@ ENTRY(cpu_xsc3_set_pte_ext) orr r2, r2, ip xscale_set_pte_ext_epilogue - mov pc, lr + ret lr .ltorg .align @@ -478,7 +478,7 @@ __xsc3_setup: bic r0, r0, r5 @ ..V. ..R. .... ..A. orr r0, r0, r6 @ ..VI Z..S .... .C.M (mmu) @ ...I Z..S .... .... (uc) - mov pc, lr + ret lr .size __xsc3_setup, . - __xsc3_setup diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index d19b1cfcad91..23259f104c66 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -118,7 +118,7 @@ ENTRY(cpu_xscale_proc_init) mrc p15, 0, r1, c1, c0, 1 bic r1, r1, #1 mcr p15, 0, r1, c1, c0, 1 - mov pc, lr + ret lr /* * cpu_xscale_proc_fin() @@ -128,7 +128,7 @@ ENTRY(cpu_xscale_proc_fin) bic r0, r0, #0x1800 @ ...IZ........... bic r0, r0, #0x0006 @ .............CA. mcr p15, 0, r0, c1, c0, 0 @ disable caches - mov pc, lr + ret lr /* * cpu_xscale_reset(loc) @@ -160,7 +160,7 @@ ENTRY(cpu_xscale_reset) @ CAUTION: MMU turned off from this point. We count on the pipeline @ already containing those two last instructions to survive. mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs - mov pc, r0 + ret r0 ENDPROC(cpu_xscale_reset) .popsection @@ -179,7 +179,7 @@ ENDPROC(cpu_xscale_reset) ENTRY(cpu_xscale_do_idle) mov r0, #1 mcr p14, 0, r0, c7, c0, 0 @ Go to IDLE - mov pc, lr + ret lr /* ================================= CACHE ================================ */ @@ -191,7 +191,7 @@ ENTRY(cpu_xscale_do_idle) ENTRY(xscale_flush_icache_all) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache - mov pc, lr + ret lr ENDPROC(xscale_flush_icache_all) /* @@ -216,7 +216,7 @@ __flush_whole_cache: tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * flush_user_cache_range(start, end, vm_flags) @@ -245,7 +245,7 @@ ENTRY(xscale_flush_user_cache_range) tst r2, #VM_EXEC mcrne p15, 0, ip, c7, c5, 6 @ Invalidate BTB mcrne p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * coherent_kern_range(start, end) @@ -269,7 +269,7 @@ ENTRY(xscale_coherent_kern_range) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * coherent_user_range(start, end) @@ -291,7 +291,7 @@ ENTRY(xscale_coherent_user_range) mov r0, #0 mcr p15, 0, r0, c7, c5, 6 @ Invalidate BTB mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * flush_kern_dcache_area(void *addr, size_t size) @@ -312,7 +312,7 @@ ENTRY(xscale_flush_kern_dcache_area) mov r0, #0 mcr p15, 0, r0, c7, c5, 0 @ Invalidate I cache & BTB mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * dma_inv_range(start, end) @@ -336,7 +336,7 @@ xscale_dma_inv_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * dma_clean_range(start, end) @@ -353,7 +353,7 @@ xscale_dma_clean_range: cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * dma_flush_range(start, end) @@ -371,7 +371,7 @@ ENTRY(xscale_dma_flush_range) cmp r0, r1 blo 1b mcr p15, 0, r0, c7, c10, 4 @ Drain Write (& Fill) Buffer - mov pc, lr + ret lr /* * dma_map_area(start, size, dir) @@ -407,7 +407,7 @@ ENDPROC(xscale_80200_A0_A1_dma_map_area) * - dir - DMA direction */ ENTRY(xscale_dma_unmap_area) - mov pc, lr + ret lr ENDPROC(xscale_dma_unmap_area) .globl xscale_flush_kern_cache_louis @@ -458,7 +458,7 @@ ENTRY(cpu_xscale_dcache_clean_area) add r0, r0, #CACHELINESIZE subs r1, r1, #CACHELINESIZE bhi 1b - mov pc, lr + ret lr /* =============================== PageTable ============================== */ @@ -521,7 +521,7 @@ ENTRY(cpu_xscale_set_pte_ext) orr r2, r2, ip xscale_set_pte_ext_epilogue - mov pc, lr + ret lr .ltorg .align @@ -572,7 +572,7 @@ __xscale_setup: mrc p15, 0, r0, c1, c0, 0 @ get control register bic r0, r0, r5 orr r0, r0, r6 - mov pc, lr + ret lr .size __xscale_setup, . - __xscale_setup /* diff --git a/arch/arm/mm/tlb-fa.S b/arch/arm/mm/tlb-fa.S index d3ddcf9a76ca..d2d9ecbe0aac 100644 --- a/arch/arm/mm/tlb-fa.S +++ b/arch/arm/mm/tlb-fa.S @@ -18,6 +18,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/tlbflush.h> #include "proc-macros.S" @@ -37,7 +38,7 @@ ENTRY(fa_flush_user_tlb_range) vma_vm_mm ip, r2 act_mm r3 @ get current->active_mm eors r3, ip, r3 @ == mm ? - movne pc, lr @ no, we dont do anything + retne lr @ no, we dont do anything mov r3, #0 mcr p15, 0, r3, c7, c10, 4 @ drain WB bic r0, r0, #0x0ff @@ -47,7 +48,7 @@ ENTRY(fa_flush_user_tlb_range) cmp r0, r1 blo 1b mcr p15, 0, r3, c7, c10, 4 @ data write barrier - mov pc, lr + ret lr ENTRY(fa_flush_kern_tlb_range) @@ -61,7 +62,7 @@ ENTRY(fa_flush_kern_tlb_range) blo 1b mcr p15, 0, r3, c7, c10, 4 @ data write barrier mcr p15, 0, r3, c7, c5, 4 @ prefetch flush (isb) - mov pc, lr + ret lr __INITDATA diff --git a/arch/arm/mm/tlb-v4.S b/arch/arm/mm/tlb-v4.S index 17a025ade573..a2b5dca42048 100644 --- a/arch/arm/mm/tlb-v4.S +++ b/arch/arm/mm/tlb-v4.S @@ -14,6 +14,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/tlbflush.h> #include "proc-macros.S" @@ -33,7 +34,7 @@ ENTRY(v4_flush_user_tlb_range) vma_vm_mm ip, r2 act_mm r3 @ get current->active_mm eors r3, ip, r3 @ == mm ? - movne pc, lr @ no, we dont do anything + retne lr @ no, we dont do anything .v4_flush_kern_tlb_range: bic r0, r0, #0x0ff bic r0, r0, #0xf00 @@ -41,7 +42,7 @@ ENTRY(v4_flush_user_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mov pc, lr + ret lr /* * v4_flush_kern_tlb_range(start, end) diff --git a/arch/arm/mm/tlb-v4wb.S b/arch/arm/mm/tlb-v4wb.S index c04598fa4d4a..5a093b458dbc 100644 --- a/arch/arm/mm/tlb-v4wb.S +++ b/arch/arm/mm/tlb-v4wb.S @@ -14,6 +14,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/tlbflush.h> #include "proc-macros.S" @@ -33,7 +34,7 @@ ENTRY(v4wb_flush_user_tlb_range) vma_vm_mm ip, r2 act_mm r3 @ get current->active_mm eors r3, ip, r3 @ == mm ? - movne pc, lr @ no, we dont do anything + retne lr @ no, we dont do anything vma_vm_flags r2, r2 mcr p15, 0, r3, c7, c10, 4 @ drain WB tst r2, #VM_EXEC @@ -44,7 +45,7 @@ ENTRY(v4wb_flush_user_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mov pc, lr + ret lr /* * v4_flush_kern_tlb_range(start, end) @@ -65,7 +66,7 @@ ENTRY(v4wb_flush_kern_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mov pc, lr + ret lr __INITDATA diff --git a/arch/arm/mm/tlb-v4wbi.S b/arch/arm/mm/tlb-v4wbi.S index 1f6062b6c1c1..058861548f68 100644 --- a/arch/arm/mm/tlb-v4wbi.S +++ b/arch/arm/mm/tlb-v4wbi.S @@ -14,6 +14,7 @@ */ #include <linux/linkage.h> #include <linux/init.h> +#include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/tlbflush.h> #include "proc-macros.S" @@ -32,7 +33,7 @@ ENTRY(v4wbi_flush_user_tlb_range) vma_vm_mm ip, r2 act_mm r3 @ get current->active_mm eors r3, ip, r3 @ == mm ? - movne pc, lr @ no, we dont do anything + retne lr @ no, we dont do anything mov r3, #0 mcr p15, 0, r3, c7, c10, 4 @ drain WB vma_vm_flags r2, r2 @@ -44,7 +45,7 @@ ENTRY(v4wbi_flush_user_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mov pc, lr + ret lr ENTRY(v4wbi_flush_kern_tlb_range) mov r3, #0 @@ -56,7 +57,7 @@ ENTRY(v4wbi_flush_kern_tlb_range) add r0, r0, #PAGE_SZ cmp r0, r1 blo 1b - mov pc, lr + ret lr __INITDATA diff --git a/arch/arm/mm/tlb-v6.S b/arch/arm/mm/tlb-v6.S index eca07f550a0b..6f689be638bd 100644 --- a/arch/arm/mm/tlb-v6.S +++ b/arch/arm/mm/tlb-v6.S @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/assembler.h> #include <asm/page.h> #include <asm/tlbflush.h> #include "proc-macros.S" @@ -55,7 +56,7 @@ ENTRY(v6wbi_flush_user_tlb_range) cmp r0, r1 blo 1b mcr p15, 0, ip, c7, c10, 4 @ data synchronization barrier - mov pc, lr + ret lr /* * v6wbi_flush_kern_tlb_range(start,end) @@ -84,7 +85,7 @@ ENTRY(v6wbi_flush_kern_tlb_range) blo 1b mcr p15, 0, r2, c7, c10, 4 @ data synchronization barrier mcr p15, 0, r2, c7, c5, 4 @ prefetch flush (isb) - mov pc, lr + ret lr __INIT diff --git a/arch/arm/mm/tlb-v7.S b/arch/arm/mm/tlb-v7.S index 355308767bae..e5101a3bc57c 100644 --- a/arch/arm/mm/tlb-v7.S +++ b/arch/arm/mm/tlb-v7.S @@ -57,7 +57,7 @@ ENTRY(v7wbi_flush_user_tlb_range) cmp r0, r1 blo 1b dsb ish - mov pc, lr + ret lr ENDPROC(v7wbi_flush_user_tlb_range) /* @@ -86,7 +86,7 @@ ENTRY(v7wbi_flush_kern_tlb_range) blo 1b dsb ish isb - mov pc, lr + ret lr ENDPROC(v7wbi_flush_kern_tlb_range) __INIT diff --git a/arch/arm/nwfpe/entry.S b/arch/arm/nwfpe/entry.S index d18dde95b8aa..5d65be1f1e8a 100644 --- a/arch/arm/nwfpe/entry.S +++ b/arch/arm/nwfpe/entry.S @@ -19,7 +19,7 @@ along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - +#include <asm/assembler.h> #include <asm/opcodes.h> /* This is the kernel's entry point into the floating point emulator. @@ -92,7 +92,7 @@ emulate: mov r0, r6 @ prepare for EmulateAll() bl EmulateAll @ emulate the instruction cmp r0, #0 @ was emulation successful - moveq pc, r4 @ no, return failure + reteq r4 @ no, return failure next: .Lx1: ldrt r6, [r5], #4 @ get the next instruction and @@ -102,7 +102,7 @@ next: teq r2, #0x0C000000 teqne r2, #0x0D000000 teqne r2, #0x0E000000 - movne pc, r9 @ return ok if not a fp insn + retne r9 @ return ok if not a fp insn str r5, [sp, #S_PC] @ update PC copy in regs @@ -115,7 +115,7 @@ next: @ plain LDR instruction. Weird, but it seems harmless. .pushsection .fixup,"ax" .align 2 -.Lfix: mov pc, r9 @ let the user eat segfaults +.Lfix: ret r9 @ let the user eat segfaults .popsection .pushsection __ex_table,"a" diff --git a/arch/arm/oprofile/common.c b/arch/arm/oprofile/common.c index 99c63d4b6af8..e6a3c4c92163 100644 --- a/arch/arm/oprofile/common.c +++ b/arch/arm/oprofile/common.c @@ -107,10 +107,7 @@ static void arm_backtrace(struct pt_regs * const regs, unsigned int depth) if (!user_mode(regs)) { struct stackframe frame; - frame.fp = regs->ARM_fp; - frame.sp = regs->ARM_sp; - frame.lr = regs->ARM_lr; - frame.pc = regs->ARM_pc; + arm_get_current_stackframe(regs, &frame); walk_stackframe(&frame, report_trace, &depth); return; } diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c index b5608b1f9fbd..1c98659bbf89 100644 --- a/arch/arm/plat-omap/dma.c +++ b/arch/arm/plat-omap/dma.c @@ -698,6 +698,8 @@ int omap_request_dma(int dev_id, const char *dev_name, unsigned long flags; struct omap_dma_lch *chan; + WARN(strcmp(dev_name, "DMA engine"), "Using deprecated platform DMA API - please update to DMA engine"); + spin_lock_irqsave(&dma_chan_lock, flags); for (ch = 0; ch < dma_chan_count; ch++) { if (free_ch == -1 && dma_chan[ch].dev_id == -1) { diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S index fe6ca574d093..2e78760f3495 100644 --- a/arch/arm/vfp/entry.S +++ b/arch/arm/vfp/entry.S @@ -34,7 +34,7 @@ ENDPROC(do_vfp) ENTRY(vfp_null_entry) dec_preempt_count_ti r10, r4 - mov pc, lr + ret lr ENDPROC(vfp_null_entry) .align 2 @@ -49,7 +49,7 @@ ENTRY(vfp_testing_entry) dec_preempt_count_ti r10, r4 ldr r0, VFP_arch_address str r0, [r0] @ set to non-zero value - mov pc, r9 @ we have handled the fault + ret r9 @ we have handled the fault ENDPROC(vfp_testing_entry) .align 2 diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index be807625ed8c..cda654cbf2c2 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -183,7 +183,7 @@ vfp_hw_state_valid: @ always subtract 4 from the following @ instruction address. dec_preempt_count_ti r10, r4 - mov pc, r9 @ we think we have handled things + ret r9 @ we think we have handled things look_for_VFP_exceptions: @@ -202,7 +202,7 @@ look_for_VFP_exceptions: DBGSTR "not VFP" dec_preempt_count_ti r10, r4 - mov pc, lr + ret lr process_exception: DBGSTR "bounce" @@ -234,7 +234,7 @@ ENTRY(vfp_save_state) VFPFMRX r12, FPINST2 @ FPINST2 if needed (and present) 1: stmia r0, {r1, r2, r3, r12} @ save FPEXC, FPSCR, FPINST, FPINST2 - mov pc, lr + ret lr ENDPROC(vfp_save_state) .align @@ -245,7 +245,7 @@ vfp_current_hw_state_address: #ifdef CONFIG_THUMB2_KERNEL adr \tmp, 1f add \tmp, \tmp, \base, lsl \shift - mov pc, \tmp + ret \tmp #else add pc, pc, \base, lsl \shift mov r0, r0 @@ -257,10 +257,10 @@ ENTRY(vfp_get_float) tbl_branch r0, r3, #3 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 1: mrc p10, 0, r0, c\dr, c0, 0 @ fmrs r0, s0 - mov pc, lr + ret lr .org 1b + 8 1: mrc p10, 0, r0, c\dr, c0, 4 @ fmrs r0, s1 - mov pc, lr + ret lr .org 1b + 8 .endr ENDPROC(vfp_get_float) @@ -269,10 +269,10 @@ ENTRY(vfp_put_float) tbl_branch r1, r3, #3 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 1: mcr p10, 0, r0, c\dr, c0, 0 @ fmsr r0, s0 - mov pc, lr + ret lr .org 1b + 8 1: mcr p10, 0, r0, c\dr, c0, 4 @ fmsr r0, s1 - mov pc, lr + ret lr .org 1b + 8 .endr ENDPROC(vfp_put_float) @@ -281,14 +281,14 @@ ENTRY(vfp_get_double) tbl_branch r0, r3, #3 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 1: fmrrd r0, r1, d\dr - mov pc, lr + ret lr .org 1b + 8 .endr #ifdef CONFIG_VFPv3 @ d16 - d31 registers .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 1: mrrc p11, 3, r0, r1, c\dr @ fmrrd r0, r1, d\dr - mov pc, lr + ret lr .org 1b + 8 .endr #endif @@ -296,21 +296,21 @@ ENTRY(vfp_get_double) @ virtual register 16 (or 32 if VFPv3) for compare with zero mov r0, #0 mov r1, #0 - mov pc, lr + ret lr ENDPROC(vfp_get_double) ENTRY(vfp_put_double) tbl_branch r2, r3, #3 .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 1: fmdrr d\dr, r0, r1 - mov pc, lr + ret lr .org 1b + 8 .endr #ifdef CONFIG_VFPv3 @ d16 - d31 registers .irp dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 1: mcrr p11, 3, r0, r1, c\dr @ fmdrr r0, r1, d\dr - mov pc, lr + ret lr .org 1b + 8 .endr #endif diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index 44e3a5f10c4c..f00e08075938 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -58,7 +58,7 @@ ENTRY(HYPERVISOR_##hypercall) \ mov r12, #__HYPERVISOR_##hypercall; \ __HVC(XEN_IMM); \ - mov pc, lr; \ + ret lr; \ ENDPROC(HYPERVISOR_##hypercall) #define HYPERCALL0 HYPERCALL_SIMPLE @@ -74,7 +74,7 @@ ENTRY(HYPERVISOR_##hypercall) \ mov r12, #__HYPERVISOR_##hypercall; \ __HVC(XEN_IMM); \ ldm sp!, {r4} \ - mov pc, lr \ + ret lr \ ENDPROC(HYPERVISOR_##hypercall) .text @@ -101,5 +101,5 @@ ENTRY(privcmd_call) ldr r4, [sp, #4] __HVC(XEN_IMM) ldm sp!, {r4} - mov pc, lr + ret lr ENDPROC(privcmd_call); diff --git a/arch/ia64/include/uapi/asm/fcntl.h b/arch/ia64/include/uapi/asm/fcntl.h index 1dd275dc8f65..7b485876cad4 100644 --- a/arch/ia64/include/uapi/asm/fcntl.h +++ b/arch/ia64/include/uapi/asm/fcntl.h @@ -8,6 +8,7 @@ #define force_o_largefile() \ (personality(current->personality) != PER_LINUX32) +#include <linux/personality.h> #include <asm-generic/fcntl.h> #endif /* _ASM_IA64_FCNTL_H */ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 7a469acee33c..4e238e6e661c 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -269,6 +269,7 @@ config LANTIQ config LASAT bool "LASAT Networks platforms" select CEVT_R4K + select CRC32 select CSRC_R4K select DMA_NONCOHERENT select SYS_HAS_EARLY_PRINTK diff --git a/arch/mips/include/asm/sigcontext.h b/arch/mips/include/asm/sigcontext.h index f54bdbe85c0d..eeeb0f48c767 100644 --- a/arch/mips/include/asm/sigcontext.h +++ b/arch/mips/include/asm/sigcontext.h @@ -32,8 +32,6 @@ struct sigcontext32 { __u32 sc_lo2; __u32 sc_hi3; __u32 sc_lo3; - __u64 sc_msaregs[32]; /* Most significant 64 bits */ - __u32 sc_msa_csr; }; #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 || _MIPS_SIM == _MIPS_SIM_NABI32 */ #endif /* _ASM_SIGCONTEXT_H */ diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h index f8d63b3b40b4..708c5d414905 100644 --- a/arch/mips/include/asm/uasm.h +++ b/arch/mips/include/asm/uasm.h @@ -67,6 +67,9 @@ void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, signed int c) #define Ip_u2s3u1(op) \ void ISAOPC(op)(u32 **buf, unsigned int a, signed int b, unsigned int c) +#define Ip_s3s1s2(op) \ +void ISAOPC(op)(u32 **buf, int a, int b, int c) + #define Ip_u2u1s3(op) \ void ISAOPC(op)(u32 **buf, unsigned int a, unsigned int b, signed int c) @@ -147,6 +150,7 @@ Ip_u2s3u1(_scd); Ip_u2s3u1(_sd); Ip_u2u1u3(_sll); Ip_u3u2u1(_sllv); +Ip_s3s1s2(_slt); Ip_u2u1s3(_sltiu); Ip_u3u1u2(_sltu); Ip_u2u1u3(_sra); diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index 4b7160259292..4bfdb9d4c186 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h @@ -273,6 +273,7 @@ enum mm_32a_minor_op { mm_and_op = 0x250, mm_or32_op = 0x290, mm_xor32_op = 0x310, + mm_slt_op = 0x350, mm_sltu_op = 0x390, }; diff --git a/arch/mips/include/uapi/asm/sigcontext.h b/arch/mips/include/uapi/asm/sigcontext.h index 681c17603a48..6c9906f59c6e 100644 --- a/arch/mips/include/uapi/asm/sigcontext.h +++ b/arch/mips/include/uapi/asm/sigcontext.h @@ -12,10 +12,6 @@ #include <linux/types.h> #include <asm/sgidefs.h> -/* Bits which may be set in sc_used_math */ -#define USEDMATH_FP (1 << 0) -#define USEDMATH_MSA (1 << 1) - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* @@ -41,8 +37,6 @@ struct sigcontext { unsigned long sc_lo2; unsigned long sc_hi3; unsigned long sc_lo3; - unsigned long long sc_msaregs[32]; /* Most significant 64 bits */ - unsigned long sc_msa_csr; }; #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ @@ -76,8 +70,6 @@ struct sigcontext { __u32 sc_used_math; __u32 sc_dsp; __u32 sc_reserved; - __u64 sc_msaregs[32]; - __u32 sc_msa_csr; }; diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index 02f075df8f2e..4bb5107511e2 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -293,7 +293,6 @@ void output_sc_defines(void) OFFSET(SC_LO2, sigcontext, sc_lo2); OFFSET(SC_HI3, sigcontext, sc_hi3); OFFSET(SC_LO3, sigcontext, sc_lo3); - OFFSET(SC_MSAREGS, sigcontext, sc_msaregs); BLANK(); } #endif @@ -308,7 +307,6 @@ void output_sc_defines(void) OFFSET(SC_MDLO, sigcontext, sc_mdlo); OFFSET(SC_PC, sigcontext, sc_pc); OFFSET(SC_FPC_CSR, sigcontext, sc_fpc_csr); - OFFSET(SC_MSAREGS, sigcontext, sc_msaregs); BLANK(); } #endif @@ -320,7 +318,6 @@ void output_sc32_defines(void) OFFSET(SC32_FPREGS, sigcontext32, sc_fpregs); OFFSET(SC32_FPC_CSR, sigcontext32, sc_fpc_csr); OFFSET(SC32_FPC_EIR, sigcontext32, sc_fpc_eir); - OFFSET(SC32_MSAREGS, sigcontext32, sc_msaregs); BLANK(); } #endif diff --git a/arch/mips/kernel/irq-msc01.c b/arch/mips/kernel/irq-msc01.c index 4858642d543d..a734b2c2f9ea 100644 --- a/arch/mips/kernel/irq-msc01.c +++ b/arch/mips/kernel/irq-msc01.c @@ -126,7 +126,7 @@ void __init init_msc_irqs(unsigned long icubase, unsigned int irqbase, msc_irqma board_bind_eic_interrupt = &msc_bind_eic_interrupt; - for (; nirq >= 0; nirq--, imp++) { + for (; nirq > 0; nirq--, imp++) { int n = imp->im_irq; switch (imp->im_type) { diff --git a/arch/mips/kernel/pm-cps.c b/arch/mips/kernel/pm-cps.c index 5aa4c6f8cf83..c4c2069d3a20 100644 --- a/arch/mips/kernel/pm-cps.c +++ b/arch/mips/kernel/pm-cps.c @@ -101,7 +101,7 @@ static void coupled_barrier(atomic_t *a, unsigned online) if (!coupled_coherence) return; - smp_mb__before_atomic_inc(); + smp_mb__before_atomic(); atomic_inc(a); while (atomic_read(a) < online) @@ -158,7 +158,7 @@ int cps_pm_enter_state(enum cps_pm_state state) /* Indicate that this CPU might not be coherent */ cpumask_clear_cpu(cpu, &cpu_coherent_mask); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); /* Create a non-coherent mapping of the core ready_count */ core_ready_count = per_cpu(ready_count, core); diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index 71814272d148..8352523568e6 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -13,7 +13,6 @@ * Copyright (C) 1999, 2001 Silicon Graphics, Inc. */ #include <asm/asm.h> -#include <asm/asmmacro.h> #include <asm/errno.h> #include <asm/fpregdef.h> #include <asm/mipsregs.h> @@ -246,218 +245,6 @@ LEAF(_restore_fp_context32) END(_restore_fp_context32) #endif -#ifdef CONFIG_CPU_HAS_MSA - - .macro save_sc_msareg wr, off, sc, tmp -#ifdef CONFIG_64BIT - copy_u_d \tmp, \wr, 1 - EX sd \tmp, (\off+(\wr*8))(\sc) -#elif defined(CONFIG_CPU_LITTLE_ENDIAN) - copy_u_w \tmp, \wr, 2 - EX sw \tmp, (\off+(\wr*8)+0)(\sc) - copy_u_w \tmp, \wr, 3 - EX sw \tmp, (\off+(\wr*8)+4)(\sc) -#else /* CONFIG_CPU_BIG_ENDIAN */ - copy_u_w \tmp, \wr, 2 - EX sw \tmp, (\off+(\wr*8)+4)(\sc) - copy_u_w \tmp, \wr, 3 - EX sw \tmp, (\off+(\wr*8)+0)(\sc) -#endif - .endm - -/* - * int _save_msa_context(struct sigcontext *sc) - * - * Save the upper 64 bits of each vector register along with the MSA_CSR - * register into sc. Returns zero on success, else non-zero. - */ -LEAF(_save_msa_context) - save_sc_msareg 0, SC_MSAREGS, a0, t0 - save_sc_msareg 1, SC_MSAREGS, a0, t0 - save_sc_msareg 2, SC_MSAREGS, a0, t0 - save_sc_msareg 3, SC_MSAREGS, a0, t0 - save_sc_msareg 4, SC_MSAREGS, a0, t0 - save_sc_msareg 5, SC_MSAREGS, a0, t0 - save_sc_msareg 6, SC_MSAREGS, a0, t0 - save_sc_msareg 7, SC_MSAREGS, a0, t0 - save_sc_msareg 8, SC_MSAREGS, a0, t0 - save_sc_msareg 9, SC_MSAREGS, a0, t0 - save_sc_msareg 10, SC_MSAREGS, a0, t0 - save_sc_msareg 11, SC_MSAREGS, a0, t0 - save_sc_msareg 12, SC_MSAREGS, a0, t0 - save_sc_msareg 13, SC_MSAREGS, a0, t0 - save_sc_msareg 14, SC_MSAREGS, a0, t0 - save_sc_msareg 15, SC_MSAREGS, a0, t0 - save_sc_msareg 16, SC_MSAREGS, a0, t0 - save_sc_msareg 17, SC_MSAREGS, a0, t0 - save_sc_msareg 18, SC_MSAREGS, a0, t0 - save_sc_msareg 19, SC_MSAREGS, a0, t0 - save_sc_msareg 20, SC_MSAREGS, a0, t0 - save_sc_msareg 21, SC_MSAREGS, a0, t0 - save_sc_msareg 22, SC_MSAREGS, a0, t0 - save_sc_msareg 23, SC_MSAREGS, a0, t0 - save_sc_msareg 24, SC_MSAREGS, a0, t0 - save_sc_msareg 25, SC_MSAREGS, a0, t0 - save_sc_msareg 26, SC_MSAREGS, a0, t0 - save_sc_msareg 27, SC_MSAREGS, a0, t0 - save_sc_msareg 28, SC_MSAREGS, a0, t0 - save_sc_msareg 29, SC_MSAREGS, a0, t0 - save_sc_msareg 30, SC_MSAREGS, a0, t0 - save_sc_msareg 31, SC_MSAREGS, a0, t0 - jr ra - li v0, 0 - END(_save_msa_context) - -#ifdef CONFIG_MIPS32_COMPAT - -/* - * int _save_msa_context32(struct sigcontext32 *sc) - * - * Save the upper 64 bits of each vector register along with the MSA_CSR - * register into sc. Returns zero on success, else non-zero. - */ -LEAF(_save_msa_context32) - save_sc_msareg 0, SC32_MSAREGS, a0, t0 - save_sc_msareg 1, SC32_MSAREGS, a0, t0 - save_sc_msareg 2, SC32_MSAREGS, a0, t0 - save_sc_msareg 3, SC32_MSAREGS, a0, t0 - save_sc_msareg 4, SC32_MSAREGS, a0, t0 - save_sc_msareg 5, SC32_MSAREGS, a0, t0 - save_sc_msareg 6, SC32_MSAREGS, a0, t0 - save_sc_msareg 7, SC32_MSAREGS, a0, t0 - save_sc_msareg 8, SC32_MSAREGS, a0, t0 - save_sc_msareg 9, SC32_MSAREGS, a0, t0 - save_sc_msareg 10, SC32_MSAREGS, a0, t0 - save_sc_msareg 11, SC32_MSAREGS, a0, t0 - save_sc_msareg 12, SC32_MSAREGS, a0, t0 - save_sc_msareg 13, SC32_MSAREGS, a0, t0 - save_sc_msareg 14, SC32_MSAREGS, a0, t0 - save_sc_msareg 15, SC32_MSAREGS, a0, t0 - save_sc_msareg 16, SC32_MSAREGS, a0, t0 - save_sc_msareg 17, SC32_MSAREGS, a0, t0 - save_sc_msareg 18, SC32_MSAREGS, a0, t0 - save_sc_msareg 19, SC32_MSAREGS, a0, t0 - save_sc_msareg 20, SC32_MSAREGS, a0, t0 - save_sc_msareg 21, SC32_MSAREGS, a0, t0 - save_sc_msareg 22, SC32_MSAREGS, a0, t0 - save_sc_msareg 23, SC32_MSAREGS, a0, t0 - save_sc_msareg 24, SC32_MSAREGS, a0, t0 - save_sc_msareg 25, SC32_MSAREGS, a0, t0 - save_sc_msareg 26, SC32_MSAREGS, a0, t0 - save_sc_msareg 27, SC32_MSAREGS, a0, t0 - save_sc_msareg 28, SC32_MSAREGS, a0, t0 - save_sc_msareg 29, SC32_MSAREGS, a0, t0 - save_sc_msareg 30, SC32_MSAREGS, a0, t0 - save_sc_msareg 31, SC32_MSAREGS, a0, t0 - jr ra - li v0, 0 - END(_save_msa_context32) - -#endif /* CONFIG_MIPS32_COMPAT */ - - .macro restore_sc_msareg wr, off, sc, tmp -#ifdef CONFIG_64BIT - EX ld \tmp, (\off+(\wr*8))(\sc) - insert_d \wr, 1, \tmp -#elif defined(CONFIG_CPU_LITTLE_ENDIAN) - EX lw \tmp, (\off+(\wr*8)+0)(\sc) - insert_w \wr, 2, \tmp - EX lw \tmp, (\off+(\wr*8)+4)(\sc) - insert_w \wr, 3, \tmp -#else /* CONFIG_CPU_BIG_ENDIAN */ - EX lw \tmp, (\off+(\wr*8)+4)(\sc) - insert_w \wr, 2, \tmp - EX lw \tmp, (\off+(\wr*8)+0)(\sc) - insert_w \wr, 3, \tmp -#endif - .endm - -/* - * int _restore_msa_context(struct sigcontext *sc) - */ -LEAF(_restore_msa_context) - restore_sc_msareg 0, SC_MSAREGS, a0, t0 - restore_sc_msareg 1, SC_MSAREGS, a0, t0 - restore_sc_msareg 2, SC_MSAREGS, a0, t0 - restore_sc_msareg 3, SC_MSAREGS, a0, t0 - restore_sc_msareg 4, SC_MSAREGS, a0, t0 - restore_sc_msareg 5, SC_MSAREGS, a0, t0 - restore_sc_msareg 6, SC_MSAREGS, a0, t0 - restore_sc_msareg 7, SC_MSAREGS, a0, t0 - restore_sc_msareg 8, SC_MSAREGS, a0, t0 - restore_sc_msareg 9, SC_MSAREGS, a0, t0 - restore_sc_msareg 10, SC_MSAREGS, a0, t0 - restore_sc_msareg 11, SC_MSAREGS, a0, t0 - restore_sc_msareg 12, SC_MSAREGS, a0, t0 - restore_sc_msareg 13, SC_MSAREGS, a0, t0 - restore_sc_msareg 14, SC_MSAREGS, a0, t0 - restore_sc_msareg 15, SC_MSAREGS, a0, t0 - restore_sc_msareg 16, SC_MSAREGS, a0, t0 - restore_sc_msareg 17, SC_MSAREGS, a0, t0 - restore_sc_msareg 18, SC_MSAREGS, a0, t0 - restore_sc_msareg 19, SC_MSAREGS, a0, t0 - restore_sc_msareg 20, SC_MSAREGS, a0, t0 - restore_sc_msareg 21, SC_MSAREGS, a0, t0 - restore_sc_msareg 22, SC_MSAREGS, a0, t0 - restore_sc_msareg 23, SC_MSAREGS, a0, t0 - restore_sc_msareg 24, SC_MSAREGS, a0, t0 - restore_sc_msareg 25, SC_MSAREGS, a0, t0 - restore_sc_msareg 26, SC_MSAREGS, a0, t0 - restore_sc_msareg 27, SC_MSAREGS, a0, t0 - restore_sc_msareg 28, SC_MSAREGS, a0, t0 - restore_sc_msareg 29, SC_MSAREGS, a0, t0 - restore_sc_msareg 30, SC_MSAREGS, a0, t0 - restore_sc_msareg 31, SC_MSAREGS, a0, t0 - jr ra - li v0, 0 - END(_restore_msa_context) - -#ifdef CONFIG_MIPS32_COMPAT - -/* - * int _restore_msa_context32(struct sigcontext32 *sc) - */ -LEAF(_restore_msa_context32) - restore_sc_msareg 0, SC32_MSAREGS, a0, t0 - restore_sc_msareg 1, SC32_MSAREGS, a0, t0 - restore_sc_msareg 2, SC32_MSAREGS, a0, t0 - restore_sc_msareg 3, SC32_MSAREGS, a0, t0 - restore_sc_msareg 4, SC32_MSAREGS, a0, t0 - restore_sc_msareg 5, SC32_MSAREGS, a0, t0 - restore_sc_msareg 6, SC32_MSAREGS, a0, t0 - restore_sc_msareg 7, SC32_MSAREGS, a0, t0 - restore_sc_msareg 8, SC32_MSAREGS, a0, t0 - restore_sc_msareg 9, SC32_MSAREGS, a0, t0 - restore_sc_msareg 10, SC32_MSAREGS, a0, t0 - restore_sc_msareg 11, SC32_MSAREGS, a0, t0 - restore_sc_msareg 12, SC32_MSAREGS, a0, t0 - restore_sc_msareg 13, SC32_MSAREGS, a0, t0 - restore_sc_msareg 14, SC32_MSAREGS, a0, t0 - restore_sc_msareg 15, SC32_MSAREGS, a0, t0 - restore_sc_msareg 16, SC32_MSAREGS, a0, t0 - restore_sc_msareg 17, SC32_MSAREGS, a0, t0 - restore_sc_msareg 18, SC32_MSAREGS, a0, t0 - restore_sc_msareg 19, SC32_MSAREGS, a0, t0 - restore_sc_msareg 20, SC32_MSAREGS, a0, t0 - restore_sc_msareg 21, SC32_MSAREGS, a0, t0 - restore_sc_msareg 22, SC32_MSAREGS, a0, t0 - restore_sc_msareg 23, SC32_MSAREGS, a0, t0 - restore_sc_msareg 24, SC32_MSAREGS, a0, t0 - restore_sc_msareg 25, SC32_MSAREGS, a0, t0 - restore_sc_msareg 26, SC32_MSAREGS, a0, t0 - restore_sc_msareg 27, SC32_MSAREGS, a0, t0 - restore_sc_msareg 28, SC32_MSAREGS, a0, t0 - restore_sc_msareg 29, SC32_MSAREGS, a0, t0 - restore_sc_msareg 30, SC32_MSAREGS, a0, t0 - restore_sc_msareg 31, SC32_MSAREGS, a0, t0 - jr ra - li v0, 0 - END(_restore_msa_context32) - -#endif /* CONFIG_MIPS32_COMPAT */ - -#endif /* CONFIG_CPU_HAS_MSA */ - .set reorder .type fault@function diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index 33133d3df3e5..9e60d117e41e 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -31,7 +31,6 @@ #include <linux/bitops.h> #include <asm/cacheflush.h> #include <asm/fpu.h> -#include <asm/msa.h> #include <asm/sim.h> #include <asm/ucontext.h> #include <asm/cpu-features.h> @@ -48,9 +47,6 @@ static int (*restore_fp_context)(struct sigcontext __user *sc); extern asmlinkage int _save_fp_context(struct sigcontext __user *sc); extern asmlinkage int _restore_fp_context(struct sigcontext __user *sc); -extern asmlinkage int _save_msa_context(struct sigcontext __user *sc); -extern asmlinkage int _restore_msa_context(struct sigcontext __user *sc); - struct sigframe { u32 sf_ass[4]; /* argument save space for o32 */ u32 sf_pad[2]; /* Was: signal trampoline */ @@ -100,60 +96,20 @@ static int copy_fp_from_sigcontext(struct sigcontext __user *sc) } /* - * These functions will save only the upper 64 bits of the vector registers, - * since the lower 64 bits have already been saved as the scalar FP context. - */ -static int copy_msa_to_sigcontext(struct sigcontext __user *sc) -{ - int i; - int err = 0; - - for (i = 0; i < NUM_FPU_REGS; i++) { - err |= - __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), - &sc->sc_msaregs[i]); - } - err |= __put_user(current->thread.fpu.msacsr, &sc->sc_msa_csr); - - return err; -} - -static int copy_msa_from_sigcontext(struct sigcontext __user *sc) -{ - int i; - int err = 0; - u64 val; - - for (i = 0; i < NUM_FPU_REGS; i++) { - err |= __get_user(val, &sc->sc_msaregs[i]); - set_fpr64(¤t->thread.fpu.fpr[i], 1, val); - } - err |= __get_user(current->thread.fpu.msacsr, &sc->sc_msa_csr); - - return err; -} - -/* * Helper routines */ -static int protected_save_fp_context(struct sigcontext __user *sc, - unsigned used_math) +static int protected_save_fp_context(struct sigcontext __user *sc) { int err; - bool save_msa = cpu_has_msa && (used_math & USEDMATH_MSA); #ifndef CONFIG_EVA while (1) { lock_fpu_owner(); if (is_fpu_owner()) { err = save_fp_context(sc); - if (save_msa && !err) - err = _save_msa_context(sc); unlock_fpu_owner(); } else { unlock_fpu_owner(); err = copy_fp_to_sigcontext(sc); - if (save_msa && !err) - err = copy_msa_to_sigcontext(sc); } if (likely(!err)) break; @@ -169,38 +125,24 @@ static int protected_save_fp_context(struct sigcontext __user *sc, * EVA does not have FPU EVA instructions so saving fpu context directly * does not work. */ - disable_msa(); lose_fpu(1); err = save_fp_context(sc); /* this might fail */ - if (save_msa && !err) - err = copy_msa_to_sigcontext(sc); #endif return err; } -static int protected_restore_fp_context(struct sigcontext __user *sc, - unsigned used_math) +static int protected_restore_fp_context(struct sigcontext __user *sc) { int err, tmp __maybe_unused; - bool restore_msa = cpu_has_msa && (used_math & USEDMATH_MSA); #ifndef CONFIG_EVA while (1) { lock_fpu_owner(); if (is_fpu_owner()) { err = restore_fp_context(sc); - if (restore_msa && !err) { - enable_msa(); - err = _restore_msa_context(sc); - } else { - /* signal handler may have used MSA */ - disable_msa(); - } unlock_fpu_owner(); } else { unlock_fpu_owner(); err = copy_fp_from_sigcontext(sc); - if (!err && (used_math & USEDMATH_MSA)) - err = copy_msa_from_sigcontext(sc); } if (likely(!err)) break; @@ -216,11 +158,8 @@ static int protected_restore_fp_context(struct sigcontext __user *sc, * EVA does not have FPU EVA instructions so restoring fpu context * directly does not work. */ - enable_msa(); lose_fpu(0); err = restore_fp_context(sc); /* this might fail */ - if (restore_msa && !err) - err = copy_msa_from_sigcontext(sc); #endif return err; } @@ -252,8 +191,7 @@ int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) err |= __put_user(rddsp(DSP_MASK), &sc->sc_dsp); } - used_math = used_math() ? USEDMATH_FP : 0; - used_math |= thread_msa_context_live() ? USEDMATH_MSA : 0; + used_math = !!used_math(); err |= __put_user(used_math, &sc->sc_used_math); if (used_math) { @@ -261,7 +199,7 @@ int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) * Save FPU state to signal context. Signal handler * will "inherit" current FPU state. */ - err |= protected_save_fp_context(sc, used_math); + err |= protected_save_fp_context(sc); } return err; } @@ -286,14 +224,14 @@ int fpcsr_pending(unsigned int __user *fpcsr) } static int -check_and_restore_fp_context(struct sigcontext __user *sc, unsigned used_math) +check_and_restore_fp_context(struct sigcontext __user *sc) { int err, sig; err = sig = fpcsr_pending(&sc->sc_fpc_csr); if (err > 0) err = 0; - err |= protected_restore_fp_context(sc, used_math); + err |= protected_restore_fp_context(sc); return err ?: sig; } @@ -333,10 +271,9 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) if (used_math) { /* restore fpu context if we have used it before */ if (!err) - err = check_and_restore_fp_context(sc, used_math); + err = check_and_restore_fp_context(sc); } else { - /* signal handler may have used FPU or MSA. Disable them. */ - disable_msa(); + /* signal handler may have used FPU. Give it up. */ lose_fpu(0); } diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index 299f956e4db3..bae2e6ee2109 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -30,7 +30,6 @@ #include <asm/sim.h> #include <asm/ucontext.h> #include <asm/fpu.h> -#include <asm/msa.h> #include <asm/war.h> #include <asm/vdso.h> #include <asm/dsp.h> @@ -43,9 +42,6 @@ static int (*restore_fp_context32)(struct sigcontext32 __user *sc); extern asmlinkage int _save_fp_context32(struct sigcontext32 __user *sc); extern asmlinkage int _restore_fp_context32(struct sigcontext32 __user *sc); -extern asmlinkage int _save_msa_context32(struct sigcontext32 __user *sc); -extern asmlinkage int _restore_msa_context32(struct sigcontext32 __user *sc); - /* * Including <asm/unistd.h> would give use the 64-bit syscall numbers ... */ @@ -115,59 +111,19 @@ static int copy_fp_from_sigcontext32(struct sigcontext32 __user *sc) } /* - * These functions will save only the upper 64 bits of the vector registers, - * since the lower 64 bits have already been saved as the scalar FP context. - */ -static int copy_msa_to_sigcontext32(struct sigcontext32 __user *sc) -{ - int i; - int err = 0; - - for (i = 0; i < NUM_FPU_REGS; i++) { - err |= - __put_user(get_fpr64(¤t->thread.fpu.fpr[i], 1), - &sc->sc_msaregs[i]); - } - err |= __put_user(current->thread.fpu.msacsr, &sc->sc_msa_csr); - - return err; -} - -static int copy_msa_from_sigcontext32(struct sigcontext32 __user *sc) -{ - int i; - int err = 0; - u64 val; - - for (i = 0; i < NUM_FPU_REGS; i++) { - err |= __get_user(val, &sc->sc_msaregs[i]); - set_fpr64(¤t->thread.fpu.fpr[i], 1, val); - } - err |= __get_user(current->thread.fpu.msacsr, &sc->sc_msa_csr); - - return err; -} - -/* * sigcontext handlers */ -static int protected_save_fp_context32(struct sigcontext32 __user *sc, - unsigned used_math) +static int protected_save_fp_context32(struct sigcontext32 __user *sc) { int err; - bool save_msa = cpu_has_msa && (used_math & USEDMATH_MSA); while (1) { lock_fpu_owner(); if (is_fpu_owner()) { err = save_fp_context32(sc); - if (save_msa && !err) - err = _save_msa_context32(sc); unlock_fpu_owner(); } else { unlock_fpu_owner(); err = copy_fp_to_sigcontext32(sc); - if (save_msa && !err) - err = copy_msa_to_sigcontext32(sc); } if (likely(!err)) break; @@ -181,28 +137,17 @@ static int protected_save_fp_context32(struct sigcontext32 __user *sc, return err; } -static int protected_restore_fp_context32(struct sigcontext32 __user *sc, - unsigned used_math) +static int protected_restore_fp_context32(struct sigcontext32 __user *sc) { int err, tmp __maybe_unused; - bool restore_msa = cpu_has_msa && (used_math & USEDMATH_MSA); while (1) { lock_fpu_owner(); if (is_fpu_owner()) { err = restore_fp_context32(sc); - if (restore_msa && !err) { - enable_msa(); - err = _restore_msa_context32(sc); - } else { - /* signal handler may have used MSA */ - disable_msa(); - } unlock_fpu_owner(); } else { unlock_fpu_owner(); err = copy_fp_from_sigcontext32(sc); - if (restore_msa && !err) - err = copy_msa_from_sigcontext32(sc); } if (likely(!err)) break; @@ -241,8 +186,7 @@ static int setup_sigcontext32(struct pt_regs *regs, err |= __put_user(mflo3(), &sc->sc_lo3); } - used_math = used_math() ? USEDMATH_FP : 0; - used_math |= thread_msa_context_live() ? USEDMATH_MSA : 0; + used_math = !!used_math(); err |= __put_user(used_math, &sc->sc_used_math); if (used_math) { @@ -250,21 +194,20 @@ static int setup_sigcontext32(struct pt_regs *regs, * Save FPU state to signal context. Signal handler * will "inherit" current FPU state. */ - err |= protected_save_fp_context32(sc, used_math); + err |= protected_save_fp_context32(sc); } return err; } static int -check_and_restore_fp_context32(struct sigcontext32 __user *sc, - unsigned used_math) +check_and_restore_fp_context32(struct sigcontext32 __user *sc) { int err, sig; err = sig = fpcsr_pending(&sc->sc_fpc_csr); if (err > 0) err = 0; - err |= protected_restore_fp_context32(sc, used_math); + err |= protected_restore_fp_context32(sc); return err ?: sig; } @@ -301,10 +244,9 @@ static int restore_sigcontext32(struct pt_regs *regs, if (used_math) { /* restore fpu context if we have used it before */ if (!err) - err = check_and_restore_fp_context32(sc, used_math); + err = check_and_restore_fp_context32(sc); } else { - /* signal handler may have used FPU or MSA. Disable them. */ - disable_msa(); + /* signal handler may have used FPU. Give it up. */ lose_fpu(0); } diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index df0598d9bfdd..949f2c6827a0 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -301,7 +301,7 @@ static int cps_cpu_disable(void) core_cfg = &mips_cps_core_bootcfg[current_cpu_data.core]; atomic_sub(1 << cpu_vpe_id(¤t_cpu_data), &core_cfg->vpe_mask); - smp_mb__after_atomic_dec(); + smp_mb__after_atomic(); set_cpu_online(cpu, false); cpu_clear(cpu, cpu_callin_map); diff --git a/arch/mips/math-emu/ieee754.c b/arch/mips/math-emu/ieee754.c index 53f1d2287084..8e97acbbe22c 100644 --- a/arch/mips/math-emu/ieee754.c +++ b/arch/mips/math-emu/ieee754.c @@ -34,13 +34,22 @@ * Special constants */ -#define DPCNST(s, b, m) \ +/* + * Older GCC requires the inner braces for initialization of union ieee754dp's + * anonymous struct member. Without an error will result. + */ +#define xPCNST(s, b, m, ebias) \ { \ - .sign = (s), \ - .bexp = (b) + DP_EBIAS, \ - .mant = (m) \ + { \ + .sign = (s), \ + .bexp = (b) + ebias, \ + .mant = (m) \ + } \ } +#define DPCNST(s, b, m) \ + xPCNST(s, b, m, DP_EBIAS) + const union ieee754dp __ieee754dp_spcvals[] = { DPCNST(0, DP_EMIN - 1, 0x0000000000000ULL), /* + zero */ DPCNST(1, DP_EMIN - 1, 0x0000000000000ULL), /* - zero */ @@ -62,11 +71,7 @@ const union ieee754dp __ieee754dp_spcvals[] = { }; #define SPCNST(s, b, m) \ -{ \ - .sign = (s), \ - .bexp = (b) + SP_EBIAS, \ - .mant = (m) \ -} + xPCNST(s, b, m, SP_EBIAS) const union ieee754sp __ieee754sp_spcvals[] = { SPCNST(0, SP_EMIN - 1, 0x000000), /* + zero */ diff --git a/arch/mips/mm/uasm-micromips.c b/arch/mips/mm/uasm-micromips.c index 775c2800cba2..8399ddf03a02 100644 --- a/arch/mips/mm/uasm-micromips.c +++ b/arch/mips/mm/uasm-micromips.c @@ -102,6 +102,7 @@ static struct insn insn_table_MM[] = { { insn_sd, 0, 0 }, { insn_sll, M(mm_pool32a_op, 0, 0, 0, 0, mm_sll32_op), RT | RS | RD }, { insn_sllv, M(mm_pool32a_op, 0, 0, 0, 0, mm_sllv32_op), RT | RS | RD }, + { insn_slt, M(mm_pool32a_op, 0, 0, 0, 0, mm_slt_op), RT | RS | RD }, { insn_sltiu, M(mm_sltiu32_op, 0, 0, 0, 0, 0), RT | RS | SIMM }, { insn_sltu, M(mm_pool32a_op, 0, 0, 0, 0, mm_sltu_op), RT | RS | RD }, { insn_sra, M(mm_pool32a_op, 0, 0, 0, 0, mm_sra_op), RT | RS | RD }, diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c index 38792c2364f5..6708a2dbf934 100644 --- a/arch/mips/mm/uasm-mips.c +++ b/arch/mips/mm/uasm-mips.c @@ -89,7 +89,7 @@ static struct insn insn_table[] = { { insn_lb, M(lb_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_ld, M(ld_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_ldx, M(spec3_op, 0, 0, 0, ldx_op, lx_op), RS | RT | RD }, - { insn_lh, M(lw_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, + { insn_lh, M(lh_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_lld, M(lld_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_ll, M(ll_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_lui, M(lui_op, 0, 0, 0, 0, 0), RT | SIMM }, @@ -110,6 +110,7 @@ static struct insn insn_table[] = { { insn_sd, M(sd_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_sll, M(spec_op, 0, 0, 0, 0, sll_op), RT | RD | RE }, { insn_sllv, M(spec_op, 0, 0, 0, 0, sllv_op), RS | RT | RD }, + { insn_slt, M(spec_op, 0, 0, 0, 0, slt_op), RS | RT | RD }, { insn_sltiu, M(sltiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM }, { insn_sltu, M(spec_op, 0, 0, 0, 0, sltu_op), RS | RT | RD }, { insn_sra, M(spec_op, 0, 0, 0, 0, sra_op), RT | RD | RE }, diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c index 00515805fe41..a01b0d6cedd2 100644 --- a/arch/mips/mm/uasm.c +++ b/arch/mips/mm/uasm.c @@ -53,7 +53,7 @@ enum opcode { insn_ld, insn_ldx, insn_lh, insn_ll, insn_lld, insn_lui, insn_lw, insn_lwx, insn_mfc0, insn_mfhi, insn_mflo, insn_mtc0, insn_mul, insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sc, insn_scd, - insn_sd, insn_sll, insn_sllv, insn_sltiu, insn_sltu, insn_sra, + insn_sd, insn_sll, insn_sllv, insn_slt, insn_sltiu, insn_sltu, insn_sra, insn_srl, insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh, insn_xor, insn_xori, insn_yield, @@ -139,6 +139,13 @@ Ip_u1u2u3(op) \ } \ UASM_EXPORT_SYMBOL(uasm_i##op); +#define I_s3s1s2(op) \ +Ip_s3s1s2(op) \ +{ \ + build_insn(buf, insn##op, b, c, a); \ +} \ +UASM_EXPORT_SYMBOL(uasm_i##op); + #define I_u2u1u3(op) \ Ip_u2u1u3(op) \ { \ @@ -289,6 +296,7 @@ I_u2s3u1(_scd) I_u2s3u1(_sd) I_u2u1u3(_sll) I_u3u2u1(_sllv) +I_s3s1s2(_slt) I_u2u1s3(_sltiu) I_u3u1u2(_sltu) I_u2u1u3(_sra) diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index a67b9753330b..b87390a56a2f 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -119,8 +119,6 @@ /* Arguments used by JIT */ #define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */ -#define FLAG_NEED_X_RESET (1 << 0) - #define SBIT(x) (1 << (x)) /* Signed version of BIT() */ /** @@ -153,6 +151,8 @@ static inline int optimize_div(u32 *k) return 0; } +static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx); + /* Simply emit the instruction if the JIT memory space has been allocated */ #define emit_instr(ctx, func, ...) \ do { \ @@ -166,9 +166,7 @@ do { \ /* Determine if immediate is within the 16-bit signed range */ static inline bool is_range16(s32 imm) { - if (imm >= SBIT(15) || imm < -SBIT(15)) - return true; - return false; + return !(imm >= SBIT(15) || imm < -SBIT(15)); } static inline void emit_addu(unsigned int dst, unsigned int src1, @@ -187,7 +185,7 @@ static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx) { if (ctx->target != NULL) { /* addiu can only handle s16 */ - if (is_range16(imm)) { + if (!is_range16(imm)) { u32 *p = &ctx->target[ctx->idx]; uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16); p = &ctx->target[ctx->idx + 1]; @@ -199,7 +197,7 @@ static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx) } ctx->idx++; - if (is_range16(imm)) + if (!is_range16(imm)) ctx->idx++; } @@ -240,7 +238,7 @@ static inline void emit_daddiu(unsigned int dst, unsigned int src, static inline void emit_addiu(unsigned int dst, unsigned int src, u32 imm, struct jit_ctx *ctx) { - if (is_range16(imm)) { + if (!is_range16(imm)) { emit_load_imm(r_tmp, imm, ctx); emit_addu(dst, r_tmp, src, ctx); } else { @@ -313,8 +311,11 @@ static inline void emit_sll(unsigned int dst, unsigned int src, unsigned int sa, struct jit_ctx *ctx) { /* sa is 5-bits long */ - BUG_ON(sa >= BIT(5)); - emit_instr(ctx, sll, dst, src, sa); + if (sa >= BIT(5)) + /* Shifting >= 32 results in zero */ + emit_jit_reg_move(dst, r_zero, ctx); + else + emit_instr(ctx, sll, dst, src, sa); } static inline void emit_srlv(unsigned int dst, unsigned int src, @@ -327,8 +328,17 @@ static inline void emit_srl(unsigned int dst, unsigned int src, unsigned int sa, struct jit_ctx *ctx) { /* sa is 5-bits long */ - BUG_ON(sa >= BIT(5)); - emit_instr(ctx, srl, dst, src, sa); + if (sa >= BIT(5)) + /* Shifting >= 32 results in zero */ + emit_jit_reg_move(dst, r_zero, ctx); + else + emit_instr(ctx, srl, dst, src, sa); +} + +static inline void emit_slt(unsigned int dst, unsigned int src1, + unsigned int src2, struct jit_ctx *ctx) +{ + emit_instr(ctx, slt, dst, src1, src2); } static inline void emit_sltu(unsigned int dst, unsigned int src1, @@ -341,7 +351,7 @@ static inline void emit_sltiu(unsigned dst, unsigned int src, unsigned int imm, struct jit_ctx *ctx) { /* 16 bit immediate */ - if (is_range16((s32)imm)) { + if (!is_range16((s32)imm)) { emit_load_imm(r_tmp, imm, ctx); emit_sltu(dst, src, r_tmp, ctx); } else { @@ -408,7 +418,7 @@ static inline void emit_div(unsigned int dst, unsigned int src, u32 *p = &ctx->target[ctx->idx]; uasm_i_divu(&p, dst, src); p = &ctx->target[ctx->idx + 1]; - uasm_i_mfhi(&p, dst); + uasm_i_mflo(&p, dst); } ctx->idx += 2; /* 2 insts */ } @@ -443,6 +453,17 @@ static inline void emit_wsbh(unsigned int dst, unsigned int src, emit_instr(ctx, wsbh, dst, src); } +/* load pointer to register */ +static inline void emit_load_ptr(unsigned int dst, unsigned int src, + int imm, struct jit_ctx *ctx) +{ + /* src contains the base addr of the 32/64-pointer */ + if (config_enabled(CONFIG_64BIT)) + emit_instr(ctx, ld, dst, imm, src); + else + emit_instr(ctx, lw, dst, imm, src); +} + /* load a function pointer to register */ static inline void emit_load_func(unsigned int reg, ptr imm, struct jit_ctx *ctx) @@ -545,29 +566,13 @@ static inline u16 align_sp(unsigned int num) return num; } -static inline void update_on_xread(struct jit_ctx *ctx) -{ - if (!(ctx->flags & SEEN_X)) - ctx->flags |= FLAG_NEED_X_RESET; - - ctx->flags |= SEEN_X; -} - static bool is_load_to_a(u16 inst) { switch (inst) { - case BPF_S_LD_W_LEN: - case BPF_S_LD_W_ABS: - case BPF_S_LD_H_ABS: - case BPF_S_LD_B_ABS: - case BPF_S_ANC_CPU: - case BPF_S_ANC_IFINDEX: - case BPF_S_ANC_MARK: - case BPF_S_ANC_PROTOCOL: - case BPF_S_ANC_RXHASH: - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: - case BPF_S_ANC_QUEUE: + case BPF_LD | BPF_W | BPF_LEN: + case BPF_LD | BPF_W | BPF_ABS: + case BPF_LD | BPF_H | BPF_ABS: + case BPF_LD | BPF_B | BPF_ABS: return true; default: return false; @@ -618,7 +623,10 @@ static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset) if (ctx->flags & SEEN_MEM) { if (real_off % (RSIZE * 2)) real_off += RSIZE; - emit_addiu(r_M, r_sp, real_off, ctx); + if (config_enabled(CONFIG_64BIT)) + emit_daddiu(r_M, r_sp, real_off, ctx); + else + emit_addiu(r_M, r_sp, real_off, ctx); } } @@ -705,11 +713,11 @@ static void build_prologue(struct jit_ctx *ctx) if (ctx->flags & SEEN_SKB) emit_reg_move(r_skb, MIPS_R_A0, ctx); - if (ctx->flags & FLAG_NEED_X_RESET) + if (ctx->flags & SEEN_X) emit_jit_reg_move(r_X, r_zero, ctx); /* Do not leak kernel data to userspace */ - if ((first_inst != BPF_S_RET_K) && !(is_load_to_a(first_inst))) + if ((first_inst != (BPF_RET | BPF_K)) && !(is_load_to_a(first_inst))) emit_jit_reg_move(r_A, r_zero, ctx); } @@ -757,13 +765,17 @@ static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) return (u64)err << 32 | ntohl(ret); } -#define PKT_TYPE_MAX 7 +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_TYPE_MAX (7 << 5) +#else +#define PKT_TYPE_MAX 7 +#endif static int pkt_type_offset(void) { struct sk_buff skb_probe = { .pkt_type = ~0, }; - char *ct = (char *)&skb_probe; + u8 *ct = (u8 *)&skb_probe; unsigned int off; for (off = 0; off < sizeof(struct sk_buff); off++) { @@ -783,46 +795,62 @@ static int build_body(struct jit_ctx *ctx) u32 k, b_off __maybe_unused; for (i = 0; i < prog->len; i++) { + u16 code; + inst = &(prog->insns[i]); pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n", __func__, inst->code, inst->jt, inst->jf, inst->k); k = inst->k; + code = bpf_anc_helper(inst); if (ctx->target == NULL) ctx->offsets[i] = ctx->idx * 4; - switch (inst->code) { - case BPF_S_LD_IMM: + switch (code) { + case BPF_LD | BPF_IMM: /* A <- k ==> li r_A, k */ ctx->flags |= SEEN_A; emit_load_imm(r_A, k, ctx); break; - case BPF_S_LD_W_LEN: + case BPF_LD | BPF_W | BPF_LEN: BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); /* A <- len ==> lw r_A, offset(skb) */ ctx->flags |= SEEN_SKB | SEEN_A; off = offsetof(struct sk_buff, len); emit_load(r_A, r_skb, off, ctx); break; - case BPF_S_LD_MEM: + case BPF_LD | BPF_MEM: /* A <- M[k] ==> lw r_A, offset(M) */ ctx->flags |= SEEN_MEM | SEEN_A; emit_load(r_A, r_M, SCRATCH_OFF(k), ctx); break; - case BPF_S_LD_W_ABS: + case BPF_LD | BPF_W | BPF_ABS: /* A <- P[k:4] */ load_order = 2; goto load; - case BPF_S_LD_H_ABS: + case BPF_LD | BPF_H | BPF_ABS: /* A <- P[k:2] */ load_order = 1; goto load; - case BPF_S_LD_B_ABS: + case BPF_LD | BPF_B | BPF_ABS: /* A <- P[k:1] */ load_order = 0; load: + /* the interpreter will deal with the negative K */ + if ((int)k < 0) + return -ENOTSUPP; + emit_load_imm(r_off, k, ctx); load_common: + /* + * We may got here from the indirect loads so + * return if offset is negative. + */ + emit_slt(r_s0, r_off, r_zero, ctx); + emit_bcond(MIPS_COND_NE, r_s0, r_zero, + b_imm(prog->len, ctx), ctx); + emit_reg_move(r_ret, r_zero, ctx); + ctx->flags |= SEEN_CALL | SEEN_OFF | SEEN_S0 | SEEN_SKB | SEEN_A; @@ -852,39 +880,42 @@ load_common: emit_b(b_imm(prog->len, ctx), ctx); emit_reg_move(r_ret, r_zero, ctx); break; - case BPF_S_LD_W_IND: + case BPF_LD | BPF_W | BPF_IND: /* A <- P[X + k:4] */ load_order = 2; goto load_ind; - case BPF_S_LD_H_IND: + case BPF_LD | BPF_H | BPF_IND: /* A <- P[X + k:2] */ load_order = 1; goto load_ind; - case BPF_S_LD_B_IND: + case BPF_LD | BPF_B | BPF_IND: /* A <- P[X + k:1] */ load_order = 0; load_ind: - update_on_xread(ctx); ctx->flags |= SEEN_OFF | SEEN_X; emit_addiu(r_off, r_X, k, ctx); goto load_common; - case BPF_S_LDX_IMM: + case BPF_LDX | BPF_IMM: /* X <- k */ ctx->flags |= SEEN_X; emit_load_imm(r_X, k, ctx); break; - case BPF_S_LDX_MEM: + case BPF_LDX | BPF_MEM: /* X <- M[k] */ ctx->flags |= SEEN_X | SEEN_MEM; emit_load(r_X, r_M, SCRATCH_OFF(k), ctx); break; - case BPF_S_LDX_W_LEN: + case BPF_LDX | BPF_W | BPF_LEN: /* X <- len */ ctx->flags |= SEEN_X | SEEN_SKB; off = offsetof(struct sk_buff, len); emit_load(r_X, r_skb, off, ctx); break; - case BPF_S_LDX_B_MSH: + case BPF_LDX | BPF_B | BPF_MSH: + /* the interpreter will deal with the negative K */ + if ((int)k < 0) + return -ENOTSUPP; + /* X <- 4 * (P[k:1] & 0xf) */ ctx->flags |= SEEN_X | SEEN_CALL | SEEN_S0 | SEEN_SKB; /* Load offset to a1 */ @@ -917,50 +948,49 @@ load_ind: emit_b(b_imm(prog->len, ctx), ctx); emit_load_imm(r_ret, 0, ctx); /* delay slot */ break; - case BPF_S_ST: + case BPF_ST: /* M[k] <- A */ ctx->flags |= SEEN_MEM | SEEN_A; emit_store(r_A, r_M, SCRATCH_OFF(k), ctx); break; - case BPF_S_STX: + case BPF_STX: /* M[k] <- X */ ctx->flags |= SEEN_MEM | SEEN_X; emit_store(r_X, r_M, SCRATCH_OFF(k), ctx); break; - case BPF_S_ALU_ADD_K: + case BPF_ALU | BPF_ADD | BPF_K: /* A += K */ ctx->flags |= SEEN_A; emit_addiu(r_A, r_A, k, ctx); break; - case BPF_S_ALU_ADD_X: + case BPF_ALU | BPF_ADD | BPF_X: /* A += X */ ctx->flags |= SEEN_A | SEEN_X; emit_addu(r_A, r_A, r_X, ctx); break; - case BPF_S_ALU_SUB_K: + case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */ ctx->flags |= SEEN_A; emit_addiu(r_A, r_A, -k, ctx); break; - case BPF_S_ALU_SUB_X: + case BPF_ALU | BPF_SUB | BPF_X: /* A -= X */ ctx->flags |= SEEN_A | SEEN_X; emit_subu(r_A, r_A, r_X, ctx); break; - case BPF_S_ALU_MUL_K: + case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */ /* Load K to scratch register before MUL */ ctx->flags |= SEEN_A | SEEN_S0; emit_load_imm(r_s0, k, ctx); emit_mul(r_A, r_A, r_s0, ctx); break; - case BPF_S_ALU_MUL_X: + case BPF_ALU | BPF_MUL | BPF_X: /* A *= X */ - update_on_xread(ctx); ctx->flags |= SEEN_A | SEEN_X; emit_mul(r_A, r_A, r_X, ctx); break; - case BPF_S_ALU_DIV_K: + case BPF_ALU | BPF_DIV | BPF_K: /* A /= k */ if (k == 1) break; @@ -973,7 +1003,7 @@ load_ind: emit_load_imm(r_s0, k, ctx); emit_div(r_A, r_s0, ctx); break; - case BPF_S_ALU_MOD_K: + case BPF_ALU | BPF_MOD | BPF_K: /* A %= k */ if (k == 1 || optimize_div(&k)) { ctx->flags |= SEEN_A; @@ -984,9 +1014,8 @@ load_ind: emit_mod(r_A, r_s0, ctx); } break; - case BPF_S_ALU_DIV_X: + case BPF_ALU | BPF_DIV | BPF_X: /* A /= X */ - update_on_xread(ctx); ctx->flags |= SEEN_X | SEEN_A; /* Check if r_X is zero */ emit_bcond(MIPS_COND_EQ, r_X, r_zero, @@ -994,9 +1023,8 @@ load_ind: emit_load_imm(r_val, 0, ctx); /* delay slot */ emit_div(r_A, r_X, ctx); break; - case BPF_S_ALU_MOD_X: + case BPF_ALU | BPF_MOD | BPF_X: /* A %= X */ - update_on_xread(ctx); ctx->flags |= SEEN_X | SEEN_A; /* Check if r_X is zero */ emit_bcond(MIPS_COND_EQ, r_X, r_zero, @@ -1004,94 +1032,89 @@ load_ind: emit_load_imm(r_val, 0, ctx); /* delay slot */ emit_mod(r_A, r_X, ctx); break; - case BPF_S_ALU_OR_K: + case BPF_ALU | BPF_OR | BPF_K: /* A |= K */ ctx->flags |= SEEN_A; emit_ori(r_A, r_A, k, ctx); break; - case BPF_S_ALU_OR_X: + case BPF_ALU | BPF_OR | BPF_X: /* A |= X */ - update_on_xread(ctx); ctx->flags |= SEEN_A; emit_ori(r_A, r_A, r_X, ctx); break; - case BPF_S_ALU_XOR_K: + case BPF_ALU | BPF_XOR | BPF_K: /* A ^= k */ ctx->flags |= SEEN_A; emit_xori(r_A, r_A, k, ctx); break; - case BPF_S_ANC_ALU_XOR_X: - case BPF_S_ALU_XOR_X: + case BPF_ANC | SKF_AD_ALU_XOR_X: + case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */ - update_on_xread(ctx); ctx->flags |= SEEN_A; emit_xor(r_A, r_A, r_X, ctx); break; - case BPF_S_ALU_AND_K: + case BPF_ALU | BPF_AND | BPF_K: /* A &= K */ ctx->flags |= SEEN_A; emit_andi(r_A, r_A, k, ctx); break; - case BPF_S_ALU_AND_X: + case BPF_ALU | BPF_AND | BPF_X: /* A &= X */ - update_on_xread(ctx); ctx->flags |= SEEN_A | SEEN_X; emit_and(r_A, r_A, r_X, ctx); break; - case BPF_S_ALU_LSH_K: + case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */ ctx->flags |= SEEN_A; emit_sll(r_A, r_A, k, ctx); break; - case BPF_S_ALU_LSH_X: + case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X */ ctx->flags |= SEEN_A | SEEN_X; - update_on_xread(ctx); emit_sllv(r_A, r_A, r_X, ctx); break; - case BPF_S_ALU_RSH_K: + case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K */ ctx->flags |= SEEN_A; emit_srl(r_A, r_A, k, ctx); break; - case BPF_S_ALU_RSH_X: + case BPF_ALU | BPF_RSH | BPF_X: ctx->flags |= SEEN_A | SEEN_X; - update_on_xread(ctx); emit_srlv(r_A, r_A, r_X, ctx); break; - case BPF_S_ALU_NEG: + case BPF_ALU | BPF_NEG: /* A = -A */ ctx->flags |= SEEN_A; emit_neg(r_A, ctx); break; - case BPF_S_JMP_JA: + case BPF_JMP | BPF_JA: /* pc += K */ emit_b(b_imm(i + k + 1, ctx), ctx); emit_nop(ctx); break; - case BPF_S_JMP_JEQ_K: + case BPF_JMP | BPF_JEQ | BPF_K: /* pc += ( A == K ) ? pc->jt : pc->jf */ condt = MIPS_COND_EQ | MIPS_COND_K; goto jmp_cmp; - case BPF_S_JMP_JEQ_X: + case BPF_JMP | BPF_JEQ | BPF_X: ctx->flags |= SEEN_X; /* pc += ( A == X ) ? pc->jt : pc->jf */ condt = MIPS_COND_EQ | MIPS_COND_X; goto jmp_cmp; - case BPF_S_JMP_JGE_K: + case BPF_JMP | BPF_JGE | BPF_K: /* pc += ( A >= K ) ? pc->jt : pc->jf */ condt = MIPS_COND_GE | MIPS_COND_K; goto jmp_cmp; - case BPF_S_JMP_JGE_X: + case BPF_JMP | BPF_JGE | BPF_X: ctx->flags |= SEEN_X; /* pc += ( A >= X ) ? pc->jt : pc->jf */ condt = MIPS_COND_GE | MIPS_COND_X; goto jmp_cmp; - case BPF_S_JMP_JGT_K: + case BPF_JMP | BPF_JGT | BPF_K: /* pc += ( A > K ) ? pc->jt : pc->jf */ condt = MIPS_COND_GT | MIPS_COND_K; goto jmp_cmp; - case BPF_S_JMP_JGT_X: + case BPF_JMP | BPF_JGT | BPF_X: ctx->flags |= SEEN_X; /* pc += ( A > X ) ? pc->jt : pc->jf */ condt = MIPS_COND_GT | MIPS_COND_X; @@ -1109,7 +1132,7 @@ jmp_cmp: } /* A < (K|X) ? r_scrach = 1 */ b_off = b_imm(i + inst->jf + 1, ctx); - emit_bcond(MIPS_COND_GT, r_s0, r_zero, b_off, + emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx); emit_nop(ctx); /* A > (K|X) ? scratch = 0 */ @@ -1167,7 +1190,7 @@ jmp_cmp: } } break; - case BPF_S_JMP_JSET_K: + case BPF_JMP | BPF_JSET | BPF_K: ctx->flags |= SEEN_S0 | SEEN_S1 | SEEN_A; /* pc += (A & K) ? pc -> jt : pc -> jf */ emit_load_imm(r_s1, k, ctx); @@ -1181,7 +1204,7 @@ jmp_cmp: emit_b(b_off, ctx); emit_nop(ctx); break; - case BPF_S_JMP_JSET_X: + case BPF_JMP | BPF_JSET | BPF_X: ctx->flags |= SEEN_S0 | SEEN_X | SEEN_A; /* pc += (A & X) ? pc -> jt : pc -> jf */ emit_and(r_s0, r_A, r_X, ctx); @@ -1194,7 +1217,7 @@ jmp_cmp: emit_b(b_off, ctx); emit_nop(ctx); break; - case BPF_S_RET_A: + case BPF_RET | BPF_A: ctx->flags |= SEEN_A; if (i != prog->len - 1) /* @@ -1204,7 +1227,7 @@ jmp_cmp: emit_b(b_imm(prog->len, ctx), ctx); emit_reg_move(r_ret, r_A, ctx); /* delay slot */ break; - case BPF_S_RET_K: + case BPF_RET | BPF_K: /* * It can emit two instructions so it does not fit on * the delay slot. @@ -1219,19 +1242,18 @@ jmp_cmp: emit_nop(ctx); } break; - case BPF_S_MISC_TAX: + case BPF_MISC | BPF_TAX: /* X = A */ ctx->flags |= SEEN_X | SEEN_A; emit_jit_reg_move(r_X, r_A, ctx); break; - case BPF_S_MISC_TXA: + case BPF_MISC | BPF_TXA: /* A = X */ ctx->flags |= SEEN_A | SEEN_X; - update_on_xread(ctx); emit_jit_reg_move(r_A, r_X, ctx); break; /* AUX */ - case BPF_S_ANC_PROTOCOL: + case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol */ ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, @@ -1256,7 +1278,7 @@ jmp_cmp: } #endif break; - case BPF_S_ANC_CPU: + case BPF_ANC | SKF_AD_CPU: ctx->flags |= SEEN_A | SEEN_OFF; /* A = current_thread_info()->cpu */ BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, @@ -1265,11 +1287,12 @@ jmp_cmp: /* $28/gp points to the thread_info struct */ emit_load(r_A, 28, off, ctx); break; - case BPF_S_ANC_IFINDEX: + case BPF_ANC | SKF_AD_IFINDEX: /* A = skb->dev->ifindex */ ctx->flags |= SEEN_SKB | SEEN_A | SEEN_S0; off = offsetof(struct sk_buff, dev); - emit_load(r_s0, r_skb, off, ctx); + /* Load *dev pointer */ + emit_load_ptr(r_s0, r_skb, off, ctx); /* error (0) in the delay slot */ emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_imm(prog->len, ctx), ctx); @@ -1279,31 +1302,36 @@ jmp_cmp: off = offsetof(struct net_device, ifindex); emit_load(r_A, r_s0, off, ctx); break; - case BPF_S_ANC_MARK: + case BPF_ANC | SKF_AD_MARK: ctx->flags |= SEEN_SKB | SEEN_A; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4); off = offsetof(struct sk_buff, mark); emit_load(r_A, r_skb, off, ctx); break; - case BPF_S_ANC_RXHASH: + case BPF_ANC | SKF_AD_RXHASH: ctx->flags |= SEEN_SKB | SEEN_A; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); off = offsetof(struct sk_buff, hash); emit_load(r_A, r_skb, off, ctx); break; - case BPF_S_ANC_VLAN_TAG: - case BPF_S_ANC_VLAN_TAG_PRESENT: + case BPF_ANC | SKF_AD_VLAN_TAG: + case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT: ctx->flags |= SEEN_SKB | SEEN_S0 | SEEN_A; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2); off = offsetof(struct sk_buff, vlan_tci); emit_half_load(r_s0, r_skb, off, ctx); - if (inst->code == BPF_S_ANC_VLAN_TAG) - emit_and(r_A, r_s0, VLAN_VID_MASK, ctx); - else - emit_and(r_A, r_s0, VLAN_TAG_PRESENT, ctx); + if (code == (BPF_ANC | SKF_AD_VLAN_TAG)) { + emit_andi(r_A, r_s0, (u16)~VLAN_TAG_PRESENT, ctx); + } else { + emit_andi(r_A, r_s0, VLAN_TAG_PRESENT, ctx); + /* return 1 if present */ + emit_sltu(r_A, r_zero, r_A, ctx); + } break; - case BPF_S_ANC_PKTTYPE: + case BPF_ANC | SKF_AD_PKTTYPE: + ctx->flags |= SEEN_SKB; + off = pkt_type_offset(); if (off < 0) @@ -1311,8 +1339,12 @@ jmp_cmp: emit_load_byte(r_tmp, r_skb, off, ctx); /* Keep only the last 3 bits */ emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); +#ifdef __BIG_ENDIAN_BITFIELD + /* Get the actual packet type to the lower 3 bits */ + emit_srl(r_A, r_A, 5, ctx); +#endif break; - case BPF_S_ANC_QUEUE: + case BPF_ANC | SKF_AD_QUEUE: ctx->flags |= SEEN_SKB | SEEN_A; BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2); @@ -1322,8 +1354,8 @@ jmp_cmp: emit_half_load(r_A, r_skb, off, ctx); break; default: - pr_warn("%s: Unhandled opcode: 0x%02x\n", __FILE__, - inst->code); + pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__, + inst->code); return -1; } } diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 790352f93700..35d16bd2760b 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -303,7 +303,6 @@ config PPC_EARLY_DEBUG_OPAL_VTERMNO This correspond to which /dev/hvcN you want to use for early debug. - On OPAL v1 (takeover) this should always be 0 On OPAL v2, this will be 0 for network console and 1 or 2 for the machine built-in serial ports. diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index 37991e154ef8..840a5509b3f1 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -88,4 +88,15 @@ static inline unsigned long ppc_function_entry(void *func) #endif } +static inline unsigned long ppc_global_function_entry(void *func) +{ +#if defined(CONFIG_PPC64) && defined(_CALL_ELF) && _CALL_ELF == 2 + /* PPC64 ABIv2 the global entry point is at the address */ + return (unsigned long)func; +#else + /* All other cases there is no change vs ppc_function_entry() */ + return ppc_function_entry(func); +#endif +} + #endif /* _ASM_POWERPC_CODE_PATCHING_H */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 460018889ba9..0da1dbd42e02 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -12,27 +12,7 @@ #ifndef __OPAL_H #define __OPAL_H -/****** Takeover interface ********/ - -/* PAPR H-Call used to querty the HAL existence and/or instanciate - * it from within pHyp (tech preview only). - * - * This is exclusively used in prom_init.c - */ - #ifndef __ASSEMBLY__ - -struct opal_takeover_args { - u64 k_image; /* r4 */ - u64 k_size; /* r5 */ - u64 k_entry; /* r6 */ - u64 k_entry2; /* r7 */ - u64 hal_addr; /* r8 */ - u64 rd_image; /* r9 */ - u64 rd_size; /* r10 */ - u64 rd_loc; /* r11 */ -}; - /* * SG entry * @@ -55,15 +35,6 @@ struct opal_sg_list { /* We calculate number of sg entries based on PAGE_SIZE */ #define SG_ENTRIES_PER_NODE ((PAGE_SIZE - 16) / sizeof(struct opal_sg_entry)) -extern long opal_query_takeover(u64 *hal_size, u64 *hal_align); - -extern long opal_do_takeover(struct opal_takeover_args *args); - -struct rtas_args; -extern int opal_enter_rtas(struct rtas_args *args, - unsigned long data, - unsigned long entry); - #endif /* __ASSEMBLY__ */ /****** OPAL APIs ******/ diff --git a/arch/powerpc/include/asm/swab.h b/arch/powerpc/include/asm/swab.h index b9bd1ca944d0..96f59de61855 100644 --- a/arch/powerpc/include/asm/swab.h +++ b/arch/powerpc/include/asm/swab.h @@ -9,10 +9,6 @@ #include <uapi/asm/swab.h> -#ifdef __GNUC__ -#ifndef __powerpc64__ -#endif /* __powerpc64__ */ - static __inline__ __u16 ld_le16(const volatile __u16 *addr) { __u16 val; @@ -20,19 +16,12 @@ static __inline__ __u16 ld_le16(const volatile __u16 *addr) __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr)); return val; } -#define __arch_swab16p ld_le16 static __inline__ void st_le16(volatile __u16 *addr, const __u16 val) { __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); } -static inline void __arch_swab16s(__u16 *addr) -{ - st_le16(addr, *addr); -} -#define __arch_swab16s __arch_swab16s - static __inline__ __u32 ld_le32(const volatile __u32 *addr) { __u32 val; @@ -40,42 +29,10 @@ static __inline__ __u32 ld_le32(const volatile __u32 *addr) __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (addr), "m" (*addr)); return val; } -#define __arch_swab32p ld_le32 static __inline__ void st_le32(volatile __u32 *addr, const __u32 val) { __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*addr) : "r" (val), "r" (addr)); } -static inline void __arch_swab32s(__u32 *addr) -{ - st_le32(addr, *addr); -} -#define __arch_swab32s __arch_swab32s - -static inline __attribute_const__ __u16 __arch_swab16(__u16 value) -{ - __u16 result; - - __asm__("rlwimi %0,%1,8,16,23" - : "=r" (result) - : "r" (value), "0" (value >> 8)); - return result; -} -#define __arch_swab16 __arch_swab16 - -static inline __attribute_const__ __u32 __arch_swab32(__u32 value) -{ - __u32 result; - - __asm__("rlwimi %0,%1,24,16,23\n\t" - "rlwimi %0,%1,8,8,15\n\t" - "rlwimi %0,%1,24,0,7" - : "=r" (result) - : "r" (value), "0" (value >> 24)); - return result; -} -#define __arch_swab32 __arch_swab32 - -#endif /* __GNUC__ */ #endif /* _ASM_POWERPC_SWAB_H */ diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c index f202d0731b06..d178834fe508 100644 --- a/arch/powerpc/kernel/ftrace.c +++ b/arch/powerpc/kernel/ftrace.c @@ -10,6 +10,8 @@ * */ +#define pr_fmt(fmt) "ftrace-powerpc: " fmt + #include <linux/spinlock.h> #include <linux/hardirq.h> #include <linux/uaccess.h> @@ -105,7 +107,7 @@ __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned int op; - unsigned long ptr; + unsigned long entry, ptr; unsigned long ip = rec->ip; void *tramp; @@ -115,7 +117,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - printk(KERN_ERR "Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", op); return -EINVAL; } @@ -125,21 +127,21 @@ __ftrace_make_nop(struct module *mod, pr_devel("ip:%lx jumps to %p", ip, tramp); if (!is_module_trampoline(tramp)) { - printk(KERN_ERR "Not a trampoline\n"); + pr_err("Not a trampoline\n"); return -EINVAL; } if (module_trampoline_target(mod, tramp, &ptr)) { - printk(KERN_ERR "Failed to get trampoline target\n"); + pr_err("Failed to get trampoline target\n"); return -EFAULT; } pr_devel("trampoline target %lx", ptr); + entry = ppc_global_function_entry((void *)addr); /* This should match what was called */ - if (ptr != ppc_function_entry((void *)addr)) { - printk(KERN_ERR "addr %lx does not match expected %lx\n", - ptr, ppc_function_entry((void *)addr)); + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); return -EINVAL; } @@ -179,7 +181,7 @@ __ftrace_make_nop(struct module *mod, /* Make sure that that this is still a 24bit jump */ if (!is_bl_op(op)) { - printk(KERN_ERR "Not expected bl: opcode is %x\n", op); + pr_err("Not expected bl: opcode is %x\n", op); return -EINVAL; } @@ -198,7 +200,7 @@ __ftrace_make_nop(struct module *mod, /* Find where the trampoline jumps to */ if (probe_kernel_read(jmp, (void *)tramp, sizeof(jmp))) { - printk(KERN_ERR "Failed to read %lx\n", tramp); + pr_err("Failed to read %lx\n", tramp); return -EFAULT; } @@ -209,7 +211,7 @@ __ftrace_make_nop(struct module *mod, ((jmp[1] & 0xffff0000) != 0x398c0000) || (jmp[2] != 0x7d8903a6) || (jmp[3] != 0x4e800420)) { - printk(KERN_ERR "Not a trampoline\n"); + pr_err("Not a trampoline\n"); return -EINVAL; } @@ -221,8 +223,7 @@ __ftrace_make_nop(struct module *mod, pr_devel(" %lx ", tramp); if (tramp != addr) { - printk(KERN_ERR - "Trampoline location %08lx does not match addr\n", + pr_err("Trampoline location %08lx does not match addr\n", tramp); return -EINVAL; } @@ -263,15 +264,13 @@ int ftrace_make_nop(struct module *mod, */ if (!rec->arch.mod) { if (!mod) { - printk(KERN_ERR "No module loaded addr=%lx\n", - addr); + pr_err("No module loaded addr=%lx\n", addr); return -EFAULT; } rec->arch.mod = mod; } else if (mod) { if (mod != rec->arch.mod) { - printk(KERN_ERR - "Record mod %p not equal to passed in mod %p\n", + pr_err("Record mod %p not equal to passed in mod %p\n", rec->arch.mod, mod); return -EINVAL; } @@ -307,26 +306,25 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * The load offset is different depending on the ABI. For simplicity * just mask it out when doing the compare. */ - if ((op[0] != 0x48000008) || ((op[1] & 0xffff00000) != 0xe8410000)) { - printk(KERN_ERR "Unexpected call sequence: %x %x\n", - op[0], op[1]); + if ((op[0] != 0x48000008) || ((op[1] & 0xffff0000) != 0xe8410000)) { + pr_err("Unexpected call sequence: %x %x\n", op[0], op[1]); return -EINVAL; } /* If we never set up a trampoline to ftrace_caller, then bail */ if (!rec->arch.mod->arch.tramp) { - printk(KERN_ERR "No ftrace trampoline\n"); + pr_err("No ftrace trampoline\n"); return -EINVAL; } /* Ensure branch is within 24 bits */ - if (create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { - printk(KERN_ERR "Branch out of range"); + if (!create_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { + pr_err("Branch out of range\n"); return -EINVAL; } if (patch_branch(ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK)) { - printk(KERN_ERR "REL24 out of range!\n"); + pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -345,13 +343,13 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) /* It should be pointing to a nop */ if (op != PPC_INST_NOP) { - printk(KERN_ERR "Expected NOP but have %x\n", op); + pr_err("Expected NOP but have %x\n", op); return -EINVAL; } /* If we never set up a trampoline to ftrace_caller, then bail */ if (!rec->arch.mod->arch.tramp) { - printk(KERN_ERR "No ftrace trampoline\n"); + pr_err("No ftrace trampoline\n"); return -EINVAL; } @@ -359,7 +357,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) op = create_branch((unsigned int *)ip, rec->arch.mod->arch.tramp, BRANCH_SET_LINK); if (!op) { - printk(KERN_ERR "REL24 out of range!\n"); + pr_err("REL24 out of range!\n"); return -EINVAL; } @@ -397,7 +395,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * already have a module defined. */ if (!rec->arch.mod) { - printk(KERN_ERR "No module loaded\n"); + pr_err("No module loaded\n"); return -EINVAL; } diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index b82227e7e21b..12e48d56f771 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -23,7 +23,7 @@ unsigned int ioread16(void __iomem *addr) } unsigned int ioread16be(void __iomem *addr) { - return in_be16(addr); + return readw_be(addr); } unsigned int ioread32(void __iomem *addr) { @@ -31,7 +31,7 @@ unsigned int ioread32(void __iomem *addr) } unsigned int ioread32be(void __iomem *addr) { - return in_be32(addr); + return readl_be(addr); } EXPORT_SYMBOL(ioread8); EXPORT_SYMBOL(ioread16); @@ -49,7 +49,7 @@ void iowrite16(u16 val, void __iomem *addr) } void iowrite16be(u16 val, void __iomem *addr) { - out_be16(addr, val); + writew_be(val, addr); } void iowrite32(u32 val, void __iomem *addr) { @@ -57,7 +57,7 @@ void iowrite32(u32 val, void __iomem *addr) } void iowrite32be(u32 val, void __iomem *addr) { - out_be32(addr, val); + writel_be(val, addr); } EXPORT_SYMBOL(iowrite8); EXPORT_SYMBOL(iowrite16); @@ -75,15 +75,15 @@ EXPORT_SYMBOL(iowrite32be); */ void ioread8_rep(void __iomem *addr, void *dst, unsigned long count) { - _insb((u8 __iomem *) addr, dst, count); + readsb(addr, dst, count); } void ioread16_rep(void __iomem *addr, void *dst, unsigned long count) { - _insw_ns((u16 __iomem *) addr, dst, count); + readsw(addr, dst, count); } void ioread32_rep(void __iomem *addr, void *dst, unsigned long count) { - _insl_ns((u32 __iomem *) addr, dst, count); + readsl(addr, dst, count); } EXPORT_SYMBOL(ioread8_rep); EXPORT_SYMBOL(ioread16_rep); @@ -91,15 +91,15 @@ EXPORT_SYMBOL(ioread32_rep); void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count) { - _outsb((u8 __iomem *) addr, src, count); + writesb(addr, src, count); } void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count) { - _outsw_ns((u16 __iomem *) addr, src, count); + writesw(addr, src, count); } void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count) { - _outsl_ns((u32 __iomem *) addr, src, count); + writesl(addr, src, count); } EXPORT_SYMBOL(iowrite8_rep); EXPORT_SYMBOL(iowrite16_rep); diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 90fab64d911d..2f72af82513c 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -32,6 +32,7 @@ #include <linux/module.h> #include <linux/kdebug.h> #include <linux/slab.h> +#include <asm/code-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/uaccess.h> @@ -491,12 +492,10 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, return ret; } -#ifdef CONFIG_PPC64 unsigned long arch_deref_entry_point(void *entry) { - return ((func_descr_t *)entry)->entry; + return ppc_global_function_entry(entry); } -#endif int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { @@ -508,8 +507,12 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) /* setup return addr to the jprobe handler routine */ regs->nip = arch_deref_entry_point(jp->entry); #ifdef CONFIG_PPC64 +#if defined(_CALL_ELF) && _CALL_ELF == 2 + regs->gpr[12] = (unsigned long)jp->entry; +#else regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc); #endif +#endif return 1; } diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 077d2ce6c5a7..d807ee626af9 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -315,8 +315,17 @@ static void dedotify_versions(struct modversion_info *vers, struct modversion_info *end; for (end = (void *)vers + size; vers < end; vers++) - if (vers->name[0] == '.') + if (vers->name[0] == '.') { memmove(vers->name, vers->name+1, strlen(vers->name)); +#ifdef ARCH_RELOCATES_KCRCTAB + /* The TOC symbol has no CRC computed. To avoid CRC + * check failing, we must force it to the expected + * value (see CRC check in module.c). + */ + if (!strcmp(vers->name, "TOC.")) + vers->crc = -(unsigned long)reloc_start; +#endif + } } /* Undefined symbols which refer to .funcname, hack to funcname (or .TOC.) */ diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 613a860a203c..b694b0730971 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -662,13 +662,6 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL); #endif - /* Pre-initialize the cmd_line with the content of boot_commmand_line, - * which will be empty except when the content of the variable has - * been overriden by a bootloading mechanism. This happens typically - * with HAL takeover - */ - strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); - /* Retrieve various informations from the /chosen node of the * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 078145acf7fb..1a85d8f96739 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1268,201 +1268,6 @@ static u64 __initdata prom_opal_base; static u64 __initdata prom_opal_entry; #endif -#ifdef __BIG_ENDIAN__ -/* XXX Don't change this structure without updating opal-takeover.S */ -static struct opal_secondary_data { - s64 ack; /* 0 */ - u64 go; /* 8 */ - struct opal_takeover_args args; /* 16 */ -} opal_secondary_data; - -static u64 __initdata prom_opal_align; -static u64 __initdata prom_opal_size; -static int __initdata prom_rtas_start_cpu; -static u64 __initdata prom_rtas_data; -static u64 __initdata prom_rtas_entry; - -extern char opal_secondary_entry; - -static void __init prom_query_opal(void) -{ - long rc; - - /* We must not query for OPAL presence on a machine that - * supports TNK takeover (970 blades), as this uses the same - * h-call with different arguments and will crash - */ - if (PHANDLE_VALID(call_prom("finddevice", 1, 1, - ADDR("/tnk-memory-map")))) { - prom_printf("TNK takeover detected, skipping OPAL check\n"); - return; - } - - prom_printf("Querying for OPAL presence... "); - - rc = opal_query_takeover(&prom_opal_size, - &prom_opal_align); - prom_debug("(rc = %ld) ", rc); - if (rc != 0) { - prom_printf("not there.\n"); - return; - } - of_platform = PLATFORM_OPAL; - prom_printf(" there !\n"); - prom_debug(" opal_size = 0x%lx\n", prom_opal_size); - prom_debug(" opal_align = 0x%lx\n", prom_opal_align); - if (prom_opal_align < 0x10000) - prom_opal_align = 0x10000; -} - -static int __init prom_rtas_call(int token, int nargs, int nret, - int *outputs, ...) -{ - struct rtas_args rtas_args; - va_list list; - int i; - - rtas_args.token = token; - rtas_args.nargs = nargs; - rtas_args.nret = nret; - rtas_args.rets = (rtas_arg_t *)&(rtas_args.args[nargs]); - va_start(list, outputs); - for (i = 0; i < nargs; ++i) - rtas_args.args[i] = va_arg(list, rtas_arg_t); - va_end(list); - - for (i = 0; i < nret; ++i) - rtas_args.rets[i] = 0; - - opal_enter_rtas(&rtas_args, prom_rtas_data, - prom_rtas_entry); - - if (nret > 1 && outputs != NULL) - for (i = 0; i < nret-1; ++i) - outputs[i] = rtas_args.rets[i+1]; - return (nret > 0)? rtas_args.rets[0]: 0; -} - -static void __init prom_opal_hold_cpus(void) -{ - int i, cnt, cpu, rc; - long j; - phandle node; - char type[64]; - u32 servers[8]; - void *entry = (unsigned long *)&opal_secondary_entry; - struct opal_secondary_data *data = &opal_secondary_data; - - prom_debug("prom_opal_hold_cpus: start...\n"); - prom_debug(" - entry = 0x%x\n", entry); - prom_debug(" - data = 0x%x\n", data); - - data->ack = -1; - data->go = 0; - - /* look for cpus */ - for (node = 0; prom_next_node(&node); ) { - type[0] = 0; - prom_getprop(node, "device_type", type, sizeof(type)); - if (strcmp(type, "cpu") != 0) - continue; - - /* Skip non-configured cpus. */ - if (prom_getprop(node, "status", type, sizeof(type)) > 0) - if (strcmp(type, "okay") != 0) - continue; - - cnt = prom_getprop(node, "ibm,ppc-interrupt-server#s", servers, - sizeof(servers)); - if (cnt == PROM_ERROR) - break; - cnt >>= 2; - for (i = 0; i < cnt; i++) { - cpu = servers[i]; - prom_debug("CPU %d ... ", cpu); - if (cpu == prom.cpu) { - prom_debug("booted !\n"); - continue; - } - prom_debug("starting ... "); - - /* Init the acknowledge var which will be reset by - * the secondary cpu when it awakens from its OF - * spinloop. - */ - data->ack = -1; - rc = prom_rtas_call(prom_rtas_start_cpu, 3, 1, - NULL, cpu, entry, data); - prom_debug("rtas rc=%d ...", rc); - - for (j = 0; j < 100000000 && data->ack == -1; j++) { - HMT_low(); - mb(); - } - HMT_medium(); - if (data->ack != -1) - prom_debug("done, PIR=0x%x\n", data->ack); - else - prom_debug("timeout !\n"); - } - } - prom_debug("prom_opal_hold_cpus: end...\n"); -} - -static void __init prom_opal_takeover(void) -{ - struct opal_secondary_data *data = &opal_secondary_data; - struct opal_takeover_args *args = &data->args; - u64 align = prom_opal_align; - u64 top_addr, opal_addr; - - args->k_image = (u64)_stext; - args->k_size = _end - _stext; - args->k_entry = 0; - args->k_entry2 = 0x60; - - top_addr = _ALIGN_UP(args->k_size, align); - - if (prom_initrd_start != 0) { - args->rd_image = prom_initrd_start; - args->rd_size = prom_initrd_end - args->rd_image; - args->rd_loc = top_addr; - top_addr = _ALIGN_UP(args->rd_loc + args->rd_size, align); - } - - /* Pickup an address for the HAL. We want to go really high - * up to avoid problem with future kexecs. On the other hand - * we don't want to be all over the TCEs on P5IOC2 machines - * which are going to be up there too. We assume the machine - * has plenty of memory, and we ask for the HAL for now to - * be just below the 1G point, or above the initrd - */ - opal_addr = _ALIGN_DOWN(0x40000000 - prom_opal_size, align); - if (opal_addr < top_addr) - opal_addr = top_addr; - args->hal_addr = opal_addr; - - /* Copy the command line to the kernel image */ - strlcpy(boot_command_line, prom_cmd_line, - COMMAND_LINE_SIZE); - - prom_debug(" k_image = 0x%lx\n", args->k_image); - prom_debug(" k_size = 0x%lx\n", args->k_size); - prom_debug(" k_entry = 0x%lx\n", args->k_entry); - prom_debug(" k_entry2 = 0x%lx\n", args->k_entry2); - prom_debug(" hal_addr = 0x%lx\n", args->hal_addr); - prom_debug(" rd_image = 0x%lx\n", args->rd_image); - prom_debug(" rd_size = 0x%lx\n", args->rd_size); - prom_debug(" rd_loc = 0x%lx\n", args->rd_loc); - prom_printf("Performing OPAL takeover,this can take a few minutes..\n"); - prom_close_stdin(); - mb(); - data->go = 1; - for (;;) - opal_do_takeover(args); -} -#endif /* __BIG_ENDIAN__ */ - /* * Allocate room for and instantiate OPAL */ @@ -1597,12 +1402,6 @@ static void __init prom_instantiate_rtas(void) &val, sizeof(val)) != PROM_ERROR) rtas_has_query_cpu_stopped = true; -#if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) - /* PowerVN takeover hack */ - prom_rtas_data = base; - prom_rtas_entry = entry; - prom_getprop(rtas_node, "start-cpu", &prom_rtas_start_cpu, 4); -#endif prom_debug("rtas base = 0x%x\n", base); prom_debug("rtas entry = 0x%x\n", entry); prom_debug("rtas size = 0x%x\n", (long)size); @@ -3027,16 +2826,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, prom_instantiate_rtas(); #ifdef CONFIG_PPC_POWERNV -#ifdef __BIG_ENDIAN__ - /* Detect HAL and try instanciating it & doing takeover */ - if (of_platform == PLATFORM_PSERIES_LPAR) { - prom_query_opal(); - if (of_platform == PLATFORM_OPAL) { - prom_opal_hold_cpus(); - prom_opal_takeover(); - } - } else -#endif /* __BIG_ENDIAN__ */ if (of_platform == PLATFORM_OPAL) prom_instantiate_opal(); #endif /* CONFIG_PPC_POWERNV */ diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 77aa1e95e904..fe8e54b9ef7d 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -21,9 +21,7 @@ _end enter_prom memcpy memset reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 reloc_got2 kernstart_addr memstart_addr linux_banner _stext -opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry -boot_command_line __prom_init_toc_start __prom_init_toc_end -btext_setup_display TOC." +__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC." NM="$1" OBJ="$2" diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index e239df3768ac..e5b022c55ccd 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -469,9 +469,17 @@ void __init smp_setup_cpu_maps(void) } for (j = 0; j < nthreads && cpu < nr_cpu_ids; j++) { + bool avail; + DBG(" thread %d -> cpu %d (hard id %d)\n", j, cpu, be32_to_cpu(intserv[j])); - set_cpu_present(cpu, of_device_is_available(dn)); + + avail = of_device_is_available(dn); + if (!avail) + avail = !of_property_match_string(dn, + "enable-method", "spin-table"); + + set_cpu_present(cpu, avail); set_hard_smp_processor_id(cpu, be32_to_cpu(intserv[j])); set_cpu_possible(cpu, true); cpu++; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 4e47db686b5d..1bc5a1755ed4 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -54,7 +54,6 @@ #include "signal.h" -#undef DEBUG_SIG #ifdef CONFIG_PPC64 #define sys_rt_sigreturn compat_sys_rt_sigreturn @@ -1063,10 +1062,6 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, return 1; badframe: -#ifdef DEBUG_SIG - printk("badframe in handle_rt_signal, regs=%p frame=%p newsp=%lx\n", - regs, frame, newsp); -#endif if (show_unhandled_signals) printk_ratelimited(KERN_INFO "%s[%d]: bad frame in handle_rt_signal32: " @@ -1484,10 +1479,6 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, return 1; badframe: -#ifdef DEBUG_SIG - printk("badframe in handle_signal, regs=%p frame=%p newsp=%lx\n", - regs, frame, newsp); -#endif if (show_unhandled_signals) printk_ratelimited(KERN_INFO "%s[%d]: bad frame in handle_signal32: " diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index d501dc4dc3e6..97c1e4b683fc 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -38,7 +38,6 @@ #include "signal.h" -#define DEBUG_SIG 0 #define GP_REGS_SIZE min(sizeof(elf_gregset_t), sizeof(struct pt_regs)) #define FP_REGS_SIZE sizeof(elf_fpregset_t) @@ -700,10 +699,6 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5, return 0; badframe: -#if DEBUG_SIG - printk("badframe in sys_rt_sigreturn, regs=%p uc=%p &uc->uc_mcontext=%p\n", - regs, uc, &uc->uc_mcontext); -#endif if (show_unhandled_signals) printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "rt_sigreturn", @@ -809,10 +804,6 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, return 1; badframe: -#if DEBUG_SIG - printk("badframe in setup_rt_frame, regs=%p frame=%p newsp=%lx\n", - regs, frame, newsp); -#endif if (show_unhandled_signals) printk_ratelimited(regs->msr & MSR_64BIT ? fmt64 : fmt32, current->comm, current->pid, "setup_rt_frame", diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c index 94560db788bf..2c15ff094483 100644 --- a/arch/powerpc/platforms/cell/cbe_thermal.c +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -125,7 +125,7 @@ static ssize_t show_throttle(struct cbe_pmd_regs __iomem *pmd_regs, char *buf, i static ssize_t store_throttle(struct cbe_pmd_regs __iomem *pmd_regs, const char *buf, size_t size, int pos) { u64 reg_value; - int temp; + unsigned int temp; u64 new_value; int ret; diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index d55891f89a2c..4ad227d04c1a 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,4 +1,4 @@ -obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o +obj-y += setup.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o obj-y += opal-msglog.o diff --git a/arch/powerpc/platforms/powernv/opal-takeover.S b/arch/powerpc/platforms/powernv/opal-takeover.S deleted file mode 100644 index 11a3169ee583..000000000000 --- a/arch/powerpc/platforms/powernv/opal-takeover.S +++ /dev/null @@ -1,140 +0,0 @@ -/* - * PowerNV OPAL takeover assembly code, for use by prom_init.c - * - * Copyright 2011 IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/ppc_asm.h> -#include <asm/hvcall.h> -#include <asm/asm-offsets.h> -#include <asm/opal.h> - -#define H_HAL_TAKEOVER 0x5124 -#define H_HAL_TAKEOVER_QUERY_MAGIC -1 - - .text -_GLOBAL(opal_query_takeover) - mfcr r0 - stw r0,8(r1) - stdu r1,-STACKFRAMESIZE(r1) - std r3,STK_PARAM(R3)(r1) - std r4,STK_PARAM(R4)(r1) - li r3,H_HAL_TAKEOVER - li r4,H_HAL_TAKEOVER_QUERY_MAGIC - HVSC - addi r1,r1,STACKFRAMESIZE - ld r10,STK_PARAM(R3)(r1) - std r4,0(r10) - ld r10,STK_PARAM(R4)(r1) - std r5,0(r10) - lwz r0,8(r1) - mtcrf 0xff,r0 - blr - -_GLOBAL(opal_do_takeover) - mfcr r0 - stw r0,8(r1) - mflr r0 - std r0,16(r1) - bl __opal_do_takeover - ld r0,16(r1) - mtlr r0 - lwz r0,8(r1) - mtcrf 0xff,r0 - blr - -__opal_do_takeover: - ld r4,0(r3) - ld r5,0x8(r3) - ld r6,0x10(r3) - ld r7,0x18(r3) - ld r8,0x20(r3) - ld r9,0x28(r3) - ld r10,0x30(r3) - ld r11,0x38(r3) - li r3,H_HAL_TAKEOVER - HVSC - blr - - .globl opal_secondary_entry -opal_secondary_entry: - mr r31,r3 - mfmsr r11 - li r12,(MSR_SF | MSR_ISF)@highest - sldi r12,r12,48 - or r11,r11,r12 - mtmsrd r11 - isync - mfspr r4,SPRN_PIR - std r4,0(r3) -1: HMT_LOW - ld r4,8(r3) - cmpli cr0,r4,0 - beq 1b - HMT_MEDIUM -1: addi r3,r31,16 - bl __opal_do_takeover - b 1b - -_GLOBAL(opal_enter_rtas) - mflr r0 - std r0,16(r1) - stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */ - - /* Because PROM is running in 32b mode, it clobbers the high order half - * of all registers that it saves. We therefore save those registers - * PROM might touch to the stack. (r0, r3-r13 are caller saved) - */ - SAVE_GPR(2, r1) - SAVE_GPR(13, r1) - SAVE_8GPRS(14, r1) - SAVE_10GPRS(22, r1) - mfcr r10 - mfmsr r11 - std r10,_CCR(r1) - std r11,_MSR(r1) - - /* Get the PROM entrypoint */ - mtlr r5 - - /* Switch MSR to 32 bits mode - */ - li r12,1 - rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) - andc r11,r11,r12 - li r12,1 - rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) - andc r11,r11,r12 - mtmsrd r11 - isync - - /* Enter RTAS here... */ - blrl - - /* Just make sure that r1 top 32 bits didn't get - * corrupt by OF - */ - rldicl r1,r1,0,32 - - /* Restore the MSR (back to 64 bits) */ - ld r0,_MSR(r1) - MTMSRD(r0) - isync - - /* Restore other registers */ - REST_GPR(2, r1) - REST_GPR(13, r1) - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) - ld r4,_CCR(r1) - mtcr r4 - - addi r1,r1,PROM_FRAME_SIZE - ld r0,16(r1) - mtlr r0 - blr diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c index 62c47bb76517..9e5353ff6d1b 100644 --- a/arch/powerpc/sysdev/dart_iommu.c +++ b/arch/powerpc/sysdev/dart_iommu.c @@ -476,6 +476,11 @@ void __init alloc_dart_table(void) */ dart_tablebase = (unsigned long) __va(memblock_alloc_base(1UL<<24, 1UL<<24, 0x80000000L)); + /* + * The DART space is later unmapped from the kernel linear mapping and + * accessing dart_tablebase during kmemleak scanning will fault. + */ + kmemleak_no_scan((void *)dart_tablebase); printk(KERN_INFO "DART table allocated at: %lx\n", dart_tablebase); } diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index 375cffcf7dbd..91d219381306 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h @@ -89,7 +89,7 @@ static inline unsigned long get_softint(void) return retval; } -void arch_trigger_all_cpu_backtrace(void); +void arch_trigger_all_cpu_backtrace(bool); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace extern void *hardirq_stack[NR_CPUS]; diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index b2988f25e230..027e09986194 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -239,7 +239,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp) } } -void arch_trigger_all_cpu_backtrace(void) +void arch_trigger_all_cpu_backtrace(bool include_self) { struct thread_info *tp = current_thread_info(); struct pt_regs *regs = get_irq_regs(); @@ -251,16 +251,22 @@ void arch_trigger_all_cpu_backtrace(void) spin_lock_irqsave(&global_cpu_snapshot_lock, flags); - memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); - this_cpu = raw_smp_processor_id(); - __global_reg_self(tp, regs, this_cpu); + memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); + + if (include_self) + __global_reg_self(tp, regs, this_cpu); smp_fetch_global_regs(); for_each_online_cpu(cpu) { - struct global_reg_snapshot *gp = &global_cpu_snapshot[cpu].reg; + struct global_reg_snapshot *gp; + + if (!include_self && cpu == this_cpu) + continue; + + gp = &global_cpu_snapshot[cpu].reg; __global_reg_poll(gp); @@ -292,7 +298,7 @@ void arch_trigger_all_cpu_backtrace(void) static void sysrq_handle_globreg(int key) { - arch_trigger_all_cpu_backtrace(); + arch_trigger_all_cpu_backtrace(true); } static struct sysrq_key_op sparc_globalreg_op = { diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index cb6cfcd034cf..a80cbb88ea91 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -43,7 +43,7 @@ extern int vector_used_by_percpu_irq(unsigned int vector); extern void init_ISA_irqs(void); #ifdef CONFIG_X86_LOCAL_APIC -void arch_trigger_all_cpu_backtrace(void); +void arch_trigger_all_cpu_backtrace(bool); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace #endif diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index c3fcb5de5083..6a1e71bde323 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -33,31 +33,41 @@ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; /* "in progress" flag of arch_trigger_all_cpu_backtrace */ static unsigned long backtrace_flag; -void arch_trigger_all_cpu_backtrace(void) +void arch_trigger_all_cpu_backtrace(bool include_self) { int i; + int cpu = get_cpu(); - if (test_and_set_bit(0, &backtrace_flag)) + if (test_and_set_bit(0, &backtrace_flag)) { /* * If there is already a trigger_all_cpu_backtrace() in progress * (backtrace_flag == 1), don't output double cpu dump infos. */ + put_cpu(); return; + } cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); + if (!include_self) + cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); - printk(KERN_INFO "sending NMI to all CPUs:\n"); - apic->send_IPI_all(NMI_VECTOR); + if (!cpumask_empty(to_cpumask(backtrace_mask))) { + pr_info("sending NMI to %s CPUs:\n", + (include_self ? "all" : "other")); + apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR); + } /* Wait for up to 10 seconds for all CPUs to do the backtrace */ for (i = 0; i < 10 * 1000; i++) { if (cpumask_empty(to_cpumask(backtrace_mask))) break; mdelay(1); + touch_softlockup_watchdog(); } clear_bit(0, &backtrace_flag); smp_mb__after_atomic(); + put_cpu(); } static int diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index f0da82b8e634..dbaa23e78b36 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -423,9 +423,10 @@ sysenter_past_esp: jnz sysenter_audit sysenter_do_call: cmpl $(NR_syscalls), %eax - jae syscall_badsys + jae sysenter_badsys call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) +sysenter_after_call: LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF @@ -675,7 +676,12 @@ END(syscall_fault) syscall_badsys: movl $-ENOSYS,PT_EAX(%esp) - jmp resume_userspace + jmp syscall_exit +END(syscall_badsys) + +sysenter_badsys: + movl $-ENOSYS,PT_EAX(%esp) + jmp sysenter_after_call END(syscall_badsys) CFI_ENDPROC diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index a0da58db43a8..2851d63c1202 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -363,7 +363,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, /* Set up to return from userspace. */ restorer = current->mm->context.vdso + - selected_vdso32->sym___kernel_sigreturn; + selected_vdso32->sym___kernel_rt_sigreturn; if (ksig->ka.sa.sa_flags & SA_RESTORER) restorer = ksig->ka.sa.sa_restorer; put_user_ex(restorer, &frame->pretcode); diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 3c0809a0631f..61b04fe36e66 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -11,7 +11,6 @@ VDSO32-$(CONFIG_COMPAT) := y # files to link into the vdso vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o vdso-fakesections.o -vobjs-nox32 := vdso-fakesections.o # files to link into kernel obj-y += vma.o @@ -67,7 +66,8 @@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso2c FORCE # CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector) \ - -fno-omit-frame-pointer -foptimize-sibling-calls + -fno-omit-frame-pointer -foptimize-sibling-calls \ + -DDISABLE_BRANCH_PROFILING $(vobjs): KBUILD_CFLAGS += $(CFL) @@ -134,7 +134,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ targets += vdso32/vdso32.lds targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o) -targets += vdso32/vclock_gettime.o +targets += vdso32/vclock_gettime.o vdso32/vdso-fakesections.o $(obj)/vdso32.o: $(vdso32-images:%=$(obj)/%) @@ -150,11 +150,13 @@ KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) KBUILD_CFLAGS_32 += -fno-omit-frame-pointer +KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING $(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ $(obj)/vdso32/vdso32.lds \ $(obj)/vdso32/vclock_gettime.o \ + $(obj)/vdso32/vdso-fakesections.o \ $(obj)/vdso32/note.o \ $(obj)/vdso32/%.o $(call if_changed,vdso) @@ -169,14 +171,24 @@ quiet_cmd_vdso = VDSO $@ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ - -Wl,-Bsymbolic $(LTO_CFLAGS) + $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic $(LTO_CFLAGS) GCOV_PROFILE := n # -# Install the unstripped copies of vdso*.so. +# Install the unstripped copies of vdso*.so. If our toolchain supports +# build-id, install .build-id links as well. # quiet_cmd_vdso_install = INSTALL $(@:install_%=%) - cmd_vdso_install = cp $< $(MODLIB)/vdso/$(@:install_%=%) +define cmd_vdso_install + cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \ + if readelf -n $< |grep -q 'Build ID'; then \ + buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \ + first=`echo $$buildid | cut -b-2`; \ + last=`echo $$buildid | cut -b3-`; \ + mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \ + ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \ + fi +endef vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%) diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index b2e4f493e5b0..9793322751e0 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -11,9 +11,6 @@ * Check with readelf after changing. */ -/* Disable profiling for userspace code: */ -#define DISABLE_BRANCH_PROFILING - #include <uapi/linux/time.h> #include <asm/vgtod.h> #include <asm/hpet.h> diff --git a/arch/x86/vdso/vdso-fakesections.c b/arch/x86/vdso/vdso-fakesections.c index cb8a8d72c24b..aa5fbfab20a5 100644 --- a/arch/x86/vdso/vdso-fakesections.c +++ b/arch/x86/vdso/vdso-fakesections.c @@ -2,31 +2,20 @@ * Copyright 2014 Andy Lutomirski * Subject to the GNU Public License, v.2 * - * Hack to keep broken Go programs working. - * - * The Go runtime had a couple of bugs: it would read the section table to try - * to figure out how many dynamic symbols there were (it shouldn't have looked - * at the section table at all) and, if there were no SHT_SYNDYM section table - * entry, it would use an uninitialized value for the number of symbols. As a - * workaround, we supply a minimal section table. vdso2c will adjust the - * in-memory image so that "vdso_fake_sections" becomes the section table. - * - * The bug was introduced by: - * https://code.google.com/p/go/source/detail?r=56ea40aac72b (2012-08-31) - * and is being addressed in the Go runtime in this issue: - * https://code.google.com/p/go/issues/detail?id=8197 + * String table for loadable section headers. See vdso2c.h for why + * this exists. */ -#ifndef __x86_64__ -#error This hack is specific to the 64-bit vDSO -#endif - -#include <linux/elf.h> - -extern const __visible struct elf64_shdr vdso_fake_sections[]; -const __visible struct elf64_shdr vdso_fake_sections[] = { - { - .sh_type = SHT_DYNSYM, - .sh_entsize = sizeof(Elf64_Sym), - } -}; +const char fake_shstrtab[] __attribute__((section(".fake_shstrtab"))) = + ".hash\0" + ".dynsym\0" + ".dynstr\0" + ".gnu.version\0" + ".gnu.version_d\0" + ".dynamic\0" + ".rodata\0" + ".fake_shstrtab\0" /* Yay, self-referential code. */ + ".note\0" + ".eh_frame_hdr\0" + ".eh_frame\0" + ".text"; diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S index 2ec72f651ebf..9197544eea9a 100644 --- a/arch/x86/vdso/vdso-layout.lds.S +++ b/arch/x86/vdso/vdso-layout.lds.S @@ -6,6 +6,16 @@ * This script controls its layout. */ +#if defined(BUILD_VDSO64) +# define SHDR_SIZE 64 +#elif defined(BUILD_VDSO32) || defined(BUILD_VDSOX32) +# define SHDR_SIZE 40 +#else +# error unknown VDSO target +#endif + +#define NUM_FAKE_SHDRS 13 + SECTIONS { . = SIZEOF_HEADERS; @@ -18,36 +28,53 @@ SECTIONS .gnu.version_d : { *(.gnu.version_d) } .gnu.version_r : { *(.gnu.version_r) } + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { + *(.rodata*) + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + + /* + * Ideally this would live in a C file, but that won't + * work cleanly for x32 until we start building the x32 + * C code using an x32 toolchain. + */ + VDSO_FAKE_SECTION_TABLE_START = .; + . = . + NUM_FAKE_SHDRS * SHDR_SIZE; + VDSO_FAKE_SECTION_TABLE_END = .; + } :text + + .fake_shstrtab : { *(.fake_shstrtab) } :text + + .note : { *(.note.*) } :text :note .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text - .dynamic : { *(.dynamic) } :text :dynamic - - .rodata : { *(.rodata*) } :text - .data : { - *(.data*) - *(.sdata*) - *(.got.plt) *(.got) - *(.gnu.linkonce.d.*) - *(.bss*) - *(.dynbss*) - *(.gnu.linkonce.b.*) - } - - .altinstructions : { *(.altinstructions) } - .altinstr_replacement : { *(.altinstr_replacement) } /* - * Align the actual code well away from the non-instruction data. - * This is the best thing for the I-cache. + * Text is well-separated from actual data: there's plenty of + * stuff that isn't used at runtime in between. */ - . = ALIGN(0x100); .text : { *(.text*) } :text =0x90909090, /* + * At the end so that eu-elflint stays happy when vdso2c strips + * these. A better implementation would avoid allocating space + * for these. + */ + .altinstructions : { *(.altinstructions) } :text + .altinstr_replacement : { *(.altinstr_replacement) } :text + + /* * The remainder of the vDSO consists of special pages that are * shared between the kernel and userspace. It needs to be at the * end so that it doesn't overlap the mapping of the actual @@ -75,6 +102,7 @@ SECTIONS /DISCARD/ : { *(.discard) *(.discard.*) + *(__bug_table) } } diff --git a/arch/x86/vdso/vdso.lds.S b/arch/x86/vdso/vdso.lds.S index 75e3404c83b1..6807932643c2 100644 --- a/arch/x86/vdso/vdso.lds.S +++ b/arch/x86/vdso/vdso.lds.S @@ -6,6 +6,8 @@ * the DSO. */ +#define BUILD_VDSO64 + #include "vdso-layout.lds.S" /* diff --git a/arch/x86/vdso/vdso2c.c b/arch/x86/vdso/vdso2c.c index 7a6bf50f9165..238dbe82776e 100644 --- a/arch/x86/vdso/vdso2c.c +++ b/arch/x86/vdso/vdso2c.c @@ -23,6 +23,8 @@ enum { sym_vvar_page, sym_hpet_page, sym_end_mapping, + sym_VDSO_FAKE_SECTION_TABLE_START, + sym_VDSO_FAKE_SECTION_TABLE_END, }; const int special_pages[] = { @@ -30,15 +32,26 @@ const int special_pages[] = { sym_hpet_page, }; -char const * const required_syms[] = { - [sym_vvar_page] = "vvar_page", - [sym_hpet_page] = "hpet_page", - [sym_end_mapping] = "end_mapping", - "VDSO32_NOTE_MASK", - "VDSO32_SYSENTER_RETURN", - "__kernel_vsyscall", - "__kernel_sigreturn", - "__kernel_rt_sigreturn", +struct vdso_sym { + const char *name; + bool export; +}; + +struct vdso_sym required_syms[] = { + [sym_vvar_page] = {"vvar_page", true}, + [sym_hpet_page] = {"hpet_page", true}, + [sym_end_mapping] = {"end_mapping", true}, + [sym_VDSO_FAKE_SECTION_TABLE_START] = { + "VDSO_FAKE_SECTION_TABLE_START", false + }, + [sym_VDSO_FAKE_SECTION_TABLE_END] = { + "VDSO_FAKE_SECTION_TABLE_END", false + }, + {"VDSO32_NOTE_MASK", true}, + {"VDSO32_SYSENTER_RETURN", true}, + {"__kernel_vsyscall", true}, + {"__kernel_sigreturn", true}, + {"__kernel_rt_sigreturn", true}, }; __attribute__((format(printf, 1, 2))) __attribute__((noreturn)) @@ -83,37 +96,21 @@ extern void bad_put_le(void); #define NSYMS (sizeof(required_syms) / sizeof(required_syms[0])) -#define BITS 64 -#define GOFUNC go64 -#define Elf_Ehdr Elf64_Ehdr -#define Elf_Shdr Elf64_Shdr -#define Elf_Phdr Elf64_Phdr -#define Elf_Sym Elf64_Sym -#define Elf_Dyn Elf64_Dyn +#define BITSFUNC3(name, bits) name##bits +#define BITSFUNC2(name, bits) BITSFUNC3(name, bits) +#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS) + +#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x +#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) +#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x) + +#define ELF_BITS 64 #include "vdso2c.h" -#undef BITS -#undef GOFUNC -#undef Elf_Ehdr -#undef Elf_Shdr -#undef Elf_Phdr -#undef Elf_Sym -#undef Elf_Dyn - -#define BITS 32 -#define GOFUNC go32 -#define Elf_Ehdr Elf32_Ehdr -#define Elf_Shdr Elf32_Shdr -#define Elf_Phdr Elf32_Phdr -#define Elf_Sym Elf32_Sym -#define Elf_Dyn Elf32_Dyn +#undef ELF_BITS + +#define ELF_BITS 32 #include "vdso2c.h" -#undef BITS -#undef GOFUNC -#undef Elf_Ehdr -#undef Elf_Shdr -#undef Elf_Phdr -#undef Elf_Sym -#undef Elf_Dyn +#undef ELF_BITS static void go(void *addr, size_t len, FILE *outfile, const char *name) { diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h index c6eefaf389b9..df95a2fdff73 100644 --- a/arch/x86/vdso/vdso2c.h +++ b/arch/x86/vdso/vdso2c.h @@ -4,23 +4,136 @@ * are built for 32-bit userspace. */ -static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) +/* + * We're writing a section table for a few reasons: + * + * The Go runtime had a couple of bugs: it would read the section + * table to try to figure out how many dynamic symbols there were (it + * shouldn't have looked at the section table at all) and, if there + * were no SHT_SYNDYM section table entry, it would use an + * uninitialized value for the number of symbols. An empty DYNSYM + * table would work, but I see no reason not to write a valid one (and + * keep full performance for old Go programs). This hack is only + * needed on x86_64. + * + * The bug was introduced on 2012-08-31 by: + * https://code.google.com/p/go/source/detail?r=56ea40aac72b + * and was fixed on 2014-06-13 by: + * https://code.google.com/p/go/source/detail?r=fc1cd5e12595 + * + * Binutils has issues debugging the vDSO: it reads the section table to + * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which + * would break build-id if we removed the section table. Binutils + * also requires that shstrndx != 0. See: + * https://sourceware.org/bugzilla/show_bug.cgi?id=17064 + * + * elfutils might not look for PT_NOTE if there is a section table at + * all. I don't know whether this matters for any practical purpose. + * + * For simplicity, rather than hacking up a partial section table, we + * just write a mostly complete one. We omit non-dynamic symbols, + * though, since they're rather large. + * + * Once binutils gets fixed, we might be able to drop this for all but + * the 64-bit vdso, since build-id only works in kernel RPMs, and + * systems that update to new enough kernel RPMs will likely update + * binutils in sync. build-id has never worked for home-built kernel + * RPMs without manual symlinking, and I suspect that no one ever does + * that. + */ +struct BITSFUNC(fake_sections) +{ + ELF(Shdr) *table; + unsigned long table_offset; + int count, max_count; + + int in_shstrndx; + unsigned long shstr_offset; + const char *shstrtab; + size_t shstrtab_len; + + int out_shstrndx; +}; + +static unsigned int BITSFUNC(find_shname)(struct BITSFUNC(fake_sections) *out, + const char *name) +{ + const char *outname = out->shstrtab; + while (outname - out->shstrtab < out->shstrtab_len) { + if (!strcmp(name, outname)) + return (outname - out->shstrtab) + out->shstr_offset; + outname += strlen(outname) + 1; + } + + if (*name) + printf("Warning: could not find output name \"%s\"\n", name); + return out->shstr_offset + out->shstrtab_len - 1; /* Use a null. */ +} + +static void BITSFUNC(init_sections)(struct BITSFUNC(fake_sections) *out) +{ + if (!out->in_shstrndx) + fail("didn't find the fake shstrndx\n"); + + memset(out->table, 0, out->max_count * sizeof(ELF(Shdr))); + + if (out->max_count < 1) + fail("we need at least two fake output sections\n"); + + PUT_LE(&out->table[0].sh_type, SHT_NULL); + PUT_LE(&out->table[0].sh_name, BITSFUNC(find_shname)(out, "")); + + out->count = 1; +} + +static void BITSFUNC(copy_section)(struct BITSFUNC(fake_sections) *out, + int in_idx, const ELF(Shdr) *in, + const char *name) +{ + uint64_t flags = GET_LE(&in->sh_flags); + + bool copy = flags & SHF_ALLOC && + strcmp(name, ".altinstructions") && + strcmp(name, ".altinstr_replacement"); + + if (!copy) + return; + + if (out->count >= out->max_count) + fail("too many copied sections (max = %d)\n", out->max_count); + + if (in_idx == out->in_shstrndx) + out->out_shstrndx = out->count; + + out->table[out->count] = *in; + PUT_LE(&out->table[out->count].sh_name, + BITSFUNC(find_shname)(out, name)); + + /* elfutils requires that a strtab have the correct type. */ + if (!strcmp(name, ".fake_shstrtab")) + PUT_LE(&out->table[out->count].sh_type, SHT_STRTAB); + + out->count++; +} + +static void BITSFUNC(go)(void *addr, size_t len, + FILE *outfile, const char *name) { int found_load = 0; unsigned long load_size = -1; /* Work around bogus warning */ unsigned long data_size; - Elf_Ehdr *hdr = (Elf_Ehdr *)addr; + ELF(Ehdr) *hdr = (ELF(Ehdr) *)addr; int i; unsigned long j; - Elf_Shdr *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr, + ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr, *alt_sec = NULL; - Elf_Dyn *dyn = 0, *dyn_end = 0; + ELF(Dyn) *dyn = 0, *dyn_end = 0; const char *secstrings; uint64_t syms[NSYMS] = {}; - uint64_t fake_sections_value = 0, fake_sections_size = 0; + struct BITSFUNC(fake_sections) fake_sections = {}; - Elf_Phdr *pt = (Elf_Phdr *)(addr + GET_LE(&hdr->e_phoff)); + ELF(Phdr) *pt = (ELF(Phdr) *)(addr + GET_LE(&hdr->e_phoff)); /* Walk the segment table. */ for (i = 0; i < GET_LE(&hdr->e_phnum); i++) { @@ -51,7 +164,7 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) for (i = 0; dyn + i < dyn_end && GET_LE(&dyn[i].d_tag) != DT_NULL; i++) { typeof(dyn[i].d_tag) tag = GET_LE(&dyn[i].d_tag); - if (tag == DT_REL || tag == DT_RELSZ || + if (tag == DT_REL || tag == DT_RELSZ || tag == DT_RELA || tag == DT_RELENT || tag == DT_TEXTREL) fail("vdso image contains dynamic relocations\n"); } @@ -61,7 +174,7 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) GET_LE(&hdr->e_shentsize)*GET_LE(&hdr->e_shstrndx); secstrings = addr + GET_LE(&secstrings_hdr->sh_offset); for (i = 0; i < GET_LE(&hdr->e_shnum); i++) { - Elf_Shdr *sh = addr + GET_LE(&hdr->e_shoff) + + ELF(Shdr) *sh = addr + GET_LE(&hdr->e_shoff) + GET_LE(&hdr->e_shentsize) * i; if (GET_LE(&sh->sh_type) == SHT_SYMTAB) symtab_hdr = sh; @@ -82,29 +195,63 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize); i++) { int k; - Elf_Sym *sym = addr + GET_LE(&symtab_hdr->sh_offset) + + ELF(Sym) *sym = addr + GET_LE(&symtab_hdr->sh_offset) + GET_LE(&symtab_hdr->sh_entsize) * i; const char *name = addr + GET_LE(&strtab_hdr->sh_offset) + GET_LE(&sym->st_name); for (k = 0; k < NSYMS; k++) { - if (!strcmp(name, required_syms[k])) { + if (!strcmp(name, required_syms[k].name)) { if (syms[k]) { fail("duplicate symbol %s\n", - required_syms[k]); + required_syms[k].name); } syms[k] = GET_LE(&sym->st_value); } } - if (!strcmp(name, "vdso_fake_sections")) { - if (fake_sections_value) - fail("duplicate vdso_fake_sections\n"); - fake_sections_value = GET_LE(&sym->st_value); - fake_sections_size = GET_LE(&sym->st_size); + if (!strcmp(name, "fake_shstrtab")) { + ELF(Shdr) *sh; + + fake_sections.in_shstrndx = GET_LE(&sym->st_shndx); + fake_sections.shstrtab = addr + GET_LE(&sym->st_value); + fake_sections.shstrtab_len = GET_LE(&sym->st_size); + sh = addr + GET_LE(&hdr->e_shoff) + + GET_LE(&hdr->e_shentsize) * + fake_sections.in_shstrndx; + fake_sections.shstr_offset = GET_LE(&sym->st_value) - + GET_LE(&sh->sh_addr); } } + /* Build the output section table. */ + if (!syms[sym_VDSO_FAKE_SECTION_TABLE_START] || + !syms[sym_VDSO_FAKE_SECTION_TABLE_END]) + fail("couldn't find fake section table\n"); + if ((syms[sym_VDSO_FAKE_SECTION_TABLE_END] - + syms[sym_VDSO_FAKE_SECTION_TABLE_START]) % sizeof(ELF(Shdr))) + fail("fake section table size isn't a multiple of sizeof(Shdr)\n"); + fake_sections.table = addr + syms[sym_VDSO_FAKE_SECTION_TABLE_START]; + fake_sections.table_offset = syms[sym_VDSO_FAKE_SECTION_TABLE_START]; + fake_sections.max_count = (syms[sym_VDSO_FAKE_SECTION_TABLE_END] - + syms[sym_VDSO_FAKE_SECTION_TABLE_START]) / + sizeof(ELF(Shdr)); + + BITSFUNC(init_sections)(&fake_sections); + for (i = 0; i < GET_LE(&hdr->e_shnum); i++) { + ELF(Shdr) *sh = addr + GET_LE(&hdr->e_shoff) + + GET_LE(&hdr->e_shentsize) * i; + BITSFUNC(copy_section)(&fake_sections, i, sh, + secstrings + GET_LE(&sh->sh_name)); + } + if (!fake_sections.out_shstrndx) + fail("didn't generate shstrndx?!?\n"); + + PUT_LE(&hdr->e_shoff, fake_sections.table_offset); + PUT_LE(&hdr->e_shentsize, sizeof(ELF(Shdr))); + PUT_LE(&hdr->e_shnum, fake_sections.count); + PUT_LE(&hdr->e_shstrndx, fake_sections.out_shstrndx); + /* Validate mapping addresses. */ for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) { if (!syms[i]) @@ -112,25 +259,17 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) if (syms[i] % 4096) fail("%s must be a multiple of 4096\n", - required_syms[i]); + required_syms[i].name); if (syms[i] < data_size) fail("%s must be after the text mapping\n", - required_syms[i]); + required_syms[i].name); if (syms[sym_end_mapping] < syms[i] + 4096) - fail("%s overruns end_mapping\n", required_syms[i]); + fail("%s overruns end_mapping\n", + required_syms[i].name); } if (syms[sym_end_mapping] % 4096) fail("end_mapping must be a multiple of 4096\n"); - /* Remove sections or use fakes */ - if (fake_sections_size % sizeof(Elf_Shdr)) - fail("vdso_fake_sections size is not a multiple of %ld\n", - (long)sizeof(Elf_Shdr)); - PUT_LE(&hdr->e_shoff, fake_sections_value); - PUT_LE(&hdr->e_shentsize, fake_sections_value ? sizeof(Elf_Shdr) : 0); - PUT_LE(&hdr->e_shnum, fake_sections_size / sizeof(Elf_Shdr)); - PUT_LE(&hdr->e_shstrndx, SHN_UNDEF); - if (!name) { fwrite(addr, load_size, 1, outfile); return; @@ -168,9 +307,9 @@ static void GOFUNC(void *addr, size_t len, FILE *outfile, const char *name) (unsigned long)GET_LE(&alt_sec->sh_size)); } for (i = 0; i < NSYMS; i++) { - if (syms[i]) + if (required_syms[i].export && syms[i]) fprintf(outfile, "\t.sym_%s = 0x%" PRIx64 ",\n", - required_syms[i], syms[i]); + required_syms[i].name, syms[i]); } fprintf(outfile, "};\n"); } diff --git a/arch/x86/vdso/vdso32/vdso-fakesections.c b/arch/x86/vdso/vdso32/vdso-fakesections.c new file mode 100644 index 000000000000..541468e25265 --- /dev/null +++ b/arch/x86/vdso/vdso32/vdso-fakesections.c @@ -0,0 +1 @@ +#include "../vdso-fakesections.c" diff --git a/arch/x86/vdso/vdsox32.lds.S b/arch/x86/vdso/vdsox32.lds.S index 46b991b578a8..697c11ece90c 100644 --- a/arch/x86/vdso/vdsox32.lds.S +++ b/arch/x86/vdso/vdsox32.lds.S @@ -6,6 +6,8 @@ * the DSO. */ +#define BUILD_VDSOX32 + #include "vdso-layout.lds.S" /* |