diff options
Diffstat (limited to 'arch/arm/lib')
-rw-r--r-- | arch/arm/lib/copy_template.S | 12 | ||||
-rw-r--r-- | arch/arm/lib/memmove.S | 14 | ||||
-rw-r--r-- | arch/arm/lib/memset.S | 46 | ||||
-rw-r--r-- | arch/arm/lib/memzero.S | 44 |
4 files changed, 95 insertions, 21 deletions
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S index cab355c0c1f..139cce64605 100644 --- a/arch/arm/lib/copy_template.S +++ b/arch/arm/lib/copy_template.S @@ -13,14 +13,6 @@ */ /* - * This can be used to enable code to cacheline align the source pointer. - * Experiments on tested architectures (StrongARM and XScale) didn't show - * this a worthwhile thing to do. That might be different in the future. - */ -//#define CALGN(code...) code -#define CALGN(code...) - -/* * Theory of operation * ------------------- * @@ -82,7 +74,7 @@ stmfd sp!, {r5 - r8} blt 5f - CALGN( ands ip, r1, #31 ) + CALGN( ands ip, r0, #31 ) CALGN( rsb r3, ip, #32 ) CALGN( sbcnes r4, r3, r2 ) @ C is always set here CALGN( bcs 2f ) @@ -168,7 +160,7 @@ subs r2, r2, #28 blt 14f - CALGN( ands ip, r1, #31 ) + CALGN( ands ip, r0, #31 ) CALGN( rsb ip, ip, #32 ) CALGN( sbcnes r4, ip, r2 ) @ C is always set here CALGN( subcc r2, r2, ip ) diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index ef7fddc14ac..2e301b7bd8f 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -13,14 +13,6 @@ #include <linux/linkage.h> #include <asm/assembler.h> -/* - * This can be used to enable code to cacheline align the source pointer. - * Experiments on tested architectures (StrongARM and XScale) didn't show - * this a worthwhile thing to do. That might be different in the future. - */ -//#define CALGN(code...) code -#define CALGN(code...) - .text /* @@ -55,11 +47,12 @@ ENTRY(memmove) stmfd sp!, {r5 - r8} blt 5f - CALGN( ands ip, r1, #31 ) + CALGN( ands ip, r0, #31 ) CALGN( sbcnes r4, ip, r2 ) @ C is always set here CALGN( bcs 2f ) CALGN( adr r4, 6f ) CALGN( subs r2, r2, ip ) @ C is set here + CALGN( rsb ip, ip, #32 ) CALGN( add pc, r4, ip ) PLD( pld [r1, #-4] ) @@ -138,8 +131,7 @@ ENTRY(memmove) subs r2, r2, #28 blt 14f - CALGN( ands ip, r1, #31 ) - CALGN( rsb ip, ip, #32 ) + CALGN( ands ip, r0, #31 ) CALGN( sbcnes r4, ip, r2 ) @ C is always set here CALGN( subcc r2, r2, ip ) CALGN( bcc 15f ) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 95b110b07a8..b477d4ac88e 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -39,6 +39,9 @@ ENTRY(memset) mov r3, r1 cmp r2, #16 blt 4f + +#if ! CALGN(1)+0 + /* * We need an extra register for this loop - save the return address and * use the LR @@ -64,6 +67,49 @@ ENTRY(memset) stmneia r0!, {r1, r3, ip, lr} ldr lr, [sp], #4 +#else + +/* + * This version aligns the destination pointer in order to write + * whole cache lines at once. + */ + + stmfd sp!, {r4-r7, lr} + mov r4, r1 + mov r5, r1 + mov r6, r1 + mov r7, r1 + mov ip, r1 + mov lr, r1 + + cmp r2, #96 + tstgt r0, #31 + ble 3f + + and ip, r0, #31 + rsb ip, ip, #32 + sub r2, r2, ip + movs ip, ip, lsl #(32 - 4) + stmcsia r0!, {r4, r5, r6, r7} + stmmiia r0!, {r4, r5} + tst ip, #(1 << 30) + mov ip, r1 + strne r1, [r0], #4 + +3: subs r2, r2, #64 + stmgeia r0!, {r1, r3-r7, ip, lr} + stmgeia r0!, {r1, r3-r7, ip, lr} + bgt 3b + ldmeqfd sp!, {r4-r7, pc} + + tst r2, #32 + stmneia r0!, {r1, r3-r7, ip, lr} + tst r2, #16 + stmneia r0!, {r4-r7} + ldmfd sp!, {r4-r7, lr} + +#endif + 4: tst r2, #8 stmneia r0!, {r1, r3} tst r2, #4 diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S index abf2508e822..b8f79d80ee9 100644 --- a/arch/arm/lib/memzero.S +++ b/arch/arm/lib/memzero.S @@ -39,6 +39,9 @@ ENTRY(__memzero) */ cmp r1, #16 @ 1 we can skip this chunk if we blt 4f @ 1 have < 16 bytes + +#if ! CALGN(1)+0 + /* * We need an extra register for this loop - save the return address and * use the LR @@ -64,6 +67,47 @@ ENTRY(__memzero) stmneia r0!, {r2, r3, ip, lr} @ 4 ldr lr, [sp], #4 @ 1 +#else + +/* + * This version aligns the destination pointer in order to write + * whole cache lines at once. + */ + + stmfd sp!, {r4-r7, lr} + mov r4, r2 + mov r5, r2 + mov r6, r2 + mov r7, r2 + mov ip, r2 + mov lr, r2 + + cmp r1, #96 + andgts ip, r0, #31 + ble 3f + + rsb ip, ip, #32 + sub r1, r1, ip + movs ip, ip, lsl #(32 - 4) + stmcsia r0!, {r4, r5, r6, r7} + stmmiia r0!, {r4, r5} + movs ip, ip, lsl #2 + strcs r2, [r0], #4 + +3: subs r1, r1, #64 + stmgeia r0!, {r2-r7, ip, lr} + stmgeia r0!, {r2-r7, ip, lr} + bgt 3b + ldmeqfd sp!, {r4-r7, pc} + + tst r1, #32 + stmneia r0!, {r2-r7, ip, lr} + tst r1, #16 + stmneia r0!, {r4-r7} + ldmfd sp!, {r4-r7, lr} + +#endif + 4: tst r1, #8 @ 1 8 bytes or more? stmneia r0!, {r2, r3} @ 2 tst r1, #4 @ 1 4 bytes or more? |