diff options
author | Jan Beulich <JBeulich@suse.com> | 2012-01-20 16:22:04 +0000 |
---|---|---|
committer | H. Peter Anvin <hpa@linux.intel.com> | 2012-01-20 17:29:49 -0800 |
commit | cb8095bba6d24118135a5683a956f4f4fb5f17bb (patch) | |
tree | 25eff3732e8471e314591d0bc6ea41d96857c18b /arch/x86/lib | |
parent | 819165fb34b9777f852429f2c6d6f79fbb71b9eb (diff) | |
download | linux-3.10-cb8095bba6d24118135a5683a956f4f4fb5f17bb.tar.gz linux-3.10-cb8095bba6d24118135a5683a956f4f4fb5f17bb.tar.bz2 linux-3.10-cb8095bba6d24118135a5683a956f4f4fb5f17bb.zip |
x86: atomic64 assembly improvements
In the "xchg" implementation, %ebx and %ecx don't need to be copied
into %eax and %edx respectively (this is only necessary when desiring
to only read the stored value).
In the "add_unless" implementation, swapping the use of %ecx and %esi
for passing arguments allows %esi to become an input only (i.e.
permitting the register to be re-used to address the same object
without reload).
In "{add,sub}_return", doing the initial read64 through the passed in
%ecx decreases a register dependency.
In "inc_not_zero", a branch can be eliminated by or-ing together the
two halves of the current (64-bit) value, and code size can be further
reduced by adjusting the arithmetic slightly.
v2: Undo the folding of "xchg" and "set".
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Link: http://lkml.kernel.org/r/4F19A2BC020000780006E0DC@nat28.tlf.novell.com
Cc: Luca Barbieri <luca@luca-barbieri.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/atomic64_386_32.S | 6 | ||||
-rw-r--r-- | arch/x86/lib/atomic64_cx8_32.S | 29 |
2 files changed, 14 insertions, 21 deletions
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index e8e7e0d06f4..00933d5e992 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -137,13 +137,13 @@ BEGIN(dec_return) RET_ENDP #undef v -#define v %ecx +#define v %esi BEGIN(add_unless) - addl %eax, %esi + addl %eax, %ecx adcl %edx, %edi addl (v), %eax adcl 4(v), %edx - cmpl %eax, %esi + cmpl %eax, %ecx je 3f 1: movl %eax, (v) diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 391a083674b..f5cc9eb1d51 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -55,8 +55,6 @@ ENDPROC(atomic64_set_cx8) ENTRY(atomic64_xchg_cx8) CFI_STARTPROC - movl %ebx, %eax - movl %ecx, %edx 1: LOCK_PREFIX cmpxchg8b (%esi) @@ -78,7 +76,7 @@ ENTRY(atomic64_\func\()_return_cx8) movl %edx, %edi movl %ecx, %ebp - read64 %ebp + read64 %ecx 1: movl %eax, %ebx movl %edx, %ecx @@ -159,23 +157,22 @@ ENTRY(atomic64_add_unless_cx8) SAVE ebx /* these just push these two parameters on the stack */ SAVE edi - SAVE esi + SAVE ecx - movl %ecx, %ebp - movl %eax, %esi + movl %eax, %ebp movl %edx, %edi - read64 %ebp + read64 %esi 1: cmpl %eax, 0(%esp) je 4f 2: movl %eax, %ebx movl %edx, %ecx - addl %esi, %ebx + addl %ebp, %ebx adcl %edi, %ecx LOCK_PREFIX - cmpxchg8b (%ebp) + cmpxchg8b (%esi) jne 1b movl $1, %eax @@ -199,13 +196,13 @@ ENTRY(atomic64_inc_not_zero_cx8) read64 %esi 1: - testl %eax, %eax - je 4f -2: + movl %eax, %ecx + orl %edx, %ecx + jz 3f movl %eax, %ebx - movl %edx, %ecx + xorl %ecx, %ecx addl $1, %ebx - adcl $0, %ecx + adcl %edx, %ecx LOCK_PREFIX cmpxchg8b (%esi) jne 1b @@ -214,9 +211,5 @@ ENTRY(atomic64_inc_not_zero_cx8) 3: RESTORE ebx ret -4: - testl %edx, %edx - jne 2b - jmp 3b CFI_ENDPROC ENDPROC(atomic64_inc_not_zero_cx8) |