summaryrefslogtreecommitdiff
path: root/beecrypt
diff options
context:
space:
mode:
authorjbj <devnull@localhost>2003-04-28 15:06:30 +0000
committerjbj <devnull@localhost>2003-04-28 15:06:30 +0000
commita08149369a67ef068bd205164c941768a8e97937 (patch)
tree2f28c2f88031789014a2fdf0b1a1cdadf0354ce2 /beecrypt
parent6099f7ee0e858d8831726ff1e430e4f80d63ba62 (diff)
downloadrpm-a08149369a67ef068bd205164c941768a8e97937.tar.gz
rpm-a08149369a67ef068bd205164c941768a8e97937.tar.bz2
rpm-a08149369a67ef068bd205164c941768a8e97937.zip
beecrypt-3.0.0 merge: bring in latest asm code.
CVS patchset: 6783 CVS date: 2003/04/28 15:06:30
Diffstat (limited to 'beecrypt')
-rw-r--r--beecrypt/gas/Makefile.am6
-rw-r--r--beecrypt/gas/aesopt.i586.S688
-rw-r--r--beecrypt/gas/aesopt.i586.m4580
-rw-r--r--beecrypt/gas/aesopt.powerpc.S683
-rw-r--r--beecrypt/gas/aesopt.ppc.m4616
-rw-r--r--beecrypt/gas/alpha.m434
-rw-r--r--beecrypt/gas/asmdefs.m441
-rw-r--r--beecrypt/gas/blowfishopt.i586.S178
-rw-r--r--beecrypt/gas/blowfishopt.i586.m4162
-rw-r--r--beecrypt/gas/blowfishopt.powerpc.S242
-rw-r--r--beecrypt/gas/blowfishopt.ppc.m4161
-rw-r--r--beecrypt/gas/fips180opt.ia64.S (renamed from beecrypt/gas/sha1opt.ia64.S)14
-rw-r--r--beecrypt/gas/ia64.m435
-rw-r--r--beecrypt/gas/m68k.m434
-rw-r--r--beecrypt/gas/mp32opt.arm.S180
-rw-r--r--beecrypt/gas/mp32opt.ia64.S260
-rw-r--r--beecrypt/gas/mp32opt.sparcv8.S114
-rw-r--r--beecrypt/gas/mp64opt.ia64.S322
-rw-r--r--beecrypt/gas/mpopt.alpha.m4159
-rw-r--r--beecrypt/gas/mpopt.arm.m483
-rw-r--r--beecrypt/gas/mpopt.ia64.m4187
-rw-r--r--beecrypt/gas/mpopt.m68k.m4158
-rw-r--r--beecrypt/gas/mpopt.ppc.m4 (renamed from beecrypt/gas/mp32opt.powerpc.S)83
-rw-r--r--beecrypt/gas/mpopt.ppc64.m4195
-rw-r--r--beecrypt/gas/mpopt.sparcv8.m490
-rw-r--r--beecrypt/gas/mpopt.sparcv8plus.m4 (renamed from beecrypt/gas/mp32opt.sparcv9.S)110
-rw-r--r--beecrypt/gas/mpopt.x86.m4 (renamed from beecrypt/gas/mp32opt.i386.S)193
-rw-r--r--beecrypt/gas/ppc.m485
-rw-r--r--beecrypt/gas/ppc64.m471
-rw-r--r--beecrypt/gas/sha1opt.i586.S292
-rw-r--r--beecrypt/gas/sha1opt.i586.m4280
-rw-r--r--beecrypt/gas/sha1opt.powerpc.S287
-rw-r--r--beecrypt/gas/sparc.m430
-rw-r--r--beecrypt/gas/x86.m423
34 files changed, 3221 insertions, 3455 deletions
diff --git a/beecrypt/gas/Makefile.am b/beecrypt/gas/Makefile.am
index 8b80be914..4bcbc7cd5 100644
--- a/beecrypt/gas/Makefile.am
+++ b/beecrypt/gas/Makefile.am
@@ -1,9 +1,9 @@
#
# Makefile.am's purpose is to add the GNU Assembler sources to the dist
#
-# Copyright (c) 2001, 2002 Virtual Unlimited B.V.
+# Copyright (c) 2001, 2002, 2003 Virtual Unlimited B.V.
#
-# Author: Bob Deblier <bob@virtualunlimited.com>
+# Author: Bob Deblier <bob.deblier@pandora.be>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
@@ -22,4 +22,4 @@
AUTOMAKE_OPTIONS = gnu no-dependencies
-EXTRA_DIST = aesopt.i586.S aesopt.powerpc.S blowfishopt.i586.S blowfishopt.powerpc.S mp32opt.arm.S mp32opt.i386.S mp32opt.ia64.S mp32opt.powerpc.S mp32opt.sparcv8.S mp32opt.sparcv9.S mp64opt.ia64.S sha1opt.i586.S sha1opt.ia64.S sha1opt.powerpc.S
+EXTRA_DIST = aesopt.i586.m4 aesopt.ppc.m4 alpha.m4 asmdefs.m4 blowfishopt.i586.m4 blowfishopt.ppc.m4 ia64.m4 mpopt.alpha.m4 mpopt.arm.m4 mpopt.ia64.m4 mpopt.m68k.m4 mpopt.ppc.m4 mpopt.ppc64.m4 mpopt.sparcv8.m4 mpopt.sparcv8plus.m4 mpopt.x86.m4 ppc.m4 ppc64.m4 sha1opt.i586.m4 sparc.m4 x86.m4
diff --git a/beecrypt/gas/aesopt.i586.S b/beecrypt/gas/aesopt.i586.S
deleted file mode 100644
index deb0853b7..000000000
--- a/beecrypt/gas/aesopt.i586.S
+++ /dev/null
@@ -1,688 +0,0 @@
-/*
- * aesopt.i586.asm
- *
- * Assembler optimized AES routines for Intel Pentium processors
- *
- * Compile target is GNU Assembler
- *
- * Copyright (c) 2002 Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include "beecrypt.gas.h"
-
- .file "aesopt.i586.S"
-
- .text
-
- .macro sxrk
- movl (%esi),%eax
- movl 4(%esi),%ebx
- movl 8(%esi),%ecx
- movl 12(%esi),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- xorl (%ebp),%eax
- xorl 4(%ebp),%ebx
- xorl 8(%ebp),%ecx
- xorl 12(%ebp),%edx
- movl %eax, (%esp)
- movl %ebx, 4(%esp)
- movl %ecx, 8(%esp)
- movl %edx,12(%esp)
- .endm
-
- .macro etfs offset
- movl \offset+ 0(%ebp),%ecx
- movl \offset+ 4(%ebp),%edx
-
- movzbl 3(%esp),%eax
- movzbl 7(%esp),%ebx
- xorl SYMBOL_NAME(_ae0)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae0)(,%ebx,4),%edx
-
- movzbl 6(%esp),%eax
- movzbl 10(%esp),%ebx
- xorl SYMBOL_NAME(_ae1)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae1)(,%ebx,4),%edx
-
- movzbl 9(%esp),%eax
- movzbl 13(%esp),%ebx
- xorl SYMBOL_NAME(_ae2)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae2)(,%ebx,4),%edx
-
- movzbl 12(%esp),%eax
- movzbl (%esp),%ebx
- xorl SYMBOL_NAME(_ae3)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae3)(,%ebx,4),%edx
-
- movl %ecx,16(%esp)
- movl %edx,20(%esp)
-
- movl \offset+ 8(%ebp),%ecx
- movl \offset+12(%ebp),%edx
-
- movzbl 11(%esp),%eax
- movzbl 15(%esp),%ebx
- xorl SYMBOL_NAME(_ae0)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae0)(,%ebx,4),%edx
-
- movzbl 14(%esp),%eax
- movzbl 2(%esp),%ebx
- xorl SYMBOL_NAME(_ae1)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae1)(,%ebx,4),%edx
-
- movzbl 1(%esp),%eax
- movzbl 5(%esp),%ebx
- xorl SYMBOL_NAME(_ae2)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae2)(,%ebx,4),%edx
-
- movzbl 4(%esp),%eax
- movzbl 8(%esp),%ebx
- xorl SYMBOL_NAME(_ae3)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae3)(,%ebx,4),%edx
-
- movl %ecx,24(%esp)
- movl %edx,28(%esp)
- .endm
-
- .macro esft offset
- movl \offset+ 0(%ebp),%ecx
- movl \offset+ 4(%ebp),%edx
-
- movzbl 19(%esp),%eax
- movzbl 23(%esp),%ebx
- xorl SYMBOL_NAME(_ae0)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae0)(,%ebx,4),%edx
-
- movzbl 22(%esp),%eax
- movzbl 26(%esp),%ebx
- xorl SYMBOL_NAME(_ae1)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae1)(,%ebx,4),%edx
-
- movzbl 25(%esp),%eax
- movzbl 29(%esp),%ebx
- xorl SYMBOL_NAME(_ae2)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae2)(,%ebx,4),%edx
-
- movzbl 28(%esp),%eax
- movzbl 16(%esp),%ebx
- xorl SYMBOL_NAME(_ae3)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae3)(,%ebx,4),%edx
-
- movl %ecx, (%esp)
- movl %edx, 4(%esp)
-
- movl \offset+ 8(%ebp),%ecx
- movl \offset+12(%ebp),%edx
-
- movzbl 27(%esp),%eax
- movzbl 31(%esp),%ebx
- xorl SYMBOL_NAME(_ae0)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae0)(,%ebx,4),%edx
-
- movzbl 30(%esp),%eax
- movzbl 18(%esp),%ebx
- xorl SYMBOL_NAME(_ae1)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae1)(,%ebx,4),%edx
-
- movzbl 17(%esp),%eax
- movzbl 21(%esp),%ebx
- xorl SYMBOL_NAME(_ae2)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae2)(,%ebx,4),%edx
-
- movzbl 20(%esp),%eax
- movzbl 24(%esp),%ebx
- xorl SYMBOL_NAME(_ae3)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ae3)(,%ebx,4),%edx
-
- movl %ecx, 8(%esp)
- movl %edx,12(%esp)
- .endm
-
- .macro elr
- movl 0(%ebp),%ecx
- movl 4(%ebp),%edx
-
- movzbl 19(%esp),%eax
- movzbl 23(%esp),%ebx
- movl SYMBOL_NAME(_ae4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx
- andl $0xff000000,%eax
- andl $0xff000000,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movzbl 22(%esp),%eax
- movzbl 26(%esp),%ebx
- movl SYMBOL_NAME(_ae4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx
- andl $0xff0000,%eax
- andl $0xff0000,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movzbl 25(%esp),%eax
- movzbl 29(%esp),%ebx
- movl SYMBOL_NAME(_ae4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx
- andl $0xff00,%eax
- andl $0xff00,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movzbl 28(%esp),%eax
- movzbl 16(%esp),%ebx
- movl SYMBOL_NAME(_ae4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx
- andl $0xff,%eax
- andl $0xff,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movl %ecx, (%esp)
- movl %edx, 4(%esp)
-
- movl 8(%ebp),%ecx
- movl 12(%ebp),%edx
-
- movzbl 27(%esp),%eax
- movzbl 31(%esp),%ebx
- movl SYMBOL_NAME(_ae4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx
- andl $0xff000000,%eax
- andl $0xff000000,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movzbl 30(%esp),%eax
- movzbl 18(%esp),%ebx
- movl SYMBOL_NAME(_ae4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx
- andl $0xff0000,%eax
- andl $0xff0000,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movzbl 17(%esp),%eax
- movzbl 21(%esp),%ebx
- movl SYMBOL_NAME(_ae4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx
- andl $0xff00,%eax
- andl $0xff00,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movzbl 20(%esp),%eax
- movzbl 24(%esp),%ebx
- movl SYMBOL_NAME(_ae4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx
- andl $0xff,%eax
- andl $0xff,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movl %ecx, 8(%esp)
- movl %edx,12(%esp)
- .endm
-
- .macro eblock label
- sxrk
-
- etfs 16
- esft 32
- etfs 48
- esft 64
- etfs 80
- esft 96
- etfs 112
- esft 128
- etfs 144
-
- movl 256(%ebp),%eax
- cmp $10,%eax
- je \label
-
- esft 160
- etfs 176
-
- movl 256(%ebp),%eax
- cmp $12,%eax
- je \label
-
- esft 192
- etfs 208
-
- movl 256(%ebp),%eax
-
- .align 4
-\label:
- sall $4,%eax
- addl %eax,%ebp
-
- elr
- .endm
-
- .macro dtfs offset
- movl \offset+0(%ebp),%ecx
- movl \offset+4(%ebp),%edx
-
- movzbl 3(%esp),%eax
- movzbl 7(%esp),%ebx
- xorl SYMBOL_NAME(_ad0)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad0)(,%ebx,4),%edx
-
- movzbl 14(%esp),%eax
- movzbl 2(%esp),%ebx
- xorl SYMBOL_NAME(_ad1)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad1)(,%ebx,4),%edx
-
- movzbl 9(%esp),%eax
- movzbl 13(%esp),%ebx
- xorl SYMBOL_NAME(_ad2)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad2)(,%ebx,4),%edx
-
- movzbl 4(%esp),%eax
- movzbl 8(%esp),%ebx
- xorl SYMBOL_NAME(_ad3)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad3)(,%ebx,4),%edx
-
- movl %ecx,16(%esp)
- movl %edx,20(%esp)
-
- movl \offset+ 8(%ebp),%ecx
- movl \offset+12(%ebp),%edx
-
- movzbl 11(%esp),%eax
- movzbl 15(%esp),%ebx
- xorl SYMBOL_NAME(_ad0)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad0)(,%ebx,4),%edx
-
- movzbl 6(%esp),%eax
- movzbl 10(%esp),%ebx
- xorl SYMBOL_NAME(_ad1)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad1)(,%ebx,4),%edx
-
- movzbl 1(%esp),%eax
- movzbl 5(%esp),%ebx
- xorl SYMBOL_NAME(_ad2)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad2)(,%ebx,4),%edx
-
- movzbl 12(%esp),%eax
- movzbl (%esp),%ebx
- xorl SYMBOL_NAME(_ad3)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad3)(,%ebx,4),%edx
-
- movl %ecx,24(%esp)
- movl %edx,28(%esp)
- .endm
-
- .macro dsft offset
- movl \offset+ 0(%ebp),%ecx
- movl \offset+ 4(%ebp),%edx
-
- movzbl 19(%esp),%eax
- movzbl 23(%esp),%ebx
- xorl SYMBOL_NAME(_ad0)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad0)(,%ebx,4),%edx
-
- movzbl 30(%esp),%eax
- movzbl 18(%esp),%ebx
- xorl SYMBOL_NAME(_ad1)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad1)(,%ebx,4),%edx
-
- movzbl 25(%esp),%eax
- movzbl 29(%esp),%ebx
- xorl SYMBOL_NAME(_ad2)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad2)(,%ebx,4),%edx
-
- movzbl 20(%esp),%eax
- movzbl 24(%esp),%ebx
- xorl SYMBOL_NAME(_ad3)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad3)(,%ebx,4),%edx
-
- movl %ecx, (%esp)
- movl %edx, 4(%esp)
-
- movl \offset+ 8(%ebp),%ecx
- movl \offset+12(%ebp),%edx
-
- movzbl 27(%esp),%eax
- movzbl 31(%esp),%ebx
- xorl SYMBOL_NAME(_ad0)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad0)(,%ebx,4),%edx
-
- movzbl 22(%esp),%eax
- movzbl 26(%esp),%ebx
- xorl SYMBOL_NAME(_ad1)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad1)(,%ebx,4),%edx
-
- movzbl 17(%esp),%eax
- movzbl 21(%esp),%ebx
- xorl SYMBOL_NAME(_ad2)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad2)(,%ebx,4),%edx
-
- movzbl 28(%esp),%eax
- movzbl 16(%esp),%ebx
- xorl SYMBOL_NAME(_ad3)(,%eax,4),%ecx
- xorl SYMBOL_NAME(_ad3)(,%ebx,4),%edx
-
- movl %ecx, 8(%esp)
- movl %edx,12(%esp)
- .endm
-
- .macro dlr
- movl 0(%ebp),%ecx
- movl 4(%ebp),%edx
-
- movzbl 19(%esp),%eax
- movzbl 23(%esp),%ebx
- movl SYMBOL_NAME(_ad4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx
- andl $0xff000000,%eax
- andl $0xff000000,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movzbl 30(%esp),%eax
- movzbl 18(%esp),%ebx
- movl SYMBOL_NAME(_ad4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx
- andl $0xff0000,%eax
- andl $0xff0000,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movzbl 25(%esp),%eax
- movzbl 29(%esp),%ebx
- movl SYMBOL_NAME(_ad4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx
- andl $0xff00,%eax
- andl $0xff00,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movzbl 20(%esp),%eax
- movzbl 24(%esp),%ebx
- movl SYMBOL_NAME(_ad4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx
- andl $0xff,%eax
- andl $0xff,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movl %ecx, (%esp)
- movl %edx, 4(%esp)
-
- movl 8(%ebp),%ecx
- movl 12(%ebp),%edx
-
- movzbl 27(%esp),%eax
- movzbl 31(%esp),%ebx
- movl SYMBOL_NAME(_ad4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx
- andl $0xff000000,%eax
- andl $0xff000000,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movzbl 22(%esp),%eax
- movzbl 26(%esp),%ebx
- movl SYMBOL_NAME(_ad4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx
- andl $0xff0000,%eax
- andl $0xff0000,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movzbl 17(%esp),%eax
- movzbl 21(%esp),%ebx
- movl SYMBOL_NAME(_ad4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx
- andl $0xff00,%eax
- andl $0xff00,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movzbl 28(%esp),%eax
- movzbl 16(%esp),%ebx
- movl SYMBOL_NAME(_ad4)(,%eax,4),%eax
- movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx
- andl $0xff,%eax
- andl $0xff,%ebx
- xorl %eax,%ecx
- xorl %ebx,%edx
-
- movl %ecx, 8(%esp)
- movl %edx,12(%esp)
- .endm
-
- .macro dblock label
- sxrk
-
- dtfs 16
- dsft 32
- dtfs 48
- dsft 64
- dtfs 80
- dsft 96
- dtfs 112
- dsft 128
- dtfs 144
-
- movl 256(%ebp),%eax
- cmp $10,%eax
- je \label
-
- dsft 160
- dtfs 176
-
- movl 256(%ebp),%eax
- cmp $12,%eax
- je \label
-
- dsft 192
- dtfs 208
-
- movl 256(%ebp),%eax
-
- .align 4
-\label:
- sall $4,%eax
- addl %eax,%ebp
-
- dlr
- .endm
-
-C_FUNCTION_BEGIN(aesEncrypt)
-LABEL(aesEncrypt)
- pushl %edi
- pushl %esi
- pushl %ebp
- pushl %ebx
-
- movl 20(%esp),%ebp
- movl 24(%esp),%edi
- movl 28(%esp),%esi
-
- subl $32,%esp
-
- eblock LOCAL(00)
-
- movl (%esp),%eax
- movl 4(%esp),%ebx
- movl 8(%esp),%ecx
- movl 12(%esp),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- movl %eax, (%edi)
- movl %ebx, 4(%edi)
- movl %ecx, 8(%edi)
- movl %edx,12(%edi)
-
- addl $32,%esp
-
- xorl %eax,%eax
-
- popl %ebx
- popl %ebp
- popl %esi
- popl %edi
- ret
-C_FUNCTION_END(aesEncrypt, LOCAL(aesEncrypt_size))
-
-
-C_FUNCTION_BEGIN(aesDecrypt)
-LABEL(aesDecrypt)
- pushl %edi
- pushl %esi
- pushl %ebp
- pushl %ebx
-
- movl 20(%esp),%ebp
- movl 24(%esp),%edi
- movl 28(%esp),%esi
-
- subl $32,%esp
-
- dblock LOCAL(01)
-
- movl (%esp),%eax
- movl 4(%esp),%ebx
- movl 8(%esp),%ecx
- movl 12(%esp),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- movl %eax, (%edi)
- movl %ebx, 4(%edi)
- movl %ecx, 8(%edi)
- movl %edx,12(%edi)
-
- addl $32,%esp
-
- xorl %eax,%eax
-
- popl %ebx
- popl %ebp
- popl %esi
- popl %edi
- ret
-C_FUNCTION_END(aesDecrypt, LOCAL(aesDecrypt_size))
-
-
-C_FUNCTION_BEGIN(aesECBEncrypt)
-LABEL(aesECBEncrypt)
- pushl %edi
- pushl %esi
- pushl %ebp
- pushl %ebx
-
- movl 28(%esp),%edi
- movl 32(%esp),%esi
-
- subl $32,%esp
-
- .align 4
-LOCAL(02):
- movl 52(%esp),%ebp
-
- eblock LOCAL(03)
-
- movl (%esp),%eax
- movl 4(%esp),%ebx
- movl 8(%esp),%ecx
- movl 12(%esp),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- movl %eax, (%edi)
- movl %ebx, 4(%edi)
- movl %ecx, 8(%edi)
- movl %edx,12(%edi)
-
- addl $16,%esi
- addl $16,%edi
-
- decl 56(%esp)
- jnz LOCAL(02)
-
- addl $32,%esp
-
- xorl %eax,%eax
-
- popl %ebx
- popl %ebp
- popl %esi
- popl %edi
- ret
-C_FUNCTION_END(aesECBEncrypt, LOCAL(aesECBEncrypt_size))
-
-
-C_FUNCTION_BEGIN(aesECBDecrypt)
-LABEL(aesECBDecrypt)
- pushl %edi
- pushl %esi
- pushl %ebp
- pushl %ebx
-
- movl 28(%esp),%edi
- movl 32(%esp),%esi
-
- subl $32,%esp
-
- .align 4
-LOCAL(04):
- movl 52(%esp),%ebp
-
- dblock LOCAL(05)
-
- movl (%esp),%eax
- movl 4(%esp),%ebx
- movl 8(%esp),%ecx
- movl 12(%esp),%edx
- bswap %eax
- bswap %ebx
- bswap %ecx
- bswap %edx
- movl %eax, (%edi)
- movl %ebx, 4(%edi)
- movl %ecx, 8(%edi)
- movl %edx,12(%edi)
-
- addl $16,%esi
- addl $16,%edi
-
- decl 56(%esp)
- jnz LOCAL(04)
-
- addl $32,%esp
-
- xorl %eax,%eax
-
- popl %ebx
- popl %ebp
- popl %esi
- popl %edi
- ret
-C_FUNCTION_END(aesECBDecrypt, LOCAL(aesECBDecrypt_size))
diff --git a/beecrypt/gas/aesopt.i586.m4 b/beecrypt/gas/aesopt.i586.m4
new file mode 100644
index 000000000..e8dbf1da5
--- /dev/null
+++ b/beecrypt/gas/aesopt.i586.m4
@@ -0,0 +1,580 @@
+dnl aesopt.i586.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/x86.m4)
+
+define(`sxrk',`
+ movl (%esi),%eax
+ movl 4(%esi),%ebx
+ movl 8(%esi),%ecx
+ movl 12(%esi),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ xorl (%ebp),%eax
+ xorl 4(%ebp),%ebx
+ xorl 8(%ebp),%ecx
+ xorl 12(%ebp),%edx
+ movl %eax, (%esp)
+ movl %ebx, 4(%esp)
+ movl %ecx, 8(%esp)
+ movl %edx,12(%esp)
+')
+
+define(`etfs',`
+ movl $1+0(%ebp),%ecx
+ movl $1+4(%ebp),%edx
+
+ movzbl 3(%esp),%eax
+ movzbl 7(%esp),%ebx
+ xorl SYMNAME(_ae0)(,%eax,4),%ecx
+ xorl SYMNAME(_ae0)(,%ebx,4),%edx
+
+ movzbl 6(%esp),%eax
+ movzbl 10(%esp),%ebx
+ xorl SYMNAME(_ae1)(,%eax,4),%ecx
+ xorl SYMNAME(_ae1)(,%ebx,4),%edx
+
+ movzbl 9(%esp),%eax
+ movzbl 13(%esp),%ebx
+ xorl SYMNAME(_ae2)(,%eax,4),%ecx
+ xorl SYMNAME(_ae2)(,%ebx,4),%edx
+
+ movzbl 12(%esp),%eax
+ movzbl (%esp),%ebx
+ xorl SYMNAME(_ae3)(,%eax,4),%ecx
+ xorl SYMNAME(_ae3)(,%ebx,4),%edx
+
+ movl %ecx,16(%esp)
+ movl %edx,20(%esp)
+
+ movl $1+ 8(%ebp),%ecx
+ movl $1+12(%ebp),%edx
+
+ movzbl 11(%esp),%eax
+ movzbl 15(%esp),%ebx
+ xorl SYMNAME(_ae0)(,%eax,4),%ecx
+ xorl SYMNAME(_ae0)(,%ebx,4),%edx
+
+ movzbl 14(%esp),%eax
+ movzbl 2(%esp),%ebx
+ xorl SYMNAME(_ae1)(,%eax,4),%ecx
+ xorl SYMNAME(_ae1)(,%ebx,4),%edx
+
+ movzbl 1(%esp),%eax
+ movzbl 5(%esp),%ebx
+ xorl SYMNAME(_ae2)(,%eax,4),%ecx
+ xorl SYMNAME(_ae2)(,%ebx,4),%edx
+
+ movzbl 4(%esp),%eax
+ movzbl 8(%esp),%ebx
+ xorl SYMNAME(_ae3)(,%eax,4),%ecx
+ xorl SYMNAME(_ae3)(,%ebx,4),%edx
+
+ movl %ecx,24(%esp)
+ movl %edx,28(%esp)
+')
+
+define(`esft',`
+ movl $1+0(%ebp),%ecx
+ movl $1+4(%ebp),%edx
+
+ movzbl 19(%esp),%eax
+ movzbl 23(%esp),%ebx
+ xorl SYMNAME(_ae0)(,%eax,4),%ecx
+ xorl SYMNAME(_ae0)(,%ebx,4),%edx
+
+ movzbl 22(%esp),%eax
+ movzbl 26(%esp),%ebx
+ xorl SYMNAME(_ae1)(,%eax,4),%ecx
+ xorl SYMNAME(_ae1)(,%ebx,4),%edx
+
+ movzbl 25(%esp),%eax
+ movzbl 29(%esp),%ebx
+ xorl SYMNAME(_ae2)(,%eax,4),%ecx
+ xorl SYMNAME(_ae2)(,%ebx,4),%edx
+
+ movzbl 28(%esp),%eax
+ movzbl 16(%esp),%ebx
+ xorl SYMNAME(_ae3)(,%eax,4),%ecx
+ xorl SYMNAME(_ae3)(,%ebx,4),%edx
+
+ movl %ecx, (%esp)
+ movl %edx, 4(%esp)
+
+ movl $1+ 8(%ebp),%ecx
+ movl $1+12(%ebp),%edx
+
+ movzbl 27(%esp),%eax
+ movzbl 31(%esp),%ebx
+ xorl SYMNAME(_ae0)(,%eax,4),%ecx
+ xorl SYMNAME(_ae0)(,%ebx,4),%edx
+
+ movzbl 30(%esp),%eax
+ movzbl 18(%esp),%ebx
+ xorl SYMNAME(_ae1)(,%eax,4),%ecx
+ xorl SYMNAME(_ae1)(,%ebx,4),%edx
+
+ movzbl 17(%esp),%eax
+ movzbl 21(%esp),%ebx
+ xorl SYMNAME(_ae2)(,%eax,4),%ecx
+ xorl SYMNAME(_ae2)(,%ebx,4),%edx
+
+ movzbl 20(%esp),%eax
+ movzbl 24(%esp),%ebx
+ xorl SYMNAME(_ae3)(,%eax,4),%ecx
+ xorl SYMNAME(_ae3)(,%ebx,4),%edx
+
+ movl %ecx, 8(%esp)
+ movl %edx,12(%esp)
+')
+
+define(`elr',`
+ movl 0(%ebp),%ecx
+ movl 4(%ebp),%edx
+
+ movzbl 19(%esp),%eax
+ movzbl 23(%esp),%ebx
+ movl SYMNAME(_ae4)(,%eax,4),%eax
+ movl SYMNAME(_ae4)(,%ebx,4),%ebx
+ andl `$'0xff000000,%eax
+ andl `$'0xff000000,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movzbl 22(%esp),%eax
+ movzbl 26(%esp),%ebx
+ movl SYMNAME(_ae4)(,%eax,4),%eax
+ movl SYMNAME(_ae4)(,%ebx,4),%ebx
+ andl `$'0xff0000,%eax
+ andl `$'0xff0000,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movzbl 25(%esp),%eax
+ movzbl 29(%esp),%ebx
+ movl SYMNAME(_ae4)(,%eax,4),%eax
+ movl SYMNAME(_ae4)(,%ebx,4),%ebx
+ andl `$'0xff00,%eax
+ andl `$'0xff00,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movzbl 28(%esp),%eax
+ movzbl 16(%esp),%ebx
+ movl SYMNAME(_ae4)(,%eax,4),%eax
+ movl SYMNAME(_ae4)(,%ebx,4),%ebx
+ andl `$'0xff,%eax
+ andl `$'0xff,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movl %ecx, (%esp)
+ movl %edx, 4(%esp)
+
+ movl 8(%ebp),%ecx
+ movl 12(%ebp),%edx
+
+ movzbl 27(%esp),%eax
+ movzbl 31(%esp),%ebx
+ movl SYMNAME(_ae4)(,%eax,4),%eax
+ movl SYMNAME(_ae4)(,%ebx,4),%ebx
+ andl `$'0xff000000,%eax
+ andl `$'0xff000000,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movzbl 30(%esp),%eax
+ movzbl 18(%esp),%ebx
+ movl SYMNAME(_ae4)(,%eax,4),%eax
+ movl SYMNAME(_ae4)(,%ebx,4),%ebx
+ andl `$'0xff0000,%eax
+ andl `$'0xff0000,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movzbl 17(%esp),%eax
+ movzbl 21(%esp),%ebx
+ movl SYMNAME(_ae4)(,%eax,4),%eax
+ movl SYMNAME(_ae4)(,%ebx,4),%ebx
+ andl `$'0xff00,%eax
+ andl `$'0xff00,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movzbl 20(%esp),%eax
+ movzbl 24(%esp),%ebx
+ movl SYMNAME(_ae4)(,%eax,4),%eax
+ movl SYMNAME(_ae4)(,%ebx,4),%ebx
+ andl `$'0xff,%eax
+ andl `$'0xff,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movl %ecx, 8(%esp)
+ movl %edx,12(%esp)
+')
+
+define(`eblock',`
+ sxrk
+
+ etfs(16)
+ esft(32)
+ etfs(48)
+ esft(64)
+ etfs(80)
+ esft(96)
+ etfs(112)
+ esft(128)
+ etfs(144)
+
+ movl 256(%ebp),%eax
+ cmp `$'10,%eax
+ je $1
+
+ esft(160)
+ etfs(176)
+
+ movl 256(%ebp),%eax
+ cmp `$'12,%eax
+ je $1
+
+ esft(192)
+ etfs(208)
+
+ movl 256(%ebp),%eax
+
+ .align 4
+$1:
+ sall `$'4,%eax
+ addl %eax,%ebp
+
+ elr
+')
+
+define(`dtfs',`
+ movl $1+0(%ebp),%ecx
+ movl $1+4(%ebp),%edx
+
+ movzbl 3(%esp),%eax
+ movzbl 7(%esp),%ebx
+ xorl SYMNAME(_ad0)(,%eax,4),%ecx
+ xorl SYMNAME(_ad0)(,%ebx,4),%edx
+
+ movzbl 14(%esp),%eax
+ movzbl 2(%esp),%ebx
+ xorl SYMNAME(_ad1)(,%eax,4),%ecx
+ xorl SYMNAME(_ad1)(,%ebx,4),%edx
+
+ movzbl 9(%esp),%eax
+ movzbl 13(%esp),%ebx
+ xorl SYMNAME(_ad2)(,%eax,4),%ecx
+ xorl SYMNAME(_ad2)(,%ebx,4),%edx
+
+ movzbl 4(%esp),%eax
+ movzbl 8(%esp),%ebx
+ xorl SYMNAME(_ad3)(,%eax,4),%ecx
+ xorl SYMNAME(_ad3)(,%ebx,4),%edx
+
+ movl %ecx,16(%esp)
+ movl %edx,20(%esp)
+
+ movl $1+ 8(%ebp),%ecx
+ movl $1+12(%ebp),%edx
+
+ movzbl 11(%esp),%eax
+ movzbl 15(%esp),%ebx
+ xorl SYMNAME(_ad0)(,%eax,4),%ecx
+ xorl SYMNAME(_ad0)(,%ebx,4),%edx
+
+ movzbl 6(%esp),%eax
+ movzbl 10(%esp),%ebx
+ xorl SYMNAME(_ad1)(,%eax,4),%ecx
+ xorl SYMNAME(_ad1)(,%ebx,4),%edx
+
+ movzbl 1(%esp),%eax
+ movzbl 5(%esp),%ebx
+ xorl SYMNAME(_ad2)(,%eax,4),%ecx
+ xorl SYMNAME(_ad2)(,%ebx,4),%edx
+
+ movzbl 12(%esp),%eax
+ movzbl (%esp),%ebx
+ xorl SYMNAME(_ad3)(,%eax,4),%ecx
+ xorl SYMNAME(_ad3)(,%ebx,4),%edx
+
+ movl %ecx,24(%esp)
+ movl %edx,28(%esp)
+')
+
+define(`dsft',`
+ movl $1+0(%ebp),%ecx
+ movl $1+4(%ebp),%edx
+
+ movzbl 19(%esp),%eax
+ movzbl 23(%esp),%ebx
+ xorl SYMNAME(_ad0)(,%eax,4),%ecx
+ xorl SYMNAME(_ad0)(,%ebx,4),%edx
+
+ movzbl 30(%esp),%eax
+ movzbl 18(%esp),%ebx
+ xorl SYMNAME(_ad1)(,%eax,4),%ecx
+ xorl SYMNAME(_ad1)(,%ebx,4),%edx
+
+ movzbl 25(%esp),%eax
+ movzbl 29(%esp),%ebx
+ xorl SYMNAME(_ad2)(,%eax,4),%ecx
+ xorl SYMNAME(_ad2)(,%ebx,4),%edx
+
+ movzbl 20(%esp),%eax
+ movzbl 24(%esp),%ebx
+ xorl SYMNAME(_ad3)(,%eax,4),%ecx
+ xorl SYMNAME(_ad3)(,%ebx,4),%edx
+
+ movl %ecx, (%esp)
+ movl %edx, 4(%esp)
+
+ movl $1+ 8(%ebp),%ecx
+ movl $1+12(%ebp),%edx
+
+ movzbl 27(%esp),%eax
+ movzbl 31(%esp),%ebx
+ xorl SYMNAME(_ad0)(,%eax,4),%ecx
+ xorl SYMNAME(_ad0)(,%ebx,4),%edx
+
+ movzbl 22(%esp),%eax
+ movzbl 26(%esp),%ebx
+ xorl SYMNAME(_ad1)(,%eax,4),%ecx
+ xorl SYMNAME(_ad1)(,%ebx,4),%edx
+
+ movzbl 17(%esp),%eax
+ movzbl 21(%esp),%ebx
+ xorl SYMNAME(_ad2)(,%eax,4),%ecx
+ xorl SYMNAME(_ad2)(,%ebx,4),%edx
+
+ movzbl 28(%esp),%eax
+ movzbl 16(%esp),%ebx
+ xorl SYMNAME(_ad3)(,%eax,4),%ecx
+ xorl SYMNAME(_ad3)(,%ebx,4),%edx
+
+ movl %ecx, 8(%esp)
+ movl %edx,12(%esp)
+')
+
+define(`dlr',`
+ movl 0(%ebp),%ecx
+ movl 4(%ebp),%edx
+
+ movzbl 19(%esp),%eax
+ movzbl 23(%esp),%ebx
+ movl SYMNAME(_ad4)(,%eax,4),%eax
+ movl SYMNAME(_ad4)(,%ebx,4),%ebx
+ andl `$'0xff000000,%eax
+ andl `$'0xff000000,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movzbl 30(%esp),%eax
+ movzbl 18(%esp),%ebx
+ movl SYMNAME(_ad4)(,%eax,4),%eax
+ movl SYMNAME(_ad4)(,%ebx,4),%ebx
+ andl `$'0xff0000,%eax
+ andl `$'0xff0000,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movzbl 25(%esp),%eax
+ movzbl 29(%esp),%ebx
+ movl SYMNAME(_ad4)(,%eax,4),%eax
+ movl SYMNAME(_ad4)(,%ebx,4),%ebx
+ andl `$'0xff00,%eax
+ andl `$'0xff00,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movzbl 20(%esp),%eax
+ movzbl 24(%esp),%ebx
+ movl SYMNAME(_ad4)(,%eax,4),%eax
+ movl SYMNAME(_ad4)(,%ebx,4),%ebx
+ andl `$'0xff,%eax
+ andl `$'0xff,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movl %ecx, (%esp)
+ movl %edx, 4(%esp)
+
+ movl 8(%ebp),%ecx
+ movl 12(%ebp),%edx
+
+ movzbl 27(%esp),%eax
+ movzbl 31(%esp),%ebx
+ movl SYMNAME(_ad4)(,%eax,4),%eax
+ movl SYMNAME(_ad4)(,%ebx,4),%ebx
+ andl `$'0xff000000,%eax
+ andl `$'0xff000000,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movzbl 22(%esp),%eax
+ movzbl 26(%esp),%ebx
+ movl SYMNAME(_ad4)(,%eax,4),%eax
+ movl SYMNAME(_ad4)(,%ebx,4),%ebx
+ andl `$'0xff0000,%eax
+ andl `$'0xff0000,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movzbl 17(%esp),%eax
+ movzbl 21(%esp),%ebx
+ movl SYMNAME(_ad4)(,%eax,4),%eax
+ movl SYMNAME(_ad4)(,%ebx,4),%ebx
+ andl `$'0xff00,%eax
+ andl `$'0xff00,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movzbl 28(%esp),%eax
+ movzbl 16(%esp),%ebx
+ movl SYMNAME(_ad4)(,%eax,4),%eax
+ movl SYMNAME(_ad4)(,%ebx,4),%ebx
+ andl `$'0xff,%eax
+ andl `$'0xff,%ebx
+ xorl %eax,%ecx
+ xorl %ebx,%edx
+
+ movl %ecx, 8(%esp)
+ movl %edx,12(%esp)
+')
+
+define(`dblock',`
+ sxrk
+
+ dtfs(16)
+ dsft(32)
+ dtfs(48)
+ dsft(64)
+ dtfs(80)
+ dsft(96)
+ dtfs(112)
+ dsft(128)
+ dtfs(144)
+
+ movl 256(%ebp),%eax
+ cmp `$'10,%eax
+ je $1
+
+ dsft(160)
+ dtfs(176)
+
+ movl 256(%ebp),%eax
+ cmp `$'12,%eax
+ je $1
+
+ dsft(192)
+ dtfs(208)
+
+ movl 256(%ebp),%eax
+
+ .align 4
+$1:
+ sall `$'4,%eax
+ addl %eax,%ebp
+
+ dlr
+')
+
+C_FUNCTION_BEGIN(aesEncrypt)
+ pushl %edi
+ pushl %esi
+ pushl %ebp
+ pushl %ebx
+
+ movl 20(%esp),%ebp
+ movl 24(%esp),%edi
+ movl 28(%esp),%esi
+
+ subl `$'32,%esp
+
+ eblock(LOCAL(00))
+
+ movl (%esp),%eax
+ movl 4(%esp),%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax, (%edi)
+ movl %ebx, 4(%edi)
+ movl %ecx, 8(%edi)
+ movl %edx,12(%edi)
+
+ addl `$'32,%esp
+
+ xorl %eax,%eax
+
+ popl %ebx
+ popl %ebp
+ popl %esi
+ popl %edi
+ ret
+C_FUNCTION_END(aesEncrypt)
+
+
+C_FUNCTION_BEGIN(aesDecrypt)
+ pushl %edi
+ pushl %esi
+ pushl %ebp
+ pushl %ebx
+
+ movl 20(%esp),%ebp
+ movl 24(%esp),%edi
+ movl 28(%esp),%esi
+
+ subl `$'32,%esp
+
+ dblock(LOCAL(01))
+
+ movl (%esp),%eax
+ movl 4(%esp),%ebx
+ movl 8(%esp),%ecx
+ movl 12(%esp),%edx
+ bswap %eax
+ bswap %ebx
+ bswap %ecx
+ bswap %edx
+ movl %eax, (%edi)
+ movl %ebx, 4(%edi)
+ movl %ecx, 8(%edi)
+ movl %edx,12(%edi)
+
+ addl `$'32,%esp
+
+ xorl %eax,%eax
+
+ popl %ebx
+ popl %ebp
+ popl %esi
+ popl %edi
+ ret
+C_FUNCTION_END(aesDecrypt)
diff --git a/beecrypt/gas/aesopt.powerpc.S b/beecrypt/gas/aesopt.powerpc.S
deleted file mode 100644
index 17e36dc41..000000000
--- a/beecrypt/gas/aesopt.powerpc.S
+++ /dev/null
@@ -1,683 +0,0 @@
-/*
- * aesopt.powerpc.asm
- *
- * Assembler optimized AES routines for PowerPC processors
- *
- * Compile target is GNU Assembler
- *
- * Copyright (c) 2002 Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include "beecrypt.gas.h"
-
- .file "aesopt.powerpc.S"
-
- .text
-
-/*
- * ae0 in r11
- * r7 thru r10 scratch
- * four registers for s: r24 r25 r26 r27
- * four registers for t: r28 r29 r30 r31
- */
-
-#define s0 r24
-#define s1 r25
-#define s2 r26
-#define s3 r27
-#define t0 r28
-#define t1 r29
-#define t2 r30
-#define t3 r31
-
- .macro sxrk rk src
- #if WORDS_BIGENDIAN
- lwz s0, 0(\src)
- lwz s1, 4(\src)
- lwz s2, 8(\src)
- lwz s3,12(\src)
- lwz r7, 0(\rk)
- lwz r8, 4(\rk)
- lwz r9, 8(\rk)
- lwz r10,12(\rk)
- #else
- # error ppc little-endian not implemented
- #endif
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
- .endm
-
- .macro etfs rk offset
- lwz t0,\offset+ 0(\rk)
- lwz t1,\offset+ 4(\rk)
- lwz t2,\offset+ 8(\rk)
- lwz t3,\offset+12(\rk)
-
- rlwinm r7,s0,10,22,29 /* ((s0 >> 24) & 0xff) * 4 */
- rlwinm r8,s1,10,22,29 /* ((s1 >> 24) & 0xff) * 4 */
- rlwinm r9,s2,10,22,29 /* ((s2 >> 24) & 0xff) * 4 */
- rlwinm r10,s3,10,22,29 /* ((s3 >> 24) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor t0,t0,r7
- xor t1,t1,r8
- xor t2,t2,r9
- xor t3,t3,r10
-
- la r12,1024(r12)
-
- rlwinm r7,s1,18,22,29 /* ((s1 >> 16) & 0xff) * 4 */
- rlwinm r8,s2,18,22,29 /* ((s2 >> 16) & 0xff) * 4 */
- rlwinm r9,s3,18,22,29 /* ((s3 >> 16) & 0xff) * 4 */
- rlwinm r10,s0,18,22,29 /* ((s0 >> 16) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor t0,t0,r7
- xor t1,t1,r8
- xor t2,t2,r9
- xor t3,t3,r10
-
- la r12,1024(r12)
-
- rlwinm r7,s2,26,22,29 /* ((s2 >> 8) & 0xff) * 4 */
- rlwinm r8,s3,26,22,29 /* ((s3 >> 8) & 0xff) * 4 */
- rlwinm r9,s0,26,22,29 /* ((s0 >> 8) & 0xff) * 4 */
- rlwinm r10,s1,26,22,29 /* ((s1 >> 8) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor t0,t0,r7
- xor t1,t1,r8
- xor t2,t2,r9
- xor t3,t3,r10
-
- la r12,1024(r12)
-
- rlwinm r7,s3,2,22,29 /* ((s3 >> 0) & 0xff) * 4 */
- rlwinm r8,s0,2,22,29 /* ((s0 >> 0) & 0xff) * 4 */
- rlwinm r9,s1,2,22,29 /* ((s1 >> 0) & 0xff) * 4 */
- rlwinm r10,s2,2,22,29 /* ((s2 >> 0) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor t0,t0,r7
- xor t1,t1,r8
- xor t2,t2,r9
- xor t3,t3,r10
-
- la r12,-3072(r12)
- .endm
-
- .macro esft rk offset
- lwz s0,\offset+ 0(\rk)
- lwz s1,\offset+ 4(\rk)
- lwz s2,\offset+ 8(\rk)
- lwz s3,\offset+12(\rk)
-
- rlwinm r7,t0,10,22,29 /* ((t0 >> 24) & 0xff) * 4 */
- rlwinm r8,t1,10,22,29 /* ((t1 >> 24) & 0xff) * 4 */
- rlwinm r9,t2,10,22,29 /* ((t2 >> 24) & 0xff) * 4 */
- rlwinm r10,t3,10,22,29 /* ((s3 >> 24) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- la r12,1024(r12)
-
- rlwinm r7,t1,18,22,29 /* ((t1 >> 16) & 0xff) * 4 */
- rlwinm r8,t2,18,22,29 /* ((t2 >> 16) & 0xff) * 4 */
- rlwinm r9,t3,18,22,29 /* ((t3 >> 16) & 0xff) * 4 */
- rlwinm r10,t0,18,22,29 /* ((t0 >> 16) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- la r12,1024(r12)
-
- rlwinm r7,t2,26,22,29 /* ((t2 >> 8) & 0xff) * 4 */
- rlwinm r8,t3,26,22,29 /* ((t3 >> 8) & 0xff) * 4 */
- rlwinm r9,t0,26,22,29 /* ((t0 >> 8) & 0xff) * 4 */
- rlwinm r10,t1,26,22,29 /* ((t1 >> 8) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- la r12,1024(r12)
-
- rlwinm r7,t3,2,22,29 /* ((t3 >> 0) & 0xff) * 4 */
- rlwinm r8,t0,2,22,29 /* ((t0 >> 0) & 0xff) * 4 */
- rlwinm r9,t1,2,22,29 /* ((t1 >> 0) & 0xff) * 4 */
- rlwinm r10,t2,2,22,29 /* ((t2 >> 0) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- la r12,-3072(r12)
- .endm
-
- .macro elr rk
- lwz s0, 0(\rk)
- lwz s1, 4(\rk)
- lwz s2, 8(\rk)
- lwz s3,12(\rk)
-
- la r12,4096(r12)
-
- rlwinm r7,t0,10,22,29 /* ((t0 >> 24) & 0xff) * 4 */
- rlwinm r8,t1,10,22,29 /* ((t1 >> 24) & 0xff) * 4 */
- rlwinm r9,t2,10,22,29 /* ((t2 >> 24) & 0xff) * 4 */
- rlwinm r10,t3,10,22,29 /* ((t3 >> 24) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- rlwinm r7,r7,0,0,7 /* & 0xff000000 */
- rlwinm r8,r8,0,0,7 /* & 0xff000000 */
- rlwinm r9,r9,0,0,7 /* & 0xff000000 */
- rlwinm r10,r10,0,0,7 /* & 0xff000000 */
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- rlwinm r7,t1,18,22,29 /* ((t1 >> 16) & 0xff) * 4 */
- rlwinm r8,t2,18,22,29 /* ((t2 >> 16) & 0xff) * 4 */
- rlwinm r9,t3,18,22,29 /* ((t3 >> 16) & 0xff) * 4 */
- rlwinm r10,t0,18,22,29 /* ((t0 >> 16) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- rlwinm r7,r7,0,8,15 /* & 0xff0000 */
- rlwinm r8,r8,0,8,15 /* & 0xff0000 */
- rlwinm r9,r9,0,8,15 /* & 0xff0000 */
- rlwinm r10,r10,0,8,15 /* & 0xff0000 */
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- rlwinm r7,t2,26,22,29 /* ((t2 >> 8) & 0xff) * 4 */
- rlwinm r8,t3,26,22,29 /* ((t3 >> 8) & 0xff) * 4 */
- rlwinm r9,t0,26,22,29 /* ((t0 >> 8) & 0xff) * 4 */
- rlwinm r10,t1,26,22,29 /* ((t1 >> 8) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- rlwinm r7,r7,0,16,23 /* & 0xff00 */
- rlwinm r8,r8,0,16,23 /* & 0xff00 */
- rlwinm r9,r9,0,16,23 /* & 0xff00 */
- rlwinm r10,r10,0,16,23 /* & 0xff00 */
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- rlwinm r7,t3,2,22,29 /* ((t3 >> 0) & 0xff) * 4 */
- rlwinm r8,t0,2,22,29 /* ((t0 >> 0) & 0xff) * 4 */
- rlwinm r9,t1,2,22,29 /* ((t1 >> 0) & 0xff) * 4 */
- rlwinm r10,t2,2,22,29 /* ((t2 >> 0) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- rlwinm r7,r7,0,24,31 /* & 0xff */
- rlwinm r8,r8,0,24,31 /* & 0xff */
- rlwinm r9,r9,0,24,31 /* & 0xff */
- rlwinm r10,r10,0,24,31 /* & 0xff */
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- la r12,-4096(r12)
- .endm
-
- .macro eblock rk src label
- sxrk \rk \src
-
- etfs \rk 16
- esft \rk 32
- etfs \rk 48
- esft \rk 64
- etfs \rk 80
- esft \rk 96
- etfs \rk 112
- esft \rk 128
- etfs \rk 144
-
- lwz r11,256(\rk)
- cmpwi r11,10
- beq \label
-
- esft \rk 160
- etfs \rk 176
-
- cmpwi r11,12
- beq \label
-
- esft \rk 192
- etfs \rk 208
-
-\label:
- slwi r11,r11,4
- add \rk,\rk,r11
-
- elr \rk
- .endm
-
- .macro dtfs rk offset
- lwz t0,\offset+ 0(\rk)
- lwz t1,\offset+ 4(\rk)
- lwz t2,\offset+ 8(\rk)
- lwz t3,\offset+12(\rk)
-
- rlwinm r7,s0,10,22,29 /* ((s0 >> 24) & 0xff) * 4 */
- rlwinm r8,s1,10,22,29 /* ((s1 >> 24) & 0xff) * 4 */
- rlwinm r9,s2,10,22,29 /* ((s2 >> 24) & 0xff) * 4 */
- rlwinm r10,s3,10,22,29 /* ((s3 >> 24) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor t0,t0,r7
- xor t1,t1,r8
- xor t2,t2,r9
- xor t3,t3,r10
-
- la r12,1024(r12)
-
- rlwinm r7,s3,18,22,29 /* ((s3 >> 16) & 0xff) * 4 */
- rlwinm r8,s0,18,22,29 /* ((s0 >> 16) & 0xff) * 4 */
- rlwinm r9,s1,18,22,29 /* ((s1 >> 16) & 0xff) * 4 */
- rlwinm r10,s2,18,22,29 /* ((s2 >> 16) & 0xff) * 4 */
-
-/* start here */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor t0,t0,r7
- xor t1,t1,r8
- xor t2,t2,r9
- xor t3,t3,r10
-
- la r12,1024(r12)
-
- rlwinm r7,s2,26,22,29 /* ((s2 >> 8) & 0xff) * 4 */
- rlwinm r8,s3,26,22,29 /* ((s3 >> 8) & 0xff) * 4 */
- rlwinm r9,s0,26,22,29 /* ((s0 >> 8) & 0xff) * 4 */
- rlwinm r10,s1,26,22,29 /* ((s1 >> 8) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor t0,t0,r7
- xor t1,t1,r8
- xor t2,t2,r9
- xor t3,t3,r10
-
- la r12,1024(r12)
-
- rlwinm r7,s1,2,22,29 /* ((s1 >> 0) & 0xff) * 4 */
- rlwinm r8,s2,2,22,29 /* ((s2 >> 0) & 0xff) * 4 */
- rlwinm r9,s3,2,22,29 /* ((s3 >> 0) & 0xff) * 4 */
- rlwinm r10,s0,2,22,29 /* ((s0 >> 0) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor t0,t0,r7
- xor t1,t1,r8
- xor t2,t2,r9
- xor t3,t3,r10
-
- la r12,-3072(r12)
- .endm
-
- .macro dsft rk offset
- lwz s0,\offset+ 0(\rk)
- lwz s1,\offset+ 4(\rk)
- lwz s2,\offset+ 8(\rk)
- lwz s3,\offset+12(\rk)
-
- rlwinm r7,t0,10,22,29 /* ((t0 >> 24) & 0xff) * 4 */
- rlwinm r8,t1,10,22,29 /* ((t1 >> 24) & 0xff) * 4 */
- rlwinm r9,t2,10,22,29 /* ((t2 >> 24) & 0xff) * 4 */
- rlwinm r10,t3,10,22,29 /* ((s3 >> 24) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- la r12,1024(r12)
-
- rlwinm r7,t3,18,22,29 /* ((t3 >> 16) & 0xff) * 4 */
- rlwinm r8,t0,18,22,29 /* ((t0 >> 16) & 0xff) * 4 */
- rlwinm r9,t1,18,22,29 /* ((t1 >> 16) & 0xff) * 4 */
- rlwinm r10,t2,18,22,29 /* ((t2 >> 16) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- la r12,1024(r12)
-
- rlwinm r7,t2,26,22,29 /* ((t2 >> 8) & 0xff) * 4 */
- rlwinm r8,t3,26,22,29 /* ((t3 >> 8) & 0xff) * 4 */
- rlwinm r9,t0,26,22,29 /* ((t0 >> 8) & 0xff) * 4 */
- rlwinm r10,t1,26,22,29 /* ((t1 >> 8) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- la r12,1024(r12)
-
- rlwinm r7,t1,2,22,29 /* ((t1 >> 0) & 0xff) * 4 */
- rlwinm r8,t2,2,22,29 /* ((t2 >> 0) & 0xff) * 4 */
- rlwinm r9,t3,2,22,29 /* ((t3 >> 0) & 0xff) * 4 */
- rlwinm r10,t0,2,22,29 /* ((t0 >> 0) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- la r12,-3072(r12)
- .endm
-
- .macro dlr rk
- lwz s0, 0(\rk)
- lwz s1, 4(\rk)
- lwz s2, 8(\rk)
- lwz s3,12(\rk)
-
- la r12,4096(r12)
-
- rlwinm r7,t0,10,22,29 /* ((t0 >> 24) & 0xff) * 4 */
- rlwinm r8,t1,10,22,29 /* ((t1 >> 24) & 0xff) * 4 */
- rlwinm r9,t2,10,22,29 /* ((t2 >> 24) & 0xff) * 4 */
- rlwinm r10,t3,10,22,29 /* ((t3 >> 24) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- rlwinm r7,r7,0,0,7 /* & 0xff000000 */
- rlwinm r8,r8,0,0,7 /* & 0xff000000 */
- rlwinm r9,r9,0,0,7 /* & 0xff000000 */
- rlwinm r10,r10,0,0,7 /* & 0xff000000 */
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- rlwinm r7,t3,18,22,29 /* ((t3 >> 16) & 0xff) * 4 */
- rlwinm r8,t0,18,22,29 /* ((t0 >> 16) & 0xff) * 4 */
- rlwinm r9,t1,18,22,29 /* ((t1 >> 16) & 0xff) * 4 */
- rlwinm r10,t2,18,22,29 /* ((t2 >> 16) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- rlwinm r7,r7,0,8,15 /* & 0xff0000 */
- rlwinm r8,r8,0,8,15 /* & 0xff0000 */
- rlwinm r9,r9,0,8,15 /* & 0xff0000 */
- rlwinm r10,r10,0,8,15 /* & 0xff0000 */
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- rlwinm r7,t2,26,22,29 /* ((t2 >> 8) & 0xff) * 4 */
- rlwinm r8,t3,26,22,29 /* ((t3 >> 8) & 0xff) * 4 */
- rlwinm r9,t0,26,22,29 /* ((t0 >> 8) & 0xff) * 4 */
- rlwinm r10,t1,26,22,29 /* ((t1 >> 8) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- rlwinm r7,r7,0,16,23 /* & 0xff00 */
- rlwinm r8,r8,0,16,23 /* & 0xff00 */
- rlwinm r9,r9,0,16,23 /* & 0xff00 */
- rlwinm r10,r10,0,16,23 /* & 0xff00 */
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- rlwinm r7,t1,2,22,29 /* ((t1 >> 0) & 0xff) * 4 */
- rlwinm r8,t2,2,22,29 /* ((t2 >> 0) & 0xff) * 4 */
- rlwinm r9,t3,2,22,29 /* ((t3 >> 0) & 0xff) * 4 */
- rlwinm r10,t0,2,22,29 /* ((t0 >> 0) & 0xff) * 4 */
- lwzx r7,r7,r12
- lwzx r8,r8,r12
- lwzx r9,r9,r12
- lwzx r10,r10,r12
- rlwinm r7,r7,0,24,31 /* & 0xff */
- rlwinm r8,r8,0,24,31 /* & 0xff */
- rlwinm r9,r9,0,24,31 /* & 0xff */
- rlwinm r10,r10,0,24,31 /* & 0xff */
- xor s0,s0,r7
- xor s1,s1,r8
- xor s2,s2,r9
- xor s3,s3,r10
-
- la r12,-4096(r12)
- .endm
-
- .macro dblock rk src label
- sxrk \rk \src
-
- dtfs \rk 16
- dsft \rk 32
- dtfs \rk 48
- dsft \rk 64
- dtfs \rk 80
- dsft \rk 96
- dtfs \rk 112
- dsft \rk 128
- dtfs \rk 144
-
- lwz r11,256(\rk)
- cmpwi r11,10
- beq \label
-
- dsft \rk 160
- dtfs \rk 176
-
- cmpwi r11,12
- beq \label
-
- dsft \rk 192
- dtfs \rk 208
-
-\label:
- slwi r11,r11,4
- add \rk,\rk,r11
-
- dlr \rk
- .endm
-
-C_FUNCTION_BEGIN(aesEncrypt)
-LABEL(aesEncrypt)
- subi r1,r1,32
- stmw r24,0(r1)
-
- lis r12,_ae0@ha
- la r12,_ae0@l(r12)
-
- eblock rk=r3 src=r5 label=LOCAL(00)
-
- #if WORDS_BIGENDIAN
- stw s0, 0(r4)
- stw s1, 4(r4)
- stw s2, 8(r4)
- stw s3,12(r4)
- #else
- # error ppc little-endian mode not supported
- #endif
-
- li r3,0
- lmw r24,0(r1)
- addi r1,r1,32
- blr
-C_FUNCTION_END(aesEncrypt, LOCAL(aesEncrypt_size))
-
-
-C_FUNCTION_BEGIN(aesDecrypt)
-LABEL(aesDecrypt)
- subi r1,r1,32
- stmw r24,0(r1)
-
- lis r12,_ad0@ha
- la r12,_ad0@l(r12)
-
- dblock rk=r3 src=r5 label=LOCAL(01)
-
- #if WORDS_BIGENDIAN
- stw s0, 0(r4)
- stw s1, 4(r4)
- stw s2, 8(r4)
- stw s3,12(r4)
- #else
- # error ppc little-endian mode not supported
- #endif
-
- li r3,0
- lmw r24,0(r1)
- addi r1,r1,32
- blr
-C_FUNCTION_END(aesDecrypt, LOCAL(aesDecrypt_size))
-
-C_FUNCTION_BEGIN(aesECBEncrypt)
-LABEL(aesECBEncrypt)
- subi r1,r1,32
- stmw r24,0(r1)
-
- mtctr r4
-
- lis r12,_ae0@ha
- la r12,_ae0@l(r12)
-
-LOCAL(02):
- /* copy r3 into r4 */
- mr r4,r3
-
- eblock rk=r4 src=r6 label=LOCAL(03)
-
- #if WORDS_BIGENDIAN
- stw s0, 0(r5)
- stw s1, 4(r5)
- stw s2, 8(r5)
- stw s3,12(r5)
- #else
- # error ppc little-endian mode not supported
- #endif
-
- addi r5,r5,16
- addi r6,r6,16
-
- bdnz LOCAL(02)
-
- li r3,0
- lmw r24,0(r1)
- addi r1,r1,32
- blr
-C_FUNCTION_END(aesECBEncrypt, LOCAL(aesECBEncrypt_size))
-
-
-C_FUNCTION_BEGIN(aesECBDecrypt)
-LABEL(aesECBDecrypt)
- subi r1,r1,32
- stmw r24,0(r1)
-
- mtctr r4
-
- lis r12,_ad0@ha
- la r12,_ad0@l(r12)
-
-LOCAL(04):
- /* copy r3 into r4 */
- mr r4,r3
-
- dblock rk=r4 src=r6 label=LOCAL(05)
-
- #if WORDS_BIGENDIAN
- stw s0, 0(r5)
- stw s1, 4(r5)
- stw s2, 8(r5)
- stw s3,12(r5)
- #else
- # error ppc little-endian mode not supported
- #endif
-
- addi r5,r5,16
- addi r6,r6,16
-
- bdnz LOCAL(04)
-
- li r3,0
- lmw r24,0(r1)
- addi r1,r1,32
- blr
-C_FUNCTION_END(aesECBDecrypt, LOCAL(aesECBDecrypt_size))
diff --git a/beecrypt/gas/aesopt.ppc.m4 b/beecrypt/gas/aesopt.ppc.m4
new file mode 100644
index 000000000..f81f3a50a
--- /dev/null
+++ b/beecrypt/gas/aesopt.ppc.m4
@@ -0,0 +1,616 @@
+dnl aesopt.ppc.m4
+dnl
+dnl NOTE: Only works for big-endian PowerPC!
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/ppc.m4)
+
+define(`s0',`r24')
+define(`s1',`r25')
+define(`s2',`r26')
+define(`s3',`r27')
+define(`t0',`r28')
+define(`t1',`r29')
+define(`t2',`r30')
+define(`t3',`r31')
+
+define(`sxrk',`
+ifelse(ASM_BIGENDIAN,yes,`
+ lwz s0, 0($2)
+ lwz s1, 4($2)
+ lwz s2, 8($2)
+ lwz s3,12($2)
+',`
+ li r0,0
+ lwbrx s0,$2,r0
+ li r0,4
+ lwbrx s1,$2,r0
+ li r0,8
+ lwbrx s2,$2,r0
+ li r0,13
+ lwbrx s0,$2,r0
+')
+ lwz r7, 0($1)
+ lwz r8, 4($1)
+ lwz r9, 8($1)
+ lwz r10,12($1)
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+')
+
+define(`etfs',`
+ lwz t0,$2+ 0($1)
+ lwz t1,$2+ 4($1)
+ lwz t2,$2+ 8($1)
+ lwz t3,$2+12($1)
+
+ rlwinm r7,s0,10,22,29
+ rlwinm r8,s1,10,22,29
+ rlwinm r9,s2,10,22,29
+ rlwinm r10,s3,10,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor t0,t0,r7
+ xor t1,t1,r8
+ xor t2,t2,r9
+ xor t3,t3,r10
+
+ la r12,1024(r12)
+
+ rlwinm r7,s1,18,22,29
+ rlwinm r8,s2,18,22,29
+ rlwinm r9,s3,18,22,29
+ rlwinm r10,s0,18,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor t0,t0,r7
+ xor t1,t1,r8
+ xor t2,t2,r9
+ xor t3,t3,r10
+
+ la r12,1024(r12)
+
+ rlwinm r7,s2,26,22,29
+ rlwinm r8,s3,26,22,29
+ rlwinm r9,s0,26,22,29
+ rlwinm r10,s1,26,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor t0,t0,r7
+ xor t1,t1,r8
+ xor t2,t2,r9
+ xor t3,t3,r10
+
+ la r12,1024(r12)
+
+ rlwinm r7,s3,2,22,29
+ rlwinm r8,s0,2,22,29
+ rlwinm r9,s1,2,22,29
+ rlwinm r10,s2,2,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor t0,t0,r7
+ xor t1,t1,r8
+ xor t2,t2,r9
+ xor t3,t3,r10
+
+ la r12,-3072(r12)
+')
+
+define(`esft',`
+ lwz s0,$2+ 0($1)
+ lwz s1,$2+ 4($1)
+ lwz s2,$2+ 8($1)
+ lwz s3,$2+12($1)
+
+ rlwinm r7,t0,10,22,29
+ rlwinm r8,t1,10,22,29
+ rlwinm r9,t2,10,22,29
+ rlwinm r10,t3,10,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ la r12,1024(r12)
+
+ rlwinm r7,t1,18,22,29
+ rlwinm r8,t2,18,22,29
+ rlwinm r9,t3,18,22,29
+ rlwinm r10,t0,18,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ la r12,1024(r12)
+
+ rlwinm r7,t2,26,22,29
+ rlwinm r8,t3,26,22,29
+ rlwinm r9,t0,26,22,29
+ rlwinm r10,t1,26,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ la r12,1024(r12)
+
+ rlwinm r7,t3,2,22,29
+ rlwinm r8,t0,2,22,29
+ rlwinm r9,t1,2,22,29
+ rlwinm r10,t2,2,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ la r12,-3072(r12)
+')
+
+define(`elr',`
+ lwz s0, 0($1)
+ lwz s1, 4($1)
+ lwz s2, 8($1)
+ lwz s3,12($1)
+
+ la r12,4096(r12)
+
+ rlwinm r7,t0,10,22,29
+ rlwinm r8,t1,10,22,29
+ rlwinm r9,t2,10,22,29
+ rlwinm r10,t3,10,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ rlwinm r7,r7,0,0,7
+ rlwinm r8,r8,0,0,7
+ rlwinm r9,r9,0,0,7
+ rlwinm r10,r10,0,0,7
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ rlwinm r7,t1,18,22,29
+ rlwinm r8,t2,18,22,29
+ rlwinm r9,t3,18,22,29
+ rlwinm r10,t0,18,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ rlwinm r7,r7,0,8,15
+ rlwinm r8,r8,0,8,15
+ rlwinm r9,r9,0,8,15
+ rlwinm r10,r10,0,8,15
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ rlwinm r7,t2,26,22,29
+ rlwinm r8,t3,26,22,29
+ rlwinm r9,t0,26,22,29
+ rlwinm r10,t1,26,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ rlwinm r7,r7,0,16,23
+ rlwinm r8,r8,0,16,23
+ rlwinm r9,r9,0,16,23
+ rlwinm r10,r10,0,16,23
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ rlwinm r7,t3,2,22,29
+ rlwinm r8,t0,2,22,29
+ rlwinm r9,t1,2,22,29
+ rlwinm r10,t2,2,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ rlwinm r7,r7,0,24,31
+ rlwinm r8,r8,0,24,31
+ rlwinm r9,r9,0,24,31
+ rlwinm r10,r10,0,24,31
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ la r12,-4096(r12)
+')
+
+define(`eblock',`
+ sxrk($1,$2)
+
+ etfs($1,16)
+ esft($1,32)
+ etfs($1,48)
+ esft($1,64)
+ etfs($1,80)
+ esft($1,96)
+ etfs($1,112)
+ esft($1,128)
+ etfs($1,144)
+
+ lwz r11,256($1)
+ cmpwi r11,10
+ beq $3
+
+ esft($1,160)
+ etfs($1,176)
+
+ cmpwi r11,12
+ beq $3
+
+ esft($1,192)
+ etfs($1,208)
+
+$3:
+ slwi r11,r11,4
+ add $1,$1,r11
+
+ elr($1)
+')
+
+define(`dtfs',`
+ lwz t0,$2+ 0($1)
+ lwz t1,$2+ 4($1)
+ lwz t2,$2+ 8($1)
+ lwz t3,$2+12($1)
+
+ rlwinm r7,s0,10,22,29
+ rlwinm r8,s1,10,22,29
+ rlwinm r9,s2,10,22,29
+ rlwinm r10,s3,10,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor t0,t0,r7
+ xor t1,t1,r8
+ xor t2,t2,r9
+ xor t3,t3,r10
+
+ la r12,1024(r12)
+
+ rlwinm r7,s3,18,22,29
+ rlwinm r8,s0,18,22,29
+ rlwinm r9,s1,18,22,29
+ rlwinm r10,s2,18,22,29
+
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor t0,t0,r7
+ xor t1,t1,r8
+ xor t2,t2,r9
+ xor t3,t3,r10
+
+ la r12,1024(r12)
+
+ rlwinm r7,s2,26,22,29
+ rlwinm r8,s3,26,22,29
+ rlwinm r9,s0,26,22,29
+ rlwinm r10,s1,26,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor t0,t0,r7
+ xor t1,t1,r8
+ xor t2,t2,r9
+ xor t3,t3,r10
+
+ la r12,1024(r12)
+
+ rlwinm r7,s1,2,22,29
+ rlwinm r8,s2,2,22,29
+ rlwinm r9,s3,2,22,29
+ rlwinm r10,s0,2,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor t0,t0,r7
+ xor t1,t1,r8
+ xor t2,t2,r9
+ xor t3,t3,r10
+
+ la r12,-3072(r12)
+')
+
+define(`dsft',`
+ lwz s0,$2+ 0($1)
+ lwz s1,$2+ 4($1)
+ lwz s2,$2+ 8($1)
+ lwz s3,$2+12($1)
+
+ rlwinm r7,t0,10,22,29
+ rlwinm r8,t1,10,22,29
+ rlwinm r9,t2,10,22,29
+ rlwinm r10,t3,10,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ la r12,1024(r12)
+
+ rlwinm r7,t3,18,22,29
+ rlwinm r8,t0,18,22,29
+ rlwinm r9,t1,18,22,29
+ rlwinm r10,t2,18,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ la r12,1024(r12)
+
+ rlwinm r7,t2,26,22,29
+ rlwinm r8,t3,26,22,29
+ rlwinm r9,t0,26,22,29
+ rlwinm r10,t1,26,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ la r12,1024(r12)
+
+ rlwinm r7,t1,2,22,29
+ rlwinm r8,t2,2,22,29
+ rlwinm r9,t3,2,22,29
+ rlwinm r10,t0,2,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ la r12,-3072(r12)
+')
+
+define(`dlr',`
+ lwz s0, 0($1)
+ lwz s1, 4($1)
+ lwz s2, 8($1)
+ lwz s3,12($1)
+
+ la r12,4096(r12)
+
+ rlwinm r7,t0,10,22,29
+ rlwinm r8,t1,10,22,29
+ rlwinm r9,t2,10,22,29
+ rlwinm r10,t3,10,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ rlwinm r7,r7,0,0,7
+ rlwinm r8,r8,0,0,7
+ rlwinm r9,r9,0,0,7
+ rlwinm r10,r10,0,0,7
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ rlwinm r7,t3,18,22,29
+ rlwinm r8,t0,18,22,29
+ rlwinm r9,t1,18,22,29
+ rlwinm r10,t2,18,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ rlwinm r7,r7,0,8,15
+ rlwinm r8,r8,0,8,15
+ rlwinm r9,r9,0,8,15
+ rlwinm r10,r10,0,8,15
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ rlwinm r7,t2,26,22,29
+ rlwinm r8,t3,26,22,29
+ rlwinm r9,t0,26,22,29
+ rlwinm r10,t1,26,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ rlwinm r7,r7,0,16,23
+ rlwinm r8,r8,0,16,23
+ rlwinm r9,r9,0,16,23
+ rlwinm r10,r10,0,16,23
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ rlwinm r7,t1,2,22,29
+ rlwinm r8,t2,2,22,29
+ rlwinm r9,t3,2,22,29
+ rlwinm r10,t0,2,22,29
+ lwzx r7,r7,r12
+ lwzx r8,r8,r12
+ lwzx r9,r9,r12
+ lwzx r10,r10,r12
+ rlwinm r7,r7,0,24,31
+ rlwinm r8,r8,0,24,31
+ rlwinm r9,r9,0,24,31
+ rlwinm r10,r10,0,24,31
+ xor s0,s0,r7
+ xor s1,s1,r8
+ xor s2,s2,r9
+ xor s3,s3,r10
+
+ la r12,-4096(r12)
+')
+
+define(`dblock',`
+ sxrk($1,$2)
+
+ dtfs($1,16)
+ dsft($1,32)
+ dtfs($1,48)
+ dsft($1,64)
+ dtfs($1,80)
+ dsft($1,96)
+ dtfs($1,112)
+ dsft($1,128)
+ dtfs($1,144)
+
+ lwz r11,256($1)
+ cmpwi r11,10
+ beq $3
+
+ dsft($1,160)
+ dtfs($1,176)
+
+ cmpwi r11,12
+ beq $3
+
+ dsft($1,192)
+ dtfs($1,208)
+
+$3:
+ slwi r11,r11,4
+ add $1,$1,r11
+
+ dlr($1)
+')
+
+EXTERNAL_VARIABLE(_ae0)
+EXTERNAL_VARIABLE(_ad0)
+
+C_FUNCTION_BEGIN(aesEncrypt)
+ subi r1,r1,32
+ stmw r24,0(r1)
+
+ LOAD_ADDRESS(_ae0,r12)
+
+ eblock(r3,r5,LOCAL(00))
+
+ifelse(ASM_BIGENDIAN,yes,`
+ stw s0, 0(r4)
+ stw s1, 4(r4)
+ stw s2, 8(r4)
+ stw s3,12(r4)
+',`
+ li r0,0
+ stwbrx s0,r4,r0
+ li r0,4
+ stwbrx s1,r4,r0
+ li r0,8
+ stwbrx s2,r4,r0
+ li r0,12
+ stwbrx s3,r4,r0
+')
+
+ li r3,0
+ lmw r24,0(r1)
+ addi r1,r1,32
+ blr
+C_FUNCTION_END(aesEncrypt)
+
+
+C_FUNCTION_BEGIN(aesDecrypt)
+ subi r1,r1,32
+ stmw r24,0(r1)
+
+ LOAD_ADDRESS(_ad0,r12)
+
+ dblock(r3,r5,LOCAL(01))
+
+ifelse(ASM_BIGENDIAN,yes,`
+ stw s0, 0(r4)
+ stw s1, 4(r4)
+ stw s2, 8(r4)
+ stw s3,12(r4)
+',`
+ li r0,0
+ stwbrx s0,r4,r0
+ li r0,4
+ stwbrx s1,r4,r0
+ li r0,8
+ stwbrx s2,r4,r0
+ li r0,12
+ stwbrx s3,r4,r0
+')
+
+ li r3,0
+ lmw r24,0(r1)
+ addi r1,r1,32
+ blr
+C_FUNCTION_END(aesDecrypt)
diff --git a/beecrypt/gas/alpha.m4 b/beecrypt/gas/alpha.m4
new file mode 100644
index 000000000..49366dae0
--- /dev/null
+++ b/beecrypt/gas/alpha.m4
@@ -0,0 +1,34 @@
+dnl alpha.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+undefine(`C_FUNCTION_BEGIN')
+define(C_FUNCTION_BEGIN,`
+ .text
+ .align 5
+ .globl $1
+ .ent $1
+$1:
+ .frame `$'sp, 0, `$'26
+ .prologue 0
+')
+undefine(`C_FUNCTION_END')
+define(C_FUNCTION_END,`
+ .end $1
+')
diff --git a/beecrypt/gas/asmdefs.m4 b/beecrypt/gas/asmdefs.m4
new file mode 100644
index 000000000..f8341c95f
--- /dev/null
+++ b/beecrypt/gas/asmdefs.m4
@@ -0,0 +1,41 @@
+dnl asmdefs.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ifelse(substr(ASM_OS,0,5),linux,`
+define(USE_SIZE_DIRECTIVE,yes)
+')
+
+define(SYMNAME,`GSYM_PREFIX`$1'')
+define(LOCAL,`LSYM_PREFIX`$1'')
+
+define(C_FUNCTION_BEGIN,`
+ TEXTSEG
+ GLOBL SYMNAME($1)
+SYMNAME($1):
+')
+
+ifelse(USE_SIZE_DIRECTIVE,yes,`
+define(C_FUNCTION_END,`
+LOCAL($1)_size:
+ .size SYMNAME($1), LOCAL($1)_size - SYMNAME($1)
+')
+',`
+define(C_FUNCTION_END,`')
+')
diff --git a/beecrypt/gas/blowfishopt.i586.S b/beecrypt/gas/blowfishopt.i586.S
deleted file mode 100644
index 69b89bb72..000000000
--- a/beecrypt/gas/blowfishopt.i586.S
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * blowfishopt.i586.S
- *
- * Assembler optimized blowfish routines for Intel Pentium processors
- *
- * Compile target is GNU Assembler
- *
- * Copyright (c) 2000, 2001 Virtual Unlimited B.V.
- *
- * Author: Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include "beecrypt.gas.h"
-
- .file "blowfishopt.i586.S"
-
- .text
-
- .macro etworounds p # bp in %esi, xl and xr in %ecx and %edx, %eax and %ebx clear
- xorl 0+\p(%esi),%ecx
- roll $16,%ecx
- movb %ch,%al
- movb %cl,%bl
- roll $16,%ecx
- movl 0x000+72(%esi,%eax,4),%edi
- addl 0x400+72(%esi,%ebx,4),%edi
- movb %ch,%al
- movb %cl,%bl
- xorl 0x800+72(%esi,%eax,4),%edi
- addl 0xC00+72(%esi,%ebx,4),%edi
- xorl %edi,%edx
- xorl 4+\p(%esi),%edx
- roll $16,%edx
- movb %dh,%al
- movb %dl,%bl
- roll $16,%edx
- movl 0x000+72(%esi,%eax,4),%edi
- addl 0x400+72(%esi,%ebx,4),%edi
- movb %dh,%al
- movb %dl,%bl
- xorl 0x800+72(%esi,%eax,4),%edi
- addl 0xC00+72(%esi,%ebx,4),%edi
- xorl %edi,%ecx
- .endm
-
- .macro dtworounds p # bp in %esi, xl and xr in %ecx and %edx, %eax and %ebx clear
- xorl 4+\p(%esi),%ecx
- roll $16,%ecx
- movb %ch,%al
- movb %cl,%bl
- roll $16,%ecx
- movl 0x000+72(%esi,%eax,4),%edi
- addl 0x400+72(%esi,%ebx,4),%edi
- movb %ch,%al
- movb %cl,%bl
- xorl 0x800+72(%esi,%eax,4),%edi
- addl 0xC00+72(%esi,%ebx,4),%edi
- xorl %edi,%edx
- xorl 0+\p(%esi),%edx
- roll $16,%edx
- movb %dh,%al
- movb %dl,%bl
- roll $16,%edx
- movl 0x000+72(%esi,%eax,4),%edi
- addl 0x400+72(%esi,%ebx,4),%edi
- movb %dh,%al
- movb %dl,%bl
- xorl 0x800+72(%esi,%eax,4),%edi
- addl 0xC00+72(%esi,%ebx,4),%edi
- xorl %edi,%ecx
- .endm
-
-C_FUNCTION_BEGIN(blowfishEncrypt)
-LABEL(blowfishEncrypt)
- /* parameter one is the blowfish parameters; need to extract bp and set it up in esi */
- pushl %edi
- pushl %esi
- pushl %ebx
-
- movl 16(%esp),%esi # esi now contains bp
- movl 24(%esp),%edi # edi now contains src
-
- xorl %eax,%eax
- xorl %ebx,%ebx
-
- movl 0(%edi),%ecx
- movl 4(%edi),%edx
-
- bswap %ecx
- bswap %edx
-
- etworounds p= 0
- etworounds p= 8
- etworounds p=16
- etworounds p=24
- etworounds p=32
- etworounds p=40
- etworounds p=48
- etworounds p=56
-
- movl 20(%esp),%edi # edi now contains dst
- xorl 64(%esi),%ecx
- xorl 68(%esi),%edx
-
- bswap %ecx
- bswap %edx
-
- movl %ecx,4(%edi)
- movl %edx,0(%edi)
-
- xorl %eax,%eax
- popl %ebx
- popl %esi
- popl %edi
- ret
-C_FUNCTION_END(blowfishEncrypt, LOCAL(blowfishEncrypt_size))
-
-
-C_FUNCTION_BEGIN(blowfishDecrypt)
-LABEL(blowfishDecrypt)
- /* parameter one is the blowfish parameters; need to extract bp and set it up in ebp */
- pushl %edi
- pushl %esi
- pushl %ebx
-
- movl 16(%esp),%esi # esi now contains bp
- movl 24(%esp),%edi # edi now contains dst
-
- xorl %eax,%eax
- xorl %ebx,%ebx
-
- movl 0(%edi),%ecx
- movl 4(%edi),%edx
-
- bswap %ecx
- bswap %edx
-
- dtworounds p=64
- dtworounds p=56
- dtworounds p=48
- dtworounds p=40
- dtworounds p=32
- dtworounds p=24
- dtworounds p=16
- dtworounds p= 8
-
- movl 20(%esp),%edi # edi now contains dst
- xorl 4(%esi),%ecx
- xorl 0(%esi),%edx
-
- bswap %ecx
- bswap %edx
-
- movl %ecx,4(%edi)
- movl %edx,0(%edi)
-
- xorl %eax,%eax
-
- popl %ebx
- popl %esi
- popl %edi
- ret
-C_FUNCTION_END(blowfishDecrypt, LOCAL(blowfishDecrypt_size))
diff --git a/beecrypt/gas/blowfishopt.i586.m4 b/beecrypt/gas/blowfishopt.i586.m4
new file mode 100644
index 000000000..4233738fc
--- /dev/null
+++ b/beecrypt/gas/blowfishopt.i586.m4
@@ -0,0 +1,162 @@
+dnl blowfishopt.i586.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/x86.m4)
+
+dnl during this macro we assume:
+dnl bp in %esi, xl and xr in %ecx and %edx, %eax and %ebx clear
+
+define(`etworounds',`
+ xorl $1+0(%esi),%ecx
+ roll `$'16,%ecx
+ movzx %ch,%eax
+ movzx %cl,%ebx
+ roll `$'16,%ecx
+ movl 0x000+72(%esi,%eax,4),%edi
+ addl 0x400+72(%esi,%ebx,4),%edi
+ movzx %ch,%eax
+ movzx %cl,%ebx
+ xorl 0x800+72(%esi,%eax,4),%edi
+ addl 0xC00+72(%esi,%ebx,4),%edi
+ xorl %edi,%edx
+ xorl $1+4(%esi),%edx
+ roll `$'16,%edx
+ movzx %dh,%eax
+ movzx %dl,%ebx
+ roll `$'16,%edx
+ movl 0x000+72(%esi,%eax,4),%edi
+ addl 0x400+72(%esi,%ebx,4),%edi
+ movzx %dh,%eax
+ movzx %dl,%ebx
+ xorl 0x800+72(%esi,%eax,4),%edi
+ addl 0xC00+72(%esi,%ebx,4),%edi
+ xorl %edi,%ecx
+')
+
+dnl bp in %esi, xl and xr in %ecx and %edx, %eax and %ebx clear
+define(`dtworounds',`
+ xorl $1+4(%esi),%ecx
+ roll `$'16,%ecx
+ movzx %ch,%eax
+ movzx %cl,%ebx
+ roll `$'16,%ecx
+ movl 0x000+72(%esi,%eax,4),%edi
+ addl 0x400+72(%esi,%ebx,4),%edi
+ movzx %ch,%eax
+ movzx %cl,%ebx
+ xorl 0x800+72(%esi,%eax,4),%edi
+ addl 0xC00+72(%esi,%ebx,4),%edi
+ xorl %edi,%edx
+ xorl $1+0(%esi),%edx
+ roll `$'16,%edx
+ movzx %dh,%eax
+ movzx %dl,%ebx
+ roll `$'16,%edx
+ movl 0x000+72(%esi,%eax,4),%edi
+ addl 0x400+72(%esi,%ebx,4),%edi
+ movzx %dh,%eax
+ movzx %dl,%ebx
+ xorl 0x800+72(%esi,%eax,4),%edi
+ addl 0xC00+72(%esi,%ebx,4),%edi
+ xorl %edi,%ecx
+')
+
+C_FUNCTION_BEGIN(blowfishEncrypt)
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ movl 16(%esp),%esi
+ movl 24(%esp),%edi
+
+ movl 0(%edi),%ecx
+ movl 4(%edi),%edx
+
+ bswap %ecx
+ bswap %edx
+
+ etworounds(0)
+ etworounds(8)
+ etworounds(16)
+ etworounds(24)
+ etworounds(32)
+ etworounds(40)
+ etworounds(48)
+ etworounds(56)
+
+ movl 20(%esp),%edi
+ xorl 64(%esi),%ecx
+ xorl 68(%esi),%edx
+
+ bswap %ecx
+ bswap %edx
+
+ movl %ecx,4(%edi)
+ movl %edx,0(%edi)
+
+ xorl %eax,%eax
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+C_FUNCTION_END(blowfishEncrypt)
+
+
+C_FUNCTION_BEGIN(blowfishDecrypt)
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ movl 16(%esp),%esi
+ movl 24(%esp),%edi
+
+ movl 0(%edi),%ecx
+ movl 4(%edi),%edx
+
+ bswap %ecx
+ bswap %edx
+
+ dtworounds(64)
+ dtworounds(56)
+ dtworounds(48)
+ dtworounds(40)
+ dtworounds(32)
+ dtworounds(24)
+ dtworounds(16)
+ dtworounds(8)
+
+ movl 20(%esp),%edi
+ xorl 4(%esi),%ecx
+ xorl 0(%esi),%edx
+
+ bswap %ecx
+ bswap %edx
+
+ movl %ecx,4(%edi)
+ movl %edx,0(%edi)
+
+ xorl %eax,%eax
+
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+C_FUNCTION_END(blowfishDecrypt)
diff --git a/beecrypt/gas/blowfishopt.powerpc.S b/beecrypt/gas/blowfishopt.powerpc.S
deleted file mode 100644
index 28076f061..000000000
--- a/beecrypt/gas/blowfishopt.powerpc.S
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * blowfishopt.powerpc.asm
- *
- * Assembler optimized Blowfish routines for PowerPC processors
- *
- * Compile target is GNU Assembler
- *
- * Copyright (c) 2002 Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include "beecrypt.gas.h"
-
- .file "blowfish.powerpc.S"
-
- .text
-
- .macro round xl xr offset
- lwz r9,\offset(r3)
- xor \xl,\xl,r9
- rlwinm r9,\xl,10,22,29
- rlwinm r10,\xl,18,22,29
- lwzx r9,r9,r28
- lwzx r10,r10,r29
- rlwinm r11,\xl,26,22,29
- add r9,r9,r10
- lwzx r11,r11,r30
- rlwinm r12,\xl,2,22,29
- xor r9,r9,r11
- lwzx r12,r12,r31
- add r9,r9,r12
- xor \xr,\xr,r9
- .endm
-
- .macro eblock
- round xl=r7 xr=r8 offset=0
- round xl=r8 xr=r7 offset=4
- round xl=r7 xr=r8 offset=8
- round xl=r8 xr=r7 offset=12
- round xl=r7 xr=r8 offset=16
- round xl=r8 xr=r7 offset=20
- round xl=r7 xr=r8 offset=24
- round xl=r8 xr=r7 offset=28
- round xl=r7 xr=r8 offset=32
- round xl=r8 xr=r7 offset=36
- round xl=r7 xr=r8 offset=40
- round xl=r8 xr=r7 offset=44
- round xl=r7 xr=r8 offset=48
- round xl=r8 xr=r7 offset=52
- round xl=r7 xr=r8 offset=56
- round xl=r8 xr=r7 offset=60
-
- lwz r9,64(r3)
- lwz r10,68(r3)
- xor r7,r7,r9
- xor r8,r8,r10
- .endm
-
- .macro dblock
- round xl=r7 xr=r8 offset=68
- round xl=r8 xr=r7 offset=64
- round xl=r7 xr=r8 offset=60
- round xl=r8 xr=r7 offset=56
- round xl=r7 xr=r8 offset=52
- round xl=r8 xr=r7 offset=48
- round xl=r7 xr=r8 offset=44
- round xl=r8 xr=r7 offset=40
- round xl=r7 xr=r8 offset=36
- round xl=r8 xr=r7 offset=32
- round xl=r7 xr=r8 offset=28
- round xl=r8 xr=r7 offset=24
- round xl=r7 xr=r8 offset=20
- round xl=r8 xr=r7 offset=16
- round xl=r7 xr=r8 offset=12
- round xl=r8 xr=r7 offset=8
-
- lwz r9,4(r3)
- lwz r10,0(r3)
- xor r7,r7,r9
- xor r8,r8,r10
- .endm
-
-C_FUNCTION_BEGIN(blowfishEncrypt)
-LABEL(blowfishEncrypt)
- la r1,-16(r1)
- stmw r28,0(r1)
-
- la r28,72(r3)
- la r29,1096(r3)
- la r30,2120(r3)
- la r31,3144(r3)
-
- #if WORDS_BIGENDIAN
- lwz r7,0(r5)
- lwz r8,4(r5)
- #else
- # error ppc little-endian mode not supported
- #endif
-
- eblock
-
- #if WORDS_BIGENDIAN
- stw r7,4(r4)
- stw r8,0(r4)
- #else
- # error ppc little-endian mode not supported
- #endif
-
- li r3,0
- lmw r28,0(r1)
- la r1,16(r1)
- blr
-C_FUNCTION_END(blowfishEncrypt, LOCAL(blowfishEncrypt_size))
-
-
-C_FUNCTION_BEGIN(blowfishDecrypt)
-LABEL(blowfishDecrypt)
- la r1,-16(r1)
- stmw r28,0(r1)
-
- la r28,72(r3)
- la r29,1096(r3)
- la r30,2120(r3)
- la r31,3144(r3)
-
- #if WORDS_BIGENDIAN
- lwz r7,0(r5)
- lwz r8,4(r5)
- #else
- # error ppc little-endian mode not supported
- #endif
-
- dblock
-
- #if WORDS_BIGENDIAN
- stw r7,4(r4)
- stw r8,0(r4)
- #else
- # error ppc little-endian mode not supported
- #endif
-
- li r3,0
- lmw r28,0(r1)
- la r1,16(r1)
- blr
-C_FUNCTION_END(blowfishDecrypt, LOCAL(blowfishDecrypt_size))
-
-
-C_FUNCTION_BEGIN(blowfishECBEncrypt)
-LABEL(blowfishECBEncrypt)
- la r1,-16(r1)
- stmw r28,0(r1)
-
- mtctr r4
-
- la r28,72(r3)
- la r29,1096(r3)
- la r30,2120(r3)
- la r31,3144(r3)
-
-LOCAL(00):
- #if WORDS_BIGENDIAN
- lwz r7,0(r6)
- lwz r8,4(r6)
- #else
- # error ppc little-endian mode not supported
- #endif
-
- eblock
-
- #if WORDS_BIGENDIAN
- stw r7,4(r5)
- stw r8,0(r5)
- #else
- # error ppc little-endian mode not supported
- #endif
-
- la r5,8(r5)
- la r6,8(r6)
-
- bdnz LOCAL(00)
-
- li r3,0
- lmw r28,0(r1)
- la r1,16(r1)
- blr
-C_FUNCTION_END(blowfishECBEncrypt, LOCAL(blowfishECBEncrypt_size))
-
-
-C_FUNCTION_BEGIN(blowfishECBDecrypt)
-LABEL(blowfishECBDecrypt)
- la r1,-16(r1)
- stmw r28,0(r1)
-
- mtctr r4
-
- la r28,72(r3)
- la r29,1096(r3)
- la r30,2120(r3)
- la r31,3144(r3)
-
-LOCAL(01):
- #if WORDS_BIGENDIAN
- lwz r7,0(r6)
- lwz r8,4(r6)
- #else
- # error ppc little-endian mode not supported
- #endif
-
- dblock
-
- #if WORDS_BIGENDIAN
- stw r7,4(r5)
- stw r8,0(r5)
- #else
- # error ppc little-endian mode not supported
- #endif
-
- la r5,8(r5)
- la r6,8(r6)
-
- bdnz LOCAL(01)
-
- li r3,0
- lmw r28,0(r1)
- la r1,16(r1)
- blr
-C_FUNCTION_END(blowfishECBDecrypt, LOCAL(blowfishECBDecrypt_size))
diff --git a/beecrypt/gas/blowfishopt.ppc.m4 b/beecrypt/gas/blowfishopt.ppc.m4
new file mode 100644
index 000000000..74214aa9c
--- /dev/null
+++ b/beecrypt/gas/blowfishopt.ppc.m4
@@ -0,0 +1,161 @@
+dnl blowfishopt.ppc.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/ppc.m4)
+
+define(`round',`
+ lwz r9,$3(r3)
+ xor $1,$1,r9
+ rlwinm r9,$1,10,22,29
+ rlwinm r10,$1,18,22,29
+ lwzx r9,r9,r28
+ lwzx r10,r10,r29
+ rlwinm r11,$1,26,22,29
+ add r9,r9,r10
+ lwzx r11,r11,r30
+ rlwinm r12,$1,2,22,29
+ xor r9,r9,r11
+ lwzx r12,r12,r31
+ add r9,r9,r12
+ xor $2,$2,r9
+')
+
+define(`eblock',`
+ round(r7,r8,0)
+ round(r8,r7,4)
+ round(r7,r8,8)
+ round(r8,r7,12)
+ round(r7,r8,16)
+ round(r8,r7,20)
+ round(r7,r8,24)
+ round(r8,r7,28)
+ round(r7,r8,32)
+ round(r8,r7,36)
+ round(r7,r8,40)
+ round(r8,r7,44)
+ round(r7,r8,48)
+ round(r8,r7,52)
+ round(r7,r8,56)
+ round(r8,r7,60)
+ lwz r9,64(r3)
+ lwz r10,68(r3)
+ xor r7,r7,r9
+ xor r8,r8,r10
+')
+
+define(`dblock',`
+ round(r7,r8,68)
+ round(r8,r7,64)
+ round(r7,r8,60)
+ round(r8,r7,56)
+ round(r7,r8,52)
+ round(r8,r7,48)
+ round(r7,r8,44)
+ round(r8,r7,40)
+ round(r7,r8,36)
+ round(r8,r7,32)
+ round(r7,r8,28)
+ round(r8,r7,24)
+ round(r7,r8,20)
+ round(r8,r7,16)
+ round(r7,r8,12)
+ round(r8,r7,8)
+ lwz r9,4(r3)
+ lwz r10,0(r3)
+ xor r7,r7,r9
+ xor r8,r8,r10
+')
+
+
+C_FUNCTION_BEGIN(blowfishEncrypt)
+ la r1,-16(r1)
+ stmw r28,0(r1)
+
+ la r28,72(r3)
+ la r29,1096(r3)
+ la r30,2120(r3)
+ la r31,3144(r3)
+
+ifelse(ASM_BIGENDIAN,yes,`
+ lwz r7,0(r5)
+ lwz r8,4(r5)
+',`
+ li r0,0
+ lwbrx r7,r5,r0
+ li r0,4
+ lwbrx r7,r5,r0
+')
+
+ eblock
+
+ifelse(ASM_BIGENDIAN,yes,`
+ stw r7,4(r4)
+ stw r8,0(r4)
+',`
+ li r0,4
+ stwbrx r7,r4,r0
+ li r0,0
+ stwbrx r7,r4,r0
+')
+
+ li r3,0
+ lmw r28,0(r1)
+ la r1,16(r1)
+ blr
+C_FUNCTION_END(blowfishEncrypt)
+
+
+C_FUNCTION_BEGIN(blowfishDecrypt)
+ la r1,-16(r1)
+ stmw r28,0(r1)
+
+ la r28,72(r3)
+ la r29,1096(r3)
+ la r30,2120(r3)
+ la r31,3144(r3)
+
+ifelse(ASM_BIGENDIAN,yes,`
+ lwz r7,0(r5)
+ lwz r8,4(r5)
+',`
+ li r0,0
+ lwbrx r7,r5,r0
+ li r0,4
+ lwbrx r7,r5,r0
+')
+
+ dblock
+
+ifelse(ASM_BIGENDIAN,yes,`
+ stw r7,4(r4)
+ stw r8,0(r4)
+',`
+ li r0,4
+ stwbrx r7,r4,r0
+ li r0,0
+ stwbrx r7,r4,r0
+')
+
+ li r3,0
+ lmw r28,0(r1)
+ la r1,16(r1)
+ blr
+C_FUNCTION_END(blowfishDecrypt)
diff --git a/beecrypt/gas/sha1opt.ia64.S b/beecrypt/gas/fips180opt.ia64.S
index 77a2a975a..a9c6edaa8 100644
--- a/beecrypt/gas/sha1opt.ia64.S
+++ b/beecrypt/gas/fips180opt.ia64.S
@@ -27,7 +27,7 @@
*
*/
-#include "beecrypt.gas.h"
+#include "config.gas.h"
#define saved_pfs r14
#define saved_lc r15
@@ -38,13 +38,13 @@
.text
-#define K00 0x5a827999
-#define K20 0x6ed9eba1
-#define K40 0x8f1bbcdc
-#define K60 0xca62c1d6
+ .equ K00, 0x5a827999
+ .equ K20, 0x6ed9eba1
+ .equ K40, 0x8f1bbcdc
+ .equ K60, 0xca62c1d6
-#define PARAM_H 0
-#define PARAM_DATA 20
+ .equ PARAM_H, 0
+ .equ PARAM_DATA, 20
/* for optimization, I have to see how I can parallellize the code
diff --git a/beecrypt/gas/ia64.m4 b/beecrypt/gas/ia64.m4
new file mode 100644
index 000000000..1ac0898d3
--- /dev/null
+++ b/beecrypt/gas/ia64.m4
@@ -0,0 +1,35 @@
+dnl ia64.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+define(`saved_pfs',`r14')
+define(`saved_lc',`r15')
+
+ifelse(substr(ASM_OS,0,4),hpux,`
+undefine(`C_FUNCTION_BEGIN')
+define(C_FUNCTION_BEGIN,`
+ TEXTSEG
+ GLOBL SYMNAME($1)#
+ .proc SYMNAME($1)#
+SYMNAME($1):
+')
+',`
+ .explicit
+')
diff --git a/beecrypt/gas/m68k.m4 b/beecrypt/gas/m68k.m4
new file mode 100644
index 000000000..1c6bb6e1e
--- /dev/null
+++ b/beecrypt/gas/m68k.m4
@@ -0,0 +1,34 @@
+ifelse(REGISTERS_NEED_PERCENT,yes,`
+define(d0,%d0)
+define(d1,%d1)
+define(d2,%d2)
+define(d3,%d3)
+define(d4,%d4)
+define(d5,%d5)
+define(d6,%d6)
+define(d7,%d7)
+define(a0,%a0)
+define(a1,%a1)
+define(a2,%a2)
+define(a3,%a3)
+define(a4,%a4)
+define(a5,%a5)
+define(a6,%a6)
+define(a7,%a7)
+define(sp,%sp)
+')
+ifelse(INSTRUCTIONS_NEED_DOT_SIZE_QUALIF,yes,`
+define(addal,adda.l)
+define(addl,add.l)
+define(addql,addq.l)
+define(addxl,addx.l)
+define(clrl,clr.l)
+define(lsll,lsl.l)
+define(movel,move.l)
+define(moveml,movem.l)
+define(moveal,movea.l)
+define(umull,umul.l)
+define(subl,sub.l)
+define(subql,subq.l)
+define(subxl,subx.l)
+')
diff --git a/beecrypt/gas/mp32opt.arm.S b/beecrypt/gas/mp32opt.arm.S
deleted file mode 100644
index 5908047ba..000000000
--- a/beecrypt/gas/mp32opt.arm.S
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * mp32opt.arm.S
- *
- * Assembler optimized multiprecision integer routines for ARM processors
- *
- * Compile target is GNU Assembler
- *
- * Copyright (c) 2001, 2002 Virtual Unlimited B.V.
- *
- * Author: Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include "beecrypt.gas.h"
-
- .file "mp32opt.arm.S"
-
- .text
-
-/* ARM uses up to four registers for parameter passing */
-
-#if 0
-C_FUNCTION_BEGIN(mp32addw)
-LABEL(mp32addw)
- /* r0 is xsize and must be at least one, r1 = xdata, r2 = y */
- stmfd sp!, {r4, lr}
- add r1, r1, r0, asl #2
- mov r3, #0
-LOCAL(mp32addw_loop):
- ldr r4, [r1, #-4]
- adds r4, r4, r2
- adc r2, r3, r3
- str r4, [r1, #-4]!
- subs r0, r0, #1
- bne LOCAL(mp32addw_loop)
- mov r0, r2
- ldmfd sp!, {r4, pc}
-C_FUNCTION_END(mp32addw, LOCAL(mp32addw_size))
-
-
-C_FUNCTION_BEGIN(mp32subw)
-LABEL(mp32subw)
- /* r0 is xsize and must be at least one, r1 = xdata, r2 = y */
- stmfd sp!, {r4, lr}
- add r1, r1, r0, asl #2
- mov r3, #0
-LOCAL(mp32subw_loop):
- ldr r4, [r1, #-4]
- subs r4, r4, r2
- adc r2, r3, r3
- str r4, [r1, #-4]!
- subs r0, r0, #1
- bne LOCAL(mp32subw_loop)
- mov r0, r2
- ldmfd sp!, {r4, pc}
-C_FUNCTION_END(mp32subw, LOCAL(mp32subw_size))
-
-
-C_FUNCTION_BEGIN(mp32add)
-LABEL(mp32add)
- /* r0 is size, r1 = xdata, r2 = ydata */
- stmfd sp!, {r4, r5, lr}
- /* copy cpsr to r5 and clear the carry bit */
- mrs r5, cpsr
- bic r5, r5, #0x20000000
- /* adjust the addresses */
- add r1, r1, r0, asl #2
- add r2, r2, r0, asl #2
-LOCAL(mp32add_loop):
- /* restore the carry bit */
- msr cpsr_c, r5
- ldr r3, [r1, #-4]!
- ldr r4, [r2, #-4]!
- adcs r3, r3, r2
- str r3, [r1, #0]
- /* save the carry bit */
- mrs r5, cpsr
- subs r0, r0, #1
- bne LOCAL(mp32add_loop)
-
- /* restore the carry bit */
- msr cpsr_c, r5
-
- /* set the result to the proper value */
- adc r0, r0, r0
- ldmfd sp!, {r4, r5, pc}
-C_FUNCTION_END(mp32add, LOCAL(mp32add_size))
-#endif
-
-
-C_FUNCTION_BEGIN(mp32setmul)
-LABEL(mp32setmul)
- stmfd sp!, {r4, r5, lr}
- /* adjust the addresses */
- add r1, r1, r0, asl #2
- add r2, r2, r0, asl #2
- /* r3 is the multiplicand; r4 load from memory, r5 is scratch, ip is carry */
- mov ip, #0
-LOCAL(mp32setmul_loop):
- ldr r4, [r2, #-4]!
- mov r5, #0
- umlal ip, r5, r3, r4
- str ip, [r1, #-4]!
- mov ip, r5
- subs r0, r0, #1
- bne LOCAL(mp32setmul_loop)
- /* return carry */
- mov r0, ip
- ldmfd sp!, {r4, r5, pc}
-C_FUNCTION_END(mp32setmul, LOCAL(mp32setmul_size))
-
-
-C_FUNCTION_BEGIN(mp32addmul)
-LABEL(mp32addmul)
- stmfd sp!, {r4, r5, r6, lr}
- /* adjust the addresses */
- add r1, r1, r0, asl #2
- add r2, r2, r0, asl #2
- /* r3 is the multiplicand; r4 & r5 load from memory, r6 is scratch, ip is carry */
- mov ip, #0
-LOCAL(mp32addmul_loop):
- ldr r4, [r2, #-4]!
- ldr r5, [r1, #-4]
- mov r6, #0
- umlal ip, r6, r3, r4
- adds r5, r5, ip
- adc ip, r6, #0
- str r5, [r1, #-4]!
- subs r0, r0, #1
- bne LOCAL(mp32addmul_loop)
- /* return carry */
- mov r0, ip
- ldmfd sp!, {r4, r5, r6, pc}
-C_FUNCTION_END(mp32addmul, LOCAL(mp32addmul_size))
-
-
-#if 0
-/* this routine needs fixing; it causes a core dump for some reason */
-/* unfortunately the system I test this on has no debugger */
-C_FUNCTION_BEGIN(mp32addsqrtrc)
-LABEL(mp32addsqrtrc):
- stmfd sp!, {r4, r5, r6, lr}
- /* adjust the addresses */
- add r1, r1, r0, asl #2
- add r2, r2, r0, asl #2
- /* r3 is a zero register, ip is the carry */
- mov r3, #0
- mov ip, #0
-LOCAL(mp32addsqrtrc_loop):
- ldr r4, [r2, #-4]!
- mov r6, #0
- umlal ip, r6, r4, r4
- ldr r5, [r1, #-4] /* lo word */
- ldr r4, [r1, #-8] /* hi word */
- adds r5, r5, ip
- adcs r4, r4, r6
- str r5, [r1, #-4]
- str r4, [r1, #-8]!
- adc ip, r3, #0 /* set carry */
- subs r0, r0, #1
- bne LOCAL(mp32addsqrtrc_loop)
- /* return carry */
- mov r0, ip
- ldmfd sp!, {r4, r5, r6, pc}
-C_FUNCTION_END(mp32addsqrtrc, LOCAL(mp32addsqrtrc_size))
-#endif
diff --git a/beecrypt/gas/mp32opt.ia64.S b/beecrypt/gas/mp32opt.ia64.S
deleted file mode 100644
index 520ff4b52..000000000
--- a/beecrypt/gas/mp32opt.ia64.S
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * mp32opt.ia64.S
- *
- * Assembler optimized multiprecision integer routines for ia64 (Intel Itanium)
- *
- * Compile target is GNU Assembler
- *
- * Copyright (c) 2000, 2001 Virtual Unlimited B.V.
- *
- * Author: Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-/*
- * I will need to implement 64 bit multiprecision assembler-optimized routines
- * before this platform can be tested adequately. The current 32 bit ones suffer
- * from loading into integer registers, conversion to floating point, doing the xma
- * and converting back to integer; 64 bit values can be loaded directly into
- * floating point registers, which should shave off a lot of cycles.
- */
-
-#include "beecrypt.gas.h"
-
-#define saved_pfs r14
-#define saved_lc r15
-
-#define size r16
-#define dst r17
-#define src r18
-
- .file "mp32opt.ia64.S"
-
- .text
-
- .explicit
-
- .align 32
- .global mpzero#
- .proc mpzero#
-
-mpzero:
- alloc saved_pfs = ar.pfs,2,0,0,0
- mov saved_lc = ar.lc
- sub size = in0,r0,1;;
- mov src = in1
- mov ar.lc = size;;
-.L00:
- st4 [src] = r0,4
- br.ctop.sptk .L00
- ;;
- mov ar.lc = saved_lc
- mov ar.pfs = saved_pfs
- br.ret.sptk b0
- .endp mpzero#
-
-
- .align 32
- .global mpcopy#
- .proc mpcopy#
-
-mpcopy:
- alloc saved_pfs = ar.pfs,3,5,0,8
- mov saved_lc = ar.lc
- sub size = in0,r0,1
- mov dst = in1
- mov src = in2;;
- mov ar.lc = size
- mov ar.ec = 2
- mov pr.rot = (1 << 16);;
-.L01:
- (p17) st4 [dst] = r33,4
- (p16) ld4 r32 = [src],4;;
- br.ctop.sptk .L01;;
- mov ar.lc = saved_lc
- mov ar.pfs = saved_pfs
- br.ret.sptk b0
- .endp mpcopy#
-
-
- .if 0
- .align 32
- .global mpz
- .type mpz,@function
-
-mpz:
- alloc r14 = ar.pfs,2,6,0,8
- mov r15 = ar.lc
- sub r16= in0,r0,1
- mov r17 = in1
- mov r18 = in2;;
- mov r8 = 1
- mov pr.rot = 1 << 16
- mov ar.ec = 2
- mov ar.lc = r16;;
-.L02:
- (p16) ld4 r32 = [r18],4
- (p18) cmp.eq p0,p32 = r34,r0
- (p33) mov r8 = r0
- (p33) br.exit
- .endif
-
-
- .align 32
- .global mpadd#
- .proc mpadd#
-
-mpadd:
- alloc r14 = ar.pfs,3,0,0,0
- mov r15 = ar.lc
- # adjust size by -1
- sub r16 = in0,r0,1
- # clear carry
- mov r8 = r0;;
- # load addresses
- shladd r17 = r16,2,in1
- shladd r18 = r16,2,in2
- # load loop count
- mov ar.lc = r16;;
-.L20:
- ld4 r20 = [r18],-4
- ld4 r19 = [r17]
- tbit.z p1,p2 = r8,32;;
- (p1) add r8 = r19,r20
- (p2) add r8 = r19,r20,1;;
- st4 [r17] = r8,-4
- br.cloop.sptk .L20;;
- extr.u r8 = r8,32,1
- mov ar.lc = r15
- mov ar.pfs = r14
- br.ret.sptk b0
- .endp mpadd#
-
-
- .align 32
- .global mpsub#
- .proc mpsub#
-
-mpsub:
- alloc r14 = ar.pfs,3,0,0,0
- mov r15 = ar.lc
- # adjust size by -1
- sub r16 = in0,r0,1
- # clear carry
- mov r8 = r0;;
- # load addresses
- shladd r17 = r16,2,in1
- shladd r18 = r16,2,in2
- # load loop count
- mov ar.lc = r16;;
-.L30:
- ld4 r20 = [r18],-4
- ld4 r19 = [r17]
- tbit.z p1,p2 = r8,32;;
- (p1) sub r8 = r19,r20
- (p2) sub r8 = r19,r20,1;;
- st4 [r17] = r8,-4
- br.cloop.sptk .L30;;
- extr.u r8 = r8,32,1
- mov ar.lc = r15
- mov ar.pfs = r14
- br.ret.sptk b0
- .endp mpsub#
-
-
- .if 0
-
- .align 32
- .global mpsetmul#
- .proc mpsetmul#
-
-mpsetmul:
- alloc r14 = ar.pfs,4,0,0,0
- mov r15 = ar.lc
- # load mul
- setf.sig f96 = in3
- # adjust size by -1
- sub r16 = in0,r0,1
- # clear carry
- mov r8 = r0;;
- # adjust addresses
- shladd r17 = r16,2,in1
- shladd r18 = r16,2,in2
- # load loop count
- mov ar.lc = r16;;
-.L40:
- ld4 r19 = [r18],-4;;
- setf.sig f98 = r8
- setf.sig f97 = r19;;
- # multiplication can only be done in f registers, but we do have a multiply-add
- xma.l f98 = f96,f97,f98;;
- getf.sig r8 = f98;;
- st4 [r17] = r8,-4
- shr.u r8 = r8,32
- br.cloop.sptk .L40;;
- mov ar.lc = r15
- mov ar.pfs = r14
- br.ret.sptk b0
- .endp mpsetmul#
-
-
- .align 32
- .global mpaddmul#
- .proc mpaddmul#
-
-mpaddmul:
- alloc saved_pfs = ar.pfs,4,0,0,0
- mov saved_lc = ar.lc
- # load mul
- setf.sig f96 = in3
- # adjust size by -1
- sub size = in0,r0,1
- # clear carry
- mov r8 = r0;;
- # adjust addresses
- shladd dst = size,2,in1
- shladd src = size,2,in2
- # load loop count
- mov ar.lc = r16;;
-.L50:
- ld4 r19 = [dst]
- ld4 r20 = [dst],-4;;
- setf.sig f98 = r8
- setf.sig f97 = r20;;
- # multiplication can only be done in f registers, but we do have a multiply-add
- xma.l f98 = f96,f97,f98;;
- getf.sig r8 = f98;;
- add r8 = r8,r19;;
- st4 [r17] = r8,-4
- shr.u r8 = r8,32
- br.cloop.sptk .L50;;
- mov ar.lc = r15
- mov ar.pfs = r14
- br.ret.sptk b0
- .endp mpaddmul#
-
- .endif
-
-
- .if 0
- .align 16
- .global mpaddsqrtrc#
- .proc mpaddsqrtrc#
-
-mpaddsqrtrc:
- .endp mpaddsqrtrc#
- .endif
diff --git a/beecrypt/gas/mp32opt.sparcv8.S b/beecrypt/gas/mp32opt.sparcv8.S
deleted file mode 100644
index 09a94f0d7..000000000
--- a/beecrypt/gas/mp32opt.sparcv8.S
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * mp32opt.sparcv8.S
- *
- * Assembler optimized multiprecision integer routines for Sparc v8
- *
- * Compile target is GNU Assembler, Sun Solaris Assembler
- *
- * Copyright (c) 2001 Virtual Unlimited B.V.
- *
- * Author: Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include "beecrypt.gas.h"
-
- .file "mp32opt.sparcv8.S"
-
- .text
-
-C_FUNCTION_BEGIN(mpsetmul)
-LABEL(mpsetmul)
-
- .register %g2,#scratch
-
- sll %o0,2,%g1
- dec 4,%o2
- clr %o0
-LOCAL(mpsetmul_loop):
- ld [%o2+%g1],%g2
- umul %o3,%g2,%g2
- rd %y,%g3
- addcc %o0,%g2,%g2
- addx %g0,%g3,%o0
- deccc 4,%g1
- bnz LOCAL(mpsetmul_loop)
- st %g2,[%o1+%g1]
- retl
- nop
-C_FUNCTION_END(mpsetmul, LOCAL(mpsetmul_size))
-
-
-C_FUNCTION_BEGIN(mpaddmul)
-LABEL(mpaddmul)
-
- .register %g2,#scratch
-
- sll %o0,2,%g1
- mov %o1,%o4
- dec 4,%o1
- dec 4,%o2
- clr %o0
-LOCAL(mpaddmul_loop):
- ld [%o2+%g1],%g2
- ld [%o1+%g1],%g3
- umul %o3,%g2,%g2
- rd %y,%g4
- addcc %o0,%g2,%g2
- addx %g0,%g4,%g4
- addcc %g2,%g3,%g2
- addx %g0,%g4,%o0
- deccc 4,%g1
- bnz LOCAL(mpaddmul_loop)
- st %g2,[%o4+%g1]
- retl
- nop
-C_FUNCTION_END(mpaddmul, LOCAL(mpaddmul_size))
-
-
-C_FUNCTION_BEGIN(mpaddsqrtrc)
-LABEL(mpaddsqrtrc)
-
- .register %g2,#scratch
- .register %g3,#scratch
-
- sll %o0,2,%g1
- add %o1,%g1,%o1
- dec 4,%o2
- add %o1,%g1,%o1
- dec 8,%o1
- clr %o0
-LOCAL(mpaddsqrtrc_loop):
- ld [%o2+%g1],%g2
- ldd [%o1],%o4
- umul %g2,%g2,%g3
- rd %y,%g2
- /* first addition */
- addcc %o5,%g3,%o5
- addxcc %o4,%g2,%o4
- addx %g0,%g0,%o3
- /* second addition */
- addcc %o5,%o0,%o5
- addxcc %o4,%g0,%o4
- addx %o3,%g0,%o0
- std %o4,[%o1]
- deccc 4,%g1
- bnz LOCAL(mpaddsqrtrc_loop)
- sub %o1,8,%o1
- retl
- nop
-C_FUNCTION_END(mpaddsqrtrc, LOCAL(mpaddsqrtrc_size))
diff --git a/beecrypt/gas/mp64opt.ia64.S b/beecrypt/gas/mp64opt.ia64.S
deleted file mode 100644
index 581bf4d30..000000000
--- a/beecrypt/gas/mp64opt.ia64.S
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * mp64opt.ia64.S
- *
- * Assembler optimized multiprecision integer routines for ia64 (Intel Itanium)
- *
- * Compile target is GNU Assembler
- *
- * Copyright (c) 2000, 2001 Virtual Unlimited B.V.
- *
- * Author: Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include "beecrypt.gas.h"
-
-#define saved_pfs r14
-#define saved_lc r15
-
-#define size r16
-#define dst r17
-#define src r18
-#define alt r19
-
- .text
-
- .explicit
-
-/* functions to add, in order of priority:
- * mp64addsqrtrc
- * mp64neg -> can vectorize
- * mp64multwo -> can vectorize
- * mp32divtwo -> ..
- * mp64fill -> easy
- * mp64z -> vectorizable with br.wtop
- * mp64nz -> vectorizable with br.wtop
- * mp64eq -> ..
- * mp64eqx -> ..
- * mp64ne -> ..
- * mp64nex -> ..
- * mp64gt -> ..
- * mp64gtx -> ..
- * mp64lt -> ..
- * mp64ltx -> ..
- * mp64ge -> substitute with mp64lt with swap of parameters
- * mp64gex -> .. mp64ltx
- * mp64le -> .. mp64gt
- * mp64lex -> .. mp64gtx
- * mp64isone -> vectorizable with br.wtop
- * mp64istwo -> ..
- * mp64leone -> ..
- * mp64size -> ..
-
-/* mp64zero works */
-C_FUNCTION_BEGIN(mp64zero)
- alloc saved_pfs = ar.pfs,2,0,0,0
- mov saved_lc = ar.lc
- sub size = in0,r0,1;;
- mov src = in1
- mov ar.lc = size;;
-
-.Lmp64zero_loop:
- st8 [src] = r0,8
- br.ctop.sptk .Lmp64zero_loop;;
-
- mov ar.lc = saved_lc
- mov ar.pfs = saved_pfs
- br.ret.sptk b0
-C_FUNCTION_END(mp64zero)
-
-
-/* mp64copy works */
-C_FUNCTION_BEGIN(mp64copy)
- alloc saved_pfs = ar.pfs,3,5,0,8
- mov saved_lc = ar.lc
- sub size = in0,r0,1;;
- mov dst = in1
- mov src = in2
- /* prepare loop */
- mov ar.lc = size
- mov ar.ec = 2
- mov pr.rot = (1 << 16);;
-
-.Lmp64copy_loop:
- (p17) st8 [dst] = r33,-8
- (p16) ld8 r32 = [src],-8;;
- br.ctop.sptk .Lmp64copy_loop;;
-
- mov ar.lc = saved_lc
- mov ar.pfs = saved_pfs
- br.ret.sptk b0
-C_FUNCTION_END(mp64copy)
-
-
-#if 0
-/* mp64z is in development */
-C_FUNCTION_BEGIN(mp64z)
- alloc saved_pfs = ar.pfs,2,6,0,8
- mov saved_lc = ar.lc
- sub size = in0,r0,1;;
-
- mov ret0 = 1
- mov src = in1
-
- mov ar.lc = size
- mov ar.ec = 2
- mov pr.rot = ((1 << 16) | (1 << 20));;
-
-.Lmp64z_loop:
- (p16) ld8 r32 = [src],8
- (p17) cmp.ne p1,p0 = r33,r0
- (p1) br.exit.dpnt .Lmp64z_exit;;
- br.ctop.dptk .Lmp64z_loop;;
-.Lmp64z_exit:
- (p1) mov ret0 = r0
-
- mov ar.lc = saved_lc
- mov ar.pfs = saved_pfs
- br.ret.sptk b0
-C_FUNCTION_END(mp64z)
-#endif
-
-
-/* mp64add works */
-C_FUNCTION_BEGIN(mp64add)
- alloc saved_pfs = ar.pfs,3,5,0,8
- mov saved_lc = ar.lc
- sub size = in0,r0,1;;
-
- /* adjust addresses */
- shladd dst = size,3,in1
- shladd src = size,3,in2
- shladd alt = size,3,in1
-
- /* prepare modulo-scheduled loop */
- mov ar.lc = size
- mov ar.ec = 3
- mov pr.rot = ((1 << 16) | (1 << 19));;
-
-.Lmp64add_loop:
- (p16) ld8 r32 = [src],-8
- (p16) ld8 r35 = [alt],-8
- (p20) add r36 = r33,r36 /* no carry add */
- (p22) add r36 = r33,r36,1 /* carry add */
- ;;
- (p20) cmp.leu p19,p21 = r33,r36 /* no previous carry */
- (p22) cmp.ltu p19,p21 = r33,r36 /* previous carry */
- (p18) st8 [dst] = r37,-8
- br.ctop.dptk .Lmp64add_loop;;
-
- /* return carry */
- (p21) add ret0 = r0,r0
- (p23) add ret0 = r0,r0,1
- ;;
- mov ar.lc = saved_lc
- mov ar.pfs = saved_pfs
- br.ret.sptk b0
-C_FUNCTION_END(mp64add)
-
-
-/* mp64sub is in development */
-C_FUNCTION_BEGIN(mp64sub)
- alloc saved_pfs = ar.pfs,3,5,0,8
- mov saved_lc = ar.lc
- sub size = in0,r0,1;;
-
- /* adjust addresses */
- shladd dst = size,3,in1
- shladd src = size,3,in2
- shladd alt = size,3,in1
-
- /* prepare modulo-scheduled loop */
- mov ar.lc = size
- mov ar.ec = 3
- mov pr.rot = ((1 << 16) | (1 << 19));;
-
-.Lmp64sub_loop:
- (p16) ld8 r32 = [src],-8
- (p16) ld8 r35 = [alt],-8
- (p20) sub r36 = r33,r36 /* no carry sub */
- (p22) sub r36 = r33,r36,1 /* carry sub */
- ;;
- (p20) cmp.geu p19,p21 = r33,r36 /* no previous carry */
- (p22) cmp.gtu p19,p21 = r33,r36 /* previous carry */
- (p18) st8 [dst] = r37,-8
- br.ctop.dptk .Lmp64sub_loop;;
-
- /* return carry */
- (p21) add ret0 = r0,r0
- (p23) add ret0 = r0,r0,1
- ;;
- mov ar.lc = saved_lc
- mov ar.pfs = saved_pfs
- br.ret.sptk b0
-C_FUNCTION_END(mp64sub)
-
-
-/* mp64setmul works */
-C_FUNCTION_BEGIN(mp64setmul)
- alloc saved_pfs = ar.pfs,4,4,0,8
- mov saved_lc = ar.lc
-
- setf.sig f6 = in3 /* the multiplier */
- setf.sig f7 = r0 /* the carry */
- sub size = in0,r0,1;;
-
- /* adjust addresses */
- shladd dst = size,3,in1
- shladd src = size,3,in2
-
- /* prepare modulo-scheduled loop */
- mov ar.lc = size
- mov ar.ec = 3
- mov pr.rot = (1 << 16);;
-
-.Lmp64setmul_loop:
- (p16) ldf8 f36 = [src],-8
- (p18) stf8 [dst] = f33,-8
- (p17) xma.lu f32 = f6,f37,f7
- (p17) xma.hu f7 = f6,f37,f7;;
- br.ctop.dptk .Lmp64setmul_loop;;
-
- /* return carry */
- getf.sig ret0 = f7;;
-
- mov ar.lc = saved_lc
- mov ar.pfs = saved_pfs
- br.ret.sptk b0
-C_FUNCTION_END(mp64setmul)
-
-
-/* mp64addmul needs fixing */
-C_FUNCTION_BEGIN(mp64addmul)
- alloc saved_pfs = ar.pfs,4,12,0,16
- mov saved_lc = ar.lc
-
- sub size = in0,r0,1;;
- setf.sig f6 = in3 /* the multiplier */
-
- /* adjust addresses */
- shladd dst = size,3,in1
- shladd src = size,3,in2
- shladd alt = size,3,in1;;
-
- /* prepare the rotate-in carry */
- mov r32 = r0
-
- /* prepare modulo-scheduled loop */
- mov ar.lc = size
- mov ar.ec = 5
- mov pr.rot = ((1 << 16) | (1 << 21));
-
-.Lmp64addmul_loop:
- (p18) getf.sig r33 = f34 /* hi 64 bit word */
- (p24) add r38 = r35,r38
- (p17) xma.lu f37 = f6,f41,f45
- (p18) getf.sig r37 = f38 /* lo 64 bit word */
- (p26) add r38 = r35,r38,1
- (p17) xma.hu f33 = f6,f41,f45
- (p16) ldf8 f40 = [src],-8
- (p16) ldf8 f44 = [alt],-8
- ;;
- /* set carry from this operation */
- (p24) cmp.leu p23,p25 = r35,r38
- (p26) cmp.ltu p23,p25 = r35,r38
- (p20) st8 [dst] = r39,-8
- br.ctop.dptk .Lmp64addmul_loop;;
-
- /* return carry */
- (p25) add ret0 = r36,r0
- (p27) add ret0 = r36,r0,1
-
- mov ar.lc = saved_lc
- mov ar.pfs = saved_pfs
- br.ret.sptk b0
-C_FUNCTION_END(mp64addmul)
-
-/* mp64addsqrtrc will be a little more challenging */
-
-/* the primary loop will look like this:
-
-.Lmp64addsqrtrc_loop:
- /* stage 1 */
- (p16) ldf8 to_square
- (p16) ld8 lo_to_add
- (p16) ld8 hi_to_add
- /* stage 2 */
- (p17) xma.lu to_square,to_square,carry
- (p17) xma.hu to_square,to_square,carry
- /* stage 3 */
- (p18) getf lo xma
- (p18) getf hi xma
- /* stage 4 */
- (p?) add lo no carry
- (p?) add lo carry
- /* stage 5 */
- (p?+1) add hi no carry
- (p?+1) add hi carry
- ;;
- /* also stage 4 */
- (p?) cmp lo for carry
- (p?) cmp lo for carry
- /* also stage 5 */
- (p?+1) cmp hi for carry
- (p?+1) cmp hi for carry
- st8 lo
- st8 hi
- br.ctop
-*/
diff --git a/beecrypt/gas/mpopt.alpha.m4 b/beecrypt/gas/mpopt.alpha.m4
new file mode 100644
index 000000000..55d4b5266
--- /dev/null
+++ b/beecrypt/gas/mpopt.alpha.m4
@@ -0,0 +1,159 @@
+dnl mpopt.alpha.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/alpha.m4)
+
+
+C_FUNCTION_BEGIN(mpadd)
+ subq `$'16,1,`$'16
+ s8addq `$'16,0,`$'1
+ addq `$'17,`$'1,`$'17
+ addq `$'18,`$'1,`$'18
+ mov `$31',`$'0
+
+ .align 4
+LOCAL(mpadd_loop):
+ ldq `$'1,0(`$'17)
+ ldq `$'2,0(`$'18)
+ addq `$'1,`$'0,`$'3
+ cmpult `$'3,`$'1,`$'0
+ addq `$3',`$'2,`$'1
+ cmpult `$'1,`$'3,`$'2
+ stq `$'1,0(`$'17)
+ or `$'2,`$'0,`$'0
+ subq `$'16,1,`$'16
+ subq `$'17,8,`$'17
+ subq `$'18,8,`$'18
+ bge `$'16,LOCAL(mpadd_loop)
+ ret `$'31,(`$'26),1
+C_FUNCTION_END(mpadd)
+
+
+C_FUNCTION_BEGIN(mpsub)
+ subq `$'16,1,`$'16
+ s8addq `$'16,0,`$'1
+ addq `$'17,`$'1,`$'17
+ addq `$'18,`$'1,`$'18
+ mov `$31',`$'0
+
+ .align 4
+LOCAL(mpsub_loop):
+ ldq `$'1,0(`$'17)
+ ldq `$'2,0(`$'18)
+ subq `$'1,`$'0,`$'3
+ cmpult `$'1,`$'3,`$'0
+ subq `$'3,`$'2,`$'1
+ cmpult `$'3,`$'1,`$'2
+ stq `$'1,0(`$'17)
+ or `$'2,`$'0,`$'0
+ subq `$'16,1,`$'16
+ subq `$'17,8,`$'17
+ subq `$'18,8,`$'18
+ bge `$'16,LOCAL(mpsub_loop)
+ ret `$'31,(`$'26),1
+C_FUNCTION_END(mpsub)
+
+
+C_FUNCTION_BEGIN(mpsetmul)
+ subq `$'16,1,`$'16
+ s8addq `$'16,0,`$'1
+ addq `$'17,`$'1,`$'17
+ addq `$'18,`$'1,`$'18
+ mov `$31',`$'0
+
+ .align 4
+LOCAL(mpsetmul_loop):
+ ldq `$1',0(`$'18)
+ mulq `$'19,`$'1,`$'2
+ umulh `$'19,`$'1,`$'3
+ addq `$'2,`$'0,`$'2
+ cmpult `$'2,`$'0,`$'0
+ stq `$'2,0(`$'17)
+ addq `$'3,`$'0,`$'0
+ subq `$'16,1,`$'16
+ subq `$'17,8,`$'17
+ subq `$'18,8,`$'18
+ bge `$'16,LOCAL(mpsetmul_loop)
+ ret `$'31,(`$'26),1
+C_FUNCTION_END(mpsetmul)
+
+
+C_FUNCTION_BEGIN(mpaddmul)
+ subq `$'16,1,`$'16
+ s8addq `$'16,0,`$'1
+ addq `$'17,`$'1,`$'17
+ addq `$'18,`$'1,`$'18
+ mov `$31',`$'0
+
+ .align 4
+LOCAL(mpaddmul_loop):
+ ldq `$'1,0(`$'17)
+ ldq `$'2,0(`$'18)
+ mulq `$'19,`$'2,`$'3
+ umulh `$'19,`$'2,`$'4
+ addq `$'3,`$'0,`$'3
+ cmpult `$'3,`$'0,`$'0
+ addq `$'4,`$'0,`$'4
+ addq `$'3,`$'1,`$'3
+ cmpult `$'3,`$'1,`$'0
+ addq `$'4,`$'0,`$'0
+ stq `$'3,0(`$'17)
+ subq `$'16,1,`$'16
+ subq `$'17,8,`$'17
+ subq `$'18,8,`$'18
+ bge `$'16,LOCAL(mpaddmul_loop)
+ ret `$'31,(`$'26),1
+C_FUNCTION_END(mpaddmul)
+
+
+C_FUNCTION_BEGIN(mpaddsqrtrc)
+ subq `$'16,1,`$'16
+ s8addq `$'16,0,`$'1
+ addq `$'17,`$'1,`$'17
+ addq `$'17,`$'1,`$'17
+ addq `$'18,`$'1,`$'18
+ mov `$31',`$'0
+
+ .align 4
+LOCAL(mpaddsqrtrc_loop):
+ ldq `$'1,0(`$'18)
+ mulq `$1',`$1',`$'2
+ umulh `$1',`$1',`$'1
+ addq `$'2,`$'0,`$'3
+ cmpult `$3',`$'2,`$'0
+ ldq `$'2,8(`$'17)
+ addq `$'1,`$'0,`$'1
+ addq `$'3,`$'2,`$'4
+ cmpult `$'4,`$'3,`$'0
+ ldq `$'3,0(`$'17)
+ addq `$'1,`$'0,`$'2
+ cmpult `$2',`$'1,`$'0
+ stq `$'4,8(`$'17)
+ addq `$'2,`$'3,`$'1
+ cmpult `$'1,`$'2,`$2'
+ stq `$'1,0(`$'17)
+ addq `$'2,`$'0,`$'0
+ subq `$'16,1,`$'16
+ subq `$'17,16,`$'17
+ subq `$'18,8,`$'18
+ bge `$'16,LOCAL(mpaddmul_loop)
+ ret `$'31,(`$'26),1
+C_FUNCTION_END(mpaddsqrtrc)
diff --git a/beecrypt/gas/mpopt.arm.m4 b/beecrypt/gas/mpopt.arm.m4
new file mode 100644
index 000000000..a7dc677ee
--- /dev/null
+++ b/beecrypt/gas/mpopt.arm.m4
@@ -0,0 +1,83 @@
+dnl mpopt.arm.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+
+
+C_FUNCTION_BEGIN(mpsetmul)
+ stmfd sp!, {r4, r5, lr}
+ add r1, r1, r0, asl #2
+ add r2, r2, r0, asl #2
+ mov ip, #0
+LOCAL(mpsetmul_loop):
+ ldr r4, [r2, #-4]!
+ mov r5, #0
+ umlal ip, r5, r3, r4
+ str ip, [r1, #-4]!
+ mov ip, r5
+ subs r0, r0, #1
+ bne LOCAL(mpsetmul_loop)
+ mov r0, ip
+ ldmfd sp!, {r4, r5, pc}
+C_FUNCTION_END(mpsetmul)
+
+
+C_FUNCTION_BEGIN(mpaddmul)
+ stmfd sp!, {r4, r5, r6, lr}
+ add r1, r1, r0, asl #2
+ add r2, r2, r0, asl #2
+ mov ip, #0
+LOCAL(mpaddmul_loop):
+ ldr r4, [r2, #-4]!
+ ldr r5, [r1, #-4]
+ mov r6, #0
+ umlal ip, r6, r3, r4
+ adds r5, r5, ip
+ adc ip, r6, #0
+ str r5, [r1, #-4]!
+ subs r0, r0, #1
+ bne LOCAL(mpaddmul_loop)
+ mov r0, ip
+ ldmfd sp!, {r4, r5, r6, pc}
+C_FUNCTION_END(mpaddmul)
+
+
+C_FUNCTION_BEGIN(mpaddsqrtrc)
+ stmfd sp!, {r4, r5, r6, lr}
+ add r1, r1, r0, asl #3
+ add r2, r2, r0, asl #2
+ mov r3, #0
+ mov ip, #0
+LOCAL(mpaddsqrtrc_loop):
+ ldr r4, [r2, #-4]!
+ mov r6, #0
+ umlal ip, r6, r4, r4
+ ldr r5, [r1, #-4]
+ ldr r4, [r1, #-8]
+ adds r5, r5, ip
+ adcs r4, r4, r6
+ str r5, [r1, #-4]
+ str r4, [r1, #-8]!
+ adc ip, r3, #0
+ subs r0, r0, #1
+ bne LOCAL(mpaddsqrtrc_loop)
+ mov r0, ip
+ ldmfd sp!, {r4, r5, r6, pc}
+C_FUNCTION_END(mpaddsqrtrc)
diff --git a/beecrypt/gas/mpopt.ia64.m4 b/beecrypt/gas/mpopt.ia64.m4
new file mode 100644
index 000000000..8486fe2e5
--- /dev/null
+++ b/beecrypt/gas/mpopt.ia64.m4
@@ -0,0 +1,187 @@
+dnl mpopt.ia64.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/ia64.m4)
+
+define(`size',`r16')
+define(`dst',`r17')
+define(`src',`r18')
+define(`alt',`r19')
+
+
+C_FUNCTION_BEGIN(mpadd)
+ alloc saved_pfs = ar.pfs,3,5,0,8
+ mov saved_lc = ar.lc
+ sub size = in0,r0,1;;
+
+dnl adjust addresses
+ shladd dst = size,3,in1
+ shladd src = size,3,in2
+ shladd alt = size,3,in1
+
+dnl prepare modulo-scheduled loop
+ mov ar.lc = size
+ mov ar.ec = 2
+ mov pr.rot = ((1 << 16) | (1 << 19));;
+
+LOCAL(mpadd_loop):
+ (p16) ld8 r32 = [alt],-8
+ (p16) ld8 r35 = [src],-8
+ (p20) add r36 = r33,r36
+ (p22) add r36 = r33,r36,1
+ ;;
+ (p20) cmp.leu p19,p21 = r33,r36
+ (p22) cmp.ltu p19,p21 = r33,r36
+ (p18) st8 [dst] = r37,-8
+ br.ctop.dptk LOCAL(mpadd_loop);;
+
+dnl loop epilogue: final store
+ (p18) st8 [dst] = r37,-8
+
+dnl return carry
+ (p20) add ret0 = r0,r0
+ (p22) add ret0 = r0,r0,1
+ ;;
+ mov ar.lc = saved_lc
+ mov ar.pfs = saved_pfs
+ br.ret.sptk b0
+C_FUNCTION_END(mpadd)
+
+
+C_FUNCTION_BEGIN(mpsub)
+ alloc saved_pfs = ar.pfs,3,5,0,8
+ mov saved_lc = ar.lc
+ sub size = in0,r0,1;;
+
+dnl adjust addresses
+ shladd dst = size,3,in1
+ shladd src = size,3,in2
+ shladd alt = size,3,in1
+
+dnl prepare modulo-scheduled loop
+ mov ar.lc = size
+ mov ar.ec = 2
+ mov pr.rot = ((1 << 16) | (1 << 19));;
+
+LOCAL(mpsub_loop):
+ (p16) ld8 r32 = [alt],-8
+ (p16) ld8 r35 = [src],-8
+ (p20) sub r36 = r33,r36
+ (p22) sub r36 = r33,r36,1
+ ;;
+ (p20) cmp.geu p19,p21 = r33,r36
+ (p22) cmp.gtu p19,p21 = r33,r36
+ (p18) st8 [dst] = r37,-8
+ br.ctop.dptk LOCAL(mpsub_loop);;
+
+dnl loop epilogue: final store
+ (p18) st8 [dst] = r37,-8
+
+dnl return carry
+ (p20) add ret0 = r0,r0
+ (p22) add ret0 = r0,r0,1
+ ;;
+ mov ar.lc = saved_lc
+ mov ar.pfs = saved_pfs
+ br.ret.sptk b0
+C_FUNCTION_END(mpsub)
+
+
+C_FUNCTION_BEGIN(mpsetmul)
+ alloc saved_pfs = ar.pfs,4,4,0,8
+ mov saved_lc = ar.lc
+
+ setf.sig f6 = in3
+ setf.sig f7 = r0
+ sub size = in0,r0,1;;
+
+dnl adjust addresses
+ shladd dst = size,3,in1
+ shladd src = size,3,in2
+
+dnl prepare modulo-scheduled loop
+ mov ar.lc = size
+ mov ar.ec = 3
+ mov pr.rot = (1 << 16);;
+
+LOCAL(mpsetmul_loop):
+ (p16) ldf8 f32 = [src],-8
+ (p18) stf8 [dst] = f35,-8
+ (p17) xma.lu f34 = f6,f33,f7
+ (p17) xma.hu f7 = f6,f33,f7;;
+ br.ctop.dptk LOCAL(mpsetmul_loop);;
+
+dnl return carry
+ getf.sig ret0 = f7;;
+
+ mov ar.lc = saved_lc
+ mov ar.pfs = saved_pfs
+ br.ret.sptk b0
+C_FUNCTION_END(mpsetmul)
+
+
+C_FUNCTION_BEGIN(mpaddmul)
+ alloc saved_pfs = ar.pfs,4,4,0,8
+ mov saved_lc = ar.lc
+
+ setf.sig f6 = in3
+ sub size = in0,r0,1;;
+
+dnl adjust addresses
+ shladd dst = size,3,in1
+ shladd src = size,3,in2
+ shladd alt = size,3,in1;;
+
+dnl prepare the rotate-in carry
+ mov r32 = r0
+
+dnl prepare modulo-scheduled loop
+ mov ar.lc = size
+ mov ar.ec = 4
+ mov pr.rot = ((1 << 16) | (1 << 21));
+
+LOCAL(mpaddmul_loop):
+ (p18) getf.sig r37 = f35
+ (p24) add r35 = r38,r35
+ (p17) xma.lu f34 = f6,f33,f37
+ (p18) getf.sig r33 = f39
+ (p26) add r35 = r38,r35,1
+ (p17) xma.hu f38 = f6,f33,f37
+ (p16) ldf8 f32 = [src],-8
+ (p16) ldf8 f36 = [alt],-8
+ ;;
+dnl set carry from this operation
+ (p24) cmp.leu p23,p25 = r38,r35
+ (p26) cmp.ltu p23,p25 = r38,r35
+ (p20) st8 [dst] = r36,-8
+ br.ctop.dptk LOCAL(mpaddmul_loop);;
+
+dnl loop epilogue: final store
+ (p20) st8 [dst] = r36,-8
+
+dnl return carry
+ (p24) add ret0 = r35,r0
+ (p26) add ret0 = r35,r0,1
+
+ mov ar.lc = saved_lc
+ mov ar.pfs = saved_pfs
+ br.ret.sptk b0
+C_FUNCTION_END(mpaddmul)
diff --git a/beecrypt/gas/mpopt.m68k.m4 b/beecrypt/gas/mpopt.m68k.m4
new file mode 100644
index 000000000..0cb2d4ca3
--- /dev/null
+++ b/beecrypt/gas/mpopt.m68k.m4
@@ -0,0 +1,158 @@
+dnl mpopt.m68k.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/m68k.m4)
+
+dnl works
+C_FUNCTION_BEGIN(mpadd)
+ move.l 4(%sp),%d0
+ movea.l 8(%sp),%a0
+ movea.l 12(%sp),%a1
+ move.l %d0,%d1
+ lsl.l #2,%d0
+ subq.l #1,%d1
+ adda.l %d0,%a0
+ adda.l %d0,%a1
+ clr %d0
+
+ .align 2
+LOCAL(mpadd_loop):
+ addx.l -(%a1),-(%a0)
+ dbf %d1,LOCAL(mpadd_loop)
+
+ addx.l %d0,%d0
+ rts
+C_FUNCTION_END(mpadd)
+
+dnl works
+C_FUNCTION_BEGIN(mpsub)
+ move.l 4(%sp),%d0
+ movea.l 8(%sp),%a0
+ movea.l 12(%sp),%a1
+ move.l %d0,%d1
+ lsl.l #2,%d0
+ subq.l #1,%d1
+ adda.l %d0,%a0
+ adda.l %d0,%a1
+ clr %d0
+
+ .align 2
+LOCAL(mpsub_loop):
+ subx.l -(%a1),-(%a0)
+ dbf %d1,LOCAL(mpsub_loop)
+
+ addx.l %d0,%d0
+ rts
+C_FUNCTION_END(mpsub)
+
+dnl works
+C_FUNCTION_BEGIN(mpsetmul)
+ movem.l %d2-%d5,-(%sp)
+ move.l 20(%sp),%d0
+ movea.l 24(%sp),%a0
+ movea.l 28(%sp),%a1
+ move.l 32(%sp),%d2
+ move.l %d0,%d5
+ lsl.l #2,%d0
+ subq.l #1,%d5
+ adda.l %d0,%a0
+ adda.l %d0,%a1
+ clr.l %d3
+ clr.l %d4
+
+ .align 2
+LOCAL(mpsetmul_loop):
+ move.l -(%a1),%d1
+ mulu.l %d2,%d0:%d1
+ add.l %d3,%d1
+ addx.l %d4,%d0
+ move.l %d1,-(%a0)
+ move.l %d0,%d3
+ dbf %d5,LOCAL(mpsetmul_loop)
+
+ movem.l (%sp)+,%d2-%d5
+ rts
+C_FUNCTION_END(mpsetmul)
+
+dnl works
+C_FUNCTION_BEGIN(mpaddmul)
+ movem.l %d2-%d5,-(%sp)
+ move.l 20(%sp),%d0
+ movea.l 24(%sp),%a0
+ movea.l 28(%sp),%a1
+ move.l 32(%sp),%d2
+ move.l %d0,%d5
+ lsl.l #2,%d0
+ subq.l #1,%d5
+ adda.l %d0,%a0
+ adda.l %d0,%a1
+ clr.l %d3
+ clr.l %d4
+
+ .align 2
+LOCAL(mpaddmul_loop):
+ move.l -(%a1),%d1
+ mulu.l %d2,%d0:%d1
+ add.l %d3,%d1
+ addx.l %d4,%d0
+ add.l -(%a0),%d1
+ addx.l %d4,%d0
+ move.l %d1,(%a0)
+ move.l %d0,%d3
+ dbf %d5,LOCAL(mpaddmul_loop)
+
+ movem.l (%sp)+,%d2-%d5
+ rts
+C_FUNCTION_END(mpaddmul)
+
+
+C_FUNCTION_BEGIN(mpaddsqrtrc)
+ movem.l %d3-%d5,-(%sp)
+ move.l 16(%sp),%d0
+ movea.l 20(%sp),%a0
+ movea.l 24(%sp),%a1
+ move.l %d0,%d5
+ lsl.l #2,%d0
+ subq.l #1,%d5
+ adda.l %d0,%a0
+ adda.l %d0,%a0
+ adda.l %d0,%a1
+ clr.l %d3
+ clr.l %d4
+
+LOCAL(mpaddsqrtrc_loop):
+ move.l -(%a1),%d1
+dnl square %d1 into %d0 and %d1
+ mulu.l %d1,%d0:%d1
+ add.l %d3,%d1
+ addx.l %d4,%d0
+ add.l -(%a0),%d1
+ addx.l %d4,%d0
+ move.l %d1,(%a0)
+ clr.l %d3
+ add.l -(%a0),%d0
+ addx.l %d4,%d3
+ move.l %d0,0(%a0)
+ dbf %d5,LOCAL(mpaddsqrtrc_loop)
+
+ movem.l (%sp)+,%d3-%d5
+ rts
+C_FUNCTION_END(mpaddsqrtrc)
diff --git a/beecrypt/gas/mp32opt.powerpc.S b/beecrypt/gas/mpopt.ppc.m4
index b70f07903..3406f3518 100644
--- a/beecrypt/gas/mp32opt.powerpc.S
+++ b/beecrypt/gas/mpopt.ppc.m4
@@ -1,39 +1,28 @@
-/*
- * mp32opt.powerpc.S
- *
- * Assembler optimized multiprecision integer routines for PowerPC
- *
- * Compile target is GNU Assembler
- *
- * Copyright (c) 2000, 2001 Virtual Unlimited B.V.
- *
- * Author: Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include "beecrypt.gas.h"
-
- .file "mp32opt.powerpc.S"
-
- .text
+dnl mpopt.ppc.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You shoulwz have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/ppc.m4)
C_FUNCTION_BEGIN(mpaddw)
-LABEL(mpaddw)
mtctr r3
slwi r0,r3,2
add r4,r4,r0
@@ -48,14 +37,12 @@ LOCAL(mpaddw_loop):
stw r6,0(r4)
bdnz LOCAL(mpaddw_loop)
LOCAL(mpaddw_skip):
- /* return the carry */
addze r3,r0
blr
-C_FUNCTION_END(mpaddw, LOCAL(mpaddw_size))
+C_FUNCTION_END(mpaddw)
C_FUNCTION_BEGIN(mpsubw)
-LABEL(mpsubw)
mtctr r3
slwi r0,r3,2
add r4,r4,r0
@@ -70,15 +57,13 @@ LOCAL(mpsubw_loop):
stwu r6, -4(r4)
bdnz LOCAL(mpsubw_loop)
LOCAL(mpsubw_skip):
- /* return the carry */
subfe r3,r0,r0
neg r3,r3
blr
-C_FUNCTION_END(mpsubw, LOCAL(mpsubw_size))
+C_FUNCTION_END(mpsubw)
C_FUNCTION_BEGIN(mpadd)
-LABEL(mpadd)
mtctr r3
slwi r0,r3,2
add r4,r4,r0
@@ -96,14 +81,12 @@ LOCAL(mpadd_loop):
stwu r6,-4(r4)
bdnz LOCAL(mpadd_loop)
LOCAL(mpadd_skip):
- /* return the carry */
addze r3,r0
blr
-C_FUNCTION_END(mpadd, LOCAL(mpadd_size))
+C_FUNCTION_END(mpadd)
C_FUNCTION_BEGIN(mpsub)
-LABEL(mpsub)
mtctr r3
slwi r0,r3,2
add r4,r4,r0
@@ -121,15 +104,13 @@ LOCAL(mpsub_loop):
stwu r6,-4(r4)
bdnz LOCAL(mpsub_loop)
LOCAL(mpsub_skip):
- /* return the carry */
subfe r3,r0,r0
neg r3,r3
blr
-C_FUNCTION_END(mpsub, LOCAL(mpsub_size))
+C_FUNCTION_END(mpsub)
C_FUNCTION_BEGIN(mpmultwo)
-LABEL(mpmultwo)
mtctr r3
slwi r0,r3,2
add r4,r4,r0
@@ -144,14 +125,12 @@ LOCAL(mpmultwo_loop):
stwu r6,-4(r4)
bdnz LOCAL(mpmultwo_loop)
LOCAL(mpmultwo_skip):
- /* return the carry */
addze r3,r0
blr
-C_FUNCTION_END(mpmultwo, LOCAL(mpmultwo_size))
+C_FUNCTION_END(mpmultwo)
C_FUNCTION_BEGIN(mpsetmul)
-LABEL(mpsetmul)
mtctr r3
slwi r0,r3,2
add r4,r4,r0
@@ -166,11 +145,10 @@ LOCAL(mpsetmul_loop):
stwu r8,-4(r4)
bdnz LOCAL(mpsetmul_loop)
blr
-C_FUNCTION_END(mpsetmul, LOCAL(mpsetmul_size))
+C_FUNCTION_END(mpsetmul)
C_FUNCTION_BEGIN(mpaddmul)
-LABEL(mpaddmul)
mtctr r3
slwi r0,r3,2
add r4,r4,r0
@@ -188,11 +166,10 @@ LOCAL(mpaddmul_loop):
stw r9,0(r4)
bdnz LOCAL(mpaddmul_loop)
blr
-C_FUNCTION_END(mpaddmul, LOCAL(mpaddmul_size))
+C_FUNCTION_END(mpaddmul)
C_FUNCTION_BEGIN(mpaddsqrtrc)
-LABEL(mpaddsqrtrc)
mtctr r3
slwi r0,r3,2
add r4,r4,r0
@@ -215,4 +192,4 @@ LOCAL(mpaddsqrtrc_loop):
stwu r6,-8(r4)
bdnz LOCAL(mpaddsqrtrc_loop)
blr
-C_FUNCTION_END(mpaddsqrtrc, LOCAL(mpaddsqrtrc_size))
+C_FUNCTION_END(mpaddsqrtrc)
diff --git a/beecrypt/gas/mpopt.ppc64.m4 b/beecrypt/gas/mpopt.ppc64.m4
new file mode 100644
index 000000000..8fdbdb0ae
--- /dev/null
+++ b/beecrypt/gas/mpopt.ppc64.m4
@@ -0,0 +1,195 @@
+dnl mpopt.ppc64.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/ppc64.m4)
+
+
+C_FUNCTION_BEGIN(mpaddw)
+ mtctr r3
+ sldi r0,r3,3
+ add r4,r4,r0
+ li r0,0
+ ldu r6,-8(r4)
+ addc r6,r6,r5
+ std r6,0(r4)
+ bdz LOCAL(mpaddw_skip)
+LOCAL(mpaddw_loop):
+ ldu r6,-8(r4)
+ adde r6,r0,r6
+ std r6,0(r4)
+ bdnz LOCAL(mpaddw_loop)
+LOCAL(mpaddw_skip):
+ addze r3,r0
+ blr
+C_FUNCTION_END(mpaddw)
+
+
+C_FUNCTION_BEGIN(mpsubw)
+ mtctr r3
+ sldi r0,r3,3
+ add r4,r4,r0
+ li r0,0
+ ld r6,-8(r4)
+ subfc r6,r5,r6
+ stdu r6,-8(r4)
+ bdz LOCAL(mpsubw_skip)
+LOCAL(mpsubw_loop):
+ ld r6,-8(r4)
+ subfe r6,r0,r6
+ stdu r6, -8(r4)
+ bdnz LOCAL(mpsubw_loop)
+LOCAL(mpsubw_skip):
+ subfe r3,r0,r0
+ neg r3,r3
+ blr
+C_FUNCTION_END(mpsubw)
+
+
+C_FUNCTION_BEGIN(mpadd)
+ mtctr r3
+ sldi r0,r3,3
+ add r4,r4,r0
+ add r5,r5,r0
+ li r0,0
+ ld r6,-8(r4)
+ ldu r7,-8(r5)
+ addc r6,r7,r6
+ stdu r6,-8(r4)
+ bdz LOCAL(mpadd_skip)
+LOCAL(mpadd_loop):
+ ld r6,-8(r4)
+ ldu r7,-8(r5)
+ adde r6,r7,r6
+ stdu r6,-8(r4)
+ bdnz LOCAL(mpadd_loop)
+LOCAL(mpadd_skip):
+ addze r3,r0
+ blr
+C_FUNCTION_END(mpadd)
+
+
+C_FUNCTION_BEGIN(mpsub)
+ mtctr r3
+ sldi r0,r3,3
+ add r4,r4,r0
+ add r5,r5,r0
+ li r0,0
+ ld r6,-8(r4)
+ ldu r7,-8(r5)
+ subfc r6,r7,r6
+ stdu r6,-8(r4)
+ bdz LOCAL(mpsub_skip)
+LOCAL(mpsub_loop):
+ ld r6,-8(r4)
+ ldu r7,-8(r5)
+ subfe r6,r7,r6
+ stdu r6,-8(r4)
+ bdnz LOCAL(mpsub_loop)
+LOCAL(mpsub_skip):
+ subfe r3,r0,r0
+ neg r3,r3
+ blr
+C_FUNCTION_END(mpsub)
+
+
+C_FUNCTION_BEGIN(mpmultwo)
+ mtctr r3
+ sldi r0,r3,3
+ add r4,r4,r0
+ li r0,0
+ ld r6,-8(r4)
+ addc r6,r6,r6
+ stdu r6,-8(r4)
+ bdz LOCAL(mpmultwo_skip)
+LOCAL(mpmultwo_loop):
+ ld r6,-8(r4)
+ adde r6,r6,r6
+ stdu r6,-8(r4)
+ bdnz LOCAL(mpmultwo_loop)
+LOCAL(mpmultwo_skip):
+ addze r3,r0
+ blr
+C_FUNCTION_END(mpmultwo)
+
+
+C_FUNCTION_BEGIN(mpsetmul)
+ mtctr r3
+ sldi r0,r3,3
+ add r4,r4,r0
+ add r5,r5,r0
+ li r3,0
+LOCAL(mpsetmul_loop):
+ ldu r7,-8(r5)
+ mulld r8,r7,r6
+ addc r8,r8,r3
+ mulhdu r9,r7,r6
+ addze r3,r9
+ stdu r8,-8(r4)
+ bdnz LOCAL(mpsetmul_loop)
+ blr
+C_FUNCTION_END(mpsetmul)
+
+
+C_FUNCTION_BEGIN(mpaddmul)
+ mtctr r3
+ sldi r0,r3,3
+ add r4,r4,r0
+ add r5,r5,r0
+ li r3,0
+LOCAL(mpaddmul_loop):
+ ldu r8,-8(r5)
+ ldu r7,-8(r4)
+ mulld r9,r8,r6
+ addc r9,r9,r3
+ mulhdu r10,r8,r6
+ addze r3,r10
+ addc r9,r9,r7
+ addze r3,r3
+ std r9,0(r4)
+ bdnz LOCAL(mpaddmul_loop)
+ blr
+C_FUNCTION_END(mpaddmul)
+
+
+C_FUNCTION_BEGIN(mpaddsqrtrc)
+ mtctr r3
+ sldi r0,r3,3
+ add r4,r4,r0
+ add r5,r5,r0
+ add r4,r4,r0
+ li r3,0
+LOCAL(mpaddsqrtrc_loop):
+ ldu r0,-8(r5)
+ ld r6,-16(r4)
+ ld r7,-8(r4)
+ mulld r9,r0,r0
+ addc r9,r9,r3
+ mulhdu r8,r0,r0
+ addze r8,r8
+ li r3,0
+ addc r7,r7,r9
+ adde r6,r6,r8
+ addze r3,r3
+ std r7,-8(r4)
+ stdu r6,-16(r4)
+ bdnz LOCAL(mpaddsqrtrc_loop)
+ blr
+C_FUNCTION_END(mpaddsqrtrc)
diff --git a/beecrypt/gas/mpopt.sparcv8.m4 b/beecrypt/gas/mpopt.sparcv8.m4
new file mode 100644
index 000000000..f21b35614
--- /dev/null
+++ b/beecrypt/gas/mpopt.sparcv8.m4
@@ -0,0 +1,90 @@
+dnl mpopt.sparcv8.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/sparc.m4)
+
+
+C_FUNCTION_BEGIN(mpsetmul)
+ sll %o0,2,%g1
+ dec 4,%o2
+ clr %o0
+LOCAL(mpsetmul_loop):
+ ld [%o2+%g1],%g2
+ umul %o3,%g2,%g2
+ rd %y,%g3
+ addcc %o0,%g2,%g2
+ addx %g0,%g3,%o0
+ deccc 4,%g1
+ bnz LOCAL(mpsetmul_loop)
+ st %g2,[%o1+%g1]
+ retl
+ nop
+C_FUNCTION_END(mpsetmul)
+
+
+C_FUNCTION_BEGIN(mpaddmul)
+ sll %o0,2,%g1
+ mov %o1,%o4
+ dec 4,%o1
+ dec 4,%o2
+ clr %o0
+LOCAL(mpaddmul_loop):
+ ld [%o2+%g1],%g2
+ ld [%o1+%g1],%g3
+ umul %o3,%g2,%g2
+ rd %y,%g4
+ addcc %o0,%g2,%g2
+ addx %g0,%g4,%g4
+ addcc %g2,%g3,%g2
+ addx %g0,%g4,%o0
+ deccc 4,%g1
+ bnz LOCAL(mpaddmul_loop)
+ st %g2,[%o4+%g1]
+ retl
+ nop
+C_FUNCTION_END(mpaddmul)
+
+
+C_FUNCTION_BEGIN(mpaddsqrtrc)
+ sll %o0,2,%g1
+ add %o1,%g1,%o1
+ dec 4,%o2
+ add %o1,%g1,%o1
+ dec 8,%o1
+ clr %o0
+LOCAL(mpaddsqrtrc_loop):
+ ld [%o2+%g1],%g2
+ ldd [%o1],%o4
+ umul %g2,%g2,%g3
+ rd %y,%g2
+ addcc %o5,%g3,%o5
+ addxcc %o4,%g2,%o4
+ addx %g0,%g0,%o3
+ addcc %o5,%o0,%o5
+ addxcc %o4,%g0,%o4
+ addx %o3,%g0,%o0
+ std %o4,[%o1]
+ deccc 4,%g1
+ bnz LOCAL(mpaddsqrtrc_loop)
+ sub %o1,8,%o1
+ retl
+ nop
+C_FUNCTION_END(mpaddsqrtrc)
diff --git a/beecrypt/gas/mp32opt.sparcv9.S b/beecrypt/gas/mpopt.sparcv8plus.m4
index 582a68641..f021cfa6c 100644
--- a/beecrypt/gas/mp32opt.sparcv9.S
+++ b/beecrypt/gas/mpopt.sparcv8plus.m4
@@ -1,41 +1,28 @@
-/*
- * mp32opt.sparcv9.S
- *
- * Assembler optimized multiprecision integer routines for UltraSparc (64 bits instructions, will run on 32 bit OS)
- *
- * Compile target is GNU Assembler, Sun Solaris Assembler
- *
- * Copyright (c) 1998, 1999, 2000, 2001 Virtual Unlimited B.V.
- *
- * Author: Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include "beecrypt.gas.h"
-
- .file "mp32opt.sparcv9.S"
-
- .text
+dnl mpopt.sparcv8plus.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/sparc.m4)
-C_FUNCTION_BEGIN(mpaddw)
-LABEL(mpaddw)
-
- .register %g2,#scratch
+C_FUNCTION_BEGIN(mpaddw)
sll %o0,2,%g1
dec 4,%g1
clr %o0
@@ -53,14 +40,10 @@ LOCAL(mpaddw_loop):
LOCAL(mpaddw_skip):
retl
movcs %icc,1,%o0
-C_FUNCTION_END(mpaddw, LOCAL(mpaddw_size))
+C_FUNCTION_END(mpaddw)
C_FUNCTION_BEGIN(mpsubw)
-LABEL(mpsubw)
-
- .register %g2,#scratch
-
sll %o0,2,%g1
dec 4,%g1
clr %o0
@@ -78,15 +61,10 @@ LOCAL(mpsubw_loop):
LOCAL(mpsubw_skip):
retl
movcs %icc,1,%o0
-C_FUNCTION_END(mpsubw, LOCAL(mpsubw_size))
+C_FUNCTION_END(mpsubw)
C_FUNCTION_BEGIN(mpadd)
-LABEL(mpadd)
-
- .register %g2,#scratch
- .register %g3,#scratch
-
sll %o0,2,%g1
dec 4,%g1
addcc %g0,%g0,%o0
@@ -99,15 +77,10 @@ LOCAL(mpadd_loop):
dec 4,%g1
retl
movcs %icc,1,%o0
-C_FUNCTION_END(mpadd, LOCAL(mpadd_size))
+C_FUNCTION_END(mpadd)
C_FUNCTION_BEGIN(mpsub)
-LABEL(mpsub)
-
- .register %g2,#scratch
- .register %g3,#scratch
-
sll %o0,2,%g1
dec 4,%g1
addcc %g0,%g0,%o0
@@ -120,15 +93,10 @@ LOCAL(mpsub_loop):
dec 4,%g1
retl
movcs %icc,1,%o0
-C_FUNCTION_END(mpsub, LOCAL(mpsub_size))
+C_FUNCTION_END(mpsub)
C_FUNCTION_BEGIN(mpmultwo)
-LABEL(mpmultwo)
-
- .register %g2,#scratch
- .register %g3,#scratch
-
sll %o0,2,%g1
dec 4,%g1
addcc %g0,%g0,%o0
@@ -140,15 +108,10 @@ LOCAL(mpmultwo_loop):
dec 4,%g1
retl
movcs %icc,1,%o0
-C_FUNCTION_END(mpmultwo, LOCAL(mpmultwo_size))
+C_FUNCTION_END(mpmultwo)
C_FUNCTION_BEGIN(mpsetmul)
-LABEL(mpsetmul)
-
- .register %g2,#scratch
- .register %g3,#scratch
-
sll %o0,2,%g1
dec 4,%g1
clr %o0
@@ -162,15 +125,10 @@ LOCAL(mpsetmul_loop):
dec 4,%g1
retl
srlx %o0,32,%o0
-C_FUNCTION_END(mpsetmul, LOCAL(mpsetmul_size))
+C_FUNCTION_END(mpsetmul)
C_FUNCTION_BEGIN(mpaddmul)
-LABEL(mpaddmul)
-
- .register %g2,#scratch
- .register %g3,#scratch
-
sll %o0,2,%g1
dec 4,%g1
clr %o0
@@ -186,22 +144,16 @@ LOCAL(mpaddmul_loop):
dec 4,%g1
retl
srlx %o0,32,%o0
-C_FUNCTION_END(mpaddmul, LOCAL(mpaddmul_size))
+C_FUNCTION_END(mpaddmul)
C_FUNCTION_BEGIN(mpaddsqrtrc)
-LABEL(mpaddsqrtrc)
-
- .register %g2,#scratch
- .register %g3,#scratch
-
sll %o0,2,%g1
dec 4,%g1
add %o1,%g1,%o1
add %o1,%g1,%o1
clr %o0
LOCAL(mpaddsqrtrc_loop):
- /* load from o1 into g4 as xuint; simulate xuint carry by doing an xuint comparison; carry if result smaller than initial value */
lduw [%o2+%g1],%g2
ldx [%o1],%g4
mulx %g2,%g2,%g2
@@ -216,4 +168,4 @@ LOCAL(mpaddsqrtrc_loop):
dec 4,%g1
retl
nop
-C_FUNCTION_END(mpaddsqrtrc, LOCAL(mpaddsqrtrc_size))
+C_FUNCTION_END(mpaddsqrtrc)
diff --git a/beecrypt/gas/mp32opt.i386.S b/beecrypt/gas/mpopt.x86.m4
index 967faa82b..e1097999d 100644
--- a/beecrypt/gas/mp32opt.i386.S
+++ b/beecrypt/gas/mpopt.x86.m4
@@ -1,39 +1,28 @@
-/*
- * mp32opt.i386.S
- *
- * Assembler optimized multiprecision integer routines for Intel 386 and higher
- *
- * Compile target is GNU Assembler
- *
- * Copyright (c) 1998, 1999, 2000, 2001 Virtual Unlimited B.V.
- *
- * Author: Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include "beecrypt.gas.h"
-
- .file "mp32opt.i386.S"
-
- .text
+dnl mpopt.x86.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/x86.m4)
C_FUNCTION_BEGIN(mpzero)
-LABEL(mpzero)
pushl %edi
movl 8(%esp),%ecx
@@ -44,11 +33,10 @@ LABEL(mpzero)
popl %edi
ret
-C_FUNCTION_END(mpzero, LOCAL(mpzero_size))
+C_FUNCTION_END(mpzero)
C_FUNCTION_BEGIN(mpfill)
-LABEL(mpfill)
pushl %edi
movl 8(%esp),%ecx
@@ -59,32 +47,29 @@ LABEL(mpfill)
popl %edi
ret
-C_FUNCTION_END(mpfill, LOCAL(mpfill_size))
+C_FUNCTION_END(mpfill)
C_FUNCTION_BEGIN(mpeven)
-LABEL(mpeven)
movl 4(%esp),%ecx
movl 8(%esp),%eax
movl -4(%eax,%ecx,4),%eax
notl %eax
- andl $1,%eax
+ andl `$'1,%eax
ret
-C_FUNCTION_END(mpeven, LOCAL(mpeven_size))
+C_FUNCTION_END(mpeven)
C_FUNCTION_BEGIN(mpodd)
-LABEL(mpodd)
movl 4(%esp),%ecx
movl 8(%esp),%eax
movl -4(%eax,%ecx,4),%eax
- andl $1,%eax
+ andl `$'1,%eax
ret
-C_FUNCTION_END(mpodd, LOCAL(mpodd_size))
+C_FUNCTION_END(mpodd)
C_FUNCTION_BEGIN(mpaddw)
-LABEL(mpaddw)
pushl %edi
movl 8(%esp),%ecx
@@ -110,11 +95,10 @@ LOCAL(mpaddw_skip):
popl %edi
ret
-C_FUNCTION_END(mpaddw, LOCAL(mpaddw_size))
+C_FUNCTION_END(mpaddw)
C_FUNCTION_BEGIN(mpsubw)
-LABEL(mpsubw)
pushl %edi
movl 8(%esp),%ecx
@@ -139,11 +123,10 @@ LOCAL(mpsubw_skip):
negl %eax
popl %edi
ret
-C_FUNCTION_END(mpsubw, LOCAL(mpsubw_size))
+C_FUNCTION_END(mpsubw)
C_FUNCTION_BEGIN(mpadd)
-LABEL(mpadd)
pushl %edi
pushl %esi
@@ -157,7 +140,9 @@ LABEL(mpadd)
.align 4
LOCAL(mpadd_loop):
movl (%esi,%ecx,4),%eax
- adcl %eax,(%edi,%ecx,4)
+ movl (%edi,%ecx,4),%edx
+ adcl %eax,%edx
+ movl %edx,(%edi,%ecx,4)
decl %ecx
jns LOCAL(mpadd_loop)
@@ -167,11 +152,10 @@ LOCAL(mpadd_loop):
popl %esi
popl %edi
ret
-C_FUNCTION_END(mpadd, LOCAL(mpadd_size))
+C_FUNCTION_END(mpadd)
C_FUNCTION_BEGIN(mpsub)
-LABEL(mpsub)
pushl %edi
pushl %esi
@@ -185,7 +169,9 @@ LABEL(mpsub)
.align 4
LOCAL(mpsub_loop):
movl (%esi,%ecx,4),%eax
- sbbl %eax,(%edi,%ecx,4)
+ movl (%edi,%ecx,4),%edx
+ sbbl %eax,%edx
+ movl %edx,(%edi,%ecx,4)
decl %ecx
jns LOCAL(mpsub_loop)
@@ -194,11 +180,10 @@ LOCAL(mpsub_loop):
popl %esi
popl %edi
ret
-C_FUNCTION_END(mpsub, LOCAL(mpsub_size))
+C_FUNCTION_END(mpsub)
C_FUNCTION_BEGIN(mpdivtwo)
-LABEL(mpdivtwo)
pushl %edi
movl 8(%esp),%ecx
@@ -206,32 +191,33 @@ LABEL(mpdivtwo)
leal (%edi,%ecx,4),%edi
negl %ecx
- clc
+ xorl %eax,%eax
.align 4
LOCAL(mpdivtwo_loop):
- rcrl $1,(%edi,%ecx,4)
+ rcrl `$'1,(%edi,%ecx,4)
inc %ecx
jnz LOCAL(mpdivtwo_loop)
popl %edi
ret
-C_FUNCTION_END(mpdivtwo, LOCAL(mpdivtwo_size))
+C_FUNCTION_END(mpdivtwo)
C_FUNCTION_BEGIN(mpmultwo)
-LABEL(mpmultwo)
pushl %edi
movl 8(%esp),%ecx
movl 12(%esp),%edi
- clc
+ xorl %edx,%edx
decl %ecx
.align 4
LOCAL(mpmultwo_loop):
- rcll $1,(%edi,%ecx,4)
+ movl (%edi,%ecx,4),%eax
+ adcl %eax,%eax
+ movl %eax,(%edi,%ecx,4)
decl %ecx
jns LOCAL(mpmultwo_loop)
@@ -240,13 +226,34 @@ LOCAL(mpmultwo_loop):
popl %edi
ret
-C_FUNCTION_END(mpmultwo, LOCAL(mpmultwo_size))
+C_FUNCTION_END(mpmultwo)
C_FUNCTION_BEGIN(mpsetmul)
-LABEL(mpsetmul)
pushl %edi
pushl %esi
+ifdef(`USE_SSE2',`
+ movl 12(%esp),%ecx
+ movl 16(%esp),%edi
+ movl 20(%esp),%esi
+ movd 24(%esp),%mm1
+
+ pxor %mm0,%mm0
+ decl %ecx
+
+ .align 4
+LOCAL(mpsetmul_loop):
+ movd (%esi,%ecx,4),%mm2
+ pmuludq %mm1,%mm2
+ paddq %mm2,%mm0
+ movd %mm0,(%edi,%ecx,4)
+ decl %ecx
+ psrlq `$'32,%mm0
+ jns LOCAL(mpsetmul_loop)
+
+ movd %mm0,%eax
+ emms
+',`
pushl %ebx
pushl %ebp
@@ -264,7 +271,7 @@ LOCAL(mpsetmul_loop):
movl (%esi,%ecx,4),%eax
mull %ebp
addl %ebx,%eax
- adcl $0,%edx
+ adcl `$'0,%edx
movl %eax,(%edi,%ecx,4)
decl %ecx
jns LOCAL(mpsetmul_loop)
@@ -273,16 +280,40 @@ LOCAL(mpsetmul_loop):
popl %ebp
popl %ebx
+')
popl %esi
popl %edi
ret
-C_FUNCTION_END(mpsetmul, LOCAL(mpsetmul_size))
+C_FUNCTION_END(mpsetmul)
C_FUNCTION_BEGIN(mpaddmul)
-LABEL(mpaddmul)
pushl %edi
pushl %esi
+ifdef(`USE_SSE2',`
+ movl 12(%esp),%ecx
+ movl 16(%esp),%edi
+ movl 20(%esp),%esi
+ movd 24(%esp),%mm1
+
+ pxor %mm0,%mm0
+ decl %ecx
+
+ .align 4
+LOCAL(mpaddmul_loop):
+ movd (%esi,%ecx,4),%mm2
+ movd (%edi,%ecx,4),%mm3
+ pmuludq %mm1,%mm2
+ paddq %mm2,%mm3
+ paddq %mm3,%mm0
+ movd %mm0,(%edi,%ecx,4)
+ decl %ecx
+ psrlq $32,%mm0
+ jns LOCAL(mpaddmul_loop)
+
+ movd %mm0,%eax
+ emms
+',`
pushl %ebx
pushl %ebp
@@ -311,16 +342,43 @@ LOCAL(mpaddmul_loop):
popl %ebp
popl %ebx
+')
popl %esi
popl %edi
ret
-C_FUNCTION_END(mpaddmul, LOCAL(mpaddmul_size))
+C_FUNCTION_END(mpaddmul)
C_FUNCTION_BEGIN(mpaddsqrtrc)
-LABEL(mpaddsqrtrc)
pushl %edi
pushl %esi
+ifdef(`USE_SSE2',`
+ movl 12(%esp),%ecx
+ movl 16(%esp),%edi
+ movl 20(%esp),%esi
+
+ pxor %mm0,%mm0
+ decl %ecx
+
+ .align 4
+LOCAL(mpaddsqrtrc_loop):
+ movd (%esi,%ecx,4),%mm2
+ pmuludq %mm2,%mm2
+ movd 4(%edi,%ecx,8),%mm3
+ paddq %mm2,%mm3
+ movd 0(%edi,%ecx,8),%mm4
+ paddq %mm3,%mm0
+ movd %mm0,4(%edi,%ecx,8)
+ psrlq $32,%mm0
+ paddq %mm4,%mm0
+ movd %mm0,0(%edi,%ecx,8)
+ decl %ecx
+ psrlq $32,%mm0
+ jns LOCAL(mpaddsqrtrc_loop)
+
+ movd %mm0,%eax
+ emms
+',`
pushl %ebx
movl 16(%esp),%ecx
@@ -346,7 +404,8 @@ LOCAL(mpaddsqrtrc_loop):
movl %ebx,%eax
popl %ebx
+')
popl %esi
popl %edi
ret
-C_FUNCTION_END(mpaddsqrtrc, LOCAL(mpaddsqrtrc_size))
+C_FUNCTION_END(mpaddsqrtrc)
diff --git a/beecrypt/gas/ppc.m4 b/beecrypt/gas/ppc.m4
new file mode 100644
index 000000000..f8e9865db
--- /dev/null
+++ b/beecrypt/gas/ppc.m4
@@ -0,0 +1,85 @@
+dnl ppc.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ifelse(substr(ASM_OS,0,3),aix,`
+undefine(`C_FUNCTION_BEGIN')
+define(C_FUNCTION_BEGIN,`
+ .toc
+ .globl $1[DS]
+ .csect $1[DS]
+ .long .$1[PR], TOC[tc0], 0
+ .toc
+ .globl .$1[PR]
+ .csect .$1[PR]
+')
+undefine(`C_FUNCTION_END')
+define(C_FUNCTION_END,`
+ .tbtag 0x0,0xc,0x0,0x0,0x0,0x0,0x0,0x0
+')
+define(LOAD_ADDRESS,`
+ lwz $2,L$1(r2)
+')
+define(EXTERNAL_VARIABLE,`
+ .toc
+L$1:
+ .tc $1[TC],$1[RW]
+')
+
+ .machine "ppc"
+
+ .set r0,0
+ .set r1,1
+ .set r2,2
+ .set r3,3
+ .set r4,4
+ .set r5,5
+ .set r6,6
+ .set r7,7
+ .set r8,8
+ .set r9,9
+ .set r10,10
+ .set r11,11
+ .set r12,12
+ .set r13,13
+ .set r14,14
+ .set r15,15
+ .set r16,16
+ .set r17,17
+ .set r18,18
+ .set r19,19
+ .set r20,20
+ .set r21,21
+ .set r22,22
+ .set r23,23
+ .set r24,24
+ .set r25,25
+ .set r26,26
+ .set r27,27
+ .set r28,28
+ .set r29,29
+ .set r30,30
+ .set r31,31
+',`
+define(LOAD_ADDRESS,`
+ lis $2,$1@ha
+ la $2,$1@l($2)
+')
+define(EXTERNAL_VARIABLE)
+')
diff --git a/beecrypt/gas/ppc64.m4 b/beecrypt/gas/ppc64.m4
new file mode 100644
index 000000000..38bec0e47
--- /dev/null
+++ b/beecrypt/gas/ppc64.m4
@@ -0,0 +1,71 @@
+dnl ppc64.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ifelse(substr(ASM_OS,0,3),aix,`
+undefine(`C_FUNCTION_BEGIN')
+define(C_FUNCTION_BEGIN,`
+ .toc
+ .globl $1[DS]
+ .csect $1[DS]
+ .llong .$1[PR], TOC[tc0], 0
+ .toc
+ .globl .$1[PR]
+ .csect .$1[PR]
+')
+undefine(`C_FUNCTION_END')
+define(C_FUNCTION_END,`
+ .tbtag 0x0,0xc,0x0,0x0,0x0,0x0,0x0,0x0
+')
+
+ .machine "ppc64"
+
+ .set r0,0
+ .set r1,1
+ .set r2,2
+ .set r3,3
+ .set r4,4
+ .set r5,5
+ .set r6,6
+ .set r7,7
+ .set r8,8
+ .set r9,9
+ .set r10,10
+ .set r11,11
+ .set r12,12
+ .set r13,13
+ .set r14,14
+ .set r15,15
+ .set r16,16
+ .set r17,17
+ .set r18,18
+ .set r19,19
+ .set r20,20
+ .set r21,21
+ .set r22,22
+ .set r23,23
+ .set r24,24
+ .set r25,25
+ .set r26,26
+ .set r27,27
+ .set r28,28
+ .set r29,29
+ .set r30,30
+ .set r31,31
+')
diff --git a/beecrypt/gas/sha1opt.i586.S b/beecrypt/gas/sha1opt.i586.S
deleted file mode 100644
index 31e39822b..000000000
--- a/beecrypt/gas/sha1opt.i586.S
+++ /dev/null
@@ -1,292 +0,0 @@
-/*
- * fips180opt.i586.S
- *
- * Assembler optimized SHA-1 routines for Intel Pentium processors
- *
- * Compile target is GNU Assembler
- *
- * Copyright (c) 2000 Virtual Unlimited B.V.
- *
- * Author: Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include "beecrypt.gas.h"
-
- .file "fips180opt.i586.S"
-
- .text
-
-#define K00 0x5a827999
-#define K20 0x6ed9eba1
-#define K40 0x8f1bbcdc
-#define K60 0xca62c1d6
-
-#define PARAM_H 0
-#define PARAM_DATA 20
-
- .macro subround1 b c d e w
- movl \c,%ecx
- movl \b,%ebx
- movl \d,%edx
- roll $5,%eax
- xorl %edx,%ecx
- addl \e,%eax
- andl %ebx,%ecx
- addl $K00,%eax
- rorl $2,%ebx
- addl \w(%esi,%edi),%eax
- xorl %edx,%ecx
- movl %ebx,\b
- addl %ecx,%eax
- movl %eax,\e
- .endm
-
- .macro subround2 b c d e w
- movl \c,%ecx
- movl \b,%ebx
- roll $5,%eax
- xorl %ebx,%ecx
- addl \e,%eax
- xorl \d,%ecx
- addl $K20,%eax
- rorl $2,%ebx
- addl \w(%esi,%edi),%eax
- movl %ebx,\b
- addl %ecx,%eax
- movl %eax,\e
- .endm
-
- .macro subround3 b c d e w
- movl \c,%ecx
- roll $5,%eax
- movl \b,%ebx
- movl %ecx,%edx
- addl \e,%eax
- orl %ebx,%ecx
- andl %ebx,%edx
- andl \d,%ecx
- addl $K40,%eax
- orl %edx,%ecx
- addl \w(%esi,%edi),%eax
- rorl $2,%ebx
- addl %ecx,%eax
- movl %ebx,\b
- movl %eax,\e
- .endm
-
- .macro subround4 b c d e w
- movl \c,%ecx
- movl \b,%ebx
- roll $5,%eax
- xorl %ebx,%ecx
- addl \e,%eax
- xorl \d,%ecx
- addl $K60,%eax
- rorl $2,%ebx
- addl \w(%esi,%edi),%eax
- movl %ebx,\b
- addl %ecx,%eax
- movl %eax,\e
- .endm
-
-C_FUNCTION_BEGIN(sha1Process)
-LABEL(sha1Process)
- pushl %edi
- pushl %esi
- pushl %ebx
- pushl %ebp
-
- movl 20(%esp),%esi
- subl $20,%esp
- leal PARAM_DATA(%esi),%edi
- movl %esp,%ebp
-
- movl $4,%ecx
-LOCAL(0):
- movl (%esi,%ecx,4),%edx
- movl %edx,(%ebp,%ecx,4)
- decl %ecx
- jns LOCAL(0)
-
- movl $15,%ecx
- xorl %eax,%eax
-
- .p2align 2
-LOCAL(1):
- movl (%edi,%ecx,4),%edx
- bswap %edx
- mov %edx,(%edi,%ecx,4)
- decl %ecx
- jns LOCAL(1)
-
- leal PARAM_DATA(%esi),%edi
- movl $16,%ecx
-
- .p2align 2
-LOCAL(2):
- movl 52(%edi),%eax
- movl 56(%edi),%ebx
- xorl 32(%edi),%eax
- xorl 36(%edi),%ebx
- xorl 8(%edi),%eax
- xorl 12(%edi),%ebx
- xorl (%edi),%eax
- xorl 4(%edi),%ebx
- roll $1,%eax
- roll $1,%ebx
- movl %eax,64(%edi)
- movl %ebx,68(%edi)
- movl 60(%edi),%eax
- movl 64(%edi),%ebx
- xorl 40(%edi),%eax
- xorl 44(%edi),%ebx
- xorl 16(%edi),%eax
- xorl 20(%edi),%ebx
- xorl 8(%edi),%eax
- xorl 12(%edi),%ebx
- roll $1,%eax
- roll $1,%ebx
- movl %eax,72(%edi)
- movl %ebx,76(%edi)
- addl $16,%edi
- decl %ecx
- jnz LOCAL(2)
-
- movl $PARAM_DATA,%edi
-
- movl (%ebp),%eax
-LOCAL(01_20):
- subround1 4(%ebp), 8(%ebp), 12(%ebp), 16(%ebp), 0
- subround1 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround1 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround1 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround1 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
- subround1 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround1 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround1 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround1 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround1 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
- subround1 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround1 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround1 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround1 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround1 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
- subround1 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround1 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround1 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround1 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround1 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
-
-LOCAL(21_40):
- subround2 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround2 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround2 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround2 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround2 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
- subround2 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround2 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround2 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround2 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround2 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
- subround2 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround2 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround2 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround2 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround2 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
- subround2 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround2 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround2 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround2 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround2 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
-
-LOCAL(41_60):
- subround3 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround3 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround3 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround3 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround3 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
- subround3 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround3 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround3 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround3 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround3 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
- subround3 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround3 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround3 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround3 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround3 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
- subround3 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround3 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround3 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround3 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround3 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
-
-LOCAL(61_80):
- subround4 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround4 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround4 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround4 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround4 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
- subround4 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround4 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround4 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround4 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround4 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
- subround4 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround4 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround4 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround4 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround4 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- addl $20,%edi
- subround4 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0
- subround4 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4
- subround4 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8
- subround4 12(%ebp), %ebx , (%ebp), 4(%ebp), 12
- subround4 8(%ebp), %ebx , 16(%ebp), (%ebp), 16
- /* addl $20,%edi */
-
- movl $4,%ecx
-
- .p2align 2
-LOCAL(3):
- movl (%ebp,%ecx,4),%eax
- addl %eax,(%esi,%ecx,4)
- decl %ecx
- jns LOCAL(3)
-
- addl $20,%esp
- popl %ebp
- popl %ebx
- popl %esi
- popl %edi
- ret
-C_FUNCTION_END(sha1Process, LOCAL(sha1Process_size))
diff --git a/beecrypt/gas/sha1opt.i586.m4 b/beecrypt/gas/sha1opt.i586.m4
new file mode 100644
index 000000000..a8d8ec6b5
--- /dev/null
+++ b/beecrypt/gas/sha1opt.i586.m4
@@ -0,0 +1,280 @@
+dnl sha1opt.i586.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+include(config.m4)
+include(ASM_SRCDIR/x86.m4)
+
+ .equ K00, 0x5a827999
+ .equ K20, 0x6ed9eba1
+ .equ K40, 0x8f1bbcdc
+ .equ K60, 0xca62c1d6
+
+ .equ PARAM_H, 0
+ .equ PARAM_DATA, 20
+
+define(`subround1',`
+ movl $2,%ecx
+ movl $1,%ebx
+ movl $3,%edx
+ roll `$'5,%eax
+ xorl %edx,%ecx
+ addl $4,%eax
+ andl %ebx,%ecx
+ addl `$'K00,%eax
+ rorl `$'2,%ebx
+ addl $5(%esi,%edi),%eax
+ xorl %edx,%ecx
+ movl %ebx,$1
+ addl %ecx,%eax
+ movl %eax,$4
+')
+
+define(`subround2',`
+ movl $2,%ecx
+ movl $1,%ebx
+ roll `$'5,%eax
+ xorl %ebx,%ecx
+ addl $4,%eax
+ xorl $3,%ecx
+ addl `$'K20,%eax
+ rorl `$'2,%ebx
+ addl $5(%esi,%edi),%eax
+ movl %ebx,$1
+ addl %ecx,%eax
+ movl %eax,$4
+')
+
+define(`subround3',`
+ movl $2,%ecx
+ roll `$'5,%eax
+ movl $1,%ebx
+ movl %ecx,%edx
+ addl $4,%eax
+ orl %ebx,%ecx
+ andl %ebx,%edx
+ andl $3,%ecx
+ addl `$'K40,%eax
+ orl %edx,%ecx
+ addl $5(%esi,%edi),%eax
+ rorl `$'2,%ebx
+ addl %ecx,%eax
+ movl %ebx,$1
+ movl %eax,$4
+')
+
+define(`subround4',`
+ movl $2,%ecx
+ movl $1,%ebx
+ roll `$'5,%eax
+ xorl %ebx,%ecx
+ addl $4,%eax
+ xorl $3,%ecx
+ addl `$'K60,%eax
+ rorl `$'2,%ebx
+ addl $5(%esi,%edi),%eax
+ movl %ebx,$1
+ addl %ecx,%eax
+ movl %eax,$4
+')
+
+C_FUNCTION_BEGIN(sha1Process)
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ pushl %ebp
+
+ movl 20(%esp),%esi
+ subl `$'20,%esp
+ leal PARAM_DATA(%esi),%edi
+ movl %esp,%ebp
+
+ movl `$'4,%ecx
+LOCAL(0):
+ movl (%esi,%ecx,4),%edx
+ movl %edx,(%ebp,%ecx,4)
+ decl %ecx
+ jns LOCAL(0)
+
+ movl `$'15,%ecx
+ xorl %eax,%eax
+
+ .align 4
+LOCAL(1):
+ movl (%edi,%ecx,4),%edx
+ bswap %edx
+ mov %edx,(%edi,%ecx,4)
+ decl %ecx
+ jns LOCAL(1)
+
+ leal PARAM_DATA(%esi),%edi
+ movl `$'16,%ecx
+
+ .align 4
+LOCAL(2):
+ movl 52(%edi),%eax
+ movl 56(%edi),%ebx
+ xorl 32(%edi),%eax
+ xorl 36(%edi),%ebx
+ xorl 8(%edi),%eax
+ xorl 12(%edi),%ebx
+ xorl (%edi),%eax
+ xorl 4(%edi),%ebx
+ roll `$'1,%eax
+ roll `$'1,%ebx
+ movl %eax,64(%edi)
+ movl %ebx,68(%edi)
+ movl 60(%edi),%eax
+ movl 64(%edi),%ebx
+ xorl 40(%edi),%eax
+ xorl 44(%edi),%ebx
+ xorl 16(%edi),%eax
+ xorl 20(%edi),%ebx
+ xorl 8(%edi),%eax
+ xorl 12(%edi),%ebx
+ roll `$'1,%eax
+ roll `$'1,%ebx
+ movl %eax,72(%edi)
+ movl %ebx,76(%edi)
+ addl `$'16,%edi
+ decl %ecx
+ jnz LOCAL(2)
+
+ movl `$'PARAM_DATA,%edi
+
+ movl (%ebp),%eax
+LOCAL(01_20):
+ subround1( 4(%ebp), 8(%ebp), 12(%ebp), 16(%ebp), 0)
+ subround1( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround1(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround1(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround1( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+ subround1( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround1( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround1(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround1(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround1( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+ subround1( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround1( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround1(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround1(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround1( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+ subround1( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround1( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround1(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround1(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround1( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+
+LOCAL(21_40):
+ subround2( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround2( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround2(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround2(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround2( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+ subround2( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround2( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround2(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround2(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround2( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+ subround2( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround2( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround2(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround2(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround2( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+ subround2( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround2( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround2(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround2(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround2( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+
+LOCAL(41_60):
+ subround3( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround3( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround3(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround3(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround3( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+ subround3( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround3( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround3(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround3(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround3( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+ subround3( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround3( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround3(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround3(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround3( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+ subround3( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround3( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround3(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround3(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround3( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+
+LOCAL(61_80):
+ subround4( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround4( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround4(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround4(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround4( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+ subround4( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround4( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround4(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround4(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround4( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+ subround4( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround4( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround4(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround4(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround4( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+ addl `$'20,%edi
+ subround4( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0)
+ subround4( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4)
+ subround4(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8)
+ subround4(12(%ebp), %ebx , (%ebp), 4(%ebp), 12)
+ subround4( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16)
+
+ movl `$'4,%ecx
+
+ .align 4
+LOCAL(3):
+ movl (%ebp,%ecx,4),%eax
+ addl %eax,(%esi,%ecx,4)
+ decl %ecx
+ jns LOCAL(3)
+
+ addl `$'20,%esp
+ popl %ebp
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+C_FUNCTION_END(sha1Process)
diff --git a/beecrypt/gas/sha1opt.powerpc.S b/beecrypt/gas/sha1opt.powerpc.S
deleted file mode 100644
index f9dc73a9e..000000000
--- a/beecrypt/gas/sha1opt.powerpc.S
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * fips180opt.powerpc.S
- *
- * Assembler optimized SHA-1 routines for PowerPC processors
- *
- * Warning: this code is incomplete and only contains a rough prototype!
- *
- * Compile target is GNU Assembler
- *
- * Copyright (c) 2000, 2001 Virtual Unlimited B.V.
- *
- * Author: Bob Deblier <bob@virtualunlimited.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- */
-
-#include "beecrypt.gas.h"
-
- .file "fips180opt.powerpc.S"
-
- .text
-
-#if DARWIN
-# define reg0 r0
-# define reg3 r3
-# define reg4 r4
-# define reg5 r5
-# define reg6 r6
-# define reg7 r7
-# define reg8 r8
-# define reg9 r9
-# define reg26 r26
-# define reg27 r27
-# define reg28 r28
-# define reg29 r29
-# define reg30 r30
-# define reg31 r31
-#else
-# define reg0 %r0
-# define reg3 %r3
-# define reg4 %r4
-# define reg5 %r5
-# define reg6 %r6
-# define reg7 %r7
-# define reg8 %r8
-# define reg9 %r9
-# define reg26 %r26
-# define reg27 %r27
-# define reg28 %r28
-# define reg29 %r29
-# define reg30 %r30
-# define reg31 %r31
-#endif
-
-#define K00 0x5a827999
-#define K20 0x6ed9eba1
-#define K40 0x8f1bbcdc
-#define K60 0xca62c1d6
-
-#define PARAM_H 0
-#define PARAM_DATA 20
-
-/* sha1Param: param in reg3 */
-
- .macro subround1 a b c d e w
- lwzu reg7,4(\w)
- rotlwi reg5,\a,5
- dbct r0,\w
- xor reg6,\c,\d
- add \e,\e,K00
- and reg6,reg6,\b
- add \e,\e,reg7
- xor reg6,reg6,\d
- add \e,\e,reg5
- rotrwi \b,\b,2
- add \e,\e,reg6
- .endm
-
- .macro subround2 a b c d e w
- lwzu reg7,4(\w)
- rotlwi reg5,\a,5
- dbct r0,\w
- add \e,\e,K20
- xor reg6,\b,\c
- add \e,\e,reg5
- xor reg6,reg6,\d
- add \e,\e,reg7
- rotrwi \b,\b,2
- add \e,\e,reg6
- .endm
-
- .macro subround3 a b c d e w
- lwzu reg7,4(\w)
- rotlwi reg5,\a,5
- dbct r0,\w
- xor reg6,\b,\c
- add \e,\e,reg5
- and reg6,reg6,\d
- add \e,\e,K40
- and reg5,\b,\c
- add \e,\e,reg7
- or reg6,reg6,reg5
- rotrwi \b,\b,2
- add \e,\e,reg6
- .endm
-
- .macro subround4 a b c d e w
- lwzu reg7,4(\w)
- rotlwi reg5,\a,5
- dbct r0,\w
- add \e,\e,K60
- xor reg6,\b,\c
- add \e,\e,reg5
- xor reg6,reg6,\d
- add \e,\e,reg7
- rotrwi \b,\b,2
- add \e,\e,reg6
- .endm
-
-C_FUNCTION_BEGIN(sha1Process)
-/* zero reg0 for general use */
- li reg0,0
-/* for a,b,c,d,e use r26,r27,r28,r29,r30, for w use r31 */
-
-/* we need to save registers before loading them */
- stmw reg26,-24(reg1)
-/* load the frame pointer with parameter data, and hint cache */
- addi reg31,reg3,PARAM_DATA
- dbct reg31
-
-#if !WORDS_BIGENDIAN /* have to provide for PowerPC little-endian mode
- /* loop of 16 entries */
- li reg5,60
- mtctr reg6
-.L00:
- lwbrx reg6,reg31,reg5
- stwx reg6,reg31,reg5
- subi. reg5,reg5,4
- bcge cr0,.L00
- addi reg31,reg3,PARAM_DATA
-#endif
-
-/* do the initial mixing */
- li reg8,64
- addi reg26,reg3,PARAM_DATA+64-4
- addi reg27,reg3,PARAM_DATA+64-3*4-4
- addi reg28,reg3,PARAM_DATA+64-8*4-4
- addi reg29,reg3,PARAM_DATA+64-14*4-4
- addi reg30,reg3,PARAM_DATA+64-16*4-4
- mtctr reg8
-
-.L10:
- lwzu reg5,4(reg27)
- lwzu reg6,4(reg28)
- lwzu reg7,4(reg29)
- lwzu reg8,4(reg30)
- xor reg5,reg5,reg6
- xor reg7,reg7,reg8
- xor reg5,reg5,reg7
- stwu reg5,4(reg26)
- bdnz .L10
-
- lwz reg26,PARAM_H (reg3)
- lwz reg27,PARAM_H+4 (reg3)
- lwz reg28,PARAM_H+8 (reg3)
- lwz reg29,PARAM_H+12(reg3)
- lwz reg30,PARAM_H+16(reg3)
-
- subround1 reg26,reg27,reg28,reg29,reg30,reg31
- subround1 reg30,reg26,reg27,reg28,reg29,reg31
- subround1 reg29,reg30,reg26,reg27,reg28,reg31
- subround1 reg28,reg29,reg30,reg26,reg27,reg31
- subround1 reg27,reg28,reg29,reg30,reg26,reg31
- subround1 reg26,reg27,reg28,reg29,reg30,reg31
- subround1 reg30,reg26,reg27,reg28,reg29,reg31
- subround1 reg29,reg30,reg26,reg27,reg28,reg31
- subround1 reg28,reg29,reg30,reg26,reg27,reg31
- subround1 reg27,reg28,reg29,reg30,reg26,reg31
- subround1 reg26,reg27,reg28,reg29,reg30,reg31
- subround1 reg30,reg26,reg27,reg28,reg29,reg31
- subround1 reg29,reg30,reg26,reg27,reg28,reg31
- subround1 reg28,reg29,reg30,reg26,reg27,reg31
- subround1 reg27,reg28,reg29,reg30,reg26,reg31
- subround1 reg26,reg27,reg28,reg29,reg30,reg31
- subround1 reg30,reg26,reg27,reg28,reg29,reg31
- subround1 reg29,reg30,reg26,reg27,reg28,reg31
- subround1 reg28,reg29,reg30,reg26,reg27,reg31
- subround1 reg27,reg28,reg29,reg30,reg26,reg31
-
- subround2 reg26,reg27,reg28,reg29,reg30,reg31
- subround2 reg30,reg26,reg27,reg28,reg29,reg31
- subround2 reg29,reg30,reg26,reg27,reg28,reg31
- subround2 reg28,reg29,reg30,reg26,reg27,reg31
- subround2 reg27,reg28,reg29,reg30,reg26,reg31
- subround2 reg26,reg27,reg28,reg29,reg30,reg31
- subround2 reg30,reg26,reg27,reg28,reg29,reg31
- subround2 reg29,reg30,reg26,reg27,reg28,reg31
- subround2 reg28,reg29,reg30,reg26,reg27,reg31
- subround2 reg27,reg28,reg29,reg30,reg26,reg31
- subround2 reg26,reg27,reg28,reg29,reg30,reg31
- subround2 reg30,reg26,reg27,reg28,reg29,reg31
- subround2 reg29,reg30,reg26,reg27,reg28,reg31
- subround2 reg28,reg29,reg30,reg26,reg27,reg31
- subround2 reg27,reg28,reg29,reg30,reg26,reg31
- subround2 reg26,reg27,reg28,reg29,reg30,reg31
- subround2 reg30,reg26,reg27,reg28,reg29,reg31
- subround2 reg29,reg30,reg26,reg27,reg28,reg31
- subround2 reg28,reg29,reg30,reg26,reg27,reg31
- subround2 reg27,reg28,reg29,reg30,reg26,reg31
-
- subround3 reg26,reg27,reg28,reg29,reg30,reg31
- subround3 reg30,reg26,reg27,reg28,reg29,reg31
- subround3 reg29,reg30,reg26,reg27,reg28,reg31
- subround3 reg28,reg29,reg30,reg26,reg27,reg31
- subround3 reg27,reg28,reg29,reg30,reg26,reg31
- subround3 reg26,reg27,reg28,reg29,reg30,reg31
- subround3 reg30,reg26,reg27,reg28,reg29,reg31
- subround3 reg29,reg30,reg26,reg27,reg28,reg31
- subround3 reg28,reg29,reg30,reg26,reg27,reg31
- subround3 reg27,reg28,reg29,reg30,reg26,reg31
- subround3 reg26,reg27,reg28,reg29,reg30,reg31
- subround3 reg30,reg26,reg27,reg28,reg29,reg31
- subround3 reg29,reg30,reg26,reg27,reg28,reg31
- subround3 reg28,reg29,reg30,reg26,reg27,reg31
- subround3 reg27,reg28,reg29,reg30,reg26,reg31
- subround3 reg26,reg27,reg28,reg29,reg30,reg31
- subround3 reg30,reg26,reg27,reg28,reg29,reg31
- subround3 reg29,reg30,reg26,reg27,reg28,reg31
- subround3 reg28,reg29,reg30,reg26,reg27,reg31
- subround3 reg27,reg28,reg29,reg30,reg26,reg31
-
- subround4 reg26,reg27,reg28,reg29,reg30,reg31
- subround4 reg30,reg26,reg27,reg28,reg29,reg31
- subround4 reg29,reg30,reg26,reg27,reg28,reg31
- subround4 reg28,reg29,reg30,reg26,reg27,reg31
- subround4 reg27,reg28,reg29,reg30,reg26,reg31
- subround4 reg26,reg27,reg28,reg29,reg30,reg31
- subround4 reg30,reg26,reg27,reg28,reg29,reg31
- subround4 reg29,reg30,reg26,reg27,reg28,reg31
- subround4 reg28,reg29,reg30,reg26,reg27,reg31
- subround4 reg27,reg28,reg29,reg30,reg26,reg31
- subround4 reg26,reg27,reg28,reg29,reg30,reg31
- subround4 reg30,reg26,reg27,reg28,reg29,reg31
- subround4 reg29,reg30,reg26,reg27,reg28,reg31
- subround4 reg28,reg29,reg30,reg26,reg27,reg31
- subround4 reg27,reg28,reg29,reg30,reg26,reg31
- subround4 reg26,reg27,reg28,reg29,reg30,reg31
- subround4 reg30,reg26,reg27,reg28,reg29,reg31
- subround4 reg29,reg30,reg26,reg27,reg28,reg31
- subround4 reg28,reg29,reg30,reg26,reg27,reg31
- subround4 reg27,reg28,reg29,reg30,reg26,reg31
-
-/* then store the five values into registers */
- lwz reg5,PARAM_H (reg3)
- lwz reg6,PARAM_H+4 (reg3)
- lwz reg7,PARAM_H+8 (reg3)
- lwz reg8,PARAM_H+12(reg3)
- lwz reg9,PARAM_H+16(reg3)
- add reg26,reg5,reg26
- add reg27,reg5,reg27
- add reg28,reg5,reg28
- add reg29,reg5,reg29
- add reg30,reg5,reg30
- stw reg26,PARAM_H (reg3)
- stw reg27,PARAM_H+4 (reg3)
- stw reg28,PARAM_H+8 (reg3)
- stw reg29,PARAM_H+12(reg3)
- stw reg30,PARAM_H+16(reg3)
-
-/* finally, restore registers */
- lmw reg26,-24(reg1)
-/* and return */
- blr
-C_FUNCION_END(sha1Process, .Lsha1Process_size)
diff --git a/beecrypt/gas/sparc.m4 b/beecrypt/gas/sparc.m4
new file mode 100644
index 000000000..e735600c3
--- /dev/null
+++ b/beecrypt/gas/sparc.m4
@@ -0,0 +1,30 @@
+dnl sparc.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ifelse(substr(ASM_OS,0,7),solaris,`
+undefine(`C_FUNCTION_BEGIN')
+define(C_FUNCTION_BEGIN,`
+ TEXTSEG
+ GLOBL SYMNAME($1)
+SYMNAME($1):
+ .register %g2,#scratch
+ .register %g3,#scratch
+')
+')
diff --git a/beecrypt/gas/x86.m4 b/beecrypt/gas/x86.m4
new file mode 100644
index 000000000..131c94df2
--- /dev/null
+++ b/beecrypt/gas/x86.m4
@@ -0,0 +1,23 @@
+dnl x86.m4
+dnl
+dnl Copyright (c) 2003 Bob Deblier
+dnl
+dnl Author: Bob Deblier <bob.deblier@pandora.be>
+dnl
+dnl This library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public
+dnl License as published by the Free Software Foundation; either
+dnl version 2.1 of the License, or (at your option) any later version.
+dnl
+dnl This library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public
+dnl License along with this library; if not, write to the Free Software
+dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ifelse(ASM_ARCH,pentium4,`
+ define(`USE_SSE2')
+')