diff options
author | jbj <devnull@localhost> | 2003-04-28 15:06:30 +0000 |
---|---|---|
committer | jbj <devnull@localhost> | 2003-04-28 15:06:30 +0000 |
commit | a08149369a67ef068bd205164c941768a8e97937 (patch) | |
tree | 2f28c2f88031789014a2fdf0b1a1cdadf0354ce2 /beecrypt | |
parent | 6099f7ee0e858d8831726ff1e430e4f80d63ba62 (diff) | |
download | rpm-a08149369a67ef068bd205164c941768a8e97937.tar.gz rpm-a08149369a67ef068bd205164c941768a8e97937.tar.bz2 rpm-a08149369a67ef068bd205164c941768a8e97937.zip |
beecrypt-3.0.0 merge: bring in latest asm code.
CVS patchset: 6783
CVS date: 2003/04/28 15:06:30
Diffstat (limited to 'beecrypt')
34 files changed, 3221 insertions, 3455 deletions
diff --git a/beecrypt/gas/Makefile.am b/beecrypt/gas/Makefile.am index 8b80be914..4bcbc7cd5 100644 --- a/beecrypt/gas/Makefile.am +++ b/beecrypt/gas/Makefile.am @@ -1,9 +1,9 @@ # # Makefile.am's purpose is to add the GNU Assembler sources to the dist # -# Copyright (c) 2001, 2002 Virtual Unlimited B.V. +# Copyright (c) 2001, 2002, 2003 Virtual Unlimited B.V. # -# Author: Bob Deblier <bob@virtualunlimited.com> +# Author: Bob Deblier <bob.deblier@pandora.be> # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -22,4 +22,4 @@ AUTOMAKE_OPTIONS = gnu no-dependencies -EXTRA_DIST = aesopt.i586.S aesopt.powerpc.S blowfishopt.i586.S blowfishopt.powerpc.S mp32opt.arm.S mp32opt.i386.S mp32opt.ia64.S mp32opt.powerpc.S mp32opt.sparcv8.S mp32opt.sparcv9.S mp64opt.ia64.S sha1opt.i586.S sha1opt.ia64.S sha1opt.powerpc.S +EXTRA_DIST = aesopt.i586.m4 aesopt.ppc.m4 alpha.m4 asmdefs.m4 blowfishopt.i586.m4 blowfishopt.ppc.m4 ia64.m4 mpopt.alpha.m4 mpopt.arm.m4 mpopt.ia64.m4 mpopt.m68k.m4 mpopt.ppc.m4 mpopt.ppc64.m4 mpopt.sparcv8.m4 mpopt.sparcv8plus.m4 mpopt.x86.m4 ppc.m4 ppc64.m4 sha1opt.i586.m4 sparc.m4 x86.m4 diff --git a/beecrypt/gas/aesopt.i586.S b/beecrypt/gas/aesopt.i586.S deleted file mode 100644 index deb0853b7..000000000 --- a/beecrypt/gas/aesopt.i586.S +++ /dev/null @@ -1,688 +0,0 @@ -/* - * aesopt.i586.asm - * - * Assembler optimized AES routines for Intel Pentium processors - * - * Compile target is GNU Assembler - * - * Copyright (c) 2002 Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "beecrypt.gas.h" - - .file "aesopt.i586.S" - - .text - - .macro sxrk - movl (%esi),%eax - movl 4(%esi),%ebx - movl 8(%esi),%ecx - movl 12(%esi),%edx - bswap %eax - bswap %ebx - bswap %ecx - bswap %edx - xorl (%ebp),%eax - xorl 4(%ebp),%ebx - xorl 8(%ebp),%ecx - xorl 12(%ebp),%edx - movl %eax, (%esp) - movl %ebx, 4(%esp) - movl %ecx, 8(%esp) - movl %edx,12(%esp) - .endm - - .macro etfs offset - movl \offset+ 0(%ebp),%ecx - movl \offset+ 4(%ebp),%edx - - movzbl 3(%esp),%eax - movzbl 7(%esp),%ebx - xorl SYMBOL_NAME(_ae0)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae0)(,%ebx,4),%edx - - movzbl 6(%esp),%eax - movzbl 10(%esp),%ebx - xorl SYMBOL_NAME(_ae1)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae1)(,%ebx,4),%edx - - movzbl 9(%esp),%eax - movzbl 13(%esp),%ebx - xorl SYMBOL_NAME(_ae2)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae2)(,%ebx,4),%edx - - movzbl 12(%esp),%eax - movzbl (%esp),%ebx - xorl SYMBOL_NAME(_ae3)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae3)(,%ebx,4),%edx - - movl %ecx,16(%esp) - movl %edx,20(%esp) - - movl \offset+ 8(%ebp),%ecx - movl \offset+12(%ebp),%edx - - movzbl 11(%esp),%eax - movzbl 15(%esp),%ebx - xorl SYMBOL_NAME(_ae0)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae0)(,%ebx,4),%edx - - movzbl 14(%esp),%eax - movzbl 2(%esp),%ebx - xorl SYMBOL_NAME(_ae1)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae1)(,%ebx,4),%edx - - movzbl 1(%esp),%eax - movzbl 5(%esp),%ebx - xorl SYMBOL_NAME(_ae2)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae2)(,%ebx,4),%edx - - movzbl 4(%esp),%eax - movzbl 8(%esp),%ebx - xorl SYMBOL_NAME(_ae3)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae3)(,%ebx,4),%edx - - movl %ecx,24(%esp) - movl %edx,28(%esp) - .endm - - .macro esft offset - movl \offset+ 0(%ebp),%ecx - movl \offset+ 4(%ebp),%edx - - movzbl 19(%esp),%eax - movzbl 23(%esp),%ebx - xorl SYMBOL_NAME(_ae0)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae0)(,%ebx,4),%edx - - movzbl 22(%esp),%eax - movzbl 26(%esp),%ebx - xorl SYMBOL_NAME(_ae1)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae1)(,%ebx,4),%edx - - movzbl 25(%esp),%eax - movzbl 29(%esp),%ebx - xorl SYMBOL_NAME(_ae2)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae2)(,%ebx,4),%edx - - movzbl 28(%esp),%eax - movzbl 16(%esp),%ebx - xorl SYMBOL_NAME(_ae3)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae3)(,%ebx,4),%edx - - movl %ecx, (%esp) - movl %edx, 4(%esp) - - movl \offset+ 8(%ebp),%ecx - movl \offset+12(%ebp),%edx - - movzbl 27(%esp),%eax - movzbl 31(%esp),%ebx - xorl SYMBOL_NAME(_ae0)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae0)(,%ebx,4),%edx - - movzbl 30(%esp),%eax - movzbl 18(%esp),%ebx - xorl SYMBOL_NAME(_ae1)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae1)(,%ebx,4),%edx - - movzbl 17(%esp),%eax - movzbl 21(%esp),%ebx - xorl SYMBOL_NAME(_ae2)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae2)(,%ebx,4),%edx - - movzbl 20(%esp),%eax - movzbl 24(%esp),%ebx - xorl SYMBOL_NAME(_ae3)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ae3)(,%ebx,4),%edx - - movl %ecx, 8(%esp) - movl %edx,12(%esp) - .endm - - .macro elr - movl 0(%ebp),%ecx - movl 4(%ebp),%edx - - movzbl 19(%esp),%eax - movzbl 23(%esp),%ebx - movl SYMBOL_NAME(_ae4)(,%eax,4),%eax - movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx - andl $0xff000000,%eax - andl $0xff000000,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movzbl 22(%esp),%eax - movzbl 26(%esp),%ebx - movl SYMBOL_NAME(_ae4)(,%eax,4),%eax - movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx - andl $0xff0000,%eax - andl $0xff0000,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movzbl 25(%esp),%eax - movzbl 29(%esp),%ebx - movl SYMBOL_NAME(_ae4)(,%eax,4),%eax - movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx - andl $0xff00,%eax - andl $0xff00,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movzbl 28(%esp),%eax - movzbl 16(%esp),%ebx - movl SYMBOL_NAME(_ae4)(,%eax,4),%eax - movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx - andl $0xff,%eax - andl $0xff,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movl %ecx, (%esp) - movl %edx, 4(%esp) - - movl 8(%ebp),%ecx - movl 12(%ebp),%edx - - movzbl 27(%esp),%eax - movzbl 31(%esp),%ebx - movl SYMBOL_NAME(_ae4)(,%eax,4),%eax - movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx - andl $0xff000000,%eax - andl $0xff000000,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movzbl 30(%esp),%eax - movzbl 18(%esp),%ebx - movl SYMBOL_NAME(_ae4)(,%eax,4),%eax - movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx - andl $0xff0000,%eax - andl $0xff0000,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movzbl 17(%esp),%eax - movzbl 21(%esp),%ebx - movl SYMBOL_NAME(_ae4)(,%eax,4),%eax - movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx - andl $0xff00,%eax - andl $0xff00,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movzbl 20(%esp),%eax - movzbl 24(%esp),%ebx - movl SYMBOL_NAME(_ae4)(,%eax,4),%eax - movl SYMBOL_NAME(_ae4)(,%ebx,4),%ebx - andl $0xff,%eax - andl $0xff,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movl %ecx, 8(%esp) - movl %edx,12(%esp) - .endm - - .macro eblock label - sxrk - - etfs 16 - esft 32 - etfs 48 - esft 64 - etfs 80 - esft 96 - etfs 112 - esft 128 - etfs 144 - - movl 256(%ebp),%eax - cmp $10,%eax - je \label - - esft 160 - etfs 176 - - movl 256(%ebp),%eax - cmp $12,%eax - je \label - - esft 192 - etfs 208 - - movl 256(%ebp),%eax - - .align 4 -\label: - sall $4,%eax - addl %eax,%ebp - - elr - .endm - - .macro dtfs offset - movl \offset+0(%ebp),%ecx - movl \offset+4(%ebp),%edx - - movzbl 3(%esp),%eax - movzbl 7(%esp),%ebx - xorl SYMBOL_NAME(_ad0)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad0)(,%ebx,4),%edx - - movzbl 14(%esp),%eax - movzbl 2(%esp),%ebx - xorl SYMBOL_NAME(_ad1)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad1)(,%ebx,4),%edx - - movzbl 9(%esp),%eax - movzbl 13(%esp),%ebx - xorl SYMBOL_NAME(_ad2)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad2)(,%ebx,4),%edx - - movzbl 4(%esp),%eax - movzbl 8(%esp),%ebx - xorl SYMBOL_NAME(_ad3)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad3)(,%ebx,4),%edx - - movl %ecx,16(%esp) - movl %edx,20(%esp) - - movl \offset+ 8(%ebp),%ecx - movl \offset+12(%ebp),%edx - - movzbl 11(%esp),%eax - movzbl 15(%esp),%ebx - xorl SYMBOL_NAME(_ad0)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad0)(,%ebx,4),%edx - - movzbl 6(%esp),%eax - movzbl 10(%esp),%ebx - xorl SYMBOL_NAME(_ad1)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad1)(,%ebx,4),%edx - - movzbl 1(%esp),%eax - movzbl 5(%esp),%ebx - xorl SYMBOL_NAME(_ad2)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad2)(,%ebx,4),%edx - - movzbl 12(%esp),%eax - movzbl (%esp),%ebx - xorl SYMBOL_NAME(_ad3)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad3)(,%ebx,4),%edx - - movl %ecx,24(%esp) - movl %edx,28(%esp) - .endm - - .macro dsft offset - movl \offset+ 0(%ebp),%ecx - movl \offset+ 4(%ebp),%edx - - movzbl 19(%esp),%eax - movzbl 23(%esp),%ebx - xorl SYMBOL_NAME(_ad0)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad0)(,%ebx,4),%edx - - movzbl 30(%esp),%eax - movzbl 18(%esp),%ebx - xorl SYMBOL_NAME(_ad1)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad1)(,%ebx,4),%edx - - movzbl 25(%esp),%eax - movzbl 29(%esp),%ebx - xorl SYMBOL_NAME(_ad2)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad2)(,%ebx,4),%edx - - movzbl 20(%esp),%eax - movzbl 24(%esp),%ebx - xorl SYMBOL_NAME(_ad3)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad3)(,%ebx,4),%edx - - movl %ecx, (%esp) - movl %edx, 4(%esp) - - movl \offset+ 8(%ebp),%ecx - movl \offset+12(%ebp),%edx - - movzbl 27(%esp),%eax - movzbl 31(%esp),%ebx - xorl SYMBOL_NAME(_ad0)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad0)(,%ebx,4),%edx - - movzbl 22(%esp),%eax - movzbl 26(%esp),%ebx - xorl SYMBOL_NAME(_ad1)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad1)(,%ebx,4),%edx - - movzbl 17(%esp),%eax - movzbl 21(%esp),%ebx - xorl SYMBOL_NAME(_ad2)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad2)(,%ebx,4),%edx - - movzbl 28(%esp),%eax - movzbl 16(%esp),%ebx - xorl SYMBOL_NAME(_ad3)(,%eax,4),%ecx - xorl SYMBOL_NAME(_ad3)(,%ebx,4),%edx - - movl %ecx, 8(%esp) - movl %edx,12(%esp) - .endm - - .macro dlr - movl 0(%ebp),%ecx - movl 4(%ebp),%edx - - movzbl 19(%esp),%eax - movzbl 23(%esp),%ebx - movl SYMBOL_NAME(_ad4)(,%eax,4),%eax - movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx - andl $0xff000000,%eax - andl $0xff000000,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movzbl 30(%esp),%eax - movzbl 18(%esp),%ebx - movl SYMBOL_NAME(_ad4)(,%eax,4),%eax - movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx - andl $0xff0000,%eax - andl $0xff0000,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movzbl 25(%esp),%eax - movzbl 29(%esp),%ebx - movl SYMBOL_NAME(_ad4)(,%eax,4),%eax - movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx - andl $0xff00,%eax - andl $0xff00,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movzbl 20(%esp),%eax - movzbl 24(%esp),%ebx - movl SYMBOL_NAME(_ad4)(,%eax,4),%eax - movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx - andl $0xff,%eax - andl $0xff,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movl %ecx, (%esp) - movl %edx, 4(%esp) - - movl 8(%ebp),%ecx - movl 12(%ebp),%edx - - movzbl 27(%esp),%eax - movzbl 31(%esp),%ebx - movl SYMBOL_NAME(_ad4)(,%eax,4),%eax - movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx - andl $0xff000000,%eax - andl $0xff000000,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movzbl 22(%esp),%eax - movzbl 26(%esp),%ebx - movl SYMBOL_NAME(_ad4)(,%eax,4),%eax - movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx - andl $0xff0000,%eax - andl $0xff0000,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movzbl 17(%esp),%eax - movzbl 21(%esp),%ebx - movl SYMBOL_NAME(_ad4)(,%eax,4),%eax - movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx - andl $0xff00,%eax - andl $0xff00,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movzbl 28(%esp),%eax - movzbl 16(%esp),%ebx - movl SYMBOL_NAME(_ad4)(,%eax,4),%eax - movl SYMBOL_NAME(_ad4)(,%ebx,4),%ebx - andl $0xff,%eax - andl $0xff,%ebx - xorl %eax,%ecx - xorl %ebx,%edx - - movl %ecx, 8(%esp) - movl %edx,12(%esp) - .endm - - .macro dblock label - sxrk - - dtfs 16 - dsft 32 - dtfs 48 - dsft 64 - dtfs 80 - dsft 96 - dtfs 112 - dsft 128 - dtfs 144 - - movl 256(%ebp),%eax - cmp $10,%eax - je \label - - dsft 160 - dtfs 176 - - movl 256(%ebp),%eax - cmp $12,%eax - je \label - - dsft 192 - dtfs 208 - - movl 256(%ebp),%eax - - .align 4 -\label: - sall $4,%eax - addl %eax,%ebp - - dlr - .endm - -C_FUNCTION_BEGIN(aesEncrypt) -LABEL(aesEncrypt) - pushl %edi - pushl %esi - pushl %ebp - pushl %ebx - - movl 20(%esp),%ebp - movl 24(%esp),%edi - movl 28(%esp),%esi - - subl $32,%esp - - eblock LOCAL(00) - - movl (%esp),%eax - movl 4(%esp),%ebx - movl 8(%esp),%ecx - movl 12(%esp),%edx - bswap %eax - bswap %ebx - bswap %ecx - bswap %edx - movl %eax, (%edi) - movl %ebx, 4(%edi) - movl %ecx, 8(%edi) - movl %edx,12(%edi) - - addl $32,%esp - - xorl %eax,%eax - - popl %ebx - popl %ebp - popl %esi - popl %edi - ret -C_FUNCTION_END(aesEncrypt, LOCAL(aesEncrypt_size)) - - -C_FUNCTION_BEGIN(aesDecrypt) -LABEL(aesDecrypt) - pushl %edi - pushl %esi - pushl %ebp - pushl %ebx - - movl 20(%esp),%ebp - movl 24(%esp),%edi - movl 28(%esp),%esi - - subl $32,%esp - - dblock LOCAL(01) - - movl (%esp),%eax - movl 4(%esp),%ebx - movl 8(%esp),%ecx - movl 12(%esp),%edx - bswap %eax - bswap %ebx - bswap %ecx - bswap %edx - movl %eax, (%edi) - movl %ebx, 4(%edi) - movl %ecx, 8(%edi) - movl %edx,12(%edi) - - addl $32,%esp - - xorl %eax,%eax - - popl %ebx - popl %ebp - popl %esi - popl %edi - ret -C_FUNCTION_END(aesDecrypt, LOCAL(aesDecrypt_size)) - - -C_FUNCTION_BEGIN(aesECBEncrypt) -LABEL(aesECBEncrypt) - pushl %edi - pushl %esi - pushl %ebp - pushl %ebx - - movl 28(%esp),%edi - movl 32(%esp),%esi - - subl $32,%esp - - .align 4 -LOCAL(02): - movl 52(%esp),%ebp - - eblock LOCAL(03) - - movl (%esp),%eax - movl 4(%esp),%ebx - movl 8(%esp),%ecx - movl 12(%esp),%edx - bswap %eax - bswap %ebx - bswap %ecx - bswap %edx - movl %eax, (%edi) - movl %ebx, 4(%edi) - movl %ecx, 8(%edi) - movl %edx,12(%edi) - - addl $16,%esi - addl $16,%edi - - decl 56(%esp) - jnz LOCAL(02) - - addl $32,%esp - - xorl %eax,%eax - - popl %ebx - popl %ebp - popl %esi - popl %edi - ret -C_FUNCTION_END(aesECBEncrypt, LOCAL(aesECBEncrypt_size)) - - -C_FUNCTION_BEGIN(aesECBDecrypt) -LABEL(aesECBDecrypt) - pushl %edi - pushl %esi - pushl %ebp - pushl %ebx - - movl 28(%esp),%edi - movl 32(%esp),%esi - - subl $32,%esp - - .align 4 -LOCAL(04): - movl 52(%esp),%ebp - - dblock LOCAL(05) - - movl (%esp),%eax - movl 4(%esp),%ebx - movl 8(%esp),%ecx - movl 12(%esp),%edx - bswap %eax - bswap %ebx - bswap %ecx - bswap %edx - movl %eax, (%edi) - movl %ebx, 4(%edi) - movl %ecx, 8(%edi) - movl %edx,12(%edi) - - addl $16,%esi - addl $16,%edi - - decl 56(%esp) - jnz LOCAL(04) - - addl $32,%esp - - xorl %eax,%eax - - popl %ebx - popl %ebp - popl %esi - popl %edi - ret -C_FUNCTION_END(aesECBDecrypt, LOCAL(aesECBDecrypt_size)) diff --git a/beecrypt/gas/aesopt.i586.m4 b/beecrypt/gas/aesopt.i586.m4 new file mode 100644 index 000000000..e8dbf1da5 --- /dev/null +++ b/beecrypt/gas/aesopt.i586.m4 @@ -0,0 +1,580 @@ +dnl aesopt.i586.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/x86.m4) + +define(`sxrk',` + movl (%esi),%eax + movl 4(%esi),%ebx + movl 8(%esi),%ecx + movl 12(%esi),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + xorl (%ebp),%eax + xorl 4(%ebp),%ebx + xorl 8(%ebp),%ecx + xorl 12(%ebp),%edx + movl %eax, (%esp) + movl %ebx, 4(%esp) + movl %ecx, 8(%esp) + movl %edx,12(%esp) +') + +define(`etfs',` + movl $1+0(%ebp),%ecx + movl $1+4(%ebp),%edx + + movzbl 3(%esp),%eax + movzbl 7(%esp),%ebx + xorl SYMNAME(_ae0)(,%eax,4),%ecx + xorl SYMNAME(_ae0)(,%ebx,4),%edx + + movzbl 6(%esp),%eax + movzbl 10(%esp),%ebx + xorl SYMNAME(_ae1)(,%eax,4),%ecx + xorl SYMNAME(_ae1)(,%ebx,4),%edx + + movzbl 9(%esp),%eax + movzbl 13(%esp),%ebx + xorl SYMNAME(_ae2)(,%eax,4),%ecx + xorl SYMNAME(_ae2)(,%ebx,4),%edx + + movzbl 12(%esp),%eax + movzbl (%esp),%ebx + xorl SYMNAME(_ae3)(,%eax,4),%ecx + xorl SYMNAME(_ae3)(,%ebx,4),%edx + + movl %ecx,16(%esp) + movl %edx,20(%esp) + + movl $1+ 8(%ebp),%ecx + movl $1+12(%ebp),%edx + + movzbl 11(%esp),%eax + movzbl 15(%esp),%ebx + xorl SYMNAME(_ae0)(,%eax,4),%ecx + xorl SYMNAME(_ae0)(,%ebx,4),%edx + + movzbl 14(%esp),%eax + movzbl 2(%esp),%ebx + xorl SYMNAME(_ae1)(,%eax,4),%ecx + xorl SYMNAME(_ae1)(,%ebx,4),%edx + + movzbl 1(%esp),%eax + movzbl 5(%esp),%ebx + xorl SYMNAME(_ae2)(,%eax,4),%ecx + xorl SYMNAME(_ae2)(,%ebx,4),%edx + + movzbl 4(%esp),%eax + movzbl 8(%esp),%ebx + xorl SYMNAME(_ae3)(,%eax,4),%ecx + xorl SYMNAME(_ae3)(,%ebx,4),%edx + + movl %ecx,24(%esp) + movl %edx,28(%esp) +') + +define(`esft',` + movl $1+0(%ebp),%ecx + movl $1+4(%ebp),%edx + + movzbl 19(%esp),%eax + movzbl 23(%esp),%ebx + xorl SYMNAME(_ae0)(,%eax,4),%ecx + xorl SYMNAME(_ae0)(,%ebx,4),%edx + + movzbl 22(%esp),%eax + movzbl 26(%esp),%ebx + xorl SYMNAME(_ae1)(,%eax,4),%ecx + xorl SYMNAME(_ae1)(,%ebx,4),%edx + + movzbl 25(%esp),%eax + movzbl 29(%esp),%ebx + xorl SYMNAME(_ae2)(,%eax,4),%ecx + xorl SYMNAME(_ae2)(,%ebx,4),%edx + + movzbl 28(%esp),%eax + movzbl 16(%esp),%ebx + xorl SYMNAME(_ae3)(,%eax,4),%ecx + xorl SYMNAME(_ae3)(,%ebx,4),%edx + + movl %ecx, (%esp) + movl %edx, 4(%esp) + + movl $1+ 8(%ebp),%ecx + movl $1+12(%ebp),%edx + + movzbl 27(%esp),%eax + movzbl 31(%esp),%ebx + xorl SYMNAME(_ae0)(,%eax,4),%ecx + xorl SYMNAME(_ae0)(,%ebx,4),%edx + + movzbl 30(%esp),%eax + movzbl 18(%esp),%ebx + xorl SYMNAME(_ae1)(,%eax,4),%ecx + xorl SYMNAME(_ae1)(,%ebx,4),%edx + + movzbl 17(%esp),%eax + movzbl 21(%esp),%ebx + xorl SYMNAME(_ae2)(,%eax,4),%ecx + xorl SYMNAME(_ae2)(,%ebx,4),%edx + + movzbl 20(%esp),%eax + movzbl 24(%esp),%ebx + xorl SYMNAME(_ae3)(,%eax,4),%ecx + xorl SYMNAME(_ae3)(,%ebx,4),%edx + + movl %ecx, 8(%esp) + movl %edx,12(%esp) +') + +define(`elr',` + movl 0(%ebp),%ecx + movl 4(%ebp),%edx + + movzbl 19(%esp),%eax + movzbl 23(%esp),%ebx + movl SYMNAME(_ae4)(,%eax,4),%eax + movl SYMNAME(_ae4)(,%ebx,4),%ebx + andl `$'0xff000000,%eax + andl `$'0xff000000,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movzbl 22(%esp),%eax + movzbl 26(%esp),%ebx + movl SYMNAME(_ae4)(,%eax,4),%eax + movl SYMNAME(_ae4)(,%ebx,4),%ebx + andl `$'0xff0000,%eax + andl `$'0xff0000,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movzbl 25(%esp),%eax + movzbl 29(%esp),%ebx + movl SYMNAME(_ae4)(,%eax,4),%eax + movl SYMNAME(_ae4)(,%ebx,4),%ebx + andl `$'0xff00,%eax + andl `$'0xff00,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movzbl 28(%esp),%eax + movzbl 16(%esp),%ebx + movl SYMNAME(_ae4)(,%eax,4),%eax + movl SYMNAME(_ae4)(,%ebx,4),%ebx + andl `$'0xff,%eax + andl `$'0xff,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movl %ecx, (%esp) + movl %edx, 4(%esp) + + movl 8(%ebp),%ecx + movl 12(%ebp),%edx + + movzbl 27(%esp),%eax + movzbl 31(%esp),%ebx + movl SYMNAME(_ae4)(,%eax,4),%eax + movl SYMNAME(_ae4)(,%ebx,4),%ebx + andl `$'0xff000000,%eax + andl `$'0xff000000,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movzbl 30(%esp),%eax + movzbl 18(%esp),%ebx + movl SYMNAME(_ae4)(,%eax,4),%eax + movl SYMNAME(_ae4)(,%ebx,4),%ebx + andl `$'0xff0000,%eax + andl `$'0xff0000,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movzbl 17(%esp),%eax + movzbl 21(%esp),%ebx + movl SYMNAME(_ae4)(,%eax,4),%eax + movl SYMNAME(_ae4)(,%ebx,4),%ebx + andl `$'0xff00,%eax + andl `$'0xff00,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movzbl 20(%esp),%eax + movzbl 24(%esp),%ebx + movl SYMNAME(_ae4)(,%eax,4),%eax + movl SYMNAME(_ae4)(,%ebx,4),%ebx + andl `$'0xff,%eax + andl `$'0xff,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movl %ecx, 8(%esp) + movl %edx,12(%esp) +') + +define(`eblock',` + sxrk + + etfs(16) + esft(32) + etfs(48) + esft(64) + etfs(80) + esft(96) + etfs(112) + esft(128) + etfs(144) + + movl 256(%ebp),%eax + cmp `$'10,%eax + je $1 + + esft(160) + etfs(176) + + movl 256(%ebp),%eax + cmp `$'12,%eax + je $1 + + esft(192) + etfs(208) + + movl 256(%ebp),%eax + + .align 4 +$1: + sall `$'4,%eax + addl %eax,%ebp + + elr +') + +define(`dtfs',` + movl $1+0(%ebp),%ecx + movl $1+4(%ebp),%edx + + movzbl 3(%esp),%eax + movzbl 7(%esp),%ebx + xorl SYMNAME(_ad0)(,%eax,4),%ecx + xorl SYMNAME(_ad0)(,%ebx,4),%edx + + movzbl 14(%esp),%eax + movzbl 2(%esp),%ebx + xorl SYMNAME(_ad1)(,%eax,4),%ecx + xorl SYMNAME(_ad1)(,%ebx,4),%edx + + movzbl 9(%esp),%eax + movzbl 13(%esp),%ebx + xorl SYMNAME(_ad2)(,%eax,4),%ecx + xorl SYMNAME(_ad2)(,%ebx,4),%edx + + movzbl 4(%esp),%eax + movzbl 8(%esp),%ebx + xorl SYMNAME(_ad3)(,%eax,4),%ecx + xorl SYMNAME(_ad3)(,%ebx,4),%edx + + movl %ecx,16(%esp) + movl %edx,20(%esp) + + movl $1+ 8(%ebp),%ecx + movl $1+12(%ebp),%edx + + movzbl 11(%esp),%eax + movzbl 15(%esp),%ebx + xorl SYMNAME(_ad0)(,%eax,4),%ecx + xorl SYMNAME(_ad0)(,%ebx,4),%edx + + movzbl 6(%esp),%eax + movzbl 10(%esp),%ebx + xorl SYMNAME(_ad1)(,%eax,4),%ecx + xorl SYMNAME(_ad1)(,%ebx,4),%edx + + movzbl 1(%esp),%eax + movzbl 5(%esp),%ebx + xorl SYMNAME(_ad2)(,%eax,4),%ecx + xorl SYMNAME(_ad2)(,%ebx,4),%edx + + movzbl 12(%esp),%eax + movzbl (%esp),%ebx + xorl SYMNAME(_ad3)(,%eax,4),%ecx + xorl SYMNAME(_ad3)(,%ebx,4),%edx + + movl %ecx,24(%esp) + movl %edx,28(%esp) +') + +define(`dsft',` + movl $1+0(%ebp),%ecx + movl $1+4(%ebp),%edx + + movzbl 19(%esp),%eax + movzbl 23(%esp),%ebx + xorl SYMNAME(_ad0)(,%eax,4),%ecx + xorl SYMNAME(_ad0)(,%ebx,4),%edx + + movzbl 30(%esp),%eax + movzbl 18(%esp),%ebx + xorl SYMNAME(_ad1)(,%eax,4),%ecx + xorl SYMNAME(_ad1)(,%ebx,4),%edx + + movzbl 25(%esp),%eax + movzbl 29(%esp),%ebx + xorl SYMNAME(_ad2)(,%eax,4),%ecx + xorl SYMNAME(_ad2)(,%ebx,4),%edx + + movzbl 20(%esp),%eax + movzbl 24(%esp),%ebx + xorl SYMNAME(_ad3)(,%eax,4),%ecx + xorl SYMNAME(_ad3)(,%ebx,4),%edx + + movl %ecx, (%esp) + movl %edx, 4(%esp) + + movl $1+ 8(%ebp),%ecx + movl $1+12(%ebp),%edx + + movzbl 27(%esp),%eax + movzbl 31(%esp),%ebx + xorl SYMNAME(_ad0)(,%eax,4),%ecx + xorl SYMNAME(_ad0)(,%ebx,4),%edx + + movzbl 22(%esp),%eax + movzbl 26(%esp),%ebx + xorl SYMNAME(_ad1)(,%eax,4),%ecx + xorl SYMNAME(_ad1)(,%ebx,4),%edx + + movzbl 17(%esp),%eax + movzbl 21(%esp),%ebx + xorl SYMNAME(_ad2)(,%eax,4),%ecx + xorl SYMNAME(_ad2)(,%ebx,4),%edx + + movzbl 28(%esp),%eax + movzbl 16(%esp),%ebx + xorl SYMNAME(_ad3)(,%eax,4),%ecx + xorl SYMNAME(_ad3)(,%ebx,4),%edx + + movl %ecx, 8(%esp) + movl %edx,12(%esp) +') + +define(`dlr',` + movl 0(%ebp),%ecx + movl 4(%ebp),%edx + + movzbl 19(%esp),%eax + movzbl 23(%esp),%ebx + movl SYMNAME(_ad4)(,%eax,4),%eax + movl SYMNAME(_ad4)(,%ebx,4),%ebx + andl `$'0xff000000,%eax + andl `$'0xff000000,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movzbl 30(%esp),%eax + movzbl 18(%esp),%ebx + movl SYMNAME(_ad4)(,%eax,4),%eax + movl SYMNAME(_ad4)(,%ebx,4),%ebx + andl `$'0xff0000,%eax + andl `$'0xff0000,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movzbl 25(%esp),%eax + movzbl 29(%esp),%ebx + movl SYMNAME(_ad4)(,%eax,4),%eax + movl SYMNAME(_ad4)(,%ebx,4),%ebx + andl `$'0xff00,%eax + andl `$'0xff00,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movzbl 20(%esp),%eax + movzbl 24(%esp),%ebx + movl SYMNAME(_ad4)(,%eax,4),%eax + movl SYMNAME(_ad4)(,%ebx,4),%ebx + andl `$'0xff,%eax + andl `$'0xff,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movl %ecx, (%esp) + movl %edx, 4(%esp) + + movl 8(%ebp),%ecx + movl 12(%ebp),%edx + + movzbl 27(%esp),%eax + movzbl 31(%esp),%ebx + movl SYMNAME(_ad4)(,%eax,4),%eax + movl SYMNAME(_ad4)(,%ebx,4),%ebx + andl `$'0xff000000,%eax + andl `$'0xff000000,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movzbl 22(%esp),%eax + movzbl 26(%esp),%ebx + movl SYMNAME(_ad4)(,%eax,4),%eax + movl SYMNAME(_ad4)(,%ebx,4),%ebx + andl `$'0xff0000,%eax + andl `$'0xff0000,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movzbl 17(%esp),%eax + movzbl 21(%esp),%ebx + movl SYMNAME(_ad4)(,%eax,4),%eax + movl SYMNAME(_ad4)(,%ebx,4),%ebx + andl `$'0xff00,%eax + andl `$'0xff00,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movzbl 28(%esp),%eax + movzbl 16(%esp),%ebx + movl SYMNAME(_ad4)(,%eax,4),%eax + movl SYMNAME(_ad4)(,%ebx,4),%ebx + andl `$'0xff,%eax + andl `$'0xff,%ebx + xorl %eax,%ecx + xorl %ebx,%edx + + movl %ecx, 8(%esp) + movl %edx,12(%esp) +') + +define(`dblock',` + sxrk + + dtfs(16) + dsft(32) + dtfs(48) + dsft(64) + dtfs(80) + dsft(96) + dtfs(112) + dsft(128) + dtfs(144) + + movl 256(%ebp),%eax + cmp `$'10,%eax + je $1 + + dsft(160) + dtfs(176) + + movl 256(%ebp),%eax + cmp `$'12,%eax + je $1 + + dsft(192) + dtfs(208) + + movl 256(%ebp),%eax + + .align 4 +$1: + sall `$'4,%eax + addl %eax,%ebp + + dlr +') + +C_FUNCTION_BEGIN(aesEncrypt) + pushl %edi + pushl %esi + pushl %ebp + pushl %ebx + + movl 20(%esp),%ebp + movl 24(%esp),%edi + movl 28(%esp),%esi + + subl `$'32,%esp + + eblock(LOCAL(00)) + + movl (%esp),%eax + movl 4(%esp),%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax, (%edi) + movl %ebx, 4(%edi) + movl %ecx, 8(%edi) + movl %edx,12(%edi) + + addl `$'32,%esp + + xorl %eax,%eax + + popl %ebx + popl %ebp + popl %esi + popl %edi + ret +C_FUNCTION_END(aesEncrypt) + + +C_FUNCTION_BEGIN(aesDecrypt) + pushl %edi + pushl %esi + pushl %ebp + pushl %ebx + + movl 20(%esp),%ebp + movl 24(%esp),%edi + movl 28(%esp),%esi + + subl `$'32,%esp + + dblock(LOCAL(01)) + + movl (%esp),%eax + movl 4(%esp),%ebx + movl 8(%esp),%ecx + movl 12(%esp),%edx + bswap %eax + bswap %ebx + bswap %ecx + bswap %edx + movl %eax, (%edi) + movl %ebx, 4(%edi) + movl %ecx, 8(%edi) + movl %edx,12(%edi) + + addl `$'32,%esp + + xorl %eax,%eax + + popl %ebx + popl %ebp + popl %esi + popl %edi + ret +C_FUNCTION_END(aesDecrypt) diff --git a/beecrypt/gas/aesopt.powerpc.S b/beecrypt/gas/aesopt.powerpc.S deleted file mode 100644 index 17e36dc41..000000000 --- a/beecrypt/gas/aesopt.powerpc.S +++ /dev/null @@ -1,683 +0,0 @@ -/* - * aesopt.powerpc.asm - * - * Assembler optimized AES routines for PowerPC processors - * - * Compile target is GNU Assembler - * - * Copyright (c) 2002 Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "beecrypt.gas.h" - - .file "aesopt.powerpc.S" - - .text - -/* - * ae0 in r11 - * r7 thru r10 scratch - * four registers for s: r24 r25 r26 r27 - * four registers for t: r28 r29 r30 r31 - */ - -#define s0 r24 -#define s1 r25 -#define s2 r26 -#define s3 r27 -#define t0 r28 -#define t1 r29 -#define t2 r30 -#define t3 r31 - - .macro sxrk rk src - #if WORDS_BIGENDIAN - lwz s0, 0(\src) - lwz s1, 4(\src) - lwz s2, 8(\src) - lwz s3,12(\src) - lwz r7, 0(\rk) - lwz r8, 4(\rk) - lwz r9, 8(\rk) - lwz r10,12(\rk) - #else - # error ppc little-endian not implemented - #endif - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - .endm - - .macro etfs rk offset - lwz t0,\offset+ 0(\rk) - lwz t1,\offset+ 4(\rk) - lwz t2,\offset+ 8(\rk) - lwz t3,\offset+12(\rk) - - rlwinm r7,s0,10,22,29 /* ((s0 >> 24) & 0xff) * 4 */ - rlwinm r8,s1,10,22,29 /* ((s1 >> 24) & 0xff) * 4 */ - rlwinm r9,s2,10,22,29 /* ((s2 >> 24) & 0xff) * 4 */ - rlwinm r10,s3,10,22,29 /* ((s3 >> 24) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor t0,t0,r7 - xor t1,t1,r8 - xor t2,t2,r9 - xor t3,t3,r10 - - la r12,1024(r12) - - rlwinm r7,s1,18,22,29 /* ((s1 >> 16) & 0xff) * 4 */ - rlwinm r8,s2,18,22,29 /* ((s2 >> 16) & 0xff) * 4 */ - rlwinm r9,s3,18,22,29 /* ((s3 >> 16) & 0xff) * 4 */ - rlwinm r10,s0,18,22,29 /* ((s0 >> 16) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor t0,t0,r7 - xor t1,t1,r8 - xor t2,t2,r9 - xor t3,t3,r10 - - la r12,1024(r12) - - rlwinm r7,s2,26,22,29 /* ((s2 >> 8) & 0xff) * 4 */ - rlwinm r8,s3,26,22,29 /* ((s3 >> 8) & 0xff) * 4 */ - rlwinm r9,s0,26,22,29 /* ((s0 >> 8) & 0xff) * 4 */ - rlwinm r10,s1,26,22,29 /* ((s1 >> 8) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor t0,t0,r7 - xor t1,t1,r8 - xor t2,t2,r9 - xor t3,t3,r10 - - la r12,1024(r12) - - rlwinm r7,s3,2,22,29 /* ((s3 >> 0) & 0xff) * 4 */ - rlwinm r8,s0,2,22,29 /* ((s0 >> 0) & 0xff) * 4 */ - rlwinm r9,s1,2,22,29 /* ((s1 >> 0) & 0xff) * 4 */ - rlwinm r10,s2,2,22,29 /* ((s2 >> 0) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor t0,t0,r7 - xor t1,t1,r8 - xor t2,t2,r9 - xor t3,t3,r10 - - la r12,-3072(r12) - .endm - - .macro esft rk offset - lwz s0,\offset+ 0(\rk) - lwz s1,\offset+ 4(\rk) - lwz s2,\offset+ 8(\rk) - lwz s3,\offset+12(\rk) - - rlwinm r7,t0,10,22,29 /* ((t0 >> 24) & 0xff) * 4 */ - rlwinm r8,t1,10,22,29 /* ((t1 >> 24) & 0xff) * 4 */ - rlwinm r9,t2,10,22,29 /* ((t2 >> 24) & 0xff) * 4 */ - rlwinm r10,t3,10,22,29 /* ((s3 >> 24) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - la r12,1024(r12) - - rlwinm r7,t1,18,22,29 /* ((t1 >> 16) & 0xff) * 4 */ - rlwinm r8,t2,18,22,29 /* ((t2 >> 16) & 0xff) * 4 */ - rlwinm r9,t3,18,22,29 /* ((t3 >> 16) & 0xff) * 4 */ - rlwinm r10,t0,18,22,29 /* ((t0 >> 16) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - la r12,1024(r12) - - rlwinm r7,t2,26,22,29 /* ((t2 >> 8) & 0xff) * 4 */ - rlwinm r8,t3,26,22,29 /* ((t3 >> 8) & 0xff) * 4 */ - rlwinm r9,t0,26,22,29 /* ((t0 >> 8) & 0xff) * 4 */ - rlwinm r10,t1,26,22,29 /* ((t1 >> 8) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - la r12,1024(r12) - - rlwinm r7,t3,2,22,29 /* ((t3 >> 0) & 0xff) * 4 */ - rlwinm r8,t0,2,22,29 /* ((t0 >> 0) & 0xff) * 4 */ - rlwinm r9,t1,2,22,29 /* ((t1 >> 0) & 0xff) * 4 */ - rlwinm r10,t2,2,22,29 /* ((t2 >> 0) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - la r12,-3072(r12) - .endm - - .macro elr rk - lwz s0, 0(\rk) - lwz s1, 4(\rk) - lwz s2, 8(\rk) - lwz s3,12(\rk) - - la r12,4096(r12) - - rlwinm r7,t0,10,22,29 /* ((t0 >> 24) & 0xff) * 4 */ - rlwinm r8,t1,10,22,29 /* ((t1 >> 24) & 0xff) * 4 */ - rlwinm r9,t2,10,22,29 /* ((t2 >> 24) & 0xff) * 4 */ - rlwinm r10,t3,10,22,29 /* ((t3 >> 24) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - rlwinm r7,r7,0,0,7 /* & 0xff000000 */ - rlwinm r8,r8,0,0,7 /* & 0xff000000 */ - rlwinm r9,r9,0,0,7 /* & 0xff000000 */ - rlwinm r10,r10,0,0,7 /* & 0xff000000 */ - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - rlwinm r7,t1,18,22,29 /* ((t1 >> 16) & 0xff) * 4 */ - rlwinm r8,t2,18,22,29 /* ((t2 >> 16) & 0xff) * 4 */ - rlwinm r9,t3,18,22,29 /* ((t3 >> 16) & 0xff) * 4 */ - rlwinm r10,t0,18,22,29 /* ((t0 >> 16) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - rlwinm r7,r7,0,8,15 /* & 0xff0000 */ - rlwinm r8,r8,0,8,15 /* & 0xff0000 */ - rlwinm r9,r9,0,8,15 /* & 0xff0000 */ - rlwinm r10,r10,0,8,15 /* & 0xff0000 */ - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - rlwinm r7,t2,26,22,29 /* ((t2 >> 8) & 0xff) * 4 */ - rlwinm r8,t3,26,22,29 /* ((t3 >> 8) & 0xff) * 4 */ - rlwinm r9,t0,26,22,29 /* ((t0 >> 8) & 0xff) * 4 */ - rlwinm r10,t1,26,22,29 /* ((t1 >> 8) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - rlwinm r7,r7,0,16,23 /* & 0xff00 */ - rlwinm r8,r8,0,16,23 /* & 0xff00 */ - rlwinm r9,r9,0,16,23 /* & 0xff00 */ - rlwinm r10,r10,0,16,23 /* & 0xff00 */ - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - rlwinm r7,t3,2,22,29 /* ((t3 >> 0) & 0xff) * 4 */ - rlwinm r8,t0,2,22,29 /* ((t0 >> 0) & 0xff) * 4 */ - rlwinm r9,t1,2,22,29 /* ((t1 >> 0) & 0xff) * 4 */ - rlwinm r10,t2,2,22,29 /* ((t2 >> 0) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - rlwinm r7,r7,0,24,31 /* & 0xff */ - rlwinm r8,r8,0,24,31 /* & 0xff */ - rlwinm r9,r9,0,24,31 /* & 0xff */ - rlwinm r10,r10,0,24,31 /* & 0xff */ - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - la r12,-4096(r12) - .endm - - .macro eblock rk src label - sxrk \rk \src - - etfs \rk 16 - esft \rk 32 - etfs \rk 48 - esft \rk 64 - etfs \rk 80 - esft \rk 96 - etfs \rk 112 - esft \rk 128 - etfs \rk 144 - - lwz r11,256(\rk) - cmpwi r11,10 - beq \label - - esft \rk 160 - etfs \rk 176 - - cmpwi r11,12 - beq \label - - esft \rk 192 - etfs \rk 208 - -\label: - slwi r11,r11,4 - add \rk,\rk,r11 - - elr \rk - .endm - - .macro dtfs rk offset - lwz t0,\offset+ 0(\rk) - lwz t1,\offset+ 4(\rk) - lwz t2,\offset+ 8(\rk) - lwz t3,\offset+12(\rk) - - rlwinm r7,s0,10,22,29 /* ((s0 >> 24) & 0xff) * 4 */ - rlwinm r8,s1,10,22,29 /* ((s1 >> 24) & 0xff) * 4 */ - rlwinm r9,s2,10,22,29 /* ((s2 >> 24) & 0xff) * 4 */ - rlwinm r10,s3,10,22,29 /* ((s3 >> 24) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor t0,t0,r7 - xor t1,t1,r8 - xor t2,t2,r9 - xor t3,t3,r10 - - la r12,1024(r12) - - rlwinm r7,s3,18,22,29 /* ((s3 >> 16) & 0xff) * 4 */ - rlwinm r8,s0,18,22,29 /* ((s0 >> 16) & 0xff) * 4 */ - rlwinm r9,s1,18,22,29 /* ((s1 >> 16) & 0xff) * 4 */ - rlwinm r10,s2,18,22,29 /* ((s2 >> 16) & 0xff) * 4 */ - -/* start here */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor t0,t0,r7 - xor t1,t1,r8 - xor t2,t2,r9 - xor t3,t3,r10 - - la r12,1024(r12) - - rlwinm r7,s2,26,22,29 /* ((s2 >> 8) & 0xff) * 4 */ - rlwinm r8,s3,26,22,29 /* ((s3 >> 8) & 0xff) * 4 */ - rlwinm r9,s0,26,22,29 /* ((s0 >> 8) & 0xff) * 4 */ - rlwinm r10,s1,26,22,29 /* ((s1 >> 8) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor t0,t0,r7 - xor t1,t1,r8 - xor t2,t2,r9 - xor t3,t3,r10 - - la r12,1024(r12) - - rlwinm r7,s1,2,22,29 /* ((s1 >> 0) & 0xff) * 4 */ - rlwinm r8,s2,2,22,29 /* ((s2 >> 0) & 0xff) * 4 */ - rlwinm r9,s3,2,22,29 /* ((s3 >> 0) & 0xff) * 4 */ - rlwinm r10,s0,2,22,29 /* ((s0 >> 0) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor t0,t0,r7 - xor t1,t1,r8 - xor t2,t2,r9 - xor t3,t3,r10 - - la r12,-3072(r12) - .endm - - .macro dsft rk offset - lwz s0,\offset+ 0(\rk) - lwz s1,\offset+ 4(\rk) - lwz s2,\offset+ 8(\rk) - lwz s3,\offset+12(\rk) - - rlwinm r7,t0,10,22,29 /* ((t0 >> 24) & 0xff) * 4 */ - rlwinm r8,t1,10,22,29 /* ((t1 >> 24) & 0xff) * 4 */ - rlwinm r9,t2,10,22,29 /* ((t2 >> 24) & 0xff) * 4 */ - rlwinm r10,t3,10,22,29 /* ((s3 >> 24) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - la r12,1024(r12) - - rlwinm r7,t3,18,22,29 /* ((t3 >> 16) & 0xff) * 4 */ - rlwinm r8,t0,18,22,29 /* ((t0 >> 16) & 0xff) * 4 */ - rlwinm r9,t1,18,22,29 /* ((t1 >> 16) & 0xff) * 4 */ - rlwinm r10,t2,18,22,29 /* ((t2 >> 16) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - la r12,1024(r12) - - rlwinm r7,t2,26,22,29 /* ((t2 >> 8) & 0xff) * 4 */ - rlwinm r8,t3,26,22,29 /* ((t3 >> 8) & 0xff) * 4 */ - rlwinm r9,t0,26,22,29 /* ((t0 >> 8) & 0xff) * 4 */ - rlwinm r10,t1,26,22,29 /* ((t1 >> 8) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - la r12,1024(r12) - - rlwinm r7,t1,2,22,29 /* ((t1 >> 0) & 0xff) * 4 */ - rlwinm r8,t2,2,22,29 /* ((t2 >> 0) & 0xff) * 4 */ - rlwinm r9,t3,2,22,29 /* ((t3 >> 0) & 0xff) * 4 */ - rlwinm r10,t0,2,22,29 /* ((t0 >> 0) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - la r12,-3072(r12) - .endm - - .macro dlr rk - lwz s0, 0(\rk) - lwz s1, 4(\rk) - lwz s2, 8(\rk) - lwz s3,12(\rk) - - la r12,4096(r12) - - rlwinm r7,t0,10,22,29 /* ((t0 >> 24) & 0xff) * 4 */ - rlwinm r8,t1,10,22,29 /* ((t1 >> 24) & 0xff) * 4 */ - rlwinm r9,t2,10,22,29 /* ((t2 >> 24) & 0xff) * 4 */ - rlwinm r10,t3,10,22,29 /* ((t3 >> 24) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - rlwinm r7,r7,0,0,7 /* & 0xff000000 */ - rlwinm r8,r8,0,0,7 /* & 0xff000000 */ - rlwinm r9,r9,0,0,7 /* & 0xff000000 */ - rlwinm r10,r10,0,0,7 /* & 0xff000000 */ - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - rlwinm r7,t3,18,22,29 /* ((t3 >> 16) & 0xff) * 4 */ - rlwinm r8,t0,18,22,29 /* ((t0 >> 16) & 0xff) * 4 */ - rlwinm r9,t1,18,22,29 /* ((t1 >> 16) & 0xff) * 4 */ - rlwinm r10,t2,18,22,29 /* ((t2 >> 16) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - rlwinm r7,r7,0,8,15 /* & 0xff0000 */ - rlwinm r8,r8,0,8,15 /* & 0xff0000 */ - rlwinm r9,r9,0,8,15 /* & 0xff0000 */ - rlwinm r10,r10,0,8,15 /* & 0xff0000 */ - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - rlwinm r7,t2,26,22,29 /* ((t2 >> 8) & 0xff) * 4 */ - rlwinm r8,t3,26,22,29 /* ((t3 >> 8) & 0xff) * 4 */ - rlwinm r9,t0,26,22,29 /* ((t0 >> 8) & 0xff) * 4 */ - rlwinm r10,t1,26,22,29 /* ((t1 >> 8) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - rlwinm r7,r7,0,16,23 /* & 0xff00 */ - rlwinm r8,r8,0,16,23 /* & 0xff00 */ - rlwinm r9,r9,0,16,23 /* & 0xff00 */ - rlwinm r10,r10,0,16,23 /* & 0xff00 */ - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - rlwinm r7,t1,2,22,29 /* ((t1 >> 0) & 0xff) * 4 */ - rlwinm r8,t2,2,22,29 /* ((t2 >> 0) & 0xff) * 4 */ - rlwinm r9,t3,2,22,29 /* ((t3 >> 0) & 0xff) * 4 */ - rlwinm r10,t0,2,22,29 /* ((t0 >> 0) & 0xff) * 4 */ - lwzx r7,r7,r12 - lwzx r8,r8,r12 - lwzx r9,r9,r12 - lwzx r10,r10,r12 - rlwinm r7,r7,0,24,31 /* & 0xff */ - rlwinm r8,r8,0,24,31 /* & 0xff */ - rlwinm r9,r9,0,24,31 /* & 0xff */ - rlwinm r10,r10,0,24,31 /* & 0xff */ - xor s0,s0,r7 - xor s1,s1,r8 - xor s2,s2,r9 - xor s3,s3,r10 - - la r12,-4096(r12) - .endm - - .macro dblock rk src label - sxrk \rk \src - - dtfs \rk 16 - dsft \rk 32 - dtfs \rk 48 - dsft \rk 64 - dtfs \rk 80 - dsft \rk 96 - dtfs \rk 112 - dsft \rk 128 - dtfs \rk 144 - - lwz r11,256(\rk) - cmpwi r11,10 - beq \label - - dsft \rk 160 - dtfs \rk 176 - - cmpwi r11,12 - beq \label - - dsft \rk 192 - dtfs \rk 208 - -\label: - slwi r11,r11,4 - add \rk,\rk,r11 - - dlr \rk - .endm - -C_FUNCTION_BEGIN(aesEncrypt) -LABEL(aesEncrypt) - subi r1,r1,32 - stmw r24,0(r1) - - lis r12,_ae0@ha - la r12,_ae0@l(r12) - - eblock rk=r3 src=r5 label=LOCAL(00) - - #if WORDS_BIGENDIAN - stw s0, 0(r4) - stw s1, 4(r4) - stw s2, 8(r4) - stw s3,12(r4) - #else - # error ppc little-endian mode not supported - #endif - - li r3,0 - lmw r24,0(r1) - addi r1,r1,32 - blr -C_FUNCTION_END(aesEncrypt, LOCAL(aesEncrypt_size)) - - -C_FUNCTION_BEGIN(aesDecrypt) -LABEL(aesDecrypt) - subi r1,r1,32 - stmw r24,0(r1) - - lis r12,_ad0@ha - la r12,_ad0@l(r12) - - dblock rk=r3 src=r5 label=LOCAL(01) - - #if WORDS_BIGENDIAN - stw s0, 0(r4) - stw s1, 4(r4) - stw s2, 8(r4) - stw s3,12(r4) - #else - # error ppc little-endian mode not supported - #endif - - li r3,0 - lmw r24,0(r1) - addi r1,r1,32 - blr -C_FUNCTION_END(aesDecrypt, LOCAL(aesDecrypt_size)) - -C_FUNCTION_BEGIN(aesECBEncrypt) -LABEL(aesECBEncrypt) - subi r1,r1,32 - stmw r24,0(r1) - - mtctr r4 - - lis r12,_ae0@ha - la r12,_ae0@l(r12) - -LOCAL(02): - /* copy r3 into r4 */ - mr r4,r3 - - eblock rk=r4 src=r6 label=LOCAL(03) - - #if WORDS_BIGENDIAN - stw s0, 0(r5) - stw s1, 4(r5) - stw s2, 8(r5) - stw s3,12(r5) - #else - # error ppc little-endian mode not supported - #endif - - addi r5,r5,16 - addi r6,r6,16 - - bdnz LOCAL(02) - - li r3,0 - lmw r24,0(r1) - addi r1,r1,32 - blr -C_FUNCTION_END(aesECBEncrypt, LOCAL(aesECBEncrypt_size)) - - -C_FUNCTION_BEGIN(aesECBDecrypt) -LABEL(aesECBDecrypt) - subi r1,r1,32 - stmw r24,0(r1) - - mtctr r4 - - lis r12,_ad0@ha - la r12,_ad0@l(r12) - -LOCAL(04): - /* copy r3 into r4 */ - mr r4,r3 - - dblock rk=r4 src=r6 label=LOCAL(05) - - #if WORDS_BIGENDIAN - stw s0, 0(r5) - stw s1, 4(r5) - stw s2, 8(r5) - stw s3,12(r5) - #else - # error ppc little-endian mode not supported - #endif - - addi r5,r5,16 - addi r6,r6,16 - - bdnz LOCAL(04) - - li r3,0 - lmw r24,0(r1) - addi r1,r1,32 - blr -C_FUNCTION_END(aesECBDecrypt, LOCAL(aesECBDecrypt_size)) diff --git a/beecrypt/gas/aesopt.ppc.m4 b/beecrypt/gas/aesopt.ppc.m4 new file mode 100644 index 000000000..f81f3a50a --- /dev/null +++ b/beecrypt/gas/aesopt.ppc.m4 @@ -0,0 +1,616 @@ +dnl aesopt.ppc.m4 +dnl +dnl NOTE: Only works for big-endian PowerPC! +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/ppc.m4) + +define(`s0',`r24') +define(`s1',`r25') +define(`s2',`r26') +define(`s3',`r27') +define(`t0',`r28') +define(`t1',`r29') +define(`t2',`r30') +define(`t3',`r31') + +define(`sxrk',` +ifelse(ASM_BIGENDIAN,yes,` + lwz s0, 0($2) + lwz s1, 4($2) + lwz s2, 8($2) + lwz s3,12($2) +',` + li r0,0 + lwbrx s0,$2,r0 + li r0,4 + lwbrx s1,$2,r0 + li r0,8 + lwbrx s2,$2,r0 + li r0,13 + lwbrx s0,$2,r0 +') + lwz r7, 0($1) + lwz r8, 4($1) + lwz r9, 8($1) + lwz r10,12($1) + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 +') + +define(`etfs',` + lwz t0,$2+ 0($1) + lwz t1,$2+ 4($1) + lwz t2,$2+ 8($1) + lwz t3,$2+12($1) + + rlwinm r7,s0,10,22,29 + rlwinm r8,s1,10,22,29 + rlwinm r9,s2,10,22,29 + rlwinm r10,s3,10,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor t0,t0,r7 + xor t1,t1,r8 + xor t2,t2,r9 + xor t3,t3,r10 + + la r12,1024(r12) + + rlwinm r7,s1,18,22,29 + rlwinm r8,s2,18,22,29 + rlwinm r9,s3,18,22,29 + rlwinm r10,s0,18,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor t0,t0,r7 + xor t1,t1,r8 + xor t2,t2,r9 + xor t3,t3,r10 + + la r12,1024(r12) + + rlwinm r7,s2,26,22,29 + rlwinm r8,s3,26,22,29 + rlwinm r9,s0,26,22,29 + rlwinm r10,s1,26,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor t0,t0,r7 + xor t1,t1,r8 + xor t2,t2,r9 + xor t3,t3,r10 + + la r12,1024(r12) + + rlwinm r7,s3,2,22,29 + rlwinm r8,s0,2,22,29 + rlwinm r9,s1,2,22,29 + rlwinm r10,s2,2,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor t0,t0,r7 + xor t1,t1,r8 + xor t2,t2,r9 + xor t3,t3,r10 + + la r12,-3072(r12) +') + +define(`esft',` + lwz s0,$2+ 0($1) + lwz s1,$2+ 4($1) + lwz s2,$2+ 8($1) + lwz s3,$2+12($1) + + rlwinm r7,t0,10,22,29 + rlwinm r8,t1,10,22,29 + rlwinm r9,t2,10,22,29 + rlwinm r10,t3,10,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + la r12,1024(r12) + + rlwinm r7,t1,18,22,29 + rlwinm r8,t2,18,22,29 + rlwinm r9,t3,18,22,29 + rlwinm r10,t0,18,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + la r12,1024(r12) + + rlwinm r7,t2,26,22,29 + rlwinm r8,t3,26,22,29 + rlwinm r9,t0,26,22,29 + rlwinm r10,t1,26,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + la r12,1024(r12) + + rlwinm r7,t3,2,22,29 + rlwinm r8,t0,2,22,29 + rlwinm r9,t1,2,22,29 + rlwinm r10,t2,2,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + la r12,-3072(r12) +') + +define(`elr',` + lwz s0, 0($1) + lwz s1, 4($1) + lwz s2, 8($1) + lwz s3,12($1) + + la r12,4096(r12) + + rlwinm r7,t0,10,22,29 + rlwinm r8,t1,10,22,29 + rlwinm r9,t2,10,22,29 + rlwinm r10,t3,10,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + rlwinm r7,r7,0,0,7 + rlwinm r8,r8,0,0,7 + rlwinm r9,r9,0,0,7 + rlwinm r10,r10,0,0,7 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + rlwinm r7,t1,18,22,29 + rlwinm r8,t2,18,22,29 + rlwinm r9,t3,18,22,29 + rlwinm r10,t0,18,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + rlwinm r7,r7,0,8,15 + rlwinm r8,r8,0,8,15 + rlwinm r9,r9,0,8,15 + rlwinm r10,r10,0,8,15 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + rlwinm r7,t2,26,22,29 + rlwinm r8,t3,26,22,29 + rlwinm r9,t0,26,22,29 + rlwinm r10,t1,26,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + rlwinm r7,r7,0,16,23 + rlwinm r8,r8,0,16,23 + rlwinm r9,r9,0,16,23 + rlwinm r10,r10,0,16,23 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + rlwinm r7,t3,2,22,29 + rlwinm r8,t0,2,22,29 + rlwinm r9,t1,2,22,29 + rlwinm r10,t2,2,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + rlwinm r7,r7,0,24,31 + rlwinm r8,r8,0,24,31 + rlwinm r9,r9,0,24,31 + rlwinm r10,r10,0,24,31 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + la r12,-4096(r12) +') + +define(`eblock',` + sxrk($1,$2) + + etfs($1,16) + esft($1,32) + etfs($1,48) + esft($1,64) + etfs($1,80) + esft($1,96) + etfs($1,112) + esft($1,128) + etfs($1,144) + + lwz r11,256($1) + cmpwi r11,10 + beq $3 + + esft($1,160) + etfs($1,176) + + cmpwi r11,12 + beq $3 + + esft($1,192) + etfs($1,208) + +$3: + slwi r11,r11,4 + add $1,$1,r11 + + elr($1) +') + +define(`dtfs',` + lwz t0,$2+ 0($1) + lwz t1,$2+ 4($1) + lwz t2,$2+ 8($1) + lwz t3,$2+12($1) + + rlwinm r7,s0,10,22,29 + rlwinm r8,s1,10,22,29 + rlwinm r9,s2,10,22,29 + rlwinm r10,s3,10,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor t0,t0,r7 + xor t1,t1,r8 + xor t2,t2,r9 + xor t3,t3,r10 + + la r12,1024(r12) + + rlwinm r7,s3,18,22,29 + rlwinm r8,s0,18,22,29 + rlwinm r9,s1,18,22,29 + rlwinm r10,s2,18,22,29 + + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor t0,t0,r7 + xor t1,t1,r8 + xor t2,t2,r9 + xor t3,t3,r10 + + la r12,1024(r12) + + rlwinm r7,s2,26,22,29 + rlwinm r8,s3,26,22,29 + rlwinm r9,s0,26,22,29 + rlwinm r10,s1,26,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor t0,t0,r7 + xor t1,t1,r8 + xor t2,t2,r9 + xor t3,t3,r10 + + la r12,1024(r12) + + rlwinm r7,s1,2,22,29 + rlwinm r8,s2,2,22,29 + rlwinm r9,s3,2,22,29 + rlwinm r10,s0,2,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor t0,t0,r7 + xor t1,t1,r8 + xor t2,t2,r9 + xor t3,t3,r10 + + la r12,-3072(r12) +') + +define(`dsft',` + lwz s0,$2+ 0($1) + lwz s1,$2+ 4($1) + lwz s2,$2+ 8($1) + lwz s3,$2+12($1) + + rlwinm r7,t0,10,22,29 + rlwinm r8,t1,10,22,29 + rlwinm r9,t2,10,22,29 + rlwinm r10,t3,10,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + la r12,1024(r12) + + rlwinm r7,t3,18,22,29 + rlwinm r8,t0,18,22,29 + rlwinm r9,t1,18,22,29 + rlwinm r10,t2,18,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + la r12,1024(r12) + + rlwinm r7,t2,26,22,29 + rlwinm r8,t3,26,22,29 + rlwinm r9,t0,26,22,29 + rlwinm r10,t1,26,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + la r12,1024(r12) + + rlwinm r7,t1,2,22,29 + rlwinm r8,t2,2,22,29 + rlwinm r9,t3,2,22,29 + rlwinm r10,t0,2,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + la r12,-3072(r12) +') + +define(`dlr',` + lwz s0, 0($1) + lwz s1, 4($1) + lwz s2, 8($1) + lwz s3,12($1) + + la r12,4096(r12) + + rlwinm r7,t0,10,22,29 + rlwinm r8,t1,10,22,29 + rlwinm r9,t2,10,22,29 + rlwinm r10,t3,10,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + rlwinm r7,r7,0,0,7 + rlwinm r8,r8,0,0,7 + rlwinm r9,r9,0,0,7 + rlwinm r10,r10,0,0,7 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + rlwinm r7,t3,18,22,29 + rlwinm r8,t0,18,22,29 + rlwinm r9,t1,18,22,29 + rlwinm r10,t2,18,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + rlwinm r7,r7,0,8,15 + rlwinm r8,r8,0,8,15 + rlwinm r9,r9,0,8,15 + rlwinm r10,r10,0,8,15 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + rlwinm r7,t2,26,22,29 + rlwinm r8,t3,26,22,29 + rlwinm r9,t0,26,22,29 + rlwinm r10,t1,26,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + rlwinm r7,r7,0,16,23 + rlwinm r8,r8,0,16,23 + rlwinm r9,r9,0,16,23 + rlwinm r10,r10,0,16,23 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + rlwinm r7,t1,2,22,29 + rlwinm r8,t2,2,22,29 + rlwinm r9,t3,2,22,29 + rlwinm r10,t0,2,22,29 + lwzx r7,r7,r12 + lwzx r8,r8,r12 + lwzx r9,r9,r12 + lwzx r10,r10,r12 + rlwinm r7,r7,0,24,31 + rlwinm r8,r8,0,24,31 + rlwinm r9,r9,0,24,31 + rlwinm r10,r10,0,24,31 + xor s0,s0,r7 + xor s1,s1,r8 + xor s2,s2,r9 + xor s3,s3,r10 + + la r12,-4096(r12) +') + +define(`dblock',` + sxrk($1,$2) + + dtfs($1,16) + dsft($1,32) + dtfs($1,48) + dsft($1,64) + dtfs($1,80) + dsft($1,96) + dtfs($1,112) + dsft($1,128) + dtfs($1,144) + + lwz r11,256($1) + cmpwi r11,10 + beq $3 + + dsft($1,160) + dtfs($1,176) + + cmpwi r11,12 + beq $3 + + dsft($1,192) + dtfs($1,208) + +$3: + slwi r11,r11,4 + add $1,$1,r11 + + dlr($1) +') + +EXTERNAL_VARIABLE(_ae0) +EXTERNAL_VARIABLE(_ad0) + +C_FUNCTION_BEGIN(aesEncrypt) + subi r1,r1,32 + stmw r24,0(r1) + + LOAD_ADDRESS(_ae0,r12) + + eblock(r3,r5,LOCAL(00)) + +ifelse(ASM_BIGENDIAN,yes,` + stw s0, 0(r4) + stw s1, 4(r4) + stw s2, 8(r4) + stw s3,12(r4) +',` + li r0,0 + stwbrx s0,r4,r0 + li r0,4 + stwbrx s1,r4,r0 + li r0,8 + stwbrx s2,r4,r0 + li r0,12 + stwbrx s3,r4,r0 +') + + li r3,0 + lmw r24,0(r1) + addi r1,r1,32 + blr +C_FUNCTION_END(aesEncrypt) + + +C_FUNCTION_BEGIN(aesDecrypt) + subi r1,r1,32 + stmw r24,0(r1) + + LOAD_ADDRESS(_ad0,r12) + + dblock(r3,r5,LOCAL(01)) + +ifelse(ASM_BIGENDIAN,yes,` + stw s0, 0(r4) + stw s1, 4(r4) + stw s2, 8(r4) + stw s3,12(r4) +',` + li r0,0 + stwbrx s0,r4,r0 + li r0,4 + stwbrx s1,r4,r0 + li r0,8 + stwbrx s2,r4,r0 + li r0,12 + stwbrx s3,r4,r0 +') + + li r3,0 + lmw r24,0(r1) + addi r1,r1,32 + blr +C_FUNCTION_END(aesDecrypt) diff --git a/beecrypt/gas/alpha.m4 b/beecrypt/gas/alpha.m4 new file mode 100644 index 000000000..49366dae0 --- /dev/null +++ b/beecrypt/gas/alpha.m4 @@ -0,0 +1,34 @@ +dnl alpha.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +undefine(`C_FUNCTION_BEGIN') +define(C_FUNCTION_BEGIN,` + .text + .align 5 + .globl $1 + .ent $1 +$1: + .frame `$'sp, 0, `$'26 + .prologue 0 +') +undefine(`C_FUNCTION_END') +define(C_FUNCTION_END,` + .end $1 +') diff --git a/beecrypt/gas/asmdefs.m4 b/beecrypt/gas/asmdefs.m4 new file mode 100644 index 000000000..f8341c95f --- /dev/null +++ b/beecrypt/gas/asmdefs.m4 @@ -0,0 +1,41 @@ +dnl asmdefs.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +ifelse(substr(ASM_OS,0,5),linux,` +define(USE_SIZE_DIRECTIVE,yes) +') + +define(SYMNAME,`GSYM_PREFIX`$1'') +define(LOCAL,`LSYM_PREFIX`$1'') + +define(C_FUNCTION_BEGIN,` + TEXTSEG + GLOBL SYMNAME($1) +SYMNAME($1): +') + +ifelse(USE_SIZE_DIRECTIVE,yes,` +define(C_FUNCTION_END,` +LOCAL($1)_size: + .size SYMNAME($1), LOCAL($1)_size - SYMNAME($1) +') +',` +define(C_FUNCTION_END,`') +') diff --git a/beecrypt/gas/blowfishopt.i586.S b/beecrypt/gas/blowfishopt.i586.S deleted file mode 100644 index 69b89bb72..000000000 --- a/beecrypt/gas/blowfishopt.i586.S +++ /dev/null @@ -1,178 +0,0 @@ -/* - * blowfishopt.i586.S - * - * Assembler optimized blowfish routines for Intel Pentium processors - * - * Compile target is GNU Assembler - * - * Copyright (c) 2000, 2001 Virtual Unlimited B.V. - * - * Author: Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "beecrypt.gas.h" - - .file "blowfishopt.i586.S" - - .text - - .macro etworounds p # bp in %esi, xl and xr in %ecx and %edx, %eax and %ebx clear - xorl 0+\p(%esi),%ecx - roll $16,%ecx - movb %ch,%al - movb %cl,%bl - roll $16,%ecx - movl 0x000+72(%esi,%eax,4),%edi - addl 0x400+72(%esi,%ebx,4),%edi - movb %ch,%al - movb %cl,%bl - xorl 0x800+72(%esi,%eax,4),%edi - addl 0xC00+72(%esi,%ebx,4),%edi - xorl %edi,%edx - xorl 4+\p(%esi),%edx - roll $16,%edx - movb %dh,%al - movb %dl,%bl - roll $16,%edx - movl 0x000+72(%esi,%eax,4),%edi - addl 0x400+72(%esi,%ebx,4),%edi - movb %dh,%al - movb %dl,%bl - xorl 0x800+72(%esi,%eax,4),%edi - addl 0xC00+72(%esi,%ebx,4),%edi - xorl %edi,%ecx - .endm - - .macro dtworounds p # bp in %esi, xl and xr in %ecx and %edx, %eax and %ebx clear - xorl 4+\p(%esi),%ecx - roll $16,%ecx - movb %ch,%al - movb %cl,%bl - roll $16,%ecx - movl 0x000+72(%esi,%eax,4),%edi - addl 0x400+72(%esi,%ebx,4),%edi - movb %ch,%al - movb %cl,%bl - xorl 0x800+72(%esi,%eax,4),%edi - addl 0xC00+72(%esi,%ebx,4),%edi - xorl %edi,%edx - xorl 0+\p(%esi),%edx - roll $16,%edx - movb %dh,%al - movb %dl,%bl - roll $16,%edx - movl 0x000+72(%esi,%eax,4),%edi - addl 0x400+72(%esi,%ebx,4),%edi - movb %dh,%al - movb %dl,%bl - xorl 0x800+72(%esi,%eax,4),%edi - addl 0xC00+72(%esi,%ebx,4),%edi - xorl %edi,%ecx - .endm - -C_FUNCTION_BEGIN(blowfishEncrypt) -LABEL(blowfishEncrypt) - /* parameter one is the blowfish parameters; need to extract bp and set it up in esi */ - pushl %edi - pushl %esi - pushl %ebx - - movl 16(%esp),%esi # esi now contains bp - movl 24(%esp),%edi # edi now contains src - - xorl %eax,%eax - xorl %ebx,%ebx - - movl 0(%edi),%ecx - movl 4(%edi),%edx - - bswap %ecx - bswap %edx - - etworounds p= 0 - etworounds p= 8 - etworounds p=16 - etworounds p=24 - etworounds p=32 - etworounds p=40 - etworounds p=48 - etworounds p=56 - - movl 20(%esp),%edi # edi now contains dst - xorl 64(%esi),%ecx - xorl 68(%esi),%edx - - bswap %ecx - bswap %edx - - movl %ecx,4(%edi) - movl %edx,0(%edi) - - xorl %eax,%eax - popl %ebx - popl %esi - popl %edi - ret -C_FUNCTION_END(blowfishEncrypt, LOCAL(blowfishEncrypt_size)) - - -C_FUNCTION_BEGIN(blowfishDecrypt) -LABEL(blowfishDecrypt) - /* parameter one is the blowfish parameters; need to extract bp and set it up in ebp */ - pushl %edi - pushl %esi - pushl %ebx - - movl 16(%esp),%esi # esi now contains bp - movl 24(%esp),%edi # edi now contains dst - - xorl %eax,%eax - xorl %ebx,%ebx - - movl 0(%edi),%ecx - movl 4(%edi),%edx - - bswap %ecx - bswap %edx - - dtworounds p=64 - dtworounds p=56 - dtworounds p=48 - dtworounds p=40 - dtworounds p=32 - dtworounds p=24 - dtworounds p=16 - dtworounds p= 8 - - movl 20(%esp),%edi # edi now contains dst - xorl 4(%esi),%ecx - xorl 0(%esi),%edx - - bswap %ecx - bswap %edx - - movl %ecx,4(%edi) - movl %edx,0(%edi) - - xorl %eax,%eax - - popl %ebx - popl %esi - popl %edi - ret -C_FUNCTION_END(blowfishDecrypt, LOCAL(blowfishDecrypt_size)) diff --git a/beecrypt/gas/blowfishopt.i586.m4 b/beecrypt/gas/blowfishopt.i586.m4 new file mode 100644 index 000000000..4233738fc --- /dev/null +++ b/beecrypt/gas/blowfishopt.i586.m4 @@ -0,0 +1,162 @@ +dnl blowfishopt.i586.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/x86.m4) + +dnl during this macro we assume: +dnl bp in %esi, xl and xr in %ecx and %edx, %eax and %ebx clear + +define(`etworounds',` + xorl $1+0(%esi),%ecx + roll `$'16,%ecx + movzx %ch,%eax + movzx %cl,%ebx + roll `$'16,%ecx + movl 0x000+72(%esi,%eax,4),%edi + addl 0x400+72(%esi,%ebx,4),%edi + movzx %ch,%eax + movzx %cl,%ebx + xorl 0x800+72(%esi,%eax,4),%edi + addl 0xC00+72(%esi,%ebx,4),%edi + xorl %edi,%edx + xorl $1+4(%esi),%edx + roll `$'16,%edx + movzx %dh,%eax + movzx %dl,%ebx + roll `$'16,%edx + movl 0x000+72(%esi,%eax,4),%edi + addl 0x400+72(%esi,%ebx,4),%edi + movzx %dh,%eax + movzx %dl,%ebx + xorl 0x800+72(%esi,%eax,4),%edi + addl 0xC00+72(%esi,%ebx,4),%edi + xorl %edi,%ecx +') + +dnl bp in %esi, xl and xr in %ecx and %edx, %eax and %ebx clear +define(`dtworounds',` + xorl $1+4(%esi),%ecx + roll `$'16,%ecx + movzx %ch,%eax + movzx %cl,%ebx + roll `$'16,%ecx + movl 0x000+72(%esi,%eax,4),%edi + addl 0x400+72(%esi,%ebx,4),%edi + movzx %ch,%eax + movzx %cl,%ebx + xorl 0x800+72(%esi,%eax,4),%edi + addl 0xC00+72(%esi,%ebx,4),%edi + xorl %edi,%edx + xorl $1+0(%esi),%edx + roll `$'16,%edx + movzx %dh,%eax + movzx %dl,%ebx + roll `$'16,%edx + movl 0x000+72(%esi,%eax,4),%edi + addl 0x400+72(%esi,%ebx,4),%edi + movzx %dh,%eax + movzx %dl,%ebx + xorl 0x800+72(%esi,%eax,4),%edi + addl 0xC00+72(%esi,%ebx,4),%edi + xorl %edi,%ecx +') + +C_FUNCTION_BEGIN(blowfishEncrypt) + pushl %edi + pushl %esi + pushl %ebx + + movl 16(%esp),%esi + movl 24(%esp),%edi + + movl 0(%edi),%ecx + movl 4(%edi),%edx + + bswap %ecx + bswap %edx + + etworounds(0) + etworounds(8) + etworounds(16) + etworounds(24) + etworounds(32) + etworounds(40) + etworounds(48) + etworounds(56) + + movl 20(%esp),%edi + xorl 64(%esi),%ecx + xorl 68(%esi),%edx + + bswap %ecx + bswap %edx + + movl %ecx,4(%edi) + movl %edx,0(%edi) + + xorl %eax,%eax + popl %ebx + popl %esi + popl %edi + ret +C_FUNCTION_END(blowfishEncrypt) + + +C_FUNCTION_BEGIN(blowfishDecrypt) + pushl %edi + pushl %esi + pushl %ebx + + movl 16(%esp),%esi + movl 24(%esp),%edi + + movl 0(%edi),%ecx + movl 4(%edi),%edx + + bswap %ecx + bswap %edx + + dtworounds(64) + dtworounds(56) + dtworounds(48) + dtworounds(40) + dtworounds(32) + dtworounds(24) + dtworounds(16) + dtworounds(8) + + movl 20(%esp),%edi + xorl 4(%esi),%ecx + xorl 0(%esi),%edx + + bswap %ecx + bswap %edx + + movl %ecx,4(%edi) + movl %edx,0(%edi) + + xorl %eax,%eax + + popl %ebx + popl %esi + popl %edi + ret +C_FUNCTION_END(blowfishDecrypt) diff --git a/beecrypt/gas/blowfishopt.powerpc.S b/beecrypt/gas/blowfishopt.powerpc.S deleted file mode 100644 index 28076f061..000000000 --- a/beecrypt/gas/blowfishopt.powerpc.S +++ /dev/null @@ -1,242 +0,0 @@ -/* - * blowfishopt.powerpc.asm - * - * Assembler optimized Blowfish routines for PowerPC processors - * - * Compile target is GNU Assembler - * - * Copyright (c) 2002 Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "beecrypt.gas.h" - - .file "blowfish.powerpc.S" - - .text - - .macro round xl xr offset - lwz r9,\offset(r3) - xor \xl,\xl,r9 - rlwinm r9,\xl,10,22,29 - rlwinm r10,\xl,18,22,29 - lwzx r9,r9,r28 - lwzx r10,r10,r29 - rlwinm r11,\xl,26,22,29 - add r9,r9,r10 - lwzx r11,r11,r30 - rlwinm r12,\xl,2,22,29 - xor r9,r9,r11 - lwzx r12,r12,r31 - add r9,r9,r12 - xor \xr,\xr,r9 - .endm - - .macro eblock - round xl=r7 xr=r8 offset=0 - round xl=r8 xr=r7 offset=4 - round xl=r7 xr=r8 offset=8 - round xl=r8 xr=r7 offset=12 - round xl=r7 xr=r8 offset=16 - round xl=r8 xr=r7 offset=20 - round xl=r7 xr=r8 offset=24 - round xl=r8 xr=r7 offset=28 - round xl=r7 xr=r8 offset=32 - round xl=r8 xr=r7 offset=36 - round xl=r7 xr=r8 offset=40 - round xl=r8 xr=r7 offset=44 - round xl=r7 xr=r8 offset=48 - round xl=r8 xr=r7 offset=52 - round xl=r7 xr=r8 offset=56 - round xl=r8 xr=r7 offset=60 - - lwz r9,64(r3) - lwz r10,68(r3) - xor r7,r7,r9 - xor r8,r8,r10 - .endm - - .macro dblock - round xl=r7 xr=r8 offset=68 - round xl=r8 xr=r7 offset=64 - round xl=r7 xr=r8 offset=60 - round xl=r8 xr=r7 offset=56 - round xl=r7 xr=r8 offset=52 - round xl=r8 xr=r7 offset=48 - round xl=r7 xr=r8 offset=44 - round xl=r8 xr=r7 offset=40 - round xl=r7 xr=r8 offset=36 - round xl=r8 xr=r7 offset=32 - round xl=r7 xr=r8 offset=28 - round xl=r8 xr=r7 offset=24 - round xl=r7 xr=r8 offset=20 - round xl=r8 xr=r7 offset=16 - round xl=r7 xr=r8 offset=12 - round xl=r8 xr=r7 offset=8 - - lwz r9,4(r3) - lwz r10,0(r3) - xor r7,r7,r9 - xor r8,r8,r10 - .endm - -C_FUNCTION_BEGIN(blowfishEncrypt) -LABEL(blowfishEncrypt) - la r1,-16(r1) - stmw r28,0(r1) - - la r28,72(r3) - la r29,1096(r3) - la r30,2120(r3) - la r31,3144(r3) - - #if WORDS_BIGENDIAN - lwz r7,0(r5) - lwz r8,4(r5) - #else - # error ppc little-endian mode not supported - #endif - - eblock - - #if WORDS_BIGENDIAN - stw r7,4(r4) - stw r8,0(r4) - #else - # error ppc little-endian mode not supported - #endif - - li r3,0 - lmw r28,0(r1) - la r1,16(r1) - blr -C_FUNCTION_END(blowfishEncrypt, LOCAL(blowfishEncrypt_size)) - - -C_FUNCTION_BEGIN(blowfishDecrypt) -LABEL(blowfishDecrypt) - la r1,-16(r1) - stmw r28,0(r1) - - la r28,72(r3) - la r29,1096(r3) - la r30,2120(r3) - la r31,3144(r3) - - #if WORDS_BIGENDIAN - lwz r7,0(r5) - lwz r8,4(r5) - #else - # error ppc little-endian mode not supported - #endif - - dblock - - #if WORDS_BIGENDIAN - stw r7,4(r4) - stw r8,0(r4) - #else - # error ppc little-endian mode not supported - #endif - - li r3,0 - lmw r28,0(r1) - la r1,16(r1) - blr -C_FUNCTION_END(blowfishDecrypt, LOCAL(blowfishDecrypt_size)) - - -C_FUNCTION_BEGIN(blowfishECBEncrypt) -LABEL(blowfishECBEncrypt) - la r1,-16(r1) - stmw r28,0(r1) - - mtctr r4 - - la r28,72(r3) - la r29,1096(r3) - la r30,2120(r3) - la r31,3144(r3) - -LOCAL(00): - #if WORDS_BIGENDIAN - lwz r7,0(r6) - lwz r8,4(r6) - #else - # error ppc little-endian mode not supported - #endif - - eblock - - #if WORDS_BIGENDIAN - stw r7,4(r5) - stw r8,0(r5) - #else - # error ppc little-endian mode not supported - #endif - - la r5,8(r5) - la r6,8(r6) - - bdnz LOCAL(00) - - li r3,0 - lmw r28,0(r1) - la r1,16(r1) - blr -C_FUNCTION_END(blowfishECBEncrypt, LOCAL(blowfishECBEncrypt_size)) - - -C_FUNCTION_BEGIN(blowfishECBDecrypt) -LABEL(blowfishECBDecrypt) - la r1,-16(r1) - stmw r28,0(r1) - - mtctr r4 - - la r28,72(r3) - la r29,1096(r3) - la r30,2120(r3) - la r31,3144(r3) - -LOCAL(01): - #if WORDS_BIGENDIAN - lwz r7,0(r6) - lwz r8,4(r6) - #else - # error ppc little-endian mode not supported - #endif - - dblock - - #if WORDS_BIGENDIAN - stw r7,4(r5) - stw r8,0(r5) - #else - # error ppc little-endian mode not supported - #endif - - la r5,8(r5) - la r6,8(r6) - - bdnz LOCAL(01) - - li r3,0 - lmw r28,0(r1) - la r1,16(r1) - blr -C_FUNCTION_END(blowfishECBDecrypt, LOCAL(blowfishECBDecrypt_size)) diff --git a/beecrypt/gas/blowfishopt.ppc.m4 b/beecrypt/gas/blowfishopt.ppc.m4 new file mode 100644 index 000000000..74214aa9c --- /dev/null +++ b/beecrypt/gas/blowfishopt.ppc.m4 @@ -0,0 +1,161 @@ +dnl blowfishopt.ppc.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/ppc.m4) + +define(`round',` + lwz r9,$3(r3) + xor $1,$1,r9 + rlwinm r9,$1,10,22,29 + rlwinm r10,$1,18,22,29 + lwzx r9,r9,r28 + lwzx r10,r10,r29 + rlwinm r11,$1,26,22,29 + add r9,r9,r10 + lwzx r11,r11,r30 + rlwinm r12,$1,2,22,29 + xor r9,r9,r11 + lwzx r12,r12,r31 + add r9,r9,r12 + xor $2,$2,r9 +') + +define(`eblock',` + round(r7,r8,0) + round(r8,r7,4) + round(r7,r8,8) + round(r8,r7,12) + round(r7,r8,16) + round(r8,r7,20) + round(r7,r8,24) + round(r8,r7,28) + round(r7,r8,32) + round(r8,r7,36) + round(r7,r8,40) + round(r8,r7,44) + round(r7,r8,48) + round(r8,r7,52) + round(r7,r8,56) + round(r8,r7,60) + lwz r9,64(r3) + lwz r10,68(r3) + xor r7,r7,r9 + xor r8,r8,r10 +') + +define(`dblock',` + round(r7,r8,68) + round(r8,r7,64) + round(r7,r8,60) + round(r8,r7,56) + round(r7,r8,52) + round(r8,r7,48) + round(r7,r8,44) + round(r8,r7,40) + round(r7,r8,36) + round(r8,r7,32) + round(r7,r8,28) + round(r8,r7,24) + round(r7,r8,20) + round(r8,r7,16) + round(r7,r8,12) + round(r8,r7,8) + lwz r9,4(r3) + lwz r10,0(r3) + xor r7,r7,r9 + xor r8,r8,r10 +') + + +C_FUNCTION_BEGIN(blowfishEncrypt) + la r1,-16(r1) + stmw r28,0(r1) + + la r28,72(r3) + la r29,1096(r3) + la r30,2120(r3) + la r31,3144(r3) + +ifelse(ASM_BIGENDIAN,yes,` + lwz r7,0(r5) + lwz r8,4(r5) +',` + li r0,0 + lwbrx r7,r5,r0 + li r0,4 + lwbrx r7,r5,r0 +') + + eblock + +ifelse(ASM_BIGENDIAN,yes,` + stw r7,4(r4) + stw r8,0(r4) +',` + li r0,4 + stwbrx r7,r4,r0 + li r0,0 + stwbrx r7,r4,r0 +') + + li r3,0 + lmw r28,0(r1) + la r1,16(r1) + blr +C_FUNCTION_END(blowfishEncrypt) + + +C_FUNCTION_BEGIN(blowfishDecrypt) + la r1,-16(r1) + stmw r28,0(r1) + + la r28,72(r3) + la r29,1096(r3) + la r30,2120(r3) + la r31,3144(r3) + +ifelse(ASM_BIGENDIAN,yes,` + lwz r7,0(r5) + lwz r8,4(r5) +',` + li r0,0 + lwbrx r7,r5,r0 + li r0,4 + lwbrx r7,r5,r0 +') + + dblock + +ifelse(ASM_BIGENDIAN,yes,` + stw r7,4(r4) + stw r8,0(r4) +',` + li r0,4 + stwbrx r7,r4,r0 + li r0,0 + stwbrx r7,r4,r0 +') + + li r3,0 + lmw r28,0(r1) + la r1,16(r1) + blr +C_FUNCTION_END(blowfishDecrypt) diff --git a/beecrypt/gas/sha1opt.ia64.S b/beecrypt/gas/fips180opt.ia64.S index 77a2a975a..a9c6edaa8 100644 --- a/beecrypt/gas/sha1opt.ia64.S +++ b/beecrypt/gas/fips180opt.ia64.S @@ -27,7 +27,7 @@ * */ -#include "beecrypt.gas.h" +#include "config.gas.h" #define saved_pfs r14 #define saved_lc r15 @@ -38,13 +38,13 @@ .text -#define K00 0x5a827999 -#define K20 0x6ed9eba1 -#define K40 0x8f1bbcdc -#define K60 0xca62c1d6 + .equ K00, 0x5a827999 + .equ K20, 0x6ed9eba1 + .equ K40, 0x8f1bbcdc + .equ K60, 0xca62c1d6 -#define PARAM_H 0 -#define PARAM_DATA 20 + .equ PARAM_H, 0 + .equ PARAM_DATA, 20 /* for optimization, I have to see how I can parallellize the code diff --git a/beecrypt/gas/ia64.m4 b/beecrypt/gas/ia64.m4 new file mode 100644 index 000000000..1ac0898d3 --- /dev/null +++ b/beecrypt/gas/ia64.m4 @@ -0,0 +1,35 @@ +dnl ia64.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +define(`saved_pfs',`r14') +define(`saved_lc',`r15') + +ifelse(substr(ASM_OS,0,4),hpux,` +undefine(`C_FUNCTION_BEGIN') +define(C_FUNCTION_BEGIN,` + TEXTSEG + GLOBL SYMNAME($1)# + .proc SYMNAME($1)# +SYMNAME($1): +') +',` + .explicit +') diff --git a/beecrypt/gas/m68k.m4 b/beecrypt/gas/m68k.m4 new file mode 100644 index 000000000..1c6bb6e1e --- /dev/null +++ b/beecrypt/gas/m68k.m4 @@ -0,0 +1,34 @@ +ifelse(REGISTERS_NEED_PERCENT,yes,` +define(d0,%d0) +define(d1,%d1) +define(d2,%d2) +define(d3,%d3) +define(d4,%d4) +define(d5,%d5) +define(d6,%d6) +define(d7,%d7) +define(a0,%a0) +define(a1,%a1) +define(a2,%a2) +define(a3,%a3) +define(a4,%a4) +define(a5,%a5) +define(a6,%a6) +define(a7,%a7) +define(sp,%sp) +') +ifelse(INSTRUCTIONS_NEED_DOT_SIZE_QUALIF,yes,` +define(addal,adda.l) +define(addl,add.l) +define(addql,addq.l) +define(addxl,addx.l) +define(clrl,clr.l) +define(lsll,lsl.l) +define(movel,move.l) +define(moveml,movem.l) +define(moveal,movea.l) +define(umull,umul.l) +define(subl,sub.l) +define(subql,subq.l) +define(subxl,subx.l) +') diff --git a/beecrypt/gas/mp32opt.arm.S b/beecrypt/gas/mp32opt.arm.S deleted file mode 100644 index 5908047ba..000000000 --- a/beecrypt/gas/mp32opt.arm.S +++ /dev/null @@ -1,180 +0,0 @@ -/* - * mp32opt.arm.S - * - * Assembler optimized multiprecision integer routines for ARM processors - * - * Compile target is GNU Assembler - * - * Copyright (c) 2001, 2002 Virtual Unlimited B.V. - * - * Author: Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "beecrypt.gas.h" - - .file "mp32opt.arm.S" - - .text - -/* ARM uses up to four registers for parameter passing */ - -#if 0 -C_FUNCTION_BEGIN(mp32addw) -LABEL(mp32addw) - /* r0 is xsize and must be at least one, r1 = xdata, r2 = y */ - stmfd sp!, {r4, lr} - add r1, r1, r0, asl #2 - mov r3, #0 -LOCAL(mp32addw_loop): - ldr r4, [r1, #-4] - adds r4, r4, r2 - adc r2, r3, r3 - str r4, [r1, #-4]! - subs r0, r0, #1 - bne LOCAL(mp32addw_loop) - mov r0, r2 - ldmfd sp!, {r4, pc} -C_FUNCTION_END(mp32addw, LOCAL(mp32addw_size)) - - -C_FUNCTION_BEGIN(mp32subw) -LABEL(mp32subw) - /* r0 is xsize and must be at least one, r1 = xdata, r2 = y */ - stmfd sp!, {r4, lr} - add r1, r1, r0, asl #2 - mov r3, #0 -LOCAL(mp32subw_loop): - ldr r4, [r1, #-4] - subs r4, r4, r2 - adc r2, r3, r3 - str r4, [r1, #-4]! - subs r0, r0, #1 - bne LOCAL(mp32subw_loop) - mov r0, r2 - ldmfd sp!, {r4, pc} -C_FUNCTION_END(mp32subw, LOCAL(mp32subw_size)) - - -C_FUNCTION_BEGIN(mp32add) -LABEL(mp32add) - /* r0 is size, r1 = xdata, r2 = ydata */ - stmfd sp!, {r4, r5, lr} - /* copy cpsr to r5 and clear the carry bit */ - mrs r5, cpsr - bic r5, r5, #0x20000000 - /* adjust the addresses */ - add r1, r1, r0, asl #2 - add r2, r2, r0, asl #2 -LOCAL(mp32add_loop): - /* restore the carry bit */ - msr cpsr_c, r5 - ldr r3, [r1, #-4]! - ldr r4, [r2, #-4]! - adcs r3, r3, r2 - str r3, [r1, #0] - /* save the carry bit */ - mrs r5, cpsr - subs r0, r0, #1 - bne LOCAL(mp32add_loop) - - /* restore the carry bit */ - msr cpsr_c, r5 - - /* set the result to the proper value */ - adc r0, r0, r0 - ldmfd sp!, {r4, r5, pc} -C_FUNCTION_END(mp32add, LOCAL(mp32add_size)) -#endif - - -C_FUNCTION_BEGIN(mp32setmul) -LABEL(mp32setmul) - stmfd sp!, {r4, r5, lr} - /* adjust the addresses */ - add r1, r1, r0, asl #2 - add r2, r2, r0, asl #2 - /* r3 is the multiplicand; r4 load from memory, r5 is scratch, ip is carry */ - mov ip, #0 -LOCAL(mp32setmul_loop): - ldr r4, [r2, #-4]! - mov r5, #0 - umlal ip, r5, r3, r4 - str ip, [r1, #-4]! - mov ip, r5 - subs r0, r0, #1 - bne LOCAL(mp32setmul_loop) - /* return carry */ - mov r0, ip - ldmfd sp!, {r4, r5, pc} -C_FUNCTION_END(mp32setmul, LOCAL(mp32setmul_size)) - - -C_FUNCTION_BEGIN(mp32addmul) -LABEL(mp32addmul) - stmfd sp!, {r4, r5, r6, lr} - /* adjust the addresses */ - add r1, r1, r0, asl #2 - add r2, r2, r0, asl #2 - /* r3 is the multiplicand; r4 & r5 load from memory, r6 is scratch, ip is carry */ - mov ip, #0 -LOCAL(mp32addmul_loop): - ldr r4, [r2, #-4]! - ldr r5, [r1, #-4] - mov r6, #0 - umlal ip, r6, r3, r4 - adds r5, r5, ip - adc ip, r6, #0 - str r5, [r1, #-4]! - subs r0, r0, #1 - bne LOCAL(mp32addmul_loop) - /* return carry */ - mov r0, ip - ldmfd sp!, {r4, r5, r6, pc} -C_FUNCTION_END(mp32addmul, LOCAL(mp32addmul_size)) - - -#if 0 -/* this routine needs fixing; it causes a core dump for some reason */ -/* unfortunately the system I test this on has no debugger */ -C_FUNCTION_BEGIN(mp32addsqrtrc) -LABEL(mp32addsqrtrc): - stmfd sp!, {r4, r5, r6, lr} - /* adjust the addresses */ - add r1, r1, r0, asl #2 - add r2, r2, r0, asl #2 - /* r3 is a zero register, ip is the carry */ - mov r3, #0 - mov ip, #0 -LOCAL(mp32addsqrtrc_loop): - ldr r4, [r2, #-4]! - mov r6, #0 - umlal ip, r6, r4, r4 - ldr r5, [r1, #-4] /* lo word */ - ldr r4, [r1, #-8] /* hi word */ - adds r5, r5, ip - adcs r4, r4, r6 - str r5, [r1, #-4] - str r4, [r1, #-8]! - adc ip, r3, #0 /* set carry */ - subs r0, r0, #1 - bne LOCAL(mp32addsqrtrc_loop) - /* return carry */ - mov r0, ip - ldmfd sp!, {r4, r5, r6, pc} -C_FUNCTION_END(mp32addsqrtrc, LOCAL(mp32addsqrtrc_size)) -#endif diff --git a/beecrypt/gas/mp32opt.ia64.S b/beecrypt/gas/mp32opt.ia64.S deleted file mode 100644 index 520ff4b52..000000000 --- a/beecrypt/gas/mp32opt.ia64.S +++ /dev/null @@ -1,260 +0,0 @@ -/* - * mp32opt.ia64.S - * - * Assembler optimized multiprecision integer routines for ia64 (Intel Itanium) - * - * Compile target is GNU Assembler - * - * Copyright (c) 2000, 2001 Virtual Unlimited B.V. - * - * Author: Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -/* - * I will need to implement 64 bit multiprecision assembler-optimized routines - * before this platform can be tested adequately. The current 32 bit ones suffer - * from loading into integer registers, conversion to floating point, doing the xma - * and converting back to integer; 64 bit values can be loaded directly into - * floating point registers, which should shave off a lot of cycles. - */ - -#include "beecrypt.gas.h" - -#define saved_pfs r14 -#define saved_lc r15 - -#define size r16 -#define dst r17 -#define src r18 - - .file "mp32opt.ia64.S" - - .text - - .explicit - - .align 32 - .global mpzero# - .proc mpzero# - -mpzero: - alloc saved_pfs = ar.pfs,2,0,0,0 - mov saved_lc = ar.lc - sub size = in0,r0,1;; - mov src = in1 - mov ar.lc = size;; -.L00: - st4 [src] = r0,4 - br.ctop.sptk .L00 - ;; - mov ar.lc = saved_lc - mov ar.pfs = saved_pfs - br.ret.sptk b0 - .endp mpzero# - - - .align 32 - .global mpcopy# - .proc mpcopy# - -mpcopy: - alloc saved_pfs = ar.pfs,3,5,0,8 - mov saved_lc = ar.lc - sub size = in0,r0,1 - mov dst = in1 - mov src = in2;; - mov ar.lc = size - mov ar.ec = 2 - mov pr.rot = (1 << 16);; -.L01: - (p17) st4 [dst] = r33,4 - (p16) ld4 r32 = [src],4;; - br.ctop.sptk .L01;; - mov ar.lc = saved_lc - mov ar.pfs = saved_pfs - br.ret.sptk b0 - .endp mpcopy# - - - .if 0 - .align 32 - .global mpz - .type mpz,@function - -mpz: - alloc r14 = ar.pfs,2,6,0,8 - mov r15 = ar.lc - sub r16= in0,r0,1 - mov r17 = in1 - mov r18 = in2;; - mov r8 = 1 - mov pr.rot = 1 << 16 - mov ar.ec = 2 - mov ar.lc = r16;; -.L02: - (p16) ld4 r32 = [r18],4 - (p18) cmp.eq p0,p32 = r34,r0 - (p33) mov r8 = r0 - (p33) br.exit - .endif - - - .align 32 - .global mpadd# - .proc mpadd# - -mpadd: - alloc r14 = ar.pfs,3,0,0,0 - mov r15 = ar.lc - # adjust size by -1 - sub r16 = in0,r0,1 - # clear carry - mov r8 = r0;; - # load addresses - shladd r17 = r16,2,in1 - shladd r18 = r16,2,in2 - # load loop count - mov ar.lc = r16;; -.L20: - ld4 r20 = [r18],-4 - ld4 r19 = [r17] - tbit.z p1,p2 = r8,32;; - (p1) add r8 = r19,r20 - (p2) add r8 = r19,r20,1;; - st4 [r17] = r8,-4 - br.cloop.sptk .L20;; - extr.u r8 = r8,32,1 - mov ar.lc = r15 - mov ar.pfs = r14 - br.ret.sptk b0 - .endp mpadd# - - - .align 32 - .global mpsub# - .proc mpsub# - -mpsub: - alloc r14 = ar.pfs,3,0,0,0 - mov r15 = ar.lc - # adjust size by -1 - sub r16 = in0,r0,1 - # clear carry - mov r8 = r0;; - # load addresses - shladd r17 = r16,2,in1 - shladd r18 = r16,2,in2 - # load loop count - mov ar.lc = r16;; -.L30: - ld4 r20 = [r18],-4 - ld4 r19 = [r17] - tbit.z p1,p2 = r8,32;; - (p1) sub r8 = r19,r20 - (p2) sub r8 = r19,r20,1;; - st4 [r17] = r8,-4 - br.cloop.sptk .L30;; - extr.u r8 = r8,32,1 - mov ar.lc = r15 - mov ar.pfs = r14 - br.ret.sptk b0 - .endp mpsub# - - - .if 0 - - .align 32 - .global mpsetmul# - .proc mpsetmul# - -mpsetmul: - alloc r14 = ar.pfs,4,0,0,0 - mov r15 = ar.lc - # load mul - setf.sig f96 = in3 - # adjust size by -1 - sub r16 = in0,r0,1 - # clear carry - mov r8 = r0;; - # adjust addresses - shladd r17 = r16,2,in1 - shladd r18 = r16,2,in2 - # load loop count - mov ar.lc = r16;; -.L40: - ld4 r19 = [r18],-4;; - setf.sig f98 = r8 - setf.sig f97 = r19;; - # multiplication can only be done in f registers, but we do have a multiply-add - xma.l f98 = f96,f97,f98;; - getf.sig r8 = f98;; - st4 [r17] = r8,-4 - shr.u r8 = r8,32 - br.cloop.sptk .L40;; - mov ar.lc = r15 - mov ar.pfs = r14 - br.ret.sptk b0 - .endp mpsetmul# - - - .align 32 - .global mpaddmul# - .proc mpaddmul# - -mpaddmul: - alloc saved_pfs = ar.pfs,4,0,0,0 - mov saved_lc = ar.lc - # load mul - setf.sig f96 = in3 - # adjust size by -1 - sub size = in0,r0,1 - # clear carry - mov r8 = r0;; - # adjust addresses - shladd dst = size,2,in1 - shladd src = size,2,in2 - # load loop count - mov ar.lc = r16;; -.L50: - ld4 r19 = [dst] - ld4 r20 = [dst],-4;; - setf.sig f98 = r8 - setf.sig f97 = r20;; - # multiplication can only be done in f registers, but we do have a multiply-add - xma.l f98 = f96,f97,f98;; - getf.sig r8 = f98;; - add r8 = r8,r19;; - st4 [r17] = r8,-4 - shr.u r8 = r8,32 - br.cloop.sptk .L50;; - mov ar.lc = r15 - mov ar.pfs = r14 - br.ret.sptk b0 - .endp mpaddmul# - - .endif - - - .if 0 - .align 16 - .global mpaddsqrtrc# - .proc mpaddsqrtrc# - -mpaddsqrtrc: - .endp mpaddsqrtrc# - .endif diff --git a/beecrypt/gas/mp32opt.sparcv8.S b/beecrypt/gas/mp32opt.sparcv8.S deleted file mode 100644 index 09a94f0d7..000000000 --- a/beecrypt/gas/mp32opt.sparcv8.S +++ /dev/null @@ -1,114 +0,0 @@ -/* - * mp32opt.sparcv8.S - * - * Assembler optimized multiprecision integer routines for Sparc v8 - * - * Compile target is GNU Assembler, Sun Solaris Assembler - * - * Copyright (c) 2001 Virtual Unlimited B.V. - * - * Author: Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "beecrypt.gas.h" - - .file "mp32opt.sparcv8.S" - - .text - -C_FUNCTION_BEGIN(mpsetmul) -LABEL(mpsetmul) - - .register %g2,#scratch - - sll %o0,2,%g1 - dec 4,%o2 - clr %o0 -LOCAL(mpsetmul_loop): - ld [%o2+%g1],%g2 - umul %o3,%g2,%g2 - rd %y,%g3 - addcc %o0,%g2,%g2 - addx %g0,%g3,%o0 - deccc 4,%g1 - bnz LOCAL(mpsetmul_loop) - st %g2,[%o1+%g1] - retl - nop -C_FUNCTION_END(mpsetmul, LOCAL(mpsetmul_size)) - - -C_FUNCTION_BEGIN(mpaddmul) -LABEL(mpaddmul) - - .register %g2,#scratch - - sll %o0,2,%g1 - mov %o1,%o4 - dec 4,%o1 - dec 4,%o2 - clr %o0 -LOCAL(mpaddmul_loop): - ld [%o2+%g1],%g2 - ld [%o1+%g1],%g3 - umul %o3,%g2,%g2 - rd %y,%g4 - addcc %o0,%g2,%g2 - addx %g0,%g4,%g4 - addcc %g2,%g3,%g2 - addx %g0,%g4,%o0 - deccc 4,%g1 - bnz LOCAL(mpaddmul_loop) - st %g2,[%o4+%g1] - retl - nop -C_FUNCTION_END(mpaddmul, LOCAL(mpaddmul_size)) - - -C_FUNCTION_BEGIN(mpaddsqrtrc) -LABEL(mpaddsqrtrc) - - .register %g2,#scratch - .register %g3,#scratch - - sll %o0,2,%g1 - add %o1,%g1,%o1 - dec 4,%o2 - add %o1,%g1,%o1 - dec 8,%o1 - clr %o0 -LOCAL(mpaddsqrtrc_loop): - ld [%o2+%g1],%g2 - ldd [%o1],%o4 - umul %g2,%g2,%g3 - rd %y,%g2 - /* first addition */ - addcc %o5,%g3,%o5 - addxcc %o4,%g2,%o4 - addx %g0,%g0,%o3 - /* second addition */ - addcc %o5,%o0,%o5 - addxcc %o4,%g0,%o4 - addx %o3,%g0,%o0 - std %o4,[%o1] - deccc 4,%g1 - bnz LOCAL(mpaddsqrtrc_loop) - sub %o1,8,%o1 - retl - nop -C_FUNCTION_END(mpaddsqrtrc, LOCAL(mpaddsqrtrc_size)) diff --git a/beecrypt/gas/mp64opt.ia64.S b/beecrypt/gas/mp64opt.ia64.S deleted file mode 100644 index 581bf4d30..000000000 --- a/beecrypt/gas/mp64opt.ia64.S +++ /dev/null @@ -1,322 +0,0 @@ -/* - * mp64opt.ia64.S - * - * Assembler optimized multiprecision integer routines for ia64 (Intel Itanium) - * - * Compile target is GNU Assembler - * - * Copyright (c) 2000, 2001 Virtual Unlimited B.V. - * - * Author: Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "beecrypt.gas.h" - -#define saved_pfs r14 -#define saved_lc r15 - -#define size r16 -#define dst r17 -#define src r18 -#define alt r19 - - .text - - .explicit - -/* functions to add, in order of priority: - * mp64addsqrtrc - * mp64neg -> can vectorize - * mp64multwo -> can vectorize - * mp32divtwo -> .. - * mp64fill -> easy - * mp64z -> vectorizable with br.wtop - * mp64nz -> vectorizable with br.wtop - * mp64eq -> .. - * mp64eqx -> .. - * mp64ne -> .. - * mp64nex -> .. - * mp64gt -> .. - * mp64gtx -> .. - * mp64lt -> .. - * mp64ltx -> .. - * mp64ge -> substitute with mp64lt with swap of parameters - * mp64gex -> .. mp64ltx - * mp64le -> .. mp64gt - * mp64lex -> .. mp64gtx - * mp64isone -> vectorizable with br.wtop - * mp64istwo -> .. - * mp64leone -> .. - * mp64size -> .. - -/* mp64zero works */ -C_FUNCTION_BEGIN(mp64zero) - alloc saved_pfs = ar.pfs,2,0,0,0 - mov saved_lc = ar.lc - sub size = in0,r0,1;; - mov src = in1 - mov ar.lc = size;; - -.Lmp64zero_loop: - st8 [src] = r0,8 - br.ctop.sptk .Lmp64zero_loop;; - - mov ar.lc = saved_lc - mov ar.pfs = saved_pfs - br.ret.sptk b0 -C_FUNCTION_END(mp64zero) - - -/* mp64copy works */ -C_FUNCTION_BEGIN(mp64copy) - alloc saved_pfs = ar.pfs,3,5,0,8 - mov saved_lc = ar.lc - sub size = in0,r0,1;; - mov dst = in1 - mov src = in2 - /* prepare loop */ - mov ar.lc = size - mov ar.ec = 2 - mov pr.rot = (1 << 16);; - -.Lmp64copy_loop: - (p17) st8 [dst] = r33,-8 - (p16) ld8 r32 = [src],-8;; - br.ctop.sptk .Lmp64copy_loop;; - - mov ar.lc = saved_lc - mov ar.pfs = saved_pfs - br.ret.sptk b0 -C_FUNCTION_END(mp64copy) - - -#if 0 -/* mp64z is in development */ -C_FUNCTION_BEGIN(mp64z) - alloc saved_pfs = ar.pfs,2,6,0,8 - mov saved_lc = ar.lc - sub size = in0,r0,1;; - - mov ret0 = 1 - mov src = in1 - - mov ar.lc = size - mov ar.ec = 2 - mov pr.rot = ((1 << 16) | (1 << 20));; - -.Lmp64z_loop: - (p16) ld8 r32 = [src],8 - (p17) cmp.ne p1,p0 = r33,r0 - (p1) br.exit.dpnt .Lmp64z_exit;; - br.ctop.dptk .Lmp64z_loop;; -.Lmp64z_exit: - (p1) mov ret0 = r0 - - mov ar.lc = saved_lc - mov ar.pfs = saved_pfs - br.ret.sptk b0 -C_FUNCTION_END(mp64z) -#endif - - -/* mp64add works */ -C_FUNCTION_BEGIN(mp64add) - alloc saved_pfs = ar.pfs,3,5,0,8 - mov saved_lc = ar.lc - sub size = in0,r0,1;; - - /* adjust addresses */ - shladd dst = size,3,in1 - shladd src = size,3,in2 - shladd alt = size,3,in1 - - /* prepare modulo-scheduled loop */ - mov ar.lc = size - mov ar.ec = 3 - mov pr.rot = ((1 << 16) | (1 << 19));; - -.Lmp64add_loop: - (p16) ld8 r32 = [src],-8 - (p16) ld8 r35 = [alt],-8 - (p20) add r36 = r33,r36 /* no carry add */ - (p22) add r36 = r33,r36,1 /* carry add */ - ;; - (p20) cmp.leu p19,p21 = r33,r36 /* no previous carry */ - (p22) cmp.ltu p19,p21 = r33,r36 /* previous carry */ - (p18) st8 [dst] = r37,-8 - br.ctop.dptk .Lmp64add_loop;; - - /* return carry */ - (p21) add ret0 = r0,r0 - (p23) add ret0 = r0,r0,1 - ;; - mov ar.lc = saved_lc - mov ar.pfs = saved_pfs - br.ret.sptk b0 -C_FUNCTION_END(mp64add) - - -/* mp64sub is in development */ -C_FUNCTION_BEGIN(mp64sub) - alloc saved_pfs = ar.pfs,3,5,0,8 - mov saved_lc = ar.lc - sub size = in0,r0,1;; - - /* adjust addresses */ - shladd dst = size,3,in1 - shladd src = size,3,in2 - shladd alt = size,3,in1 - - /* prepare modulo-scheduled loop */ - mov ar.lc = size - mov ar.ec = 3 - mov pr.rot = ((1 << 16) | (1 << 19));; - -.Lmp64sub_loop: - (p16) ld8 r32 = [src],-8 - (p16) ld8 r35 = [alt],-8 - (p20) sub r36 = r33,r36 /* no carry sub */ - (p22) sub r36 = r33,r36,1 /* carry sub */ - ;; - (p20) cmp.geu p19,p21 = r33,r36 /* no previous carry */ - (p22) cmp.gtu p19,p21 = r33,r36 /* previous carry */ - (p18) st8 [dst] = r37,-8 - br.ctop.dptk .Lmp64sub_loop;; - - /* return carry */ - (p21) add ret0 = r0,r0 - (p23) add ret0 = r0,r0,1 - ;; - mov ar.lc = saved_lc - mov ar.pfs = saved_pfs - br.ret.sptk b0 -C_FUNCTION_END(mp64sub) - - -/* mp64setmul works */ -C_FUNCTION_BEGIN(mp64setmul) - alloc saved_pfs = ar.pfs,4,4,0,8 - mov saved_lc = ar.lc - - setf.sig f6 = in3 /* the multiplier */ - setf.sig f7 = r0 /* the carry */ - sub size = in0,r0,1;; - - /* adjust addresses */ - shladd dst = size,3,in1 - shladd src = size,3,in2 - - /* prepare modulo-scheduled loop */ - mov ar.lc = size - mov ar.ec = 3 - mov pr.rot = (1 << 16);; - -.Lmp64setmul_loop: - (p16) ldf8 f36 = [src],-8 - (p18) stf8 [dst] = f33,-8 - (p17) xma.lu f32 = f6,f37,f7 - (p17) xma.hu f7 = f6,f37,f7;; - br.ctop.dptk .Lmp64setmul_loop;; - - /* return carry */ - getf.sig ret0 = f7;; - - mov ar.lc = saved_lc - mov ar.pfs = saved_pfs - br.ret.sptk b0 -C_FUNCTION_END(mp64setmul) - - -/* mp64addmul needs fixing */ -C_FUNCTION_BEGIN(mp64addmul) - alloc saved_pfs = ar.pfs,4,12,0,16 - mov saved_lc = ar.lc - - sub size = in0,r0,1;; - setf.sig f6 = in3 /* the multiplier */ - - /* adjust addresses */ - shladd dst = size,3,in1 - shladd src = size,3,in2 - shladd alt = size,3,in1;; - - /* prepare the rotate-in carry */ - mov r32 = r0 - - /* prepare modulo-scheduled loop */ - mov ar.lc = size - mov ar.ec = 5 - mov pr.rot = ((1 << 16) | (1 << 21)); - -.Lmp64addmul_loop: - (p18) getf.sig r33 = f34 /* hi 64 bit word */ - (p24) add r38 = r35,r38 - (p17) xma.lu f37 = f6,f41,f45 - (p18) getf.sig r37 = f38 /* lo 64 bit word */ - (p26) add r38 = r35,r38,1 - (p17) xma.hu f33 = f6,f41,f45 - (p16) ldf8 f40 = [src],-8 - (p16) ldf8 f44 = [alt],-8 - ;; - /* set carry from this operation */ - (p24) cmp.leu p23,p25 = r35,r38 - (p26) cmp.ltu p23,p25 = r35,r38 - (p20) st8 [dst] = r39,-8 - br.ctop.dptk .Lmp64addmul_loop;; - - /* return carry */ - (p25) add ret0 = r36,r0 - (p27) add ret0 = r36,r0,1 - - mov ar.lc = saved_lc - mov ar.pfs = saved_pfs - br.ret.sptk b0 -C_FUNCTION_END(mp64addmul) - -/* mp64addsqrtrc will be a little more challenging */ - -/* the primary loop will look like this: - -.Lmp64addsqrtrc_loop: - /* stage 1 */ - (p16) ldf8 to_square - (p16) ld8 lo_to_add - (p16) ld8 hi_to_add - /* stage 2 */ - (p17) xma.lu to_square,to_square,carry - (p17) xma.hu to_square,to_square,carry - /* stage 3 */ - (p18) getf lo xma - (p18) getf hi xma - /* stage 4 */ - (p?) add lo no carry - (p?) add lo carry - /* stage 5 */ - (p?+1) add hi no carry - (p?+1) add hi carry - ;; - /* also stage 4 */ - (p?) cmp lo for carry - (p?) cmp lo for carry - /* also stage 5 */ - (p?+1) cmp hi for carry - (p?+1) cmp hi for carry - st8 lo - st8 hi - br.ctop -*/ diff --git a/beecrypt/gas/mpopt.alpha.m4 b/beecrypt/gas/mpopt.alpha.m4 new file mode 100644 index 000000000..55d4b5266 --- /dev/null +++ b/beecrypt/gas/mpopt.alpha.m4 @@ -0,0 +1,159 @@ +dnl mpopt.alpha.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/alpha.m4) + + +C_FUNCTION_BEGIN(mpadd) + subq `$'16,1,`$'16 + s8addq `$'16,0,`$'1 + addq `$'17,`$'1,`$'17 + addq `$'18,`$'1,`$'18 + mov `$31',`$'0 + + .align 4 +LOCAL(mpadd_loop): + ldq `$'1,0(`$'17) + ldq `$'2,0(`$'18) + addq `$'1,`$'0,`$'3 + cmpult `$'3,`$'1,`$'0 + addq `$3',`$'2,`$'1 + cmpult `$'1,`$'3,`$'2 + stq `$'1,0(`$'17) + or `$'2,`$'0,`$'0 + subq `$'16,1,`$'16 + subq `$'17,8,`$'17 + subq `$'18,8,`$'18 + bge `$'16,LOCAL(mpadd_loop) + ret `$'31,(`$'26),1 +C_FUNCTION_END(mpadd) + + +C_FUNCTION_BEGIN(mpsub) + subq `$'16,1,`$'16 + s8addq `$'16,0,`$'1 + addq `$'17,`$'1,`$'17 + addq `$'18,`$'1,`$'18 + mov `$31',`$'0 + + .align 4 +LOCAL(mpsub_loop): + ldq `$'1,0(`$'17) + ldq `$'2,0(`$'18) + subq `$'1,`$'0,`$'3 + cmpult `$'1,`$'3,`$'0 + subq `$'3,`$'2,`$'1 + cmpult `$'3,`$'1,`$'2 + stq `$'1,0(`$'17) + or `$'2,`$'0,`$'0 + subq `$'16,1,`$'16 + subq `$'17,8,`$'17 + subq `$'18,8,`$'18 + bge `$'16,LOCAL(mpsub_loop) + ret `$'31,(`$'26),1 +C_FUNCTION_END(mpsub) + + +C_FUNCTION_BEGIN(mpsetmul) + subq `$'16,1,`$'16 + s8addq `$'16,0,`$'1 + addq `$'17,`$'1,`$'17 + addq `$'18,`$'1,`$'18 + mov `$31',`$'0 + + .align 4 +LOCAL(mpsetmul_loop): + ldq `$1',0(`$'18) + mulq `$'19,`$'1,`$'2 + umulh `$'19,`$'1,`$'3 + addq `$'2,`$'0,`$'2 + cmpult `$'2,`$'0,`$'0 + stq `$'2,0(`$'17) + addq `$'3,`$'0,`$'0 + subq `$'16,1,`$'16 + subq `$'17,8,`$'17 + subq `$'18,8,`$'18 + bge `$'16,LOCAL(mpsetmul_loop) + ret `$'31,(`$'26),1 +C_FUNCTION_END(mpsetmul) + + +C_FUNCTION_BEGIN(mpaddmul) + subq `$'16,1,`$'16 + s8addq `$'16,0,`$'1 + addq `$'17,`$'1,`$'17 + addq `$'18,`$'1,`$'18 + mov `$31',`$'0 + + .align 4 +LOCAL(mpaddmul_loop): + ldq `$'1,0(`$'17) + ldq `$'2,0(`$'18) + mulq `$'19,`$'2,`$'3 + umulh `$'19,`$'2,`$'4 + addq `$'3,`$'0,`$'3 + cmpult `$'3,`$'0,`$'0 + addq `$'4,`$'0,`$'4 + addq `$'3,`$'1,`$'3 + cmpult `$'3,`$'1,`$'0 + addq `$'4,`$'0,`$'0 + stq `$'3,0(`$'17) + subq `$'16,1,`$'16 + subq `$'17,8,`$'17 + subq `$'18,8,`$'18 + bge `$'16,LOCAL(mpaddmul_loop) + ret `$'31,(`$'26),1 +C_FUNCTION_END(mpaddmul) + + +C_FUNCTION_BEGIN(mpaddsqrtrc) + subq `$'16,1,`$'16 + s8addq `$'16,0,`$'1 + addq `$'17,`$'1,`$'17 + addq `$'17,`$'1,`$'17 + addq `$'18,`$'1,`$'18 + mov `$31',`$'0 + + .align 4 +LOCAL(mpaddsqrtrc_loop): + ldq `$'1,0(`$'18) + mulq `$1',`$1',`$'2 + umulh `$1',`$1',`$'1 + addq `$'2,`$'0,`$'3 + cmpult `$3',`$'2,`$'0 + ldq `$'2,8(`$'17) + addq `$'1,`$'0,`$'1 + addq `$'3,`$'2,`$'4 + cmpult `$'4,`$'3,`$'0 + ldq `$'3,0(`$'17) + addq `$'1,`$'0,`$'2 + cmpult `$2',`$'1,`$'0 + stq `$'4,8(`$'17) + addq `$'2,`$'3,`$'1 + cmpult `$'1,`$'2,`$2' + stq `$'1,0(`$'17) + addq `$'2,`$'0,`$'0 + subq `$'16,1,`$'16 + subq `$'17,16,`$'17 + subq `$'18,8,`$'18 + bge `$'16,LOCAL(mpaddmul_loop) + ret `$'31,(`$'26),1 +C_FUNCTION_END(mpaddsqrtrc) diff --git a/beecrypt/gas/mpopt.arm.m4 b/beecrypt/gas/mpopt.arm.m4 new file mode 100644 index 000000000..a7dc677ee --- /dev/null +++ b/beecrypt/gas/mpopt.arm.m4 @@ -0,0 +1,83 @@ +dnl mpopt.arm.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) + + +C_FUNCTION_BEGIN(mpsetmul) + stmfd sp!, {r4, r5, lr} + add r1, r1, r0, asl #2 + add r2, r2, r0, asl #2 + mov ip, #0 +LOCAL(mpsetmul_loop): + ldr r4, [r2, #-4]! + mov r5, #0 + umlal ip, r5, r3, r4 + str ip, [r1, #-4]! + mov ip, r5 + subs r0, r0, #1 + bne LOCAL(mpsetmul_loop) + mov r0, ip + ldmfd sp!, {r4, r5, pc} +C_FUNCTION_END(mpsetmul) + + +C_FUNCTION_BEGIN(mpaddmul) + stmfd sp!, {r4, r5, r6, lr} + add r1, r1, r0, asl #2 + add r2, r2, r0, asl #2 + mov ip, #0 +LOCAL(mpaddmul_loop): + ldr r4, [r2, #-4]! + ldr r5, [r1, #-4] + mov r6, #0 + umlal ip, r6, r3, r4 + adds r5, r5, ip + adc ip, r6, #0 + str r5, [r1, #-4]! + subs r0, r0, #1 + bne LOCAL(mpaddmul_loop) + mov r0, ip + ldmfd sp!, {r4, r5, r6, pc} +C_FUNCTION_END(mpaddmul) + + +C_FUNCTION_BEGIN(mpaddsqrtrc) + stmfd sp!, {r4, r5, r6, lr} + add r1, r1, r0, asl #3 + add r2, r2, r0, asl #2 + mov r3, #0 + mov ip, #0 +LOCAL(mpaddsqrtrc_loop): + ldr r4, [r2, #-4]! + mov r6, #0 + umlal ip, r6, r4, r4 + ldr r5, [r1, #-4] + ldr r4, [r1, #-8] + adds r5, r5, ip + adcs r4, r4, r6 + str r5, [r1, #-4] + str r4, [r1, #-8]! + adc ip, r3, #0 + subs r0, r0, #1 + bne LOCAL(mpaddsqrtrc_loop) + mov r0, ip + ldmfd sp!, {r4, r5, r6, pc} +C_FUNCTION_END(mpaddsqrtrc) diff --git a/beecrypt/gas/mpopt.ia64.m4 b/beecrypt/gas/mpopt.ia64.m4 new file mode 100644 index 000000000..8486fe2e5 --- /dev/null +++ b/beecrypt/gas/mpopt.ia64.m4 @@ -0,0 +1,187 @@ +dnl mpopt.ia64.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/ia64.m4) + +define(`size',`r16') +define(`dst',`r17') +define(`src',`r18') +define(`alt',`r19') + + +C_FUNCTION_BEGIN(mpadd) + alloc saved_pfs = ar.pfs,3,5,0,8 + mov saved_lc = ar.lc + sub size = in0,r0,1;; + +dnl adjust addresses + shladd dst = size,3,in1 + shladd src = size,3,in2 + shladd alt = size,3,in1 + +dnl prepare modulo-scheduled loop + mov ar.lc = size + mov ar.ec = 2 + mov pr.rot = ((1 << 16) | (1 << 19));; + +LOCAL(mpadd_loop): + (p16) ld8 r32 = [alt],-8 + (p16) ld8 r35 = [src],-8 + (p20) add r36 = r33,r36 + (p22) add r36 = r33,r36,1 + ;; + (p20) cmp.leu p19,p21 = r33,r36 + (p22) cmp.ltu p19,p21 = r33,r36 + (p18) st8 [dst] = r37,-8 + br.ctop.dptk LOCAL(mpadd_loop);; + +dnl loop epilogue: final store + (p18) st8 [dst] = r37,-8 + +dnl return carry + (p20) add ret0 = r0,r0 + (p22) add ret0 = r0,r0,1 + ;; + mov ar.lc = saved_lc + mov ar.pfs = saved_pfs + br.ret.sptk b0 +C_FUNCTION_END(mpadd) + + +C_FUNCTION_BEGIN(mpsub) + alloc saved_pfs = ar.pfs,3,5,0,8 + mov saved_lc = ar.lc + sub size = in0,r0,1;; + +dnl adjust addresses + shladd dst = size,3,in1 + shladd src = size,3,in2 + shladd alt = size,3,in1 + +dnl prepare modulo-scheduled loop + mov ar.lc = size + mov ar.ec = 2 + mov pr.rot = ((1 << 16) | (1 << 19));; + +LOCAL(mpsub_loop): + (p16) ld8 r32 = [alt],-8 + (p16) ld8 r35 = [src],-8 + (p20) sub r36 = r33,r36 + (p22) sub r36 = r33,r36,1 + ;; + (p20) cmp.geu p19,p21 = r33,r36 + (p22) cmp.gtu p19,p21 = r33,r36 + (p18) st8 [dst] = r37,-8 + br.ctop.dptk LOCAL(mpsub_loop);; + +dnl loop epilogue: final store + (p18) st8 [dst] = r37,-8 + +dnl return carry + (p20) add ret0 = r0,r0 + (p22) add ret0 = r0,r0,1 + ;; + mov ar.lc = saved_lc + mov ar.pfs = saved_pfs + br.ret.sptk b0 +C_FUNCTION_END(mpsub) + + +C_FUNCTION_BEGIN(mpsetmul) + alloc saved_pfs = ar.pfs,4,4,0,8 + mov saved_lc = ar.lc + + setf.sig f6 = in3 + setf.sig f7 = r0 + sub size = in0,r0,1;; + +dnl adjust addresses + shladd dst = size,3,in1 + shladd src = size,3,in2 + +dnl prepare modulo-scheduled loop + mov ar.lc = size + mov ar.ec = 3 + mov pr.rot = (1 << 16);; + +LOCAL(mpsetmul_loop): + (p16) ldf8 f32 = [src],-8 + (p18) stf8 [dst] = f35,-8 + (p17) xma.lu f34 = f6,f33,f7 + (p17) xma.hu f7 = f6,f33,f7;; + br.ctop.dptk LOCAL(mpsetmul_loop);; + +dnl return carry + getf.sig ret0 = f7;; + + mov ar.lc = saved_lc + mov ar.pfs = saved_pfs + br.ret.sptk b0 +C_FUNCTION_END(mpsetmul) + + +C_FUNCTION_BEGIN(mpaddmul) + alloc saved_pfs = ar.pfs,4,4,0,8 + mov saved_lc = ar.lc + + setf.sig f6 = in3 + sub size = in0,r0,1;; + +dnl adjust addresses + shladd dst = size,3,in1 + shladd src = size,3,in2 + shladd alt = size,3,in1;; + +dnl prepare the rotate-in carry + mov r32 = r0 + +dnl prepare modulo-scheduled loop + mov ar.lc = size + mov ar.ec = 4 + mov pr.rot = ((1 << 16) | (1 << 21)); + +LOCAL(mpaddmul_loop): + (p18) getf.sig r37 = f35 + (p24) add r35 = r38,r35 + (p17) xma.lu f34 = f6,f33,f37 + (p18) getf.sig r33 = f39 + (p26) add r35 = r38,r35,1 + (p17) xma.hu f38 = f6,f33,f37 + (p16) ldf8 f32 = [src],-8 + (p16) ldf8 f36 = [alt],-8 + ;; +dnl set carry from this operation + (p24) cmp.leu p23,p25 = r38,r35 + (p26) cmp.ltu p23,p25 = r38,r35 + (p20) st8 [dst] = r36,-8 + br.ctop.dptk LOCAL(mpaddmul_loop);; + +dnl loop epilogue: final store + (p20) st8 [dst] = r36,-8 + +dnl return carry + (p24) add ret0 = r35,r0 + (p26) add ret0 = r35,r0,1 + + mov ar.lc = saved_lc + mov ar.pfs = saved_pfs + br.ret.sptk b0 +C_FUNCTION_END(mpaddmul) diff --git a/beecrypt/gas/mpopt.m68k.m4 b/beecrypt/gas/mpopt.m68k.m4 new file mode 100644 index 000000000..0cb2d4ca3 --- /dev/null +++ b/beecrypt/gas/mpopt.m68k.m4 @@ -0,0 +1,158 @@ +dnl mpopt.m68k.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/m68k.m4) + +dnl works +C_FUNCTION_BEGIN(mpadd) + move.l 4(%sp),%d0 + movea.l 8(%sp),%a0 + movea.l 12(%sp),%a1 + move.l %d0,%d1 + lsl.l #2,%d0 + subq.l #1,%d1 + adda.l %d0,%a0 + adda.l %d0,%a1 + clr %d0 + + .align 2 +LOCAL(mpadd_loop): + addx.l -(%a1),-(%a0) + dbf %d1,LOCAL(mpadd_loop) + + addx.l %d0,%d0 + rts +C_FUNCTION_END(mpadd) + +dnl works +C_FUNCTION_BEGIN(mpsub) + move.l 4(%sp),%d0 + movea.l 8(%sp),%a0 + movea.l 12(%sp),%a1 + move.l %d0,%d1 + lsl.l #2,%d0 + subq.l #1,%d1 + adda.l %d0,%a0 + adda.l %d0,%a1 + clr %d0 + + .align 2 +LOCAL(mpsub_loop): + subx.l -(%a1),-(%a0) + dbf %d1,LOCAL(mpsub_loop) + + addx.l %d0,%d0 + rts +C_FUNCTION_END(mpsub) + +dnl works +C_FUNCTION_BEGIN(mpsetmul) + movem.l %d2-%d5,-(%sp) + move.l 20(%sp),%d0 + movea.l 24(%sp),%a0 + movea.l 28(%sp),%a1 + move.l 32(%sp),%d2 + move.l %d0,%d5 + lsl.l #2,%d0 + subq.l #1,%d5 + adda.l %d0,%a0 + adda.l %d0,%a1 + clr.l %d3 + clr.l %d4 + + .align 2 +LOCAL(mpsetmul_loop): + move.l -(%a1),%d1 + mulu.l %d2,%d0:%d1 + add.l %d3,%d1 + addx.l %d4,%d0 + move.l %d1,-(%a0) + move.l %d0,%d3 + dbf %d5,LOCAL(mpsetmul_loop) + + movem.l (%sp)+,%d2-%d5 + rts +C_FUNCTION_END(mpsetmul) + +dnl works +C_FUNCTION_BEGIN(mpaddmul) + movem.l %d2-%d5,-(%sp) + move.l 20(%sp),%d0 + movea.l 24(%sp),%a0 + movea.l 28(%sp),%a1 + move.l 32(%sp),%d2 + move.l %d0,%d5 + lsl.l #2,%d0 + subq.l #1,%d5 + adda.l %d0,%a0 + adda.l %d0,%a1 + clr.l %d3 + clr.l %d4 + + .align 2 +LOCAL(mpaddmul_loop): + move.l -(%a1),%d1 + mulu.l %d2,%d0:%d1 + add.l %d3,%d1 + addx.l %d4,%d0 + add.l -(%a0),%d1 + addx.l %d4,%d0 + move.l %d1,(%a0) + move.l %d0,%d3 + dbf %d5,LOCAL(mpaddmul_loop) + + movem.l (%sp)+,%d2-%d5 + rts +C_FUNCTION_END(mpaddmul) + + +C_FUNCTION_BEGIN(mpaddsqrtrc) + movem.l %d3-%d5,-(%sp) + move.l 16(%sp),%d0 + movea.l 20(%sp),%a0 + movea.l 24(%sp),%a1 + move.l %d0,%d5 + lsl.l #2,%d0 + subq.l #1,%d5 + adda.l %d0,%a0 + adda.l %d0,%a0 + adda.l %d0,%a1 + clr.l %d3 + clr.l %d4 + +LOCAL(mpaddsqrtrc_loop): + move.l -(%a1),%d1 +dnl square %d1 into %d0 and %d1 + mulu.l %d1,%d0:%d1 + add.l %d3,%d1 + addx.l %d4,%d0 + add.l -(%a0),%d1 + addx.l %d4,%d0 + move.l %d1,(%a0) + clr.l %d3 + add.l -(%a0),%d0 + addx.l %d4,%d3 + move.l %d0,0(%a0) + dbf %d5,LOCAL(mpaddsqrtrc_loop) + + movem.l (%sp)+,%d3-%d5 + rts +C_FUNCTION_END(mpaddsqrtrc) diff --git a/beecrypt/gas/mp32opt.powerpc.S b/beecrypt/gas/mpopt.ppc.m4 index b70f07903..3406f3518 100644 --- a/beecrypt/gas/mp32opt.powerpc.S +++ b/beecrypt/gas/mpopt.ppc.m4 @@ -1,39 +1,28 @@ -/* - * mp32opt.powerpc.S - * - * Assembler optimized multiprecision integer routines for PowerPC - * - * Compile target is GNU Assembler - * - * Copyright (c) 2000, 2001 Virtual Unlimited B.V. - * - * Author: Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "beecrypt.gas.h" - - .file "mp32opt.powerpc.S" - - .text +dnl mpopt.ppc.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You shoulwz have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/ppc.m4) C_FUNCTION_BEGIN(mpaddw) -LABEL(mpaddw) mtctr r3 slwi r0,r3,2 add r4,r4,r0 @@ -48,14 +37,12 @@ LOCAL(mpaddw_loop): stw r6,0(r4) bdnz LOCAL(mpaddw_loop) LOCAL(mpaddw_skip): - /* return the carry */ addze r3,r0 blr -C_FUNCTION_END(mpaddw, LOCAL(mpaddw_size)) +C_FUNCTION_END(mpaddw) C_FUNCTION_BEGIN(mpsubw) -LABEL(mpsubw) mtctr r3 slwi r0,r3,2 add r4,r4,r0 @@ -70,15 +57,13 @@ LOCAL(mpsubw_loop): stwu r6, -4(r4) bdnz LOCAL(mpsubw_loop) LOCAL(mpsubw_skip): - /* return the carry */ subfe r3,r0,r0 neg r3,r3 blr -C_FUNCTION_END(mpsubw, LOCAL(mpsubw_size)) +C_FUNCTION_END(mpsubw) C_FUNCTION_BEGIN(mpadd) -LABEL(mpadd) mtctr r3 slwi r0,r3,2 add r4,r4,r0 @@ -96,14 +81,12 @@ LOCAL(mpadd_loop): stwu r6,-4(r4) bdnz LOCAL(mpadd_loop) LOCAL(mpadd_skip): - /* return the carry */ addze r3,r0 blr -C_FUNCTION_END(mpadd, LOCAL(mpadd_size)) +C_FUNCTION_END(mpadd) C_FUNCTION_BEGIN(mpsub) -LABEL(mpsub) mtctr r3 slwi r0,r3,2 add r4,r4,r0 @@ -121,15 +104,13 @@ LOCAL(mpsub_loop): stwu r6,-4(r4) bdnz LOCAL(mpsub_loop) LOCAL(mpsub_skip): - /* return the carry */ subfe r3,r0,r0 neg r3,r3 blr -C_FUNCTION_END(mpsub, LOCAL(mpsub_size)) +C_FUNCTION_END(mpsub) C_FUNCTION_BEGIN(mpmultwo) -LABEL(mpmultwo) mtctr r3 slwi r0,r3,2 add r4,r4,r0 @@ -144,14 +125,12 @@ LOCAL(mpmultwo_loop): stwu r6,-4(r4) bdnz LOCAL(mpmultwo_loop) LOCAL(mpmultwo_skip): - /* return the carry */ addze r3,r0 blr -C_FUNCTION_END(mpmultwo, LOCAL(mpmultwo_size)) +C_FUNCTION_END(mpmultwo) C_FUNCTION_BEGIN(mpsetmul) -LABEL(mpsetmul) mtctr r3 slwi r0,r3,2 add r4,r4,r0 @@ -166,11 +145,10 @@ LOCAL(mpsetmul_loop): stwu r8,-4(r4) bdnz LOCAL(mpsetmul_loop) blr -C_FUNCTION_END(mpsetmul, LOCAL(mpsetmul_size)) +C_FUNCTION_END(mpsetmul) C_FUNCTION_BEGIN(mpaddmul) -LABEL(mpaddmul) mtctr r3 slwi r0,r3,2 add r4,r4,r0 @@ -188,11 +166,10 @@ LOCAL(mpaddmul_loop): stw r9,0(r4) bdnz LOCAL(mpaddmul_loop) blr -C_FUNCTION_END(mpaddmul, LOCAL(mpaddmul_size)) +C_FUNCTION_END(mpaddmul) C_FUNCTION_BEGIN(mpaddsqrtrc) -LABEL(mpaddsqrtrc) mtctr r3 slwi r0,r3,2 add r4,r4,r0 @@ -215,4 +192,4 @@ LOCAL(mpaddsqrtrc_loop): stwu r6,-8(r4) bdnz LOCAL(mpaddsqrtrc_loop) blr -C_FUNCTION_END(mpaddsqrtrc, LOCAL(mpaddsqrtrc_size)) +C_FUNCTION_END(mpaddsqrtrc) diff --git a/beecrypt/gas/mpopt.ppc64.m4 b/beecrypt/gas/mpopt.ppc64.m4 new file mode 100644 index 000000000..8fdbdb0ae --- /dev/null +++ b/beecrypt/gas/mpopt.ppc64.m4 @@ -0,0 +1,195 @@ +dnl mpopt.ppc64.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/ppc64.m4) + + +C_FUNCTION_BEGIN(mpaddw) + mtctr r3 + sldi r0,r3,3 + add r4,r4,r0 + li r0,0 + ldu r6,-8(r4) + addc r6,r6,r5 + std r6,0(r4) + bdz LOCAL(mpaddw_skip) +LOCAL(mpaddw_loop): + ldu r6,-8(r4) + adde r6,r0,r6 + std r6,0(r4) + bdnz LOCAL(mpaddw_loop) +LOCAL(mpaddw_skip): + addze r3,r0 + blr +C_FUNCTION_END(mpaddw) + + +C_FUNCTION_BEGIN(mpsubw) + mtctr r3 + sldi r0,r3,3 + add r4,r4,r0 + li r0,0 + ld r6,-8(r4) + subfc r6,r5,r6 + stdu r6,-8(r4) + bdz LOCAL(mpsubw_skip) +LOCAL(mpsubw_loop): + ld r6,-8(r4) + subfe r6,r0,r6 + stdu r6, -8(r4) + bdnz LOCAL(mpsubw_loop) +LOCAL(mpsubw_skip): + subfe r3,r0,r0 + neg r3,r3 + blr +C_FUNCTION_END(mpsubw) + + +C_FUNCTION_BEGIN(mpadd) + mtctr r3 + sldi r0,r3,3 + add r4,r4,r0 + add r5,r5,r0 + li r0,0 + ld r6,-8(r4) + ldu r7,-8(r5) + addc r6,r7,r6 + stdu r6,-8(r4) + bdz LOCAL(mpadd_skip) +LOCAL(mpadd_loop): + ld r6,-8(r4) + ldu r7,-8(r5) + adde r6,r7,r6 + stdu r6,-8(r4) + bdnz LOCAL(mpadd_loop) +LOCAL(mpadd_skip): + addze r3,r0 + blr +C_FUNCTION_END(mpadd) + + +C_FUNCTION_BEGIN(mpsub) + mtctr r3 + sldi r0,r3,3 + add r4,r4,r0 + add r5,r5,r0 + li r0,0 + ld r6,-8(r4) + ldu r7,-8(r5) + subfc r6,r7,r6 + stdu r6,-8(r4) + bdz LOCAL(mpsub_skip) +LOCAL(mpsub_loop): + ld r6,-8(r4) + ldu r7,-8(r5) + subfe r6,r7,r6 + stdu r6,-8(r4) + bdnz LOCAL(mpsub_loop) +LOCAL(mpsub_skip): + subfe r3,r0,r0 + neg r3,r3 + blr +C_FUNCTION_END(mpsub) + + +C_FUNCTION_BEGIN(mpmultwo) + mtctr r3 + sldi r0,r3,3 + add r4,r4,r0 + li r0,0 + ld r6,-8(r4) + addc r6,r6,r6 + stdu r6,-8(r4) + bdz LOCAL(mpmultwo_skip) +LOCAL(mpmultwo_loop): + ld r6,-8(r4) + adde r6,r6,r6 + stdu r6,-8(r4) + bdnz LOCAL(mpmultwo_loop) +LOCAL(mpmultwo_skip): + addze r3,r0 + blr +C_FUNCTION_END(mpmultwo) + + +C_FUNCTION_BEGIN(mpsetmul) + mtctr r3 + sldi r0,r3,3 + add r4,r4,r0 + add r5,r5,r0 + li r3,0 +LOCAL(mpsetmul_loop): + ldu r7,-8(r5) + mulld r8,r7,r6 + addc r8,r8,r3 + mulhdu r9,r7,r6 + addze r3,r9 + stdu r8,-8(r4) + bdnz LOCAL(mpsetmul_loop) + blr +C_FUNCTION_END(mpsetmul) + + +C_FUNCTION_BEGIN(mpaddmul) + mtctr r3 + sldi r0,r3,3 + add r4,r4,r0 + add r5,r5,r0 + li r3,0 +LOCAL(mpaddmul_loop): + ldu r8,-8(r5) + ldu r7,-8(r4) + mulld r9,r8,r6 + addc r9,r9,r3 + mulhdu r10,r8,r6 + addze r3,r10 + addc r9,r9,r7 + addze r3,r3 + std r9,0(r4) + bdnz LOCAL(mpaddmul_loop) + blr +C_FUNCTION_END(mpaddmul) + + +C_FUNCTION_BEGIN(mpaddsqrtrc) + mtctr r3 + sldi r0,r3,3 + add r4,r4,r0 + add r5,r5,r0 + add r4,r4,r0 + li r3,0 +LOCAL(mpaddsqrtrc_loop): + ldu r0,-8(r5) + ld r6,-16(r4) + ld r7,-8(r4) + mulld r9,r0,r0 + addc r9,r9,r3 + mulhdu r8,r0,r0 + addze r8,r8 + li r3,0 + addc r7,r7,r9 + adde r6,r6,r8 + addze r3,r3 + std r7,-8(r4) + stdu r6,-16(r4) + bdnz LOCAL(mpaddsqrtrc_loop) + blr +C_FUNCTION_END(mpaddsqrtrc) diff --git a/beecrypt/gas/mpopt.sparcv8.m4 b/beecrypt/gas/mpopt.sparcv8.m4 new file mode 100644 index 000000000..f21b35614 --- /dev/null +++ b/beecrypt/gas/mpopt.sparcv8.m4 @@ -0,0 +1,90 @@ +dnl mpopt.sparcv8.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/sparc.m4) + + +C_FUNCTION_BEGIN(mpsetmul) + sll %o0,2,%g1 + dec 4,%o2 + clr %o0 +LOCAL(mpsetmul_loop): + ld [%o2+%g1],%g2 + umul %o3,%g2,%g2 + rd %y,%g3 + addcc %o0,%g2,%g2 + addx %g0,%g3,%o0 + deccc 4,%g1 + bnz LOCAL(mpsetmul_loop) + st %g2,[%o1+%g1] + retl + nop +C_FUNCTION_END(mpsetmul) + + +C_FUNCTION_BEGIN(mpaddmul) + sll %o0,2,%g1 + mov %o1,%o4 + dec 4,%o1 + dec 4,%o2 + clr %o0 +LOCAL(mpaddmul_loop): + ld [%o2+%g1],%g2 + ld [%o1+%g1],%g3 + umul %o3,%g2,%g2 + rd %y,%g4 + addcc %o0,%g2,%g2 + addx %g0,%g4,%g4 + addcc %g2,%g3,%g2 + addx %g0,%g4,%o0 + deccc 4,%g1 + bnz LOCAL(mpaddmul_loop) + st %g2,[%o4+%g1] + retl + nop +C_FUNCTION_END(mpaddmul) + + +C_FUNCTION_BEGIN(mpaddsqrtrc) + sll %o0,2,%g1 + add %o1,%g1,%o1 + dec 4,%o2 + add %o1,%g1,%o1 + dec 8,%o1 + clr %o0 +LOCAL(mpaddsqrtrc_loop): + ld [%o2+%g1],%g2 + ldd [%o1],%o4 + umul %g2,%g2,%g3 + rd %y,%g2 + addcc %o5,%g3,%o5 + addxcc %o4,%g2,%o4 + addx %g0,%g0,%o3 + addcc %o5,%o0,%o5 + addxcc %o4,%g0,%o4 + addx %o3,%g0,%o0 + std %o4,[%o1] + deccc 4,%g1 + bnz LOCAL(mpaddsqrtrc_loop) + sub %o1,8,%o1 + retl + nop +C_FUNCTION_END(mpaddsqrtrc) diff --git a/beecrypt/gas/mp32opt.sparcv9.S b/beecrypt/gas/mpopt.sparcv8plus.m4 index 582a68641..f021cfa6c 100644 --- a/beecrypt/gas/mp32opt.sparcv9.S +++ b/beecrypt/gas/mpopt.sparcv8plus.m4 @@ -1,41 +1,28 @@ -/* - * mp32opt.sparcv9.S - * - * Assembler optimized multiprecision integer routines for UltraSparc (64 bits instructions, will run on 32 bit OS) - * - * Compile target is GNU Assembler, Sun Solaris Assembler - * - * Copyright (c) 1998, 1999, 2000, 2001 Virtual Unlimited B.V. - * - * Author: Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "beecrypt.gas.h" - - .file "mp32opt.sparcv9.S" - - .text +dnl mpopt.sparcv8plus.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/sparc.m4) -C_FUNCTION_BEGIN(mpaddw) -LABEL(mpaddw) - - .register %g2,#scratch +C_FUNCTION_BEGIN(mpaddw) sll %o0,2,%g1 dec 4,%g1 clr %o0 @@ -53,14 +40,10 @@ LOCAL(mpaddw_loop): LOCAL(mpaddw_skip): retl movcs %icc,1,%o0 -C_FUNCTION_END(mpaddw, LOCAL(mpaddw_size)) +C_FUNCTION_END(mpaddw) C_FUNCTION_BEGIN(mpsubw) -LABEL(mpsubw) - - .register %g2,#scratch - sll %o0,2,%g1 dec 4,%g1 clr %o0 @@ -78,15 +61,10 @@ LOCAL(mpsubw_loop): LOCAL(mpsubw_skip): retl movcs %icc,1,%o0 -C_FUNCTION_END(mpsubw, LOCAL(mpsubw_size)) +C_FUNCTION_END(mpsubw) C_FUNCTION_BEGIN(mpadd) -LABEL(mpadd) - - .register %g2,#scratch - .register %g3,#scratch - sll %o0,2,%g1 dec 4,%g1 addcc %g0,%g0,%o0 @@ -99,15 +77,10 @@ LOCAL(mpadd_loop): dec 4,%g1 retl movcs %icc,1,%o0 -C_FUNCTION_END(mpadd, LOCAL(mpadd_size)) +C_FUNCTION_END(mpadd) C_FUNCTION_BEGIN(mpsub) -LABEL(mpsub) - - .register %g2,#scratch - .register %g3,#scratch - sll %o0,2,%g1 dec 4,%g1 addcc %g0,%g0,%o0 @@ -120,15 +93,10 @@ LOCAL(mpsub_loop): dec 4,%g1 retl movcs %icc,1,%o0 -C_FUNCTION_END(mpsub, LOCAL(mpsub_size)) +C_FUNCTION_END(mpsub) C_FUNCTION_BEGIN(mpmultwo) -LABEL(mpmultwo) - - .register %g2,#scratch - .register %g3,#scratch - sll %o0,2,%g1 dec 4,%g1 addcc %g0,%g0,%o0 @@ -140,15 +108,10 @@ LOCAL(mpmultwo_loop): dec 4,%g1 retl movcs %icc,1,%o0 -C_FUNCTION_END(mpmultwo, LOCAL(mpmultwo_size)) +C_FUNCTION_END(mpmultwo) C_FUNCTION_BEGIN(mpsetmul) -LABEL(mpsetmul) - - .register %g2,#scratch - .register %g3,#scratch - sll %o0,2,%g1 dec 4,%g1 clr %o0 @@ -162,15 +125,10 @@ LOCAL(mpsetmul_loop): dec 4,%g1 retl srlx %o0,32,%o0 -C_FUNCTION_END(mpsetmul, LOCAL(mpsetmul_size)) +C_FUNCTION_END(mpsetmul) C_FUNCTION_BEGIN(mpaddmul) -LABEL(mpaddmul) - - .register %g2,#scratch - .register %g3,#scratch - sll %o0,2,%g1 dec 4,%g1 clr %o0 @@ -186,22 +144,16 @@ LOCAL(mpaddmul_loop): dec 4,%g1 retl srlx %o0,32,%o0 -C_FUNCTION_END(mpaddmul, LOCAL(mpaddmul_size)) +C_FUNCTION_END(mpaddmul) C_FUNCTION_BEGIN(mpaddsqrtrc) -LABEL(mpaddsqrtrc) - - .register %g2,#scratch - .register %g3,#scratch - sll %o0,2,%g1 dec 4,%g1 add %o1,%g1,%o1 add %o1,%g1,%o1 clr %o0 LOCAL(mpaddsqrtrc_loop): - /* load from o1 into g4 as xuint; simulate xuint carry by doing an xuint comparison; carry if result smaller than initial value */ lduw [%o2+%g1],%g2 ldx [%o1],%g4 mulx %g2,%g2,%g2 @@ -216,4 +168,4 @@ LOCAL(mpaddsqrtrc_loop): dec 4,%g1 retl nop -C_FUNCTION_END(mpaddsqrtrc, LOCAL(mpaddsqrtrc_size)) +C_FUNCTION_END(mpaddsqrtrc) diff --git a/beecrypt/gas/mp32opt.i386.S b/beecrypt/gas/mpopt.x86.m4 index 967faa82b..e1097999d 100644 --- a/beecrypt/gas/mp32opt.i386.S +++ b/beecrypt/gas/mpopt.x86.m4 @@ -1,39 +1,28 @@ -/* - * mp32opt.i386.S - * - * Assembler optimized multiprecision integer routines for Intel 386 and higher - * - * Compile target is GNU Assembler - * - * Copyright (c) 1998, 1999, 2000, 2001 Virtual Unlimited B.V. - * - * Author: Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "beecrypt.gas.h" - - .file "mp32opt.i386.S" - - .text +dnl mpopt.x86.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/x86.m4) C_FUNCTION_BEGIN(mpzero) -LABEL(mpzero) pushl %edi movl 8(%esp),%ecx @@ -44,11 +33,10 @@ LABEL(mpzero) popl %edi ret -C_FUNCTION_END(mpzero, LOCAL(mpzero_size)) +C_FUNCTION_END(mpzero) C_FUNCTION_BEGIN(mpfill) -LABEL(mpfill) pushl %edi movl 8(%esp),%ecx @@ -59,32 +47,29 @@ LABEL(mpfill) popl %edi ret -C_FUNCTION_END(mpfill, LOCAL(mpfill_size)) +C_FUNCTION_END(mpfill) C_FUNCTION_BEGIN(mpeven) -LABEL(mpeven) movl 4(%esp),%ecx movl 8(%esp),%eax movl -4(%eax,%ecx,4),%eax notl %eax - andl $1,%eax + andl `$'1,%eax ret -C_FUNCTION_END(mpeven, LOCAL(mpeven_size)) +C_FUNCTION_END(mpeven) C_FUNCTION_BEGIN(mpodd) -LABEL(mpodd) movl 4(%esp),%ecx movl 8(%esp),%eax movl -4(%eax,%ecx,4),%eax - andl $1,%eax + andl `$'1,%eax ret -C_FUNCTION_END(mpodd, LOCAL(mpodd_size)) +C_FUNCTION_END(mpodd) C_FUNCTION_BEGIN(mpaddw) -LABEL(mpaddw) pushl %edi movl 8(%esp),%ecx @@ -110,11 +95,10 @@ LOCAL(mpaddw_skip): popl %edi ret -C_FUNCTION_END(mpaddw, LOCAL(mpaddw_size)) +C_FUNCTION_END(mpaddw) C_FUNCTION_BEGIN(mpsubw) -LABEL(mpsubw) pushl %edi movl 8(%esp),%ecx @@ -139,11 +123,10 @@ LOCAL(mpsubw_skip): negl %eax popl %edi ret -C_FUNCTION_END(mpsubw, LOCAL(mpsubw_size)) +C_FUNCTION_END(mpsubw) C_FUNCTION_BEGIN(mpadd) -LABEL(mpadd) pushl %edi pushl %esi @@ -157,7 +140,9 @@ LABEL(mpadd) .align 4 LOCAL(mpadd_loop): movl (%esi,%ecx,4),%eax - adcl %eax,(%edi,%ecx,4) + movl (%edi,%ecx,4),%edx + adcl %eax,%edx + movl %edx,(%edi,%ecx,4) decl %ecx jns LOCAL(mpadd_loop) @@ -167,11 +152,10 @@ LOCAL(mpadd_loop): popl %esi popl %edi ret -C_FUNCTION_END(mpadd, LOCAL(mpadd_size)) +C_FUNCTION_END(mpadd) C_FUNCTION_BEGIN(mpsub) -LABEL(mpsub) pushl %edi pushl %esi @@ -185,7 +169,9 @@ LABEL(mpsub) .align 4 LOCAL(mpsub_loop): movl (%esi,%ecx,4),%eax - sbbl %eax,(%edi,%ecx,4) + movl (%edi,%ecx,4),%edx + sbbl %eax,%edx + movl %edx,(%edi,%ecx,4) decl %ecx jns LOCAL(mpsub_loop) @@ -194,11 +180,10 @@ LOCAL(mpsub_loop): popl %esi popl %edi ret -C_FUNCTION_END(mpsub, LOCAL(mpsub_size)) +C_FUNCTION_END(mpsub) C_FUNCTION_BEGIN(mpdivtwo) -LABEL(mpdivtwo) pushl %edi movl 8(%esp),%ecx @@ -206,32 +191,33 @@ LABEL(mpdivtwo) leal (%edi,%ecx,4),%edi negl %ecx - clc + xorl %eax,%eax .align 4 LOCAL(mpdivtwo_loop): - rcrl $1,(%edi,%ecx,4) + rcrl `$'1,(%edi,%ecx,4) inc %ecx jnz LOCAL(mpdivtwo_loop) popl %edi ret -C_FUNCTION_END(mpdivtwo, LOCAL(mpdivtwo_size)) +C_FUNCTION_END(mpdivtwo) C_FUNCTION_BEGIN(mpmultwo) -LABEL(mpmultwo) pushl %edi movl 8(%esp),%ecx movl 12(%esp),%edi - clc + xorl %edx,%edx decl %ecx .align 4 LOCAL(mpmultwo_loop): - rcll $1,(%edi,%ecx,4) + movl (%edi,%ecx,4),%eax + adcl %eax,%eax + movl %eax,(%edi,%ecx,4) decl %ecx jns LOCAL(mpmultwo_loop) @@ -240,13 +226,34 @@ LOCAL(mpmultwo_loop): popl %edi ret -C_FUNCTION_END(mpmultwo, LOCAL(mpmultwo_size)) +C_FUNCTION_END(mpmultwo) C_FUNCTION_BEGIN(mpsetmul) -LABEL(mpsetmul) pushl %edi pushl %esi +ifdef(`USE_SSE2',` + movl 12(%esp),%ecx + movl 16(%esp),%edi + movl 20(%esp),%esi + movd 24(%esp),%mm1 + + pxor %mm0,%mm0 + decl %ecx + + .align 4 +LOCAL(mpsetmul_loop): + movd (%esi,%ecx,4),%mm2 + pmuludq %mm1,%mm2 + paddq %mm2,%mm0 + movd %mm0,(%edi,%ecx,4) + decl %ecx + psrlq `$'32,%mm0 + jns LOCAL(mpsetmul_loop) + + movd %mm0,%eax + emms +',` pushl %ebx pushl %ebp @@ -264,7 +271,7 @@ LOCAL(mpsetmul_loop): movl (%esi,%ecx,4),%eax mull %ebp addl %ebx,%eax - adcl $0,%edx + adcl `$'0,%edx movl %eax,(%edi,%ecx,4) decl %ecx jns LOCAL(mpsetmul_loop) @@ -273,16 +280,40 @@ LOCAL(mpsetmul_loop): popl %ebp popl %ebx +') popl %esi popl %edi ret -C_FUNCTION_END(mpsetmul, LOCAL(mpsetmul_size)) +C_FUNCTION_END(mpsetmul) C_FUNCTION_BEGIN(mpaddmul) -LABEL(mpaddmul) pushl %edi pushl %esi +ifdef(`USE_SSE2',` + movl 12(%esp),%ecx + movl 16(%esp),%edi + movl 20(%esp),%esi + movd 24(%esp),%mm1 + + pxor %mm0,%mm0 + decl %ecx + + .align 4 +LOCAL(mpaddmul_loop): + movd (%esi,%ecx,4),%mm2 + movd (%edi,%ecx,4),%mm3 + pmuludq %mm1,%mm2 + paddq %mm2,%mm3 + paddq %mm3,%mm0 + movd %mm0,(%edi,%ecx,4) + decl %ecx + psrlq $32,%mm0 + jns LOCAL(mpaddmul_loop) + + movd %mm0,%eax + emms +',` pushl %ebx pushl %ebp @@ -311,16 +342,43 @@ LOCAL(mpaddmul_loop): popl %ebp popl %ebx +') popl %esi popl %edi ret -C_FUNCTION_END(mpaddmul, LOCAL(mpaddmul_size)) +C_FUNCTION_END(mpaddmul) C_FUNCTION_BEGIN(mpaddsqrtrc) -LABEL(mpaddsqrtrc) pushl %edi pushl %esi +ifdef(`USE_SSE2',` + movl 12(%esp),%ecx + movl 16(%esp),%edi + movl 20(%esp),%esi + + pxor %mm0,%mm0 + decl %ecx + + .align 4 +LOCAL(mpaddsqrtrc_loop): + movd (%esi,%ecx,4),%mm2 + pmuludq %mm2,%mm2 + movd 4(%edi,%ecx,8),%mm3 + paddq %mm2,%mm3 + movd 0(%edi,%ecx,8),%mm4 + paddq %mm3,%mm0 + movd %mm0,4(%edi,%ecx,8) + psrlq $32,%mm0 + paddq %mm4,%mm0 + movd %mm0,0(%edi,%ecx,8) + decl %ecx + psrlq $32,%mm0 + jns LOCAL(mpaddsqrtrc_loop) + + movd %mm0,%eax + emms +',` pushl %ebx movl 16(%esp),%ecx @@ -346,7 +404,8 @@ LOCAL(mpaddsqrtrc_loop): movl %ebx,%eax popl %ebx +') popl %esi popl %edi ret -C_FUNCTION_END(mpaddsqrtrc, LOCAL(mpaddsqrtrc_size)) +C_FUNCTION_END(mpaddsqrtrc) diff --git a/beecrypt/gas/ppc.m4 b/beecrypt/gas/ppc.m4 new file mode 100644 index 000000000..f8e9865db --- /dev/null +++ b/beecrypt/gas/ppc.m4 @@ -0,0 +1,85 @@ +dnl ppc.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +ifelse(substr(ASM_OS,0,3),aix,` +undefine(`C_FUNCTION_BEGIN') +define(C_FUNCTION_BEGIN,` + .toc + .globl $1[DS] + .csect $1[DS] + .long .$1[PR], TOC[tc0], 0 + .toc + .globl .$1[PR] + .csect .$1[PR] +') +undefine(`C_FUNCTION_END') +define(C_FUNCTION_END,` + .tbtag 0x0,0xc,0x0,0x0,0x0,0x0,0x0,0x0 +') +define(LOAD_ADDRESS,` + lwz $2,L$1(r2) +') +define(EXTERNAL_VARIABLE,` + .toc +L$1: + .tc $1[TC],$1[RW] +') + + .machine "ppc" + + .set r0,0 + .set r1,1 + .set r2,2 + .set r3,3 + .set r4,4 + .set r5,5 + .set r6,6 + .set r7,7 + .set r8,8 + .set r9,9 + .set r10,10 + .set r11,11 + .set r12,12 + .set r13,13 + .set r14,14 + .set r15,15 + .set r16,16 + .set r17,17 + .set r18,18 + .set r19,19 + .set r20,20 + .set r21,21 + .set r22,22 + .set r23,23 + .set r24,24 + .set r25,25 + .set r26,26 + .set r27,27 + .set r28,28 + .set r29,29 + .set r30,30 + .set r31,31 +',` +define(LOAD_ADDRESS,` + lis $2,$1@ha + la $2,$1@l($2) +') +define(EXTERNAL_VARIABLE) +') diff --git a/beecrypt/gas/ppc64.m4 b/beecrypt/gas/ppc64.m4 new file mode 100644 index 000000000..38bec0e47 --- /dev/null +++ b/beecrypt/gas/ppc64.m4 @@ -0,0 +1,71 @@ +dnl ppc64.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +ifelse(substr(ASM_OS,0,3),aix,` +undefine(`C_FUNCTION_BEGIN') +define(C_FUNCTION_BEGIN,` + .toc + .globl $1[DS] + .csect $1[DS] + .llong .$1[PR], TOC[tc0], 0 + .toc + .globl .$1[PR] + .csect .$1[PR] +') +undefine(`C_FUNCTION_END') +define(C_FUNCTION_END,` + .tbtag 0x0,0xc,0x0,0x0,0x0,0x0,0x0,0x0 +') + + .machine "ppc64" + + .set r0,0 + .set r1,1 + .set r2,2 + .set r3,3 + .set r4,4 + .set r5,5 + .set r6,6 + .set r7,7 + .set r8,8 + .set r9,9 + .set r10,10 + .set r11,11 + .set r12,12 + .set r13,13 + .set r14,14 + .set r15,15 + .set r16,16 + .set r17,17 + .set r18,18 + .set r19,19 + .set r20,20 + .set r21,21 + .set r22,22 + .set r23,23 + .set r24,24 + .set r25,25 + .set r26,26 + .set r27,27 + .set r28,28 + .set r29,29 + .set r30,30 + .set r31,31 +') diff --git a/beecrypt/gas/sha1opt.i586.S b/beecrypt/gas/sha1opt.i586.S deleted file mode 100644 index 31e39822b..000000000 --- a/beecrypt/gas/sha1opt.i586.S +++ /dev/null @@ -1,292 +0,0 @@ -/* - * fips180opt.i586.S - * - * Assembler optimized SHA-1 routines for Intel Pentium processors - * - * Compile target is GNU Assembler - * - * Copyright (c) 2000 Virtual Unlimited B.V. - * - * Author: Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "beecrypt.gas.h" - - .file "fips180opt.i586.S" - - .text - -#define K00 0x5a827999 -#define K20 0x6ed9eba1 -#define K40 0x8f1bbcdc -#define K60 0xca62c1d6 - -#define PARAM_H 0 -#define PARAM_DATA 20 - - .macro subround1 b c d e w - movl \c,%ecx - movl \b,%ebx - movl \d,%edx - roll $5,%eax - xorl %edx,%ecx - addl \e,%eax - andl %ebx,%ecx - addl $K00,%eax - rorl $2,%ebx - addl \w(%esi,%edi),%eax - xorl %edx,%ecx - movl %ebx,\b - addl %ecx,%eax - movl %eax,\e - .endm - - .macro subround2 b c d e w - movl \c,%ecx - movl \b,%ebx - roll $5,%eax - xorl %ebx,%ecx - addl \e,%eax - xorl \d,%ecx - addl $K20,%eax - rorl $2,%ebx - addl \w(%esi,%edi),%eax - movl %ebx,\b - addl %ecx,%eax - movl %eax,\e - .endm - - .macro subround3 b c d e w - movl \c,%ecx - roll $5,%eax - movl \b,%ebx - movl %ecx,%edx - addl \e,%eax - orl %ebx,%ecx - andl %ebx,%edx - andl \d,%ecx - addl $K40,%eax - orl %edx,%ecx - addl \w(%esi,%edi),%eax - rorl $2,%ebx - addl %ecx,%eax - movl %ebx,\b - movl %eax,\e - .endm - - .macro subround4 b c d e w - movl \c,%ecx - movl \b,%ebx - roll $5,%eax - xorl %ebx,%ecx - addl \e,%eax - xorl \d,%ecx - addl $K60,%eax - rorl $2,%ebx - addl \w(%esi,%edi),%eax - movl %ebx,\b - addl %ecx,%eax - movl %eax,\e - .endm - -C_FUNCTION_BEGIN(sha1Process) -LABEL(sha1Process) - pushl %edi - pushl %esi - pushl %ebx - pushl %ebp - - movl 20(%esp),%esi - subl $20,%esp - leal PARAM_DATA(%esi),%edi - movl %esp,%ebp - - movl $4,%ecx -LOCAL(0): - movl (%esi,%ecx,4),%edx - movl %edx,(%ebp,%ecx,4) - decl %ecx - jns LOCAL(0) - - movl $15,%ecx - xorl %eax,%eax - - .p2align 2 -LOCAL(1): - movl (%edi,%ecx,4),%edx - bswap %edx - mov %edx,(%edi,%ecx,4) - decl %ecx - jns LOCAL(1) - - leal PARAM_DATA(%esi),%edi - movl $16,%ecx - - .p2align 2 -LOCAL(2): - movl 52(%edi),%eax - movl 56(%edi),%ebx - xorl 32(%edi),%eax - xorl 36(%edi),%ebx - xorl 8(%edi),%eax - xorl 12(%edi),%ebx - xorl (%edi),%eax - xorl 4(%edi),%ebx - roll $1,%eax - roll $1,%ebx - movl %eax,64(%edi) - movl %ebx,68(%edi) - movl 60(%edi),%eax - movl 64(%edi),%ebx - xorl 40(%edi),%eax - xorl 44(%edi),%ebx - xorl 16(%edi),%eax - xorl 20(%edi),%ebx - xorl 8(%edi),%eax - xorl 12(%edi),%ebx - roll $1,%eax - roll $1,%ebx - movl %eax,72(%edi) - movl %ebx,76(%edi) - addl $16,%edi - decl %ecx - jnz LOCAL(2) - - movl $PARAM_DATA,%edi - - movl (%ebp),%eax -LOCAL(01_20): - subround1 4(%ebp), 8(%ebp), 12(%ebp), 16(%ebp), 0 - subround1 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround1 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround1 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround1 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - subround1 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround1 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround1 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround1 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround1 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - subround1 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround1 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround1 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround1 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround1 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - subround1 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround1 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround1 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround1 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround1 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - -LOCAL(21_40): - subround2 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround2 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround2 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround2 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround2 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - subround2 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround2 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround2 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround2 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround2 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - subround2 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround2 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround2 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround2 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround2 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - subround2 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround2 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround2 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround2 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround2 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - -LOCAL(41_60): - subround3 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround3 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround3 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround3 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround3 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - subround3 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround3 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround3 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround3 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround3 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - subround3 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround3 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround3 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround3 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround3 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - subround3 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround3 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround3 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround3 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround3 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - -LOCAL(61_80): - subround4 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround4 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround4 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround4 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround4 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - subround4 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround4 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround4 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround4 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround4 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - subround4 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround4 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround4 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround4 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround4 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - addl $20,%edi - subround4 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0 - subround4 (%ebp), %ebx , 8(%ebp), 12(%ebp), 4 - subround4 16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8 - subround4 12(%ebp), %ebx , (%ebp), 4(%ebp), 12 - subround4 8(%ebp), %ebx , 16(%ebp), (%ebp), 16 - /* addl $20,%edi */ - - movl $4,%ecx - - .p2align 2 -LOCAL(3): - movl (%ebp,%ecx,4),%eax - addl %eax,(%esi,%ecx,4) - decl %ecx - jns LOCAL(3) - - addl $20,%esp - popl %ebp - popl %ebx - popl %esi - popl %edi - ret -C_FUNCTION_END(sha1Process, LOCAL(sha1Process_size)) diff --git a/beecrypt/gas/sha1opt.i586.m4 b/beecrypt/gas/sha1opt.i586.m4 new file mode 100644 index 000000000..a8d8ec6b5 --- /dev/null +++ b/beecrypt/gas/sha1opt.i586.m4 @@ -0,0 +1,280 @@ +dnl sha1opt.i586.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include(config.m4) +include(ASM_SRCDIR/x86.m4) + + .equ K00, 0x5a827999 + .equ K20, 0x6ed9eba1 + .equ K40, 0x8f1bbcdc + .equ K60, 0xca62c1d6 + + .equ PARAM_H, 0 + .equ PARAM_DATA, 20 + +define(`subround1',` + movl $2,%ecx + movl $1,%ebx + movl $3,%edx + roll `$'5,%eax + xorl %edx,%ecx + addl $4,%eax + andl %ebx,%ecx + addl `$'K00,%eax + rorl `$'2,%ebx + addl $5(%esi,%edi),%eax + xorl %edx,%ecx + movl %ebx,$1 + addl %ecx,%eax + movl %eax,$4 +') + +define(`subround2',` + movl $2,%ecx + movl $1,%ebx + roll `$'5,%eax + xorl %ebx,%ecx + addl $4,%eax + xorl $3,%ecx + addl `$'K20,%eax + rorl `$'2,%ebx + addl $5(%esi,%edi),%eax + movl %ebx,$1 + addl %ecx,%eax + movl %eax,$4 +') + +define(`subround3',` + movl $2,%ecx + roll `$'5,%eax + movl $1,%ebx + movl %ecx,%edx + addl $4,%eax + orl %ebx,%ecx + andl %ebx,%edx + andl $3,%ecx + addl `$'K40,%eax + orl %edx,%ecx + addl $5(%esi,%edi),%eax + rorl `$'2,%ebx + addl %ecx,%eax + movl %ebx,$1 + movl %eax,$4 +') + +define(`subround4',` + movl $2,%ecx + movl $1,%ebx + roll `$'5,%eax + xorl %ebx,%ecx + addl $4,%eax + xorl $3,%ecx + addl `$'K60,%eax + rorl `$'2,%ebx + addl $5(%esi,%edi),%eax + movl %ebx,$1 + addl %ecx,%eax + movl %eax,$4 +') + +C_FUNCTION_BEGIN(sha1Process) + pushl %edi + pushl %esi + pushl %ebx + pushl %ebp + + movl 20(%esp),%esi + subl `$'20,%esp + leal PARAM_DATA(%esi),%edi + movl %esp,%ebp + + movl `$'4,%ecx +LOCAL(0): + movl (%esi,%ecx,4),%edx + movl %edx,(%ebp,%ecx,4) + decl %ecx + jns LOCAL(0) + + movl `$'15,%ecx + xorl %eax,%eax + + .align 4 +LOCAL(1): + movl (%edi,%ecx,4),%edx + bswap %edx + mov %edx,(%edi,%ecx,4) + decl %ecx + jns LOCAL(1) + + leal PARAM_DATA(%esi),%edi + movl `$'16,%ecx + + .align 4 +LOCAL(2): + movl 52(%edi),%eax + movl 56(%edi),%ebx + xorl 32(%edi),%eax + xorl 36(%edi),%ebx + xorl 8(%edi),%eax + xorl 12(%edi),%ebx + xorl (%edi),%eax + xorl 4(%edi),%ebx + roll `$'1,%eax + roll `$'1,%ebx + movl %eax,64(%edi) + movl %ebx,68(%edi) + movl 60(%edi),%eax + movl 64(%edi),%ebx + xorl 40(%edi),%eax + xorl 44(%edi),%ebx + xorl 16(%edi),%eax + xorl 20(%edi),%ebx + xorl 8(%edi),%eax + xorl 12(%edi),%ebx + roll `$'1,%eax + roll `$'1,%ebx + movl %eax,72(%edi) + movl %ebx,76(%edi) + addl `$'16,%edi + decl %ecx + jnz LOCAL(2) + + movl `$'PARAM_DATA,%edi + + movl (%ebp),%eax +LOCAL(01_20): + subround1( 4(%ebp), 8(%ebp), 12(%ebp), 16(%ebp), 0) + subround1( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround1(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround1(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround1( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + subround1( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround1( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround1(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround1(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround1( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + subround1( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround1( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround1(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround1(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround1( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + subround1( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround1( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround1(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround1(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround1( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + +LOCAL(21_40): + subround2( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround2( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround2(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround2(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround2( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + subround2( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround2( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround2(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround2(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround2( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + subround2( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround2( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround2(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround2(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround2( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + subround2( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround2( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround2(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround2(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround2( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + +LOCAL(41_60): + subround3( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround3( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround3(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround3(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround3( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + subround3( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround3( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround3(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround3(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround3( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + subround3( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround3( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround3(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround3(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround3( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + subround3( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround3( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround3(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround3(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround3( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + +LOCAL(61_80): + subround4( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround4( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround4(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround4(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround4( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + subround4( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround4( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround4(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround4(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround4( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + subround4( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround4( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround4(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround4(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround4( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + addl `$'20,%edi + subround4( 4(%ebp), %ebx , 12(%ebp), 16(%ebp), 0) + subround4( (%ebp), %ebx , 8(%ebp), 12(%ebp), 4) + subround4(16(%ebp), %ebx , 4(%ebp), 8(%ebp), 8) + subround4(12(%ebp), %ebx , (%ebp), 4(%ebp), 12) + subround4( 8(%ebp), %ebx , 16(%ebp), (%ebp), 16) + + movl `$'4,%ecx + + .align 4 +LOCAL(3): + movl (%ebp,%ecx,4),%eax + addl %eax,(%esi,%ecx,4) + decl %ecx + jns LOCAL(3) + + addl `$'20,%esp + popl %ebp + popl %ebx + popl %esi + popl %edi + ret +C_FUNCTION_END(sha1Process) diff --git a/beecrypt/gas/sha1opt.powerpc.S b/beecrypt/gas/sha1opt.powerpc.S deleted file mode 100644 index f9dc73a9e..000000000 --- a/beecrypt/gas/sha1opt.powerpc.S +++ /dev/null @@ -1,287 +0,0 @@ -/* - * fips180opt.powerpc.S - * - * Assembler optimized SHA-1 routines for PowerPC processors - * - * Warning: this code is incomplete and only contains a rough prototype! - * - * Compile target is GNU Assembler - * - * Copyright (c) 2000, 2001 Virtual Unlimited B.V. - * - * Author: Bob Deblier <bob@virtualunlimited.com> - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include "beecrypt.gas.h" - - .file "fips180opt.powerpc.S" - - .text - -#if DARWIN -# define reg0 r0 -# define reg3 r3 -# define reg4 r4 -# define reg5 r5 -# define reg6 r6 -# define reg7 r7 -# define reg8 r8 -# define reg9 r9 -# define reg26 r26 -# define reg27 r27 -# define reg28 r28 -# define reg29 r29 -# define reg30 r30 -# define reg31 r31 -#else -# define reg0 %r0 -# define reg3 %r3 -# define reg4 %r4 -# define reg5 %r5 -# define reg6 %r6 -# define reg7 %r7 -# define reg8 %r8 -# define reg9 %r9 -# define reg26 %r26 -# define reg27 %r27 -# define reg28 %r28 -# define reg29 %r29 -# define reg30 %r30 -# define reg31 %r31 -#endif - -#define K00 0x5a827999 -#define K20 0x6ed9eba1 -#define K40 0x8f1bbcdc -#define K60 0xca62c1d6 - -#define PARAM_H 0 -#define PARAM_DATA 20 - -/* sha1Param: param in reg3 */ - - .macro subround1 a b c d e w - lwzu reg7,4(\w) - rotlwi reg5,\a,5 - dbct r0,\w - xor reg6,\c,\d - add \e,\e,K00 - and reg6,reg6,\b - add \e,\e,reg7 - xor reg6,reg6,\d - add \e,\e,reg5 - rotrwi \b,\b,2 - add \e,\e,reg6 - .endm - - .macro subround2 a b c d e w - lwzu reg7,4(\w) - rotlwi reg5,\a,5 - dbct r0,\w - add \e,\e,K20 - xor reg6,\b,\c - add \e,\e,reg5 - xor reg6,reg6,\d - add \e,\e,reg7 - rotrwi \b,\b,2 - add \e,\e,reg6 - .endm - - .macro subround3 a b c d e w - lwzu reg7,4(\w) - rotlwi reg5,\a,5 - dbct r0,\w - xor reg6,\b,\c - add \e,\e,reg5 - and reg6,reg6,\d - add \e,\e,K40 - and reg5,\b,\c - add \e,\e,reg7 - or reg6,reg6,reg5 - rotrwi \b,\b,2 - add \e,\e,reg6 - .endm - - .macro subround4 a b c d e w - lwzu reg7,4(\w) - rotlwi reg5,\a,5 - dbct r0,\w - add \e,\e,K60 - xor reg6,\b,\c - add \e,\e,reg5 - xor reg6,reg6,\d - add \e,\e,reg7 - rotrwi \b,\b,2 - add \e,\e,reg6 - .endm - -C_FUNCTION_BEGIN(sha1Process) -/* zero reg0 for general use */ - li reg0,0 -/* for a,b,c,d,e use r26,r27,r28,r29,r30, for w use r31 */ - -/* we need to save registers before loading them */ - stmw reg26,-24(reg1) -/* load the frame pointer with parameter data, and hint cache */ - addi reg31,reg3,PARAM_DATA - dbct reg31 - -#if !WORDS_BIGENDIAN /* have to provide for PowerPC little-endian mode - /* loop of 16 entries */ - li reg5,60 - mtctr reg6 -.L00: - lwbrx reg6,reg31,reg5 - stwx reg6,reg31,reg5 - subi. reg5,reg5,4 - bcge cr0,.L00 - addi reg31,reg3,PARAM_DATA -#endif - -/* do the initial mixing */ - li reg8,64 - addi reg26,reg3,PARAM_DATA+64-4 - addi reg27,reg3,PARAM_DATA+64-3*4-4 - addi reg28,reg3,PARAM_DATA+64-8*4-4 - addi reg29,reg3,PARAM_DATA+64-14*4-4 - addi reg30,reg3,PARAM_DATA+64-16*4-4 - mtctr reg8 - -.L10: - lwzu reg5,4(reg27) - lwzu reg6,4(reg28) - lwzu reg7,4(reg29) - lwzu reg8,4(reg30) - xor reg5,reg5,reg6 - xor reg7,reg7,reg8 - xor reg5,reg5,reg7 - stwu reg5,4(reg26) - bdnz .L10 - - lwz reg26,PARAM_H (reg3) - lwz reg27,PARAM_H+4 (reg3) - lwz reg28,PARAM_H+8 (reg3) - lwz reg29,PARAM_H+12(reg3) - lwz reg30,PARAM_H+16(reg3) - - subround1 reg26,reg27,reg28,reg29,reg30,reg31 - subround1 reg30,reg26,reg27,reg28,reg29,reg31 - subround1 reg29,reg30,reg26,reg27,reg28,reg31 - subround1 reg28,reg29,reg30,reg26,reg27,reg31 - subround1 reg27,reg28,reg29,reg30,reg26,reg31 - subround1 reg26,reg27,reg28,reg29,reg30,reg31 - subround1 reg30,reg26,reg27,reg28,reg29,reg31 - subround1 reg29,reg30,reg26,reg27,reg28,reg31 - subround1 reg28,reg29,reg30,reg26,reg27,reg31 - subround1 reg27,reg28,reg29,reg30,reg26,reg31 - subround1 reg26,reg27,reg28,reg29,reg30,reg31 - subround1 reg30,reg26,reg27,reg28,reg29,reg31 - subround1 reg29,reg30,reg26,reg27,reg28,reg31 - subround1 reg28,reg29,reg30,reg26,reg27,reg31 - subround1 reg27,reg28,reg29,reg30,reg26,reg31 - subround1 reg26,reg27,reg28,reg29,reg30,reg31 - subround1 reg30,reg26,reg27,reg28,reg29,reg31 - subround1 reg29,reg30,reg26,reg27,reg28,reg31 - subround1 reg28,reg29,reg30,reg26,reg27,reg31 - subround1 reg27,reg28,reg29,reg30,reg26,reg31 - - subround2 reg26,reg27,reg28,reg29,reg30,reg31 - subround2 reg30,reg26,reg27,reg28,reg29,reg31 - subround2 reg29,reg30,reg26,reg27,reg28,reg31 - subround2 reg28,reg29,reg30,reg26,reg27,reg31 - subround2 reg27,reg28,reg29,reg30,reg26,reg31 - subround2 reg26,reg27,reg28,reg29,reg30,reg31 - subround2 reg30,reg26,reg27,reg28,reg29,reg31 - subround2 reg29,reg30,reg26,reg27,reg28,reg31 - subround2 reg28,reg29,reg30,reg26,reg27,reg31 - subround2 reg27,reg28,reg29,reg30,reg26,reg31 - subround2 reg26,reg27,reg28,reg29,reg30,reg31 - subround2 reg30,reg26,reg27,reg28,reg29,reg31 - subround2 reg29,reg30,reg26,reg27,reg28,reg31 - subround2 reg28,reg29,reg30,reg26,reg27,reg31 - subround2 reg27,reg28,reg29,reg30,reg26,reg31 - subround2 reg26,reg27,reg28,reg29,reg30,reg31 - subround2 reg30,reg26,reg27,reg28,reg29,reg31 - subround2 reg29,reg30,reg26,reg27,reg28,reg31 - subround2 reg28,reg29,reg30,reg26,reg27,reg31 - subround2 reg27,reg28,reg29,reg30,reg26,reg31 - - subround3 reg26,reg27,reg28,reg29,reg30,reg31 - subround3 reg30,reg26,reg27,reg28,reg29,reg31 - subround3 reg29,reg30,reg26,reg27,reg28,reg31 - subround3 reg28,reg29,reg30,reg26,reg27,reg31 - subround3 reg27,reg28,reg29,reg30,reg26,reg31 - subround3 reg26,reg27,reg28,reg29,reg30,reg31 - subround3 reg30,reg26,reg27,reg28,reg29,reg31 - subround3 reg29,reg30,reg26,reg27,reg28,reg31 - subround3 reg28,reg29,reg30,reg26,reg27,reg31 - subround3 reg27,reg28,reg29,reg30,reg26,reg31 - subround3 reg26,reg27,reg28,reg29,reg30,reg31 - subround3 reg30,reg26,reg27,reg28,reg29,reg31 - subround3 reg29,reg30,reg26,reg27,reg28,reg31 - subround3 reg28,reg29,reg30,reg26,reg27,reg31 - subround3 reg27,reg28,reg29,reg30,reg26,reg31 - subround3 reg26,reg27,reg28,reg29,reg30,reg31 - subround3 reg30,reg26,reg27,reg28,reg29,reg31 - subround3 reg29,reg30,reg26,reg27,reg28,reg31 - subround3 reg28,reg29,reg30,reg26,reg27,reg31 - subround3 reg27,reg28,reg29,reg30,reg26,reg31 - - subround4 reg26,reg27,reg28,reg29,reg30,reg31 - subround4 reg30,reg26,reg27,reg28,reg29,reg31 - subround4 reg29,reg30,reg26,reg27,reg28,reg31 - subround4 reg28,reg29,reg30,reg26,reg27,reg31 - subround4 reg27,reg28,reg29,reg30,reg26,reg31 - subround4 reg26,reg27,reg28,reg29,reg30,reg31 - subround4 reg30,reg26,reg27,reg28,reg29,reg31 - subround4 reg29,reg30,reg26,reg27,reg28,reg31 - subround4 reg28,reg29,reg30,reg26,reg27,reg31 - subround4 reg27,reg28,reg29,reg30,reg26,reg31 - subround4 reg26,reg27,reg28,reg29,reg30,reg31 - subround4 reg30,reg26,reg27,reg28,reg29,reg31 - subround4 reg29,reg30,reg26,reg27,reg28,reg31 - subround4 reg28,reg29,reg30,reg26,reg27,reg31 - subround4 reg27,reg28,reg29,reg30,reg26,reg31 - subround4 reg26,reg27,reg28,reg29,reg30,reg31 - subround4 reg30,reg26,reg27,reg28,reg29,reg31 - subround4 reg29,reg30,reg26,reg27,reg28,reg31 - subround4 reg28,reg29,reg30,reg26,reg27,reg31 - subround4 reg27,reg28,reg29,reg30,reg26,reg31 - -/* then store the five values into registers */ - lwz reg5,PARAM_H (reg3) - lwz reg6,PARAM_H+4 (reg3) - lwz reg7,PARAM_H+8 (reg3) - lwz reg8,PARAM_H+12(reg3) - lwz reg9,PARAM_H+16(reg3) - add reg26,reg5,reg26 - add reg27,reg5,reg27 - add reg28,reg5,reg28 - add reg29,reg5,reg29 - add reg30,reg5,reg30 - stw reg26,PARAM_H (reg3) - stw reg27,PARAM_H+4 (reg3) - stw reg28,PARAM_H+8 (reg3) - stw reg29,PARAM_H+12(reg3) - stw reg30,PARAM_H+16(reg3) - -/* finally, restore registers */ - lmw reg26,-24(reg1) -/* and return */ - blr -C_FUNCION_END(sha1Process, .Lsha1Process_size) diff --git a/beecrypt/gas/sparc.m4 b/beecrypt/gas/sparc.m4 new file mode 100644 index 000000000..e735600c3 --- /dev/null +++ b/beecrypt/gas/sparc.m4 @@ -0,0 +1,30 @@ +dnl sparc.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +ifelse(substr(ASM_OS,0,7),solaris,` +undefine(`C_FUNCTION_BEGIN') +define(C_FUNCTION_BEGIN,` + TEXTSEG + GLOBL SYMNAME($1) +SYMNAME($1): + .register %g2,#scratch + .register %g3,#scratch +') +') diff --git a/beecrypt/gas/x86.m4 b/beecrypt/gas/x86.m4 new file mode 100644 index 000000000..131c94df2 --- /dev/null +++ b/beecrypt/gas/x86.m4 @@ -0,0 +1,23 @@ +dnl x86.m4 +dnl +dnl Copyright (c) 2003 Bob Deblier +dnl +dnl Author: Bob Deblier <bob.deblier@pandora.be> +dnl +dnl This library is free software; you can redistribute it and/or +dnl modify it under the terms of the GNU Lesser General Public +dnl License as published by the Free Software Foundation; either +dnl version 2.1 of the License, or (at your option) any later version. +dnl +dnl This library is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +dnl Lesser General Public License for more details. +dnl +dnl You should have received a copy of the GNU Lesser General Public +dnl License along with this library; if not, write to the Free Software +dnl Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +ifelse(ASM_ARCH,pentium4,` + define(`USE_SSE2') +') |