summaryrefslogtreecommitdiff
path: root/crypto/bn
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn')
-rw-r--r--crypto/bn/asm/armv4-gf2m.pl10
-rw-r--r--crypto/bn/asm/ia64.S4
-rw-r--r--crypto/bn/asm/ppc64-mont.pl174
-rwxr-xr-xcrypto/bn/asm/rsaz-x86_64.pl2
-rw-r--r--crypto/bn/asm/s390x-gf2m.pl6
-rwxr-xr-xcrypto/bn/asm/s390x.S109
-rw-r--r--crypto/bn/asm/x86-gf2m.pl16
-rw-r--r--crypto/bn/asm/x86_64-gcc.c2
-rw-r--r--crypto/bn/asm/x86_64-gf2m.pl16
-rwxr-xr-xcrypto/bn/asm/x86_64-mont.pl5
-rwxr-xr-xcrypto/bn/asm/x86_64-mont5.pl27
-rw-r--r--crypto/bn/bn_exp.c7
-rw-r--r--crypto/bn/bn_gcd.c2
-rw-r--r--crypto/bn/bn_gf2m.c11
-rw-r--r--crypto/bn/bn_mont.c9
-rw-r--r--crypto/bn/bn_recp.c4
-rw-r--r--crypto/bn/bn_x931p.c7
-rw-r--r--crypto/bn/bntest.c74
-rw-r--r--crypto/bn/rsaz_exp.h68
19 files changed, 356 insertions, 197 deletions
diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl
index 8f529c9..72381a7 100644
--- a/crypto/bn/asm/armv4-gf2m.pl
+++ b/crypto/bn/asm/armv4-gf2m.pl
@@ -27,7 +27,7 @@
# referred below, which improves ECDH and ECDSA verify benchmarks
# by 18-40%.
#
-# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
# Polynomial Multiplication on ARM Processors using the NEON Engine.
#
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
@@ -136,7 +136,7 @@ ___
################
# void bn_GF2m_mul_2x2(BN_ULONG *r,
# BN_ULONG a1,BN_ULONG a0,
-# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
+# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
{
$code.=<<___;
.global bn_GF2m_mul_2x2
@@ -159,7 +159,7 @@ $code.=<<___;
mov $mask,#7<<2
sub sp,sp,#32 @ allocate tab[8]
- bl mul_1x1_ialu @ a1·b1
+ bl mul_1x1_ialu @ a1·b1
str $lo,[$ret,#8]
str $hi,[$ret,#12]
@@ -169,13 +169,13 @@ $code.=<<___;
eor r2,r2,$a
eor $b,$b,r3
eor $a,$a,r2
- bl mul_1x1_ialu @ a0·b0
+ bl mul_1x1_ialu @ a0·b0
str $lo,[$ret]
str $hi,[$ret,#4]
eor $a,$a,r2
eor $b,$b,r3
- bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
+ bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
___
@r=map("r$_",(6..9));
$code.=<<___;
diff --git a/crypto/bn/asm/ia64.S b/crypto/bn/asm/ia64.S
index 951abc5..a9a42ab 100644
--- a/crypto/bn/asm/ia64.S
+++ b/crypto/bn/asm/ia64.S
@@ -422,7 +422,7 @@ bn_mul_add_words:
// This loop spins in 3*(n+10) ticks on Itanium and in 2*(n+10) on
// Itanium 2. Yes, unlike previous versions it scales:-) Previous
-// version was peforming *all* additions in IALU and was starving
+// version was performing *all* additions in IALU and was starving
// for those even on Itanium 2. In this version one addition is
// moved to FPU and is folded with multiplication. This is at cost
// of propogating the result from previous call to this subroutine
@@ -568,7 +568,7 @@ bn_sqr_comba8:
// I've estimated this routine to run in ~120 ticks, but in reality
// (i.e. according to ar.itc) it takes ~160 ticks. Are those extra
// cycles consumed for instructions fetch? Or did I misinterpret some
-// clause in Itanium µ-architecture manual? Comments are welcomed and
+// clause in Itanium µ-architecture manual? Comments are welcomed and
// highly appreciated.
//
// On Itanium 2 it takes ~190 ticks. This is because of stalls on
diff --git a/crypto/bn/asm/ppc64-mont.pl b/crypto/bn/asm/ppc64-mont.pl
index 68e3733..9e3c12d 100644
--- a/crypto/bn/asm/ppc64-mont.pl
+++ b/crypto/bn/asm/ppc64-mont.pl
@@ -94,6 +94,8 @@ if ($flavour =~ /32/) {
$POP= "ld";
} else { die "nonsense $flavour"; }
+$LITTLE_ENDIAN = ($flavour=~/le$/) ? 4 : 0;
+
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
@@ -294,12 +296,12 @@ $code.=<<___ if ($SIZE_T==8);
extrdi $t0,$a0,32,32 ; lwz $t0,4($ap)
extrdi $t1,$a0,32,0 ; lwz $t1,0($ap)
- lwz $t2,12($ap) ; load a[1] as 32-bit word pair
- lwz $t3,8($ap)
- lwz $t4,4($np) ; load n[0] as 32-bit word pair
- lwz $t5,0($np)
- lwz $t6,12($np) ; load n[1] as 32-bit word pair
- lwz $t7,8($np)
+ lwz $t2,`12^$LITTLE_ENDIAN`($ap) ; load a[1] as 32-bit word pair
+ lwz $t3,`8^$LITTLE_ENDIAN`($ap)
+ lwz $t4,`4^$LITTLE_ENDIAN`($np) ; load n[0] as 32-bit word pair
+ lwz $t5,`0^$LITTLE_ENDIAN`($np)
+ lwz $t6,`12^$LITTLE_ENDIAN`($np) ; load n[1] as 32-bit word pair
+ lwz $t7,`8^$LITTLE_ENDIAN`($np)
___
$code.=<<___ if ($SIZE_T==4);
lwz $a0,0($ap) ; pull ap[0,1] value
@@ -463,14 +465,14 @@ $code.=<<___;
L1st:
___
$code.=<<___ if ($SIZE_T==8);
- lwz $t0,4($ap) ; load a[j] as 32-bit word pair
- lwz $t1,0($ap)
- lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
- lwz $t3,8($ap)
- lwz $t4,4($np) ; load n[j] as 32-bit word pair
- lwz $t5,0($np)
- lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
- lwz $t7,8($np)
+ lwz $t0,`4^$LITTLE_ENDIAN`($ap) ; load a[j] as 32-bit word pair
+ lwz $t1,`0^$LITTLE_ENDIAN`($ap)
+ lwz $t2,`12^$LITTLE_ENDIAN`($ap) ; load a[j+1] as 32-bit word pair
+ lwz $t3,`8^$LITTLE_ENDIAN`($ap)
+ lwz $t4,`4^$LITTLE_ENDIAN`($np) ; load n[j] as 32-bit word pair
+ lwz $t5,`0^$LITTLE_ENDIAN`($np)
+ lwz $t6,`12^$LITTLE_ENDIAN`($np) ; load n[j+1] as 32-bit word pair
+ lwz $t7,`8^$LITTLE_ENDIAN`($np)
___
$code.=<<___ if ($SIZE_T==4);
lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs
@@ -505,14 +507,14 @@ $code.=<<___;
___
} else {
$code.=<<___;
- lwz $t1,`$FRAME+0`($sp)
- lwz $t0,`$FRAME+4`($sp)
- lwz $t3,`$FRAME+8`($sp)
- lwz $t2,`$FRAME+12`($sp)
- lwz $t5,`$FRAME+16`($sp)
- lwz $t4,`$FRAME+20`($sp)
- lwz $t7,`$FRAME+24`($sp)
- lwz $t6,`$FRAME+28`($sp)
+ lwz $t1,`$FRAME+0^$LITTLE_ENDIAN`($sp)
+ lwz $t0,`$FRAME+4^$LITTLE_ENDIAN`($sp)
+ lwz $t3,`$FRAME+8^$LITTLE_ENDIAN`($sp)
+ lwz $t2,`$FRAME+12^$LITTLE_ENDIAN`($sp)
+ lwz $t5,`$FRAME+16^$LITTLE_ENDIAN`($sp)
+ lwz $t4,`$FRAME+20^$LITTLE_ENDIAN`($sp)
+ lwz $t7,`$FRAME+24^$LITTLE_ENDIAN`($sp)
+ lwz $t6,`$FRAME+28^$LITTLE_ENDIAN`($sp)
___
}
$code.=<<___;
@@ -651,8 +653,8 @@ $code.=<<___;
fmadd $T1a,$N1,$na,$T1a
fmadd $T1b,$N1,$nb,$T1b
- lwz $t3,`$FRAME+32`($sp) ; permuted $t1
- lwz $t2,`$FRAME+36`($sp) ; permuted $t0
+ lwz $t3,`$FRAME+32^$LITTLE_ENDIAN`($sp) ; permuted $t1
+ lwz $t2,`$FRAME+36^$LITTLE_ENDIAN`($sp) ; permuted $t0
addc $t4,$t4,$carry
adde $t5,$t5,$c1
srwi $carry,$t4,16
@@ -673,8 +675,8 @@ $code.=<<___;
fmadd $T1a,$N0,$nc,$T1a
fmadd $T1b,$N0,$nd,$T1b
- lwz $t7,`$FRAME+40`($sp) ; permuted $t3
- lwz $t6,`$FRAME+44`($sp) ; permuted $t2
+ lwz $t7,`$FRAME+40^$LITTLE_ENDIAN`($sp) ; permuted $t3
+ lwz $t6,`$FRAME+44^$LITTLE_ENDIAN`($sp) ; permuted $t2
addc $t2,$t2,$carry
adde $t3,$t3,$c1
srwi $carry,$t2,16
@@ -686,8 +688,8 @@ $code.=<<___;
insrwi $carry,$t3,16,0
fmadd $T3a,$N2,$nc,$T3a
fmadd $T3b,$N2,$nd,$T3b
- lwz $t1,`$FRAME+48`($sp) ; permuted $t5
- lwz $t0,`$FRAME+52`($sp) ; permuted $t4
+ lwz $t1,`$FRAME+48^$LITTLE_ENDIAN`($sp) ; permuted $t5
+ lwz $t0,`$FRAME+52^$LITTLE_ENDIAN`($sp) ; permuted $t4
addc $t6,$t6,$carry
adde $t7,$t7,$c1
srwi $carry,$t6,16
@@ -699,8 +701,8 @@ $code.=<<___;
fctid $T0a,$T0a
fctid $T0b,$T0b
- lwz $t5,`$FRAME+56`($sp) ; permuted $t7
- lwz $t4,`$FRAME+60`($sp) ; permuted $t6
+ lwz $t5,`$FRAME+56^$LITTLE_ENDIAN`($sp) ; permuted $t7
+ lwz $t4,`$FRAME+60^$LITTLE_ENDIAN`($sp) ; permuted $t6
addc $t0,$t0,$carry
adde $t1,$t1,$c1
srwi $carry,$t0,16
@@ -787,14 +789,14 @@ $code.=<<___;
___
} else {
$code.=<<___;
- lwz $t1,`$FRAME+0`($sp)
- lwz $t0,`$FRAME+4`($sp)
- lwz $t3,`$FRAME+8`($sp)
- lwz $t2,`$FRAME+12`($sp)
- lwz $t5,`$FRAME+16`($sp)
- lwz $t4,`$FRAME+20`($sp)
- lwz $t7,`$FRAME+24`($sp)
- lwz $t6,`$FRAME+28`($sp)
+ lwz $t1,`$FRAME+0^$LITTLE_ENDIAN`($sp)
+ lwz $t0,`$FRAME+4^$LITTLE_ENDIAN`($sp)
+ lwz $t3,`$FRAME+8^$LITTLE_ENDIAN`($sp)
+ lwz $t2,`$FRAME+12^$LITTLE_ENDIAN`($sp)
+ lwz $t5,`$FRAME+16^$LITTLE_ENDIAN`($sp)
+ lwz $t4,`$FRAME+20^$LITTLE_ENDIAN`($sp)
+ lwz $t7,`$FRAME+24^$LITTLE_ENDIAN`($sp)
+ lwz $t6,`$FRAME+28^$LITTLE_ENDIAN`($sp)
stfd $dota,`$FRAME+64`($sp)
stfd $dotb,`$FRAME+72`($sp)
@@ -823,14 +825,14 @@ $code.=<<___;
stw $t0,12($tp) ; tp[j-1]
stw $t4,8($tp)
- lwz $t3,`$FRAME+32`($sp) ; permuted $t1
- lwz $t2,`$FRAME+36`($sp) ; permuted $t0
- lwz $t7,`$FRAME+40`($sp) ; permuted $t3
- lwz $t6,`$FRAME+44`($sp) ; permuted $t2
- lwz $t1,`$FRAME+48`($sp) ; permuted $t5
- lwz $t0,`$FRAME+52`($sp) ; permuted $t4
- lwz $t5,`$FRAME+56`($sp) ; permuted $t7
- lwz $t4,`$FRAME+60`($sp) ; permuted $t6
+ lwz $t3,`$FRAME+32^$LITTLE_ENDIAN`($sp) ; permuted $t1
+ lwz $t2,`$FRAME+36^$LITTLE_ENDIAN`($sp) ; permuted $t0
+ lwz $t7,`$FRAME+40^$LITTLE_ENDIAN`($sp) ; permuted $t3
+ lwz $t6,`$FRAME+44^$LITTLE_ENDIAN`($sp) ; permuted $t2
+ lwz $t1,`$FRAME+48^$LITTLE_ENDIAN`($sp) ; permuted $t5
+ lwz $t0,`$FRAME+52^$LITTLE_ENDIAN`($sp) ; permuted $t4
+ lwz $t5,`$FRAME+56^$LITTLE_ENDIAN`($sp) ; permuted $t7
+ lwz $t4,`$FRAME+60^$LITTLE_ENDIAN`($sp) ; permuted $t6
addc $t2,$t2,$carry
adde $t3,$t3,$c1
@@ -857,10 +859,10 @@ $code.=<<___;
stw $t2,20($tp) ; tp[j]
stwu $t0,16($tp)
- lwz $t7,`$FRAME+64`($sp)
- lwz $t6,`$FRAME+68`($sp)
- lwz $t5,`$FRAME+72`($sp)
- lwz $t4,`$FRAME+76`($sp)
+ lwz $t7,`$FRAME+64^$LITTLE_ENDIAN`($sp)
+ lwz $t6,`$FRAME+68^$LITTLE_ENDIAN`($sp)
+ lwz $t5,`$FRAME+72^$LITTLE_ENDIAN`($sp)
+ lwz $t4,`$FRAME+76^$LITTLE_ENDIAN`($sp)
addc $t6,$t6,$carry
adde $t7,$t7,$c1
@@ -1165,23 +1167,23 @@ ___
$code.=<<___;
fmadd $T1a,$N1,$na,$T1a
fmadd $T1b,$N1,$nb,$T1b
- lwz $t1,`$FRAME+0`($sp)
- lwz $t0,`$FRAME+4`($sp)
+ lwz $t1,`$FRAME+0^$LITTLE_ENDIAN`($sp)
+ lwz $t0,`$FRAME+4^$LITTLE_ENDIAN`($sp)
fmadd $T2a,$N2,$na,$T2a
fmadd $T2b,$N2,$nb,$T2b
- lwz $t3,`$FRAME+8`($sp)
- lwz $t2,`$FRAME+12`($sp)
+ lwz $t3,`$FRAME+8^$LITTLE_ENDIAN`($sp)
+ lwz $t2,`$FRAME+12^$LITTLE_ENDIAN`($sp)
fmadd $T3a,$N3,$na,$T3a
fmadd $T3b,$N3,$nb,$T3b
- lwz $t5,`$FRAME+16`($sp)
- lwz $t4,`$FRAME+20`($sp)
+ lwz $t5,`$FRAME+16^$LITTLE_ENDIAN`($sp)
+ lwz $t4,`$FRAME+20^$LITTLE_ENDIAN`($sp)
addc $t0,$t0,$carry
adde $t1,$t1,$c1
srwi $carry,$t0,16
fmadd $T0a,$N0,$na,$T0a
fmadd $T0b,$N0,$nb,$T0b
- lwz $t7,`$FRAME+24`($sp)
- lwz $t6,`$FRAME+28`($sp)
+ lwz $t7,`$FRAME+24^$LITTLE_ENDIAN`($sp)
+ lwz $t6,`$FRAME+28^$LITTLE_ENDIAN`($sp)
srwi $c1,$t1,16
insrwi $carry,$t1,16,0
@@ -1218,8 +1220,8 @@ $code.=<<___;
fctid $T1a,$T1a
addc $t0,$t0,$t2
adde $t4,$t4,$t3
- lwz $t3,`$FRAME+32`($sp) ; permuted $t1
- lwz $t2,`$FRAME+36`($sp) ; permuted $t0
+ lwz $t3,`$FRAME+32^$LITTLE_ENDIAN`($sp) ; permuted $t1
+ lwz $t2,`$FRAME+36^$LITTLE_ENDIAN`($sp) ; permuted $t0
fctid $T1b,$T1b
addze $carry,$carry
addze $c1,$c1
@@ -1229,19 +1231,19 @@ $code.=<<___;
addc $t2,$t2,$carry
adde $t3,$t3,$c1
srwi $carry,$t2,16
- lwz $t7,`$FRAME+40`($sp) ; permuted $t3
- lwz $t6,`$FRAME+44`($sp) ; permuted $t2
+ lwz $t7,`$FRAME+40^$LITTLE_ENDIAN`($sp) ; permuted $t3
+ lwz $t6,`$FRAME+44^$LITTLE_ENDIAN`($sp) ; permuted $t2
fctid $T2b,$T2b
srwi $c1,$t3,16
insrwi $carry,$t3,16,0
- lwz $t1,`$FRAME+48`($sp) ; permuted $t5
- lwz $t0,`$FRAME+52`($sp) ; permuted $t4
+ lwz $t1,`$FRAME+48^$LITTLE_ENDIAN`($sp) ; permuted $t5
+ lwz $t0,`$FRAME+52^$LITTLE_ENDIAN`($sp) ; permuted $t4
fctid $T3a,$T3a
addc $t6,$t6,$carry
adde $t7,$t7,$c1
srwi $carry,$t6,16
- lwz $t5,`$FRAME+56`($sp) ; permuted $t7
- lwz $t4,`$FRAME+60`($sp) ; permuted $t6
+ lwz $t5,`$FRAME+56^$LITTLE_ENDIAN`($sp) ; permuted $t7
+ lwz $t4,`$FRAME+60^$LITTLE_ENDIAN`($sp) ; permuted $t6
fctid $T3b,$T3b
insrwi $t2,$t6,16,0 ; 64..95 bits
@@ -1354,14 +1356,14 @@ $code.=<<___;
___
} else {
$code.=<<___;
- lwz $t1,`$FRAME+0`($sp)
- lwz $t0,`$FRAME+4`($sp)
- lwz $t3,`$FRAME+8`($sp)
- lwz $t2,`$FRAME+12`($sp)
- lwz $t5,`$FRAME+16`($sp)
- lwz $t4,`$FRAME+20`($sp)
- lwz $t7,`$FRAME+24`($sp)
- lwz $t6,`$FRAME+28`($sp)
+ lwz $t1,`$FRAME+0^$LITTLE_ENDIAN`($sp)
+ lwz $t0,`$FRAME+4^$LITTLE_ENDIAN`($sp)
+ lwz $t3,`$FRAME+8^$LITTLE_ENDIAN`($sp)
+ lwz $t2,`$FRAME+12^$LITTLE_ENDIAN`($sp)
+ lwz $t5,`$FRAME+16^$LITTLE_ENDIAN`($sp)
+ lwz $t4,`$FRAME+20^$LITTLE_ENDIAN`($sp)
+ lwz $t7,`$FRAME+24^$LITTLE_ENDIAN`($sp)
+ lwz $t6,`$FRAME+28^$LITTLE_ENDIAN`($sp)
stfd $dota,`$FRAME+64`($sp)
stfd $dotb,`$FRAME+72`($sp)
@@ -1397,14 +1399,14 @@ $code.=<<___;
stw $t0,4($tp) ; tp[j-1]
stw $t4,0($tp)
- lwz $t3,`$FRAME+32`($sp) ; permuted $t1
- lwz $t2,`$FRAME+36`($sp) ; permuted $t0
- lwz $t7,`$FRAME+40`($sp) ; permuted $t3
- lwz $t6,`$FRAME+44`($sp) ; permuted $t2
- lwz $t1,`$FRAME+48`($sp) ; permuted $t5
- lwz $t0,`$FRAME+52`($sp) ; permuted $t4
- lwz $t5,`$FRAME+56`($sp) ; permuted $t7
- lwz $t4,`$FRAME+60`($sp) ; permuted $t6
+ lwz $t3,`$FRAME+32^$LITTLE_ENDIAN`($sp) ; permuted $t1
+ lwz $t2,`$FRAME+36^$LITTLE_ENDIAN`($sp) ; permuted $t0
+ lwz $t7,`$FRAME+40^$LITTLE_ENDIAN`($sp) ; permuted $t3
+ lwz $t6,`$FRAME+44^$LITTLE_ENDIAN`($sp) ; permuted $t2
+ lwz $t1,`$FRAME+48^$LITTLE_ENDIAN`($sp) ; permuted $t5
+ lwz $t0,`$FRAME+52^$LITTLE_ENDIAN`($sp) ; permuted $t4
+ lwz $t5,`$FRAME+56^$LITTLE_ENDIAN`($sp) ; permuted $t7
+ lwz $t4,`$FRAME+60^$LITTLE_ENDIAN`($sp) ; permuted $t6
addc $t2,$t2,$carry
adde $t3,$t3,$c1
@@ -1433,12 +1435,12 @@ $code.=<<___;
addc $t2,$t2,$t6
adde $t0,$t0,$t7
- lwz $t7,`$FRAME+64`($sp)
- lwz $t6,`$FRAME+68`($sp)
+ lwz $t7,`$FRAME+64^$LITTLE_ENDIAN`($sp)
+ lwz $t6,`$FRAME+68^$LITTLE_ENDIAN`($sp)
addze $carry,$carry
addze $c1,$c1
- lwz $t5,`$FRAME+72`($sp)
- lwz $t4,`$FRAME+76`($sp)
+ lwz $t5,`$FRAME+72^$LITTLE_ENDIAN`($sp)
+ lwz $t4,`$FRAME+76^$LITTLE_ENDIAN`($sp)
addc $t6,$t6,$carry
adde $t7,$t7,$c1
diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl
index 3bd45db..12b571c 100755
--- a/crypto/bn/asm/rsaz-x86_64.pl
+++ b/crypto/bn/asm/rsaz-x86_64.pl
@@ -113,7 +113,7 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
$addx = ($1>=12);
}
-if (!$addx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) {
+if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9])\.([0-9]+)/) {
my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10
$addx = ($ver>=3.03);
}
diff --git a/crypto/bn/asm/s390x-gf2m.pl b/crypto/bn/asm/s390x-gf2m.pl
index cd9f13e..9d18d40 100644
--- a/crypto/bn/asm/s390x-gf2m.pl
+++ b/crypto/bn/asm/s390x-gf2m.pl
@@ -172,19 +172,19 @@ ___
if ($SIZE_T==8) {
my @r=map("%r$_",(6..9));
$code.=<<___;
- bras $ra,_mul_1x1 # a1·b1
+ bras $ra,_mul_1x1 # a1·b1
stmg $lo,$hi,16($rp)
lg $a,`$stdframe+128+4*$SIZE_T`($sp)
lg $b,`$stdframe+128+6*$SIZE_T`($sp)
- bras $ra,_mul_1x1 # a0·b0
+ bras $ra,_mul_1x1 # a0·b0
stmg $lo,$hi,0($rp)
lg $a,`$stdframe+128+3*$SIZE_T`($sp)
lg $b,`$stdframe+128+5*$SIZE_T`($sp)
xg $a,`$stdframe+128+4*$SIZE_T`($sp)
xg $b,`$stdframe+128+6*$SIZE_T`($sp)
- bras $ra,_mul_1x1 # (a0+a1)·(b0+b1)
+ bras $ra,_mul_1x1 # (a0+a1)·(b0+b1)
lmg @r[0],@r[3],0($rp)
xgr $lo,$hi
diff --git a/crypto/bn/asm/s390x.S b/crypto/bn/asm/s390x.S
index 43fcb79..f5eebe4 100755
--- a/crypto/bn/asm/s390x.S
+++ b/crypto/bn/asm/s390x.S
@@ -18,71 +18,106 @@
.align 4
bn_mul_add_words:
lghi zero,0 // zero = 0
- la %r1,0(%r2) // put rp aside
- lghi %r2,0 // i=0;
+ la %r1,0(%r2) // put rp aside [to give way to]
+ lghi %r2,0 // return value
ltgfr %r4,%r4
bler %r14 // if (len<=0) return 0;
- stmg %r6,%r10,48(%r15)
- lghi %r10,3
- lghi %r8,0 // carry = 0
- nr %r10,%r4 // len%4
+ stmg %r6,%r13,48(%r15)
+ lghi %r2,3
+ lghi %r12,0 // carry = 0
+ slgr %r1,%r3 // rp-=ap
+ nr %r2,%r4 // len%4
sra %r4,2 // cnt=len/4
jz .Loop1_madd // carry is incidentally cleared if branch taken
algr zero,zero // clear carry
-.Loop4_madd:
- lg %r7,0(%r2,%r3) // ap[i]
+ lg %r7,0(%r3) // ap[0]
+ lg %r9,8(%r3) // ap[1]
mlgr %r6,%r5 // *=w
- alcgr %r7,%r8 // +=carry
- alcgr %r6,zero
- alg %r7,0(%r2,%r1) // +=rp[i]
- stg %r7,0(%r2,%r1) // rp[i]=
+ brct %r4,.Loop4_madd
+ j .Loop4_madd_tail
- lg %r9,8(%r2,%r3)
+.Loop4_madd:
mlgr %r8,%r5
+ lg %r11,16(%r3) // ap[i+2]
+ alcgr %r7,%r12 // +=carry
+ alcgr %r6,zero
+ alg %r7,0(%r3,%r1) // +=rp[i]
+ stg %r7,0(%r3,%r1) // rp[i]=
+
+ mlgr %r10,%r5
+ lg %r13,24(%r3)
alcgr %r9,%r6
alcgr %r8,zero
- alg %r9,8(%r2,%r1)
- stg %r9,8(%r2,%r1)
+ alg %r9,8(%r3,%r1)
+ stg %r9,8(%r3,%r1)
+
+ mlgr %r12,%r5
+ lg %r7,32(%r3)
+ alcgr %r11,%r8
+ alcgr %r10,zero
+ alg %r11,16(%r3,%r1)
+ stg %r11,16(%r3,%r1)
- lg %r7,16(%r2,%r3)
mlgr %r6,%r5
- alcgr %r7,%r8
- alcgr %r6,zero
- alg %r7,16(%r2,%r1)
- stg %r7,16(%r2,%r1)
+ lg %r9,40(%r3)
+ alcgr %r13,%r10
+ alcgr %r12,zero
+ alg %r13,24(%r3,%r1)
+ stg %r13,24(%r3,%r1)
- lg %r9,24(%r2,%r3)
+ la %r3,32(%r3) // i+=4
+ brct %r4,.Loop4_madd
+
+.Loop4_madd_tail:
mlgr %r8,%r5
+ lg %r11,16(%r3)
+ alcgr %r7,%r12 // +=carry
+ alcgr %r6,zero
+ alg %r7,0(%r3,%r1) // +=rp[i]
+ stg %r7,0(%r3,%r1) // rp[i]=
+
+ mlgr %r10,%r5
+ lg %r13,24(%r3)
alcgr %r9,%r6
alcgr %r8,zero
- alg %r9,24(%r2,%r1)
- stg %r9,24(%r2,%r1)
+ alg %r9,8(%r3,%r1)
+ stg %r9,8(%r3,%r1)
- la %r2,32(%r2) // i+=4
- brct %r4,.Loop4_madd
+ mlgr %r12,%r5
+ alcgr %r11,%r8
+ alcgr %r10,zero
+ alg %r11,16(%r3,%r1)
+ stg %r11,16(%r3,%r1)
- la %r10,1(%r10) // see if len%4 is zero ...
- brct %r10,.Loop1_madd // without touching condition code:-)
+ alcgr %r13,%r10
+ alcgr %r12,zero
+ alg %r13,24(%r3,%r1)
+ stg %r13,24(%r3,%r1)
+
+ la %r3,32(%r3) // i+=4
+
+ la %r2,1(%r2) // see if len%4 is zero ...
+ brct %r2,.Loop1_madd // without touching condition code:-)
.Lend_madd:
- alcgr %r8,zero // collect carry bit
- lgr %r2,%r8
- lmg %r6,%r10,48(%r15)
+ lgr %r2,zero // return value
+ alcgr %r2,%r12 // collect even carry bit
+ lmg %r6,%r13,48(%r15)
br %r14
.Loop1_madd:
- lg %r7,0(%r2,%r3) // ap[i]
+ lg %r7,0(%r3) // ap[i]
mlgr %r6,%r5 // *=w
- alcgr %r7,%r8 // +=carry
+ alcgr %r7,%r12 // +=carry
alcgr %r6,zero
- alg %r7,0(%r2,%r1) // +=rp[i]
- stg %r7,0(%r2,%r1) // rp[i]=
+ alg %r7,0(%r3,%r1) // +=rp[i]
+ stg %r7,0(%r3,%r1) // rp[i]=
- lgr %r8,%r6
- la %r2,8(%r2) // i++
- brct %r10,.Loop1_madd
+ lgr %r12,%r6
+ la %r3,8(%r3) // i++
+ brct %r2,.Loop1_madd
j .Lend_madd
.size bn_mul_add_words,.-bn_mul_add_words
diff --git a/crypto/bn/asm/x86-gf2m.pl b/crypto/bn/asm/x86-gf2m.pl
index 808a1e5..b579530 100644
--- a/crypto/bn/asm/x86-gf2m.pl
+++ b/crypto/bn/asm/x86-gf2m.pl
@@ -14,7 +14,7 @@
# the time being... Except that it has three code paths: pure integer
# code suitable for any x86 CPU, MMX code suitable for PIII and later
# and PCLMULQDQ suitable for Westmere and later. Improvement varies
-# from one benchmark and µ-arch to another. Below are interval values
+# from one benchmark and µ-arch to another. Below are interval values
# for 163- and 571-bit ECDH benchmarks relative to compiler-generated
# code:
#
@@ -226,22 +226,22 @@ if ($sse2) {
&push ("edi");
&mov ($a,&wparam(1));
&mov ($b,&wparam(3));
- &call ("_mul_1x1_mmx"); # a1·b1
+ &call ("_mul_1x1_mmx"); # a1·b1
&movq ("mm7",$R);
&mov ($a,&wparam(2));
&mov ($b,&wparam(4));
- &call ("_mul_1x1_mmx"); # a0·b0
+ &call ("_mul_1x1_mmx"); # a0·b0
&movq ("mm6",$R);
&mov ($a,&wparam(1));
&mov ($b,&wparam(3));
&xor ($a,&wparam(2));
&xor ($b,&wparam(4));
- &call ("_mul_1x1_mmx"); # (a0+a1)·(b0+b1)
+ &call ("_mul_1x1_mmx"); # (a0+a1)·(b0+b1)
&pxor ($R,"mm7");
&mov ($a,&wparam(0));
- &pxor ($R,"mm6"); # (a0+a1)·(b0+b1)-a1·b1-a0·b0
+ &pxor ($R,"mm6"); # (a0+a1)·(b0+b1)-a1·b1-a0·b0
&movq ($A,$R);
&psllq ($R,32);
@@ -266,13 +266,13 @@ if ($sse2) {
&mov ($a,&wparam(1));
&mov ($b,&wparam(3));
- &call ("_mul_1x1_ialu"); # a1·b1
+ &call ("_mul_1x1_ialu"); # a1·b1
&mov (&DWP(8,"esp"),$lo);
&mov (&DWP(12,"esp"),$hi);
&mov ($a,&wparam(2));
&mov ($b,&wparam(4));
- &call ("_mul_1x1_ialu"); # a0·b0
+ &call ("_mul_1x1_ialu"); # a0·b0
&mov (&DWP(0,"esp"),$lo);
&mov (&DWP(4,"esp"),$hi);
@@ -280,7 +280,7 @@ if ($sse2) {
&mov ($b,&wparam(3));
&xor ($a,&wparam(2));
&xor ($b,&wparam(4));
- &call ("_mul_1x1_ialu"); # (a0+a1)·(b0+b1)
+ &call ("_mul_1x1_ialu"); # (a0+a1)·(b0+b1)
&mov ("ebp",&wparam(0));
@r=("ebx","ecx","edi","esi");
diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c
index d548886..d77dc43 100644
--- a/crypto/bn/asm/x86_64-gcc.c
+++ b/crypto/bn/asm/x86_64-gcc.c
@@ -65,7 +65,7 @@
# undef mul_add
/*-
- * "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
+ * "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
* "g"(0) let the compiler to decide where does it
* want to keep the value of zero;
*/
diff --git a/crypto/bn/asm/x86_64-gf2m.pl b/crypto/bn/asm/x86_64-gf2m.pl
index 226c66c..42bbec2 100644
--- a/crypto/bn/asm/x86_64-gf2m.pl
+++ b/crypto/bn/asm/x86_64-gf2m.pl
@@ -13,7 +13,7 @@
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
# the time being... Except that it has two code paths: code suitable
# for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and
-# later. Improvement varies from one benchmark and µ-arch to another.
+# later. Improvement varies from one benchmark and µ-arch to another.
# Vanilla code path is at most 20% faster than compiler-generated code
# [not very impressive], while PCLMULQDQ - whole 85%-160% better on
# 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that
@@ -184,13 +184,13 @@ ___
$code.=<<___;
movdqa %xmm0,%xmm4
movdqa %xmm1,%xmm5
- pclmulqdq \$0,%xmm1,%xmm0 # a1·b1
+ pclmulqdq \$0,%xmm1,%xmm0 # a1·b1
pxor %xmm2,%xmm4
pxor %xmm3,%xmm5
- pclmulqdq \$0,%xmm3,%xmm2 # a0·b0
- pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1)
+ pclmulqdq \$0,%xmm3,%xmm2 # a0·b0
+ pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1)
xorps %xmm0,%xmm4
- xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1
+ xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1
movdqa %xmm4,%xmm5
pslldq \$8,%xmm4
psrldq \$8,%xmm5
@@ -225,13 +225,13 @@ $code.=<<___;
mov \$0xf,$mask
mov $a1,$a
mov $b1,$b
- call _mul_1x1 # a1·b1
+ call _mul_1x1 # a1·b1
mov $lo,16(%rsp)
mov $hi,24(%rsp)
mov 48(%rsp),$a
mov 64(%rsp),$b
- call _mul_1x1 # a0·b0
+ call _mul_1x1 # a0·b0
mov $lo,0(%rsp)
mov $hi,8(%rsp)
@@ -239,7 +239,7 @@ $code.=<<___;
mov 56(%rsp),$b
xor 48(%rsp),$a
xor 64(%rsp),$b
- call _mul_1x1 # (a0+a1)·(b0+b1)
+ call _mul_1x1 # (a0+a1)·(b0+b1)
___
@r=("%rbx","%rcx","%rdi","%rsi");
$code.=<<___;
diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl
index 2989b58..725833d 100755
--- a/crypto/bn/asm/x86_64-mont.pl
+++ b/crypto/bn/asm/x86_64-mont.pl
@@ -68,6 +68,11 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
$addx = ($1>=12);
}
+if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9])\.([0-9]+)/) {
+ my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10
+ $addx = ($ver>=3.03);
+}
+
# int bn_mul_mont(
$rp="%rdi"; # BN_ULONG *rp,
$ap="%rsi"; # const BN_ULONG *ap,
diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl
index 820de3d..64e668f 100755
--- a/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/bn/asm/x86_64-mont5.pl
@@ -53,6 +53,11 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
$addx = ($1>=12);
}
+if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9])\.([0-9]+)/) {
+ my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10
+ $addx = ($ver>=3.03);
+}
+
# int bn_mul_mont_gather5(
$rp="%rdi"; # BN_ULONG *rp,
$ap="%rsi"; # const BN_ULONG *ap,
@@ -1779,6 +1784,15 @@ sqr8x_reduction:
.align 32
.L8x_tail_done:
add (%rdx),%r8 # can this overflow?
+ adc \$0,%r9
+ adc \$0,%r10
+ adc \$0,%r11
+ adc \$0,%r12
+ adc \$0,%r13
+ adc \$0,%r14
+ adc \$0,%r15 # can't overflow, because we
+ # started with "overhung" part
+ # of multiplication
xor %rax,%rax
neg $carry
@@ -3125,6 +3139,15 @@ sqrx8x_reduction:
.align 32
.Lsqrx8x_tail_done:
add 24+8(%rsp),%r8 # can this overflow?
+ adc \$0,%r9
+ adc \$0,%r10
+ adc \$0,%r11
+ adc \$0,%r12
+ adc \$0,%r13
+ adc \$0,%r14
+ adc \$0,%r15 # can't overflow, because we
+ # started with "overhung" part
+ # of multiplication
mov $carry,%rax # xor %rax,%rax
sub 16+8(%rsp),$carry # mov 16(%rsp),%cf
@@ -3168,13 +3191,11 @@ my ($rptr,$nptr)=("%rdx","%rbp");
my @ri=map("%r$_",(10..13));
my @ni=map("%r$_",(14..15));
$code.=<<___;
- xor %rbx,%rbx
+ xor %ebx,%ebx
sub %r15,%rsi # compare top-most words
adc %rbx,%rbx
mov %rcx,%r10 # -$num
- .byte 0x67
or %rbx,%rax
- .byte 0x67
mov %rcx,%r9 # -$num
xor \$1,%rax
sar \$3+2,%rcx # cf=0
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index 24afdd6..50cf323 100644
--- a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ -662,12 +662,13 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
bn_check_top(p);
bn_check_top(m);
- top = m->top;
-
- if (!(m->d[0] & 1)) {
+ if (!BN_is_odd(m)) {
BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS);
return (0);
}
+
+ top = m->top;
+
bits = BN_num_bits(p);
if (bits == 0) {
ret = BN_one(rr);
diff --git a/crypto/bn/bn_gcd.c b/crypto/bn/bn_gcd.c
index 97c55ab..ce59fe7 100644
--- a/crypto/bn/bn_gcd.c
+++ b/crypto/bn/bn_gcd.c
@@ -583,6 +583,7 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
* BN_div_no_branch will be called eventually.
*/
pB = &local_B;
+ local_B.flags = 0;
BN_with_flags(pB, B, BN_FLG_CONSTTIME);
if (!BN_nnmod(B, pB, A, ctx))
goto err;
@@ -610,6 +611,7 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in,
* BN_div_no_branch will be called eventually.
*/
pA = &local_A;
+ local_A.flags = 0;
BN_with_flags(pA, A, BN_FLG_CONSTTIME);
/* (D, M) := (A/B, A%B) ... */
diff --git a/crypto/bn/bn_gf2m.c b/crypto/bn/bn_gf2m.c
index cfa1c7c..2c61da1 100644
--- a/crypto/bn/bn_gf2m.c
+++ b/crypto/bn/bn_gf2m.c
@@ -575,7 +575,7 @@ int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[],
bn_check_top(a);
BN_CTX_start(ctx);
if ((s = BN_CTX_get(ctx)) == NULL)
- return 0;
+ goto err;
if (!bn_wexpand(s, 2 * a->top))
goto err;
@@ -699,18 +699,21 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
int top = p->top;
BN_ULONG *udp, *bdp, *vdp, *cdp;
- bn_wexpand(u, top);
+ if (!bn_wexpand(u, top))
+ goto err;
udp = u->d;
for (i = u->top; i < top; i++)
udp[i] = 0;
u->top = top;
- bn_wexpand(b, top);
+ if (!bn_wexpand(b, top))
+ goto err;
bdp = b->d;
bdp[0] = 1;
for (i = 1; i < top; i++)
bdp[i] = 0;
b->top = top;
- bn_wexpand(c, top);
+ if (!bn_wexpand(c, top))
+ goto err;
cdp = c->d;
for (i = 0; i < top; i++)
cdp[i] = 0;
diff --git a/crypto/bn/bn_mont.c b/crypto/bn/bn_mont.c
index aadd5db..be95bd5 100644
--- a/crypto/bn/bn_mont.c
+++ b/crypto/bn/bn_mont.c
@@ -361,9 +361,9 @@ void BN_MONT_CTX_free(BN_MONT_CTX *mont)
if (mont == NULL)
return;
- BN_free(&(mont->RR));
- BN_free(&(mont->N));
- BN_free(&(mont->Ni));
+ BN_clear_free(&(mont->RR));
+ BN_clear_free(&(mont->N));
+ BN_clear_free(&(mont->Ni));
if (mont->flags & BN_FLG_MALLOCED)
OPENSSL_free(mont);
}
@@ -373,6 +373,9 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
int ret = 0;
BIGNUM *Ri, *R;
+ if (BN_is_zero(mod))
+ return 0;
+
BN_CTX_start(ctx);
if ((Ri = BN_CTX_get(ctx)) == NULL)
goto err;
diff --git a/crypto/bn/bn_recp.c b/crypto/bn/bn_recp.c
index 6826f93..7497ac6 100644
--- a/crypto/bn/bn_recp.c
+++ b/crypto/bn/bn_recp.c
@@ -152,8 +152,10 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
if (BN_ucmp(m, &(recp->N)) < 0) {
BN_zero(d);
- if (!BN_copy(r, m))
+ if (!BN_copy(r, m)) {
+ BN_CTX_end(ctx);
return 0;
+ }
BN_CTX_end(ctx);
return (1);
}
diff --git a/crypto/bn/bn_x931p.c b/crypto/bn/bn_x931p.c
index 6d76b12..efa48bd 100644
--- a/crypto/bn/bn_x931p.c
+++ b/crypto/bn/bn_x931p.c
@@ -213,14 +213,14 @@ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
* exceeded.
*/
if (!BN_rand(Xp, nbits, 1, 0))
- return 0;
+ goto err;
BN_CTX_start(ctx);
t = BN_CTX_get(ctx);
for (i = 0; i < 1000; i++) {
if (!BN_rand(Xq, nbits, 1, 0))
- return 0;
+ goto err;
/* Check that |Xp - Xq| > 2^(nbits - 100) */
BN_sub(t, Xp, Xq);
if (BN_num_bits(t) > (nbits - 100))
@@ -234,6 +234,9 @@ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
return 0;
+ err:
+ BN_CTX_end(ctx);
+ return 0;
}
/*
diff --git a/crypto/bn/bntest.c b/crypto/bn/bntest.c
index 470d5da..1e35988 100644
--- a/crypto/bn/bntest.c
+++ b/crypto/bn/bntest.c
@@ -441,6 +441,14 @@ int test_div(BIO *bp, BN_CTX *ctx)
BN_init(&d);
BN_init(&e);
+ BN_one(&a);
+ BN_zero(&b);
+
+ if (BN_div(&d, &c, &a, &b, ctx)) {
+ fprintf(stderr, "Division by zero succeeded!\n");
+ return 0;
+ }
+
for (i = 0; i < num0 + num1; i++) {
if (i < num1) {
BN_bntest_rand(&a, 400, 0, 0);
@@ -516,9 +524,9 @@ int test_div_word(BIO *bp)
do {
BN_bntest_rand(&a, 512, -1, 0);
BN_bntest_rand(&b, BN_BITS2, -1, 0);
- s = b.d[0];
- } while (!s);
+ } while (BN_is_zero(&b));
+ s = b.d[0];
BN_copy(&b, &a);
r = BN_div_word(&b, s);
@@ -781,6 +789,18 @@ int test_mont(BIO *bp, BN_CTX *ctx)
if (mont == NULL)
return 0;
+ BN_zero(&n);
+ if (BN_MONT_CTX_set(mont, &n, ctx)) {
+ fprintf(stderr, "BN_MONT_CTX_set succeeded for zero modulus!\n");
+ return 0;
+ }
+
+ BN_set_word(&n, 16);
+ if (BN_MONT_CTX_set(mont, &n, ctx)) {
+ fprintf(stderr, "BN_MONT_CTX_set succeeded for even modulus!\n");
+ return 0;
+ }
+
BN_bntest_rand(&a, 100, 0, 0);
BN_bntest_rand(&b, 100, 0, 0);
for (i = 0; i < num2; i++) {
@@ -887,6 +907,14 @@ int test_mod_mul(BIO *bp, BN_CTX *ctx)
d = BN_new();
e = BN_new();
+ BN_one(a);
+ BN_one(b);
+ BN_zero(c);
+ if (BN_mod_mul(e, a, b, c, ctx)) {
+ fprintf(stderr, "BN_mod_mul with zero modulus succeeded!\n");
+ return 0;
+ }
+
for (j = 0; j < 3; j++) {
BN_bntest_rand(c, 1024, 0, 0);
for (i = 0; i < num0; i++) {
@@ -952,6 +980,14 @@ int test_mod_exp(BIO *bp, BN_CTX *ctx)
d = BN_new();
e = BN_new();
+ BN_one(a);
+ BN_one(b);
+ BN_zero(c);
+ if (BN_mod_exp(d, a, b, c, ctx)) {
+ fprintf(stderr, "BN_mod_exp with zero modulus succeeded!\n");
+ return 0;
+ }
+
BN_bntest_rand(c, 30, 0, 1); /* must be odd for montgomery */
for (i = 0; i < num2; i++) {
BN_bntest_rand(a, 20 + i * 5, 0, 0);
@@ -980,6 +1016,24 @@ int test_mod_exp(BIO *bp, BN_CTX *ctx)
return 0;
}
}
+
+ /* Regression test for carry propagation bug in sqr8x_reduction */
+ BN_hex2bn(&a, "050505050505");
+ BN_hex2bn(&b, "02");
+ BN_hex2bn(&c,
+ "4141414141414141414141274141414141414141414141414141414141414141"
+ "4141414141414141414141414141414141414141414141414141414141414141"
+ "4141414141414141414141800000000000000000000000000000000000000000"
+ "0000000000000000000000000000000000000000000000000000000000000000"
+ "0000000000000000000000000000000000000000000000000000000000000000"
+ "0000000000000000000000000000000000000000000000000000000001");
+ BN_mod_exp(d, a, b, c, ctx);
+ BN_mul(e, a, a, ctx);
+ if (BN_cmp(d, e)) {
+ fprintf(stderr, "BN_mod_exp and BN_mul produce different results!\n");
+ return 0;
+ }
+
BN_free(a);
BN_free(b);
BN_free(c);
@@ -999,6 +1053,22 @@ int test_mod_exp_mont_consttime(BIO *bp, BN_CTX *ctx)
d = BN_new();
e = BN_new();
+ BN_one(a);
+ BN_one(b);
+ BN_zero(c);
+ if (BN_mod_exp_mont_consttime(d, a, b, c, ctx, NULL)) {
+ fprintf(stderr, "BN_mod_exp_mont_consttime with zero modulus "
+ "succeeded\n");
+ return 0;
+ }
+
+ BN_set_word(c, 16);
+ if (BN_mod_exp_mont_consttime(d, a, b, c, ctx, NULL)) {
+ fprintf(stderr, "BN_mod_exp_mont_consttime with even modulus "
+ "succeeded\n");
+ return 0;
+ }
+
BN_bntest_rand(c, 30, 0, 1); /* must be odd for montgomery */
for (i = 0; i < num2; i++) {
BN_bntest_rand(a, 20 + i * 5, 0, 0);
diff --git a/crypto/bn/rsaz_exp.h b/crypto/bn/rsaz_exp.h
index 33361de..229e181 100644
--- a/crypto/bn/rsaz_exp.h
+++ b/crypto/bn/rsaz_exp.h
@@ -1,32 +1,44 @@
-/******************************************************************************
-* Copyright(c) 2012, Intel Corp.
-* Developers and authors:
-* Shay Gueron (1, 2), and Vlad Krasnov (1)
-* (1) Intel Corporation, Israel Development Center, Haifa, Israel
-* (2) University of Haifa, Israel
+/*****************************************************************************
+* *
+* Copyright (c) 2012, Intel Corporation *
+* *
+* All rights reserved. *
+* *
+* Redistribution and use in source and binary forms, with or without *
+* modification, are permitted provided that the following conditions are *
+* met: *
+* *
+* * Redistributions of source code must retain the above copyright *
+* notice, this list of conditions and the following disclaimer. *
+* *
+* * Redistributions in binary form must reproduce the above copyright *
+* notice, this list of conditions and the following disclaimer in the *
+* documentation and/or other materials provided with the *
+* distribution. *
+* *
+* * Neither the name of the Intel Corporation nor the names of its *
+* contributors may be used to endorse or promote products derived from *
+* this software without specific prior written permission. *
+* *
+* *
+* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY *
+* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
+* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
+* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR *
+* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
+* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
+* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
+* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
+* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
+* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
+* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
+* *
******************************************************************************
-* LICENSE:
-* This submission to OpenSSL is to be made available under the OpenSSL
-* license, and only to the OpenSSL project, in order to allow integration
-* into the publicly distributed code.
-* The use of this code, or portions of this code, or concepts embedded in
-* this code, or modification of this code and/or algorithm(s) in it, or the
-* use of this code for any other purpose than stated above, requires special
-* licensing.
-******************************************************************************
-* DISCLAIMER:
-* THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS AND THE COPYRIGHT OWNERS
-* ``AS IS''. ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS OR THE COPYRIGHT
-* OWNERS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
-* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-* POSSIBILITY OF SUCH DAMAGE.
-******************************************************************************/
+* Developers and authors: *
+* Shay Gueron (1, 2), and Vlad Krasnov (1) *
+* (1) Intel Corporation, Israel Development Center, Haifa, Israel *
+* (2) University of Haifa, Israel *
+*****************************************************************************/
#ifndef RSAZ_EXP_H
# define RSAZ_EXP_H