diff options
author | sangwan.kwon <sangwan.kwon@samsung.com> | 2015-12-18 14:13:27 +0900 |
---|---|---|
committer | sangwan.kwon <sangwan.kwon@samsung.com> | 2015-12-18 14:14:57 +0900 |
commit | 6a424b1e0ec46f307697ffe971a3e46b3129f693 (patch) | |
tree | 063e9bd46b09b57192b9af486e0649a139c40712 /crypto/bn | |
parent | 7bb2e75e597abc44122a538b5935153bf1ecb9ec (diff) | |
parent | 2b3ef38d58c1bb0abff4bf611177fc76e78325fa (diff) | |
download | openssl-6a424b1e0ec46f307697ffe971a3e46b3129f693.tar.gz openssl-6a424b1e0ec46f307697ffe971a3e46b3129f693.tar.bz2 openssl-6a424b1e0ec46f307697ffe971a3e46b3129f693.zip |
Upgrade Upstream version 1.0.2eHEADsubmit/tizen/20151228.015607tizen
Change-Id: If6afd73ecd5ef4548b9389eca6e53946aac3b9f2
Diffstat (limited to 'crypto/bn')
-rw-r--r-- | crypto/bn/asm/armv4-gf2m.pl | 10 | ||||
-rw-r--r-- | crypto/bn/asm/ia64.S | 4 | ||||
-rw-r--r-- | crypto/bn/asm/ppc64-mont.pl | 174 | ||||
-rwxr-xr-x | crypto/bn/asm/rsaz-x86_64.pl | 2 | ||||
-rw-r--r-- | crypto/bn/asm/s390x-gf2m.pl | 6 | ||||
-rwxr-xr-x | crypto/bn/asm/s390x.S | 109 | ||||
-rw-r--r-- | crypto/bn/asm/x86-gf2m.pl | 16 | ||||
-rw-r--r-- | crypto/bn/asm/x86_64-gcc.c | 2 | ||||
-rw-r--r-- | crypto/bn/asm/x86_64-gf2m.pl | 16 | ||||
-rwxr-xr-x | crypto/bn/asm/x86_64-mont.pl | 5 | ||||
-rwxr-xr-x | crypto/bn/asm/x86_64-mont5.pl | 27 | ||||
-rw-r--r-- | crypto/bn/bn_exp.c | 7 | ||||
-rw-r--r-- | crypto/bn/bn_gcd.c | 2 | ||||
-rw-r--r-- | crypto/bn/bn_gf2m.c | 11 | ||||
-rw-r--r-- | crypto/bn/bn_mont.c | 9 | ||||
-rw-r--r-- | crypto/bn/bn_recp.c | 4 | ||||
-rw-r--r-- | crypto/bn/bn_x931p.c | 7 | ||||
-rw-r--r-- | crypto/bn/bntest.c | 74 | ||||
-rw-r--r-- | crypto/bn/rsaz_exp.h | 68 |
19 files changed, 356 insertions, 197 deletions
diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl index 8f529c9..72381a7 100644 --- a/crypto/bn/asm/armv4-gf2m.pl +++ b/crypto/bn/asm/armv4-gf2m.pl @@ -27,7 +27,7 @@ # referred below, which improves ECDH and ECDSA verify benchmarks # by 18-40%. # -# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software +# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software # Polynomial Multiplication on ARM Processors using the NEON Engine. # # http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf @@ -136,7 +136,7 @@ ___ ################ # void bn_GF2m_mul_2x2(BN_ULONG *r, # BN_ULONG a1,BN_ULONG a0, -# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0 +# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0 { $code.=<<___; .global bn_GF2m_mul_2x2 @@ -159,7 +159,7 @@ $code.=<<___; mov $mask,#7<<2 sub sp,sp,#32 @ allocate tab[8] - bl mul_1x1_ialu @ a1·b1 + bl mul_1x1_ialu @ a1·b1 str $lo,[$ret,#8] str $hi,[$ret,#12] @@ -169,13 +169,13 @@ $code.=<<___; eor r2,r2,$a eor $b,$b,r3 eor $a,$a,r2 - bl mul_1x1_ialu @ a0·b0 + bl mul_1x1_ialu @ a0·b0 str $lo,[$ret] str $hi,[$ret,#4] eor $a,$a,r2 eor $b,$b,r3 - bl mul_1x1_ialu @ (a1+a0)·(b1+b0) + bl mul_1x1_ialu @ (a1+a0)·(b1+b0) ___ @r=map("r$_",(6..9)); $code.=<<___; diff --git a/crypto/bn/asm/ia64.S b/crypto/bn/asm/ia64.S index 951abc5..a9a42ab 100644 --- a/crypto/bn/asm/ia64.S +++ b/crypto/bn/asm/ia64.S @@ -422,7 +422,7 @@ bn_mul_add_words: // This loop spins in 3*(n+10) ticks on Itanium and in 2*(n+10) on // Itanium 2. Yes, unlike previous versions it scales:-) Previous -// version was peforming *all* additions in IALU and was starving +// version was performing *all* additions in IALU and was starving // for those even on Itanium 2. In this version one addition is // moved to FPU and is folded with multiplication. This is at cost // of propogating the result from previous call to this subroutine @@ -568,7 +568,7 @@ bn_sqr_comba8: // I've estimated this routine to run in ~120 ticks, but in reality // (i.e. according to ar.itc) it takes ~160 ticks. Are those extra // cycles consumed for instructions fetch? Or did I misinterpret some -// clause in Itanium µ-architecture manual? Comments are welcomed and +// clause in Itanium µ-architecture manual? Comments are welcomed and // highly appreciated. // // On Itanium 2 it takes ~190 ticks. This is because of stalls on diff --git a/crypto/bn/asm/ppc64-mont.pl b/crypto/bn/asm/ppc64-mont.pl index 68e3733..9e3c12d 100644 --- a/crypto/bn/asm/ppc64-mont.pl +++ b/crypto/bn/asm/ppc64-mont.pl @@ -94,6 +94,8 @@ if ($flavour =~ /32/) { $POP= "ld"; } else { die "nonsense $flavour"; } +$LITTLE_ENDIAN = ($flavour=~/le$/) ? 4 : 0; + $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or @@ -294,12 +296,12 @@ $code.=<<___ if ($SIZE_T==8); extrdi $t0,$a0,32,32 ; lwz $t0,4($ap) extrdi $t1,$a0,32,0 ; lwz $t1,0($ap) - lwz $t2,12($ap) ; load a[1] as 32-bit word pair - lwz $t3,8($ap) - lwz $t4,4($np) ; load n[0] as 32-bit word pair - lwz $t5,0($np) - lwz $t6,12($np) ; load n[1] as 32-bit word pair - lwz $t7,8($np) + lwz $t2,`12^$LITTLE_ENDIAN`($ap) ; load a[1] as 32-bit word pair + lwz $t3,`8^$LITTLE_ENDIAN`($ap) + lwz $t4,`4^$LITTLE_ENDIAN`($np) ; load n[0] as 32-bit word pair + lwz $t5,`0^$LITTLE_ENDIAN`($np) + lwz $t6,`12^$LITTLE_ENDIAN`($np) ; load n[1] as 32-bit word pair + lwz $t7,`8^$LITTLE_ENDIAN`($np) ___ $code.=<<___ if ($SIZE_T==4); lwz $a0,0($ap) ; pull ap[0,1] value @@ -463,14 +465,14 @@ $code.=<<___; L1st: ___ $code.=<<___ if ($SIZE_T==8); - lwz $t0,4($ap) ; load a[j] as 32-bit word pair - lwz $t1,0($ap) - lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair - lwz $t3,8($ap) - lwz $t4,4($np) ; load n[j] as 32-bit word pair - lwz $t5,0($np) - lwz $t6,12($np) ; load n[j+1] as 32-bit word pair - lwz $t7,8($np) + lwz $t0,`4^$LITTLE_ENDIAN`($ap) ; load a[j] as 32-bit word pair + lwz $t1,`0^$LITTLE_ENDIAN`($ap) + lwz $t2,`12^$LITTLE_ENDIAN`($ap) ; load a[j+1] as 32-bit word pair + lwz $t3,`8^$LITTLE_ENDIAN`($ap) + lwz $t4,`4^$LITTLE_ENDIAN`($np) ; load n[j] as 32-bit word pair + lwz $t5,`0^$LITTLE_ENDIAN`($np) + lwz $t6,`12^$LITTLE_ENDIAN`($np) ; load n[j+1] as 32-bit word pair + lwz $t7,`8^$LITTLE_ENDIAN`($np) ___ $code.=<<___ if ($SIZE_T==4); lwz $t0,0($ap) ; load a[j..j+3] as 32-bit word pairs @@ -505,14 +507,14 @@ $code.=<<___; ___ } else { $code.=<<___; - lwz $t1,`$FRAME+0`($sp) - lwz $t0,`$FRAME+4`($sp) - lwz $t3,`$FRAME+8`($sp) - lwz $t2,`$FRAME+12`($sp) - lwz $t5,`$FRAME+16`($sp) - lwz $t4,`$FRAME+20`($sp) - lwz $t7,`$FRAME+24`($sp) - lwz $t6,`$FRAME+28`($sp) + lwz $t1,`$FRAME+0^$LITTLE_ENDIAN`($sp) + lwz $t0,`$FRAME+4^$LITTLE_ENDIAN`($sp) + lwz $t3,`$FRAME+8^$LITTLE_ENDIAN`($sp) + lwz $t2,`$FRAME+12^$LITTLE_ENDIAN`($sp) + lwz $t5,`$FRAME+16^$LITTLE_ENDIAN`($sp) + lwz $t4,`$FRAME+20^$LITTLE_ENDIAN`($sp) + lwz $t7,`$FRAME+24^$LITTLE_ENDIAN`($sp) + lwz $t6,`$FRAME+28^$LITTLE_ENDIAN`($sp) ___ } $code.=<<___; @@ -651,8 +653,8 @@ $code.=<<___; fmadd $T1a,$N1,$na,$T1a fmadd $T1b,$N1,$nb,$T1b - lwz $t3,`$FRAME+32`($sp) ; permuted $t1 - lwz $t2,`$FRAME+36`($sp) ; permuted $t0 + lwz $t3,`$FRAME+32^$LITTLE_ENDIAN`($sp) ; permuted $t1 + lwz $t2,`$FRAME+36^$LITTLE_ENDIAN`($sp) ; permuted $t0 addc $t4,$t4,$carry adde $t5,$t5,$c1 srwi $carry,$t4,16 @@ -673,8 +675,8 @@ $code.=<<___; fmadd $T1a,$N0,$nc,$T1a fmadd $T1b,$N0,$nd,$T1b - lwz $t7,`$FRAME+40`($sp) ; permuted $t3 - lwz $t6,`$FRAME+44`($sp) ; permuted $t2 + lwz $t7,`$FRAME+40^$LITTLE_ENDIAN`($sp) ; permuted $t3 + lwz $t6,`$FRAME+44^$LITTLE_ENDIAN`($sp) ; permuted $t2 addc $t2,$t2,$carry adde $t3,$t3,$c1 srwi $carry,$t2,16 @@ -686,8 +688,8 @@ $code.=<<___; insrwi $carry,$t3,16,0 fmadd $T3a,$N2,$nc,$T3a fmadd $T3b,$N2,$nd,$T3b - lwz $t1,`$FRAME+48`($sp) ; permuted $t5 - lwz $t0,`$FRAME+52`($sp) ; permuted $t4 + lwz $t1,`$FRAME+48^$LITTLE_ENDIAN`($sp) ; permuted $t5 + lwz $t0,`$FRAME+52^$LITTLE_ENDIAN`($sp) ; permuted $t4 addc $t6,$t6,$carry adde $t7,$t7,$c1 srwi $carry,$t6,16 @@ -699,8 +701,8 @@ $code.=<<___; fctid $T0a,$T0a fctid $T0b,$T0b - lwz $t5,`$FRAME+56`($sp) ; permuted $t7 - lwz $t4,`$FRAME+60`($sp) ; permuted $t6 + lwz $t5,`$FRAME+56^$LITTLE_ENDIAN`($sp) ; permuted $t7 + lwz $t4,`$FRAME+60^$LITTLE_ENDIAN`($sp) ; permuted $t6 addc $t0,$t0,$carry adde $t1,$t1,$c1 srwi $carry,$t0,16 @@ -787,14 +789,14 @@ $code.=<<___; ___ } else { $code.=<<___; - lwz $t1,`$FRAME+0`($sp) - lwz $t0,`$FRAME+4`($sp) - lwz $t3,`$FRAME+8`($sp) - lwz $t2,`$FRAME+12`($sp) - lwz $t5,`$FRAME+16`($sp) - lwz $t4,`$FRAME+20`($sp) - lwz $t7,`$FRAME+24`($sp) - lwz $t6,`$FRAME+28`($sp) + lwz $t1,`$FRAME+0^$LITTLE_ENDIAN`($sp) + lwz $t0,`$FRAME+4^$LITTLE_ENDIAN`($sp) + lwz $t3,`$FRAME+8^$LITTLE_ENDIAN`($sp) + lwz $t2,`$FRAME+12^$LITTLE_ENDIAN`($sp) + lwz $t5,`$FRAME+16^$LITTLE_ENDIAN`($sp) + lwz $t4,`$FRAME+20^$LITTLE_ENDIAN`($sp) + lwz $t7,`$FRAME+24^$LITTLE_ENDIAN`($sp) + lwz $t6,`$FRAME+28^$LITTLE_ENDIAN`($sp) stfd $dota,`$FRAME+64`($sp) stfd $dotb,`$FRAME+72`($sp) @@ -823,14 +825,14 @@ $code.=<<___; stw $t0,12($tp) ; tp[j-1] stw $t4,8($tp) - lwz $t3,`$FRAME+32`($sp) ; permuted $t1 - lwz $t2,`$FRAME+36`($sp) ; permuted $t0 - lwz $t7,`$FRAME+40`($sp) ; permuted $t3 - lwz $t6,`$FRAME+44`($sp) ; permuted $t2 - lwz $t1,`$FRAME+48`($sp) ; permuted $t5 - lwz $t0,`$FRAME+52`($sp) ; permuted $t4 - lwz $t5,`$FRAME+56`($sp) ; permuted $t7 - lwz $t4,`$FRAME+60`($sp) ; permuted $t6 + lwz $t3,`$FRAME+32^$LITTLE_ENDIAN`($sp) ; permuted $t1 + lwz $t2,`$FRAME+36^$LITTLE_ENDIAN`($sp) ; permuted $t0 + lwz $t7,`$FRAME+40^$LITTLE_ENDIAN`($sp) ; permuted $t3 + lwz $t6,`$FRAME+44^$LITTLE_ENDIAN`($sp) ; permuted $t2 + lwz $t1,`$FRAME+48^$LITTLE_ENDIAN`($sp) ; permuted $t5 + lwz $t0,`$FRAME+52^$LITTLE_ENDIAN`($sp) ; permuted $t4 + lwz $t5,`$FRAME+56^$LITTLE_ENDIAN`($sp) ; permuted $t7 + lwz $t4,`$FRAME+60^$LITTLE_ENDIAN`($sp) ; permuted $t6 addc $t2,$t2,$carry adde $t3,$t3,$c1 @@ -857,10 +859,10 @@ $code.=<<___; stw $t2,20($tp) ; tp[j] stwu $t0,16($tp) - lwz $t7,`$FRAME+64`($sp) - lwz $t6,`$FRAME+68`($sp) - lwz $t5,`$FRAME+72`($sp) - lwz $t4,`$FRAME+76`($sp) + lwz $t7,`$FRAME+64^$LITTLE_ENDIAN`($sp) + lwz $t6,`$FRAME+68^$LITTLE_ENDIAN`($sp) + lwz $t5,`$FRAME+72^$LITTLE_ENDIAN`($sp) + lwz $t4,`$FRAME+76^$LITTLE_ENDIAN`($sp) addc $t6,$t6,$carry adde $t7,$t7,$c1 @@ -1165,23 +1167,23 @@ ___ $code.=<<___; fmadd $T1a,$N1,$na,$T1a fmadd $T1b,$N1,$nb,$T1b - lwz $t1,`$FRAME+0`($sp) - lwz $t0,`$FRAME+4`($sp) + lwz $t1,`$FRAME+0^$LITTLE_ENDIAN`($sp) + lwz $t0,`$FRAME+4^$LITTLE_ENDIAN`($sp) fmadd $T2a,$N2,$na,$T2a fmadd $T2b,$N2,$nb,$T2b - lwz $t3,`$FRAME+8`($sp) - lwz $t2,`$FRAME+12`($sp) + lwz $t3,`$FRAME+8^$LITTLE_ENDIAN`($sp) + lwz $t2,`$FRAME+12^$LITTLE_ENDIAN`($sp) fmadd $T3a,$N3,$na,$T3a fmadd $T3b,$N3,$nb,$T3b - lwz $t5,`$FRAME+16`($sp) - lwz $t4,`$FRAME+20`($sp) + lwz $t5,`$FRAME+16^$LITTLE_ENDIAN`($sp) + lwz $t4,`$FRAME+20^$LITTLE_ENDIAN`($sp) addc $t0,$t0,$carry adde $t1,$t1,$c1 srwi $carry,$t0,16 fmadd $T0a,$N0,$na,$T0a fmadd $T0b,$N0,$nb,$T0b - lwz $t7,`$FRAME+24`($sp) - lwz $t6,`$FRAME+28`($sp) + lwz $t7,`$FRAME+24^$LITTLE_ENDIAN`($sp) + lwz $t6,`$FRAME+28^$LITTLE_ENDIAN`($sp) srwi $c1,$t1,16 insrwi $carry,$t1,16,0 @@ -1218,8 +1220,8 @@ $code.=<<___; fctid $T1a,$T1a addc $t0,$t0,$t2 adde $t4,$t4,$t3 - lwz $t3,`$FRAME+32`($sp) ; permuted $t1 - lwz $t2,`$FRAME+36`($sp) ; permuted $t0 + lwz $t3,`$FRAME+32^$LITTLE_ENDIAN`($sp) ; permuted $t1 + lwz $t2,`$FRAME+36^$LITTLE_ENDIAN`($sp) ; permuted $t0 fctid $T1b,$T1b addze $carry,$carry addze $c1,$c1 @@ -1229,19 +1231,19 @@ $code.=<<___; addc $t2,$t2,$carry adde $t3,$t3,$c1 srwi $carry,$t2,16 - lwz $t7,`$FRAME+40`($sp) ; permuted $t3 - lwz $t6,`$FRAME+44`($sp) ; permuted $t2 + lwz $t7,`$FRAME+40^$LITTLE_ENDIAN`($sp) ; permuted $t3 + lwz $t6,`$FRAME+44^$LITTLE_ENDIAN`($sp) ; permuted $t2 fctid $T2b,$T2b srwi $c1,$t3,16 insrwi $carry,$t3,16,0 - lwz $t1,`$FRAME+48`($sp) ; permuted $t5 - lwz $t0,`$FRAME+52`($sp) ; permuted $t4 + lwz $t1,`$FRAME+48^$LITTLE_ENDIAN`($sp) ; permuted $t5 + lwz $t0,`$FRAME+52^$LITTLE_ENDIAN`($sp) ; permuted $t4 fctid $T3a,$T3a addc $t6,$t6,$carry adde $t7,$t7,$c1 srwi $carry,$t6,16 - lwz $t5,`$FRAME+56`($sp) ; permuted $t7 - lwz $t4,`$FRAME+60`($sp) ; permuted $t6 + lwz $t5,`$FRAME+56^$LITTLE_ENDIAN`($sp) ; permuted $t7 + lwz $t4,`$FRAME+60^$LITTLE_ENDIAN`($sp) ; permuted $t6 fctid $T3b,$T3b insrwi $t2,$t6,16,0 ; 64..95 bits @@ -1354,14 +1356,14 @@ $code.=<<___; ___ } else { $code.=<<___; - lwz $t1,`$FRAME+0`($sp) - lwz $t0,`$FRAME+4`($sp) - lwz $t3,`$FRAME+8`($sp) - lwz $t2,`$FRAME+12`($sp) - lwz $t5,`$FRAME+16`($sp) - lwz $t4,`$FRAME+20`($sp) - lwz $t7,`$FRAME+24`($sp) - lwz $t6,`$FRAME+28`($sp) + lwz $t1,`$FRAME+0^$LITTLE_ENDIAN`($sp) + lwz $t0,`$FRAME+4^$LITTLE_ENDIAN`($sp) + lwz $t3,`$FRAME+8^$LITTLE_ENDIAN`($sp) + lwz $t2,`$FRAME+12^$LITTLE_ENDIAN`($sp) + lwz $t5,`$FRAME+16^$LITTLE_ENDIAN`($sp) + lwz $t4,`$FRAME+20^$LITTLE_ENDIAN`($sp) + lwz $t7,`$FRAME+24^$LITTLE_ENDIAN`($sp) + lwz $t6,`$FRAME+28^$LITTLE_ENDIAN`($sp) stfd $dota,`$FRAME+64`($sp) stfd $dotb,`$FRAME+72`($sp) @@ -1397,14 +1399,14 @@ $code.=<<___; stw $t0,4($tp) ; tp[j-1] stw $t4,0($tp) - lwz $t3,`$FRAME+32`($sp) ; permuted $t1 - lwz $t2,`$FRAME+36`($sp) ; permuted $t0 - lwz $t7,`$FRAME+40`($sp) ; permuted $t3 - lwz $t6,`$FRAME+44`($sp) ; permuted $t2 - lwz $t1,`$FRAME+48`($sp) ; permuted $t5 - lwz $t0,`$FRAME+52`($sp) ; permuted $t4 - lwz $t5,`$FRAME+56`($sp) ; permuted $t7 - lwz $t4,`$FRAME+60`($sp) ; permuted $t6 + lwz $t3,`$FRAME+32^$LITTLE_ENDIAN`($sp) ; permuted $t1 + lwz $t2,`$FRAME+36^$LITTLE_ENDIAN`($sp) ; permuted $t0 + lwz $t7,`$FRAME+40^$LITTLE_ENDIAN`($sp) ; permuted $t3 + lwz $t6,`$FRAME+44^$LITTLE_ENDIAN`($sp) ; permuted $t2 + lwz $t1,`$FRAME+48^$LITTLE_ENDIAN`($sp) ; permuted $t5 + lwz $t0,`$FRAME+52^$LITTLE_ENDIAN`($sp) ; permuted $t4 + lwz $t5,`$FRAME+56^$LITTLE_ENDIAN`($sp) ; permuted $t7 + lwz $t4,`$FRAME+60^$LITTLE_ENDIAN`($sp) ; permuted $t6 addc $t2,$t2,$carry adde $t3,$t3,$c1 @@ -1433,12 +1435,12 @@ $code.=<<___; addc $t2,$t2,$t6 adde $t0,$t0,$t7 - lwz $t7,`$FRAME+64`($sp) - lwz $t6,`$FRAME+68`($sp) + lwz $t7,`$FRAME+64^$LITTLE_ENDIAN`($sp) + lwz $t6,`$FRAME+68^$LITTLE_ENDIAN`($sp) addze $carry,$carry addze $c1,$c1 - lwz $t5,`$FRAME+72`($sp) - lwz $t4,`$FRAME+76`($sp) + lwz $t5,`$FRAME+72^$LITTLE_ENDIAN`($sp) + lwz $t4,`$FRAME+76^$LITTLE_ENDIAN`($sp) addc $t6,$t6,$carry adde $t7,$t7,$c1 diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl index 3bd45db..12b571c 100755 --- a/crypto/bn/asm/rsaz-x86_64.pl +++ b/crypto/bn/asm/rsaz-x86_64.pl @@ -113,7 +113,7 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $addx = ($1>=12); } -if (!$addx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9])\.([0-9]+)/) { +if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9])\.([0-9]+)/) { my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 $addx = ($ver>=3.03); } diff --git a/crypto/bn/asm/s390x-gf2m.pl b/crypto/bn/asm/s390x-gf2m.pl index cd9f13e..9d18d40 100644 --- a/crypto/bn/asm/s390x-gf2m.pl +++ b/crypto/bn/asm/s390x-gf2m.pl @@ -172,19 +172,19 @@ ___ if ($SIZE_T==8) { my @r=map("%r$_",(6..9)); $code.=<<___; - bras $ra,_mul_1x1 # a1·b1 + bras $ra,_mul_1x1 # a1·b1 stmg $lo,$hi,16($rp) lg $a,`$stdframe+128+4*$SIZE_T`($sp) lg $b,`$stdframe+128+6*$SIZE_T`($sp) - bras $ra,_mul_1x1 # a0·b0 + bras $ra,_mul_1x1 # a0·b0 stmg $lo,$hi,0($rp) lg $a,`$stdframe+128+3*$SIZE_T`($sp) lg $b,`$stdframe+128+5*$SIZE_T`($sp) xg $a,`$stdframe+128+4*$SIZE_T`($sp) xg $b,`$stdframe+128+6*$SIZE_T`($sp) - bras $ra,_mul_1x1 # (a0+a1)·(b0+b1) + bras $ra,_mul_1x1 # (a0+a1)·(b0+b1) lmg @r[0],@r[3],0($rp) xgr $lo,$hi diff --git a/crypto/bn/asm/s390x.S b/crypto/bn/asm/s390x.S index 43fcb79..f5eebe4 100755 --- a/crypto/bn/asm/s390x.S +++ b/crypto/bn/asm/s390x.S @@ -18,71 +18,106 @@ .align 4 bn_mul_add_words: lghi zero,0 // zero = 0 - la %r1,0(%r2) // put rp aside - lghi %r2,0 // i=0; + la %r1,0(%r2) // put rp aside [to give way to] + lghi %r2,0 // return value ltgfr %r4,%r4 bler %r14 // if (len<=0) return 0; - stmg %r6,%r10,48(%r15) - lghi %r10,3 - lghi %r8,0 // carry = 0 - nr %r10,%r4 // len%4 + stmg %r6,%r13,48(%r15) + lghi %r2,3 + lghi %r12,0 // carry = 0 + slgr %r1,%r3 // rp-=ap + nr %r2,%r4 // len%4 sra %r4,2 // cnt=len/4 jz .Loop1_madd // carry is incidentally cleared if branch taken algr zero,zero // clear carry -.Loop4_madd: - lg %r7,0(%r2,%r3) // ap[i] + lg %r7,0(%r3) // ap[0] + lg %r9,8(%r3) // ap[1] mlgr %r6,%r5 // *=w - alcgr %r7,%r8 // +=carry - alcgr %r6,zero - alg %r7,0(%r2,%r1) // +=rp[i] - stg %r7,0(%r2,%r1) // rp[i]= + brct %r4,.Loop4_madd + j .Loop4_madd_tail - lg %r9,8(%r2,%r3) +.Loop4_madd: mlgr %r8,%r5 + lg %r11,16(%r3) // ap[i+2] + alcgr %r7,%r12 // +=carry + alcgr %r6,zero + alg %r7,0(%r3,%r1) // +=rp[i] + stg %r7,0(%r3,%r1) // rp[i]= + + mlgr %r10,%r5 + lg %r13,24(%r3) alcgr %r9,%r6 alcgr %r8,zero - alg %r9,8(%r2,%r1) - stg %r9,8(%r2,%r1) + alg %r9,8(%r3,%r1) + stg %r9,8(%r3,%r1) + + mlgr %r12,%r5 + lg %r7,32(%r3) + alcgr %r11,%r8 + alcgr %r10,zero + alg %r11,16(%r3,%r1) + stg %r11,16(%r3,%r1) - lg %r7,16(%r2,%r3) mlgr %r6,%r5 - alcgr %r7,%r8 - alcgr %r6,zero - alg %r7,16(%r2,%r1) - stg %r7,16(%r2,%r1) + lg %r9,40(%r3) + alcgr %r13,%r10 + alcgr %r12,zero + alg %r13,24(%r3,%r1) + stg %r13,24(%r3,%r1) - lg %r9,24(%r2,%r3) + la %r3,32(%r3) // i+=4 + brct %r4,.Loop4_madd + +.Loop4_madd_tail: mlgr %r8,%r5 + lg %r11,16(%r3) + alcgr %r7,%r12 // +=carry + alcgr %r6,zero + alg %r7,0(%r3,%r1) // +=rp[i] + stg %r7,0(%r3,%r1) // rp[i]= + + mlgr %r10,%r5 + lg %r13,24(%r3) alcgr %r9,%r6 alcgr %r8,zero - alg %r9,24(%r2,%r1) - stg %r9,24(%r2,%r1) + alg %r9,8(%r3,%r1) + stg %r9,8(%r3,%r1) - la %r2,32(%r2) // i+=4 - brct %r4,.Loop4_madd + mlgr %r12,%r5 + alcgr %r11,%r8 + alcgr %r10,zero + alg %r11,16(%r3,%r1) + stg %r11,16(%r3,%r1) - la %r10,1(%r10) // see if len%4 is zero ... - brct %r10,.Loop1_madd // without touching condition code:-) + alcgr %r13,%r10 + alcgr %r12,zero + alg %r13,24(%r3,%r1) + stg %r13,24(%r3,%r1) + + la %r3,32(%r3) // i+=4 + + la %r2,1(%r2) // see if len%4 is zero ... + brct %r2,.Loop1_madd // without touching condition code:-) .Lend_madd: - alcgr %r8,zero // collect carry bit - lgr %r2,%r8 - lmg %r6,%r10,48(%r15) + lgr %r2,zero // return value + alcgr %r2,%r12 // collect even carry bit + lmg %r6,%r13,48(%r15) br %r14 .Loop1_madd: - lg %r7,0(%r2,%r3) // ap[i] + lg %r7,0(%r3) // ap[i] mlgr %r6,%r5 // *=w - alcgr %r7,%r8 // +=carry + alcgr %r7,%r12 // +=carry alcgr %r6,zero - alg %r7,0(%r2,%r1) // +=rp[i] - stg %r7,0(%r2,%r1) // rp[i]= + alg %r7,0(%r3,%r1) // +=rp[i] + stg %r7,0(%r3,%r1) // rp[i]= - lgr %r8,%r6 - la %r2,8(%r2) // i++ - brct %r10,.Loop1_madd + lgr %r12,%r6 + la %r3,8(%r3) // i++ + brct %r2,.Loop1_madd j .Lend_madd .size bn_mul_add_words,.-bn_mul_add_words diff --git a/crypto/bn/asm/x86-gf2m.pl b/crypto/bn/asm/x86-gf2m.pl index 808a1e5..b579530 100644 --- a/crypto/bn/asm/x86-gf2m.pl +++ b/crypto/bn/asm/x86-gf2m.pl @@ -14,7 +14,7 @@ # the time being... Except that it has three code paths: pure integer # code suitable for any x86 CPU, MMX code suitable for PIII and later # and PCLMULQDQ suitable for Westmere and later. Improvement varies -# from one benchmark and µ-arch to another. Below are interval values +# from one benchmark and µ-arch to another. Below are interval values # for 163- and 571-bit ECDH benchmarks relative to compiler-generated # code: # @@ -226,22 +226,22 @@ if ($sse2) { &push ("edi"); &mov ($a,&wparam(1)); &mov ($b,&wparam(3)); - &call ("_mul_1x1_mmx"); # a1·b1 + &call ("_mul_1x1_mmx"); # a1·b1 &movq ("mm7",$R); &mov ($a,&wparam(2)); &mov ($b,&wparam(4)); - &call ("_mul_1x1_mmx"); # a0·b0 + &call ("_mul_1x1_mmx"); # a0·b0 &movq ("mm6",$R); &mov ($a,&wparam(1)); &mov ($b,&wparam(3)); &xor ($a,&wparam(2)); &xor ($b,&wparam(4)); - &call ("_mul_1x1_mmx"); # (a0+a1)·(b0+b1) + &call ("_mul_1x1_mmx"); # (a0+a1)·(b0+b1) &pxor ($R,"mm7"); &mov ($a,&wparam(0)); - &pxor ($R,"mm6"); # (a0+a1)·(b0+b1)-a1·b1-a0·b0 + &pxor ($R,"mm6"); # (a0+a1)·(b0+b1)-a1·b1-a0·b0 &movq ($A,$R); &psllq ($R,32); @@ -266,13 +266,13 @@ if ($sse2) { &mov ($a,&wparam(1)); &mov ($b,&wparam(3)); - &call ("_mul_1x1_ialu"); # a1·b1 + &call ("_mul_1x1_ialu"); # a1·b1 &mov (&DWP(8,"esp"),$lo); &mov (&DWP(12,"esp"),$hi); &mov ($a,&wparam(2)); &mov ($b,&wparam(4)); - &call ("_mul_1x1_ialu"); # a0·b0 + &call ("_mul_1x1_ialu"); # a0·b0 &mov (&DWP(0,"esp"),$lo); &mov (&DWP(4,"esp"),$hi); @@ -280,7 +280,7 @@ if ($sse2) { &mov ($b,&wparam(3)); &xor ($a,&wparam(2)); &xor ($b,&wparam(4)); - &call ("_mul_1x1_ialu"); # (a0+a1)·(b0+b1) + &call ("_mul_1x1_ialu"); # (a0+a1)·(b0+b1) &mov ("ebp",&wparam(0)); @r=("ebx","ecx","edi","esi"); diff --git a/crypto/bn/asm/x86_64-gcc.c b/crypto/bn/asm/x86_64-gcc.c index d548886..d77dc43 100644 --- a/crypto/bn/asm/x86_64-gcc.c +++ b/crypto/bn/asm/x86_64-gcc.c @@ -65,7 +65,7 @@ # undef mul_add /*- - * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; + * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; * "g"(0) let the compiler to decide where does it * want to keep the value of zero; */ diff --git a/crypto/bn/asm/x86_64-gf2m.pl b/crypto/bn/asm/x86_64-gf2m.pl index 226c66c..42bbec2 100644 --- a/crypto/bn/asm/x86_64-gf2m.pl +++ b/crypto/bn/asm/x86_64-gf2m.pl @@ -13,7 +13,7 @@ # in bn_gf2m.c. It's kind of low-hanging mechanical port from C for # the time being... Except that it has two code paths: code suitable # for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and -# later. Improvement varies from one benchmark and µ-arch to another. +# later. Improvement varies from one benchmark and µ-arch to another. # Vanilla code path is at most 20% faster than compiler-generated code # [not very impressive], while PCLMULQDQ - whole 85%-160% better on # 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that @@ -184,13 +184,13 @@ ___ $code.=<<___; movdqa %xmm0,%xmm4 movdqa %xmm1,%xmm5 - pclmulqdq \$0,%xmm1,%xmm0 # a1·b1 + pclmulqdq \$0,%xmm1,%xmm0 # a1·b1 pxor %xmm2,%xmm4 pxor %xmm3,%xmm5 - pclmulqdq \$0,%xmm3,%xmm2 # a0·b0 - pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1) + pclmulqdq \$0,%xmm3,%xmm2 # a0·b0 + pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1) xorps %xmm0,%xmm4 - xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1 + xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1 movdqa %xmm4,%xmm5 pslldq \$8,%xmm4 psrldq \$8,%xmm5 @@ -225,13 +225,13 @@ $code.=<<___; mov \$0xf,$mask mov $a1,$a mov $b1,$b - call _mul_1x1 # a1·b1 + call _mul_1x1 # a1·b1 mov $lo,16(%rsp) mov $hi,24(%rsp) mov 48(%rsp),$a mov 64(%rsp),$b - call _mul_1x1 # a0·b0 + call _mul_1x1 # a0·b0 mov $lo,0(%rsp) mov $hi,8(%rsp) @@ -239,7 +239,7 @@ $code.=<<___; mov 56(%rsp),$b xor 48(%rsp),$a xor 64(%rsp),$b - call _mul_1x1 # (a0+a1)·(b0+b1) + call _mul_1x1 # (a0+a1)·(b0+b1) ___ @r=("%rbx","%rcx","%rdi","%rsi"); $code.=<<___; diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl index 2989b58..725833d 100755 --- a/crypto/bn/asm/x86_64-mont.pl +++ b/crypto/bn/asm/x86_64-mont.pl @@ -68,6 +68,11 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $addx = ($1>=12); } +if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9])\.([0-9]+)/) { + my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 + $addx = ($ver>=3.03); +} + # int bn_mul_mont( $rp="%rdi"; # BN_ULONG *rp, $ap="%rsi"; # const BN_ULONG *ap, diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index 820de3d..64e668f 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -53,6 +53,11 @@ if (!$addx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) && $addx = ($1>=12); } +if (!$addx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9])\.([0-9]+)/) { + my $ver = $2 + $3/100.0; # 3.1->3.01, 3.10->3.10 + $addx = ($ver>=3.03); +} + # int bn_mul_mont_gather5( $rp="%rdi"; # BN_ULONG *rp, $ap="%rsi"; # const BN_ULONG *ap, @@ -1779,6 +1784,15 @@ sqr8x_reduction: .align 32 .L8x_tail_done: add (%rdx),%r8 # can this overflow? + adc \$0,%r9 + adc \$0,%r10 + adc \$0,%r11 + adc \$0,%r12 + adc \$0,%r13 + adc \$0,%r14 + adc \$0,%r15 # can't overflow, because we + # started with "overhung" part + # of multiplication xor %rax,%rax neg $carry @@ -3125,6 +3139,15 @@ sqrx8x_reduction: .align 32 .Lsqrx8x_tail_done: add 24+8(%rsp),%r8 # can this overflow? + adc \$0,%r9 + adc \$0,%r10 + adc \$0,%r11 + adc \$0,%r12 + adc \$0,%r13 + adc \$0,%r14 + adc \$0,%r15 # can't overflow, because we + # started with "overhung" part + # of multiplication mov $carry,%rax # xor %rax,%rax sub 16+8(%rsp),$carry # mov 16(%rsp),%cf @@ -3168,13 +3191,11 @@ my ($rptr,$nptr)=("%rdx","%rbp"); my @ri=map("%r$_",(10..13)); my @ni=map("%r$_",(14..15)); $code.=<<___; - xor %rbx,%rbx + xor %ebx,%ebx sub %r15,%rsi # compare top-most words adc %rbx,%rbx mov %rcx,%r10 # -$num - .byte 0x67 or %rbx,%rax - .byte 0x67 mov %rcx,%r9 # -$num xor \$1,%rax sar \$3+2,%rcx # cf=0 diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c index 24afdd6..50cf323 100644 --- a/crypto/bn/bn_exp.c +++ b/crypto/bn/bn_exp.c @@ -662,12 +662,13 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, bn_check_top(p); bn_check_top(m); - top = m->top; - - if (!(m->d[0] & 1)) { + if (!BN_is_odd(m)) { BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME, BN_R_CALLED_WITH_EVEN_MODULUS); return (0); } + + top = m->top; + bits = BN_num_bits(p); if (bits == 0) { ret = BN_one(rr); diff --git a/crypto/bn/bn_gcd.c b/crypto/bn/bn_gcd.c index 97c55ab..ce59fe7 100644 --- a/crypto/bn/bn_gcd.c +++ b/crypto/bn/bn_gcd.c @@ -583,6 +583,7 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in, * BN_div_no_branch will be called eventually. */ pB = &local_B; + local_B.flags = 0; BN_with_flags(pB, B, BN_FLG_CONSTTIME); if (!BN_nnmod(B, pB, A, ctx)) goto err; @@ -610,6 +611,7 @@ static BIGNUM *BN_mod_inverse_no_branch(BIGNUM *in, * BN_div_no_branch will be called eventually. */ pA = &local_A; + local_A.flags = 0; BN_with_flags(pA, A, BN_FLG_CONSTTIME); /* (D, M) := (A/B, A%B) ... */ diff --git a/crypto/bn/bn_gf2m.c b/crypto/bn/bn_gf2m.c index cfa1c7c..2c61da1 100644 --- a/crypto/bn/bn_gf2m.c +++ b/crypto/bn/bn_gf2m.c @@ -575,7 +575,7 @@ int BN_GF2m_mod_sqr_arr(BIGNUM *r, const BIGNUM *a, const int p[], bn_check_top(a); BN_CTX_start(ctx); if ((s = BN_CTX_get(ctx)) == NULL) - return 0; + goto err; if (!bn_wexpand(s, 2 * a->top)) goto err; @@ -699,18 +699,21 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) int top = p->top; BN_ULONG *udp, *bdp, *vdp, *cdp; - bn_wexpand(u, top); + if (!bn_wexpand(u, top)) + goto err; udp = u->d; for (i = u->top; i < top; i++) udp[i] = 0; u->top = top; - bn_wexpand(b, top); + if (!bn_wexpand(b, top)) + goto err; bdp = b->d; bdp[0] = 1; for (i = 1; i < top; i++) bdp[i] = 0; b->top = top; - bn_wexpand(c, top); + if (!bn_wexpand(c, top)) + goto err; cdp = c->d; for (i = 0; i < top; i++) cdp[i] = 0; diff --git a/crypto/bn/bn_mont.c b/crypto/bn/bn_mont.c index aadd5db..be95bd5 100644 --- a/crypto/bn/bn_mont.c +++ b/crypto/bn/bn_mont.c @@ -361,9 +361,9 @@ void BN_MONT_CTX_free(BN_MONT_CTX *mont) if (mont == NULL) return; - BN_free(&(mont->RR)); - BN_free(&(mont->N)); - BN_free(&(mont->Ni)); + BN_clear_free(&(mont->RR)); + BN_clear_free(&(mont->N)); + BN_clear_free(&(mont->Ni)); if (mont->flags & BN_FLG_MALLOCED) OPENSSL_free(mont); } @@ -373,6 +373,9 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) int ret = 0; BIGNUM *Ri, *R; + if (BN_is_zero(mod)) + return 0; + BN_CTX_start(ctx); if ((Ri = BN_CTX_get(ctx)) == NULL) goto err; diff --git a/crypto/bn/bn_recp.c b/crypto/bn/bn_recp.c index 6826f93..7497ac6 100644 --- a/crypto/bn/bn_recp.c +++ b/crypto/bn/bn_recp.c @@ -152,8 +152,10 @@ int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, if (BN_ucmp(m, &(recp->N)) < 0) { BN_zero(d); - if (!BN_copy(r, m)) + if (!BN_copy(r, m)) { + BN_CTX_end(ctx); return 0; + } BN_CTX_end(ctx); return (1); } diff --git a/crypto/bn/bn_x931p.c b/crypto/bn/bn_x931p.c index 6d76b12..efa48bd 100644 --- a/crypto/bn/bn_x931p.c +++ b/crypto/bn/bn_x931p.c @@ -213,14 +213,14 @@ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx) * exceeded. */ if (!BN_rand(Xp, nbits, 1, 0)) - return 0; + goto err; BN_CTX_start(ctx); t = BN_CTX_get(ctx); for (i = 0; i < 1000; i++) { if (!BN_rand(Xq, nbits, 1, 0)) - return 0; + goto err; /* Check that |Xp - Xq| > 2^(nbits - 100) */ BN_sub(t, Xp, Xq); if (BN_num_bits(t) > (nbits - 100)) @@ -234,6 +234,9 @@ int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx) return 0; + err: + BN_CTX_end(ctx); + return 0; } /* diff --git a/crypto/bn/bntest.c b/crypto/bn/bntest.c index 470d5da..1e35988 100644 --- a/crypto/bn/bntest.c +++ b/crypto/bn/bntest.c @@ -441,6 +441,14 @@ int test_div(BIO *bp, BN_CTX *ctx) BN_init(&d); BN_init(&e); + BN_one(&a); + BN_zero(&b); + + if (BN_div(&d, &c, &a, &b, ctx)) { + fprintf(stderr, "Division by zero succeeded!\n"); + return 0; + } + for (i = 0; i < num0 + num1; i++) { if (i < num1) { BN_bntest_rand(&a, 400, 0, 0); @@ -516,9 +524,9 @@ int test_div_word(BIO *bp) do { BN_bntest_rand(&a, 512, -1, 0); BN_bntest_rand(&b, BN_BITS2, -1, 0); - s = b.d[0]; - } while (!s); + } while (BN_is_zero(&b)); + s = b.d[0]; BN_copy(&b, &a); r = BN_div_word(&b, s); @@ -781,6 +789,18 @@ int test_mont(BIO *bp, BN_CTX *ctx) if (mont == NULL) return 0; + BN_zero(&n); + if (BN_MONT_CTX_set(mont, &n, ctx)) { + fprintf(stderr, "BN_MONT_CTX_set succeeded for zero modulus!\n"); + return 0; + } + + BN_set_word(&n, 16); + if (BN_MONT_CTX_set(mont, &n, ctx)) { + fprintf(stderr, "BN_MONT_CTX_set succeeded for even modulus!\n"); + return 0; + } + BN_bntest_rand(&a, 100, 0, 0); BN_bntest_rand(&b, 100, 0, 0); for (i = 0; i < num2; i++) { @@ -887,6 +907,14 @@ int test_mod_mul(BIO *bp, BN_CTX *ctx) d = BN_new(); e = BN_new(); + BN_one(a); + BN_one(b); + BN_zero(c); + if (BN_mod_mul(e, a, b, c, ctx)) { + fprintf(stderr, "BN_mod_mul with zero modulus succeeded!\n"); + return 0; + } + for (j = 0; j < 3; j++) { BN_bntest_rand(c, 1024, 0, 0); for (i = 0; i < num0; i++) { @@ -952,6 +980,14 @@ int test_mod_exp(BIO *bp, BN_CTX *ctx) d = BN_new(); e = BN_new(); + BN_one(a); + BN_one(b); + BN_zero(c); + if (BN_mod_exp(d, a, b, c, ctx)) { + fprintf(stderr, "BN_mod_exp with zero modulus succeeded!\n"); + return 0; + } + BN_bntest_rand(c, 30, 0, 1); /* must be odd for montgomery */ for (i = 0; i < num2; i++) { BN_bntest_rand(a, 20 + i * 5, 0, 0); @@ -980,6 +1016,24 @@ int test_mod_exp(BIO *bp, BN_CTX *ctx) return 0; } } + + /* Regression test for carry propagation bug in sqr8x_reduction */ + BN_hex2bn(&a, "050505050505"); + BN_hex2bn(&b, "02"); + BN_hex2bn(&c, + "4141414141414141414141274141414141414141414141414141414141414141" + "4141414141414141414141414141414141414141414141414141414141414141" + "4141414141414141414141800000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000001"); + BN_mod_exp(d, a, b, c, ctx); + BN_mul(e, a, a, ctx); + if (BN_cmp(d, e)) { + fprintf(stderr, "BN_mod_exp and BN_mul produce different results!\n"); + return 0; + } + BN_free(a); BN_free(b); BN_free(c); @@ -999,6 +1053,22 @@ int test_mod_exp_mont_consttime(BIO *bp, BN_CTX *ctx) d = BN_new(); e = BN_new(); + BN_one(a); + BN_one(b); + BN_zero(c); + if (BN_mod_exp_mont_consttime(d, a, b, c, ctx, NULL)) { + fprintf(stderr, "BN_mod_exp_mont_consttime with zero modulus " + "succeeded\n"); + return 0; + } + + BN_set_word(c, 16); + if (BN_mod_exp_mont_consttime(d, a, b, c, ctx, NULL)) { + fprintf(stderr, "BN_mod_exp_mont_consttime with even modulus " + "succeeded\n"); + return 0; + } + BN_bntest_rand(c, 30, 0, 1); /* must be odd for montgomery */ for (i = 0; i < num2; i++) { BN_bntest_rand(a, 20 + i * 5, 0, 0); diff --git a/crypto/bn/rsaz_exp.h b/crypto/bn/rsaz_exp.h index 33361de..229e181 100644 --- a/crypto/bn/rsaz_exp.h +++ b/crypto/bn/rsaz_exp.h @@ -1,32 +1,44 @@ -/****************************************************************************** -* Copyright(c) 2012, Intel Corp. -* Developers and authors: -* Shay Gueron (1, 2), and Vlad Krasnov (1) -* (1) Intel Corporation, Israel Development Center, Haifa, Israel -* (2) University of Haifa, Israel +/***************************************************************************** +* * +* Copyright (c) 2012, Intel Corporation * +* * +* All rights reserved. * +* * +* Redistribution and use in source and binary forms, with or without * +* modification, are permitted provided that the following conditions are * +* met: * +* * +* * Redistributions of source code must retain the above copyright * +* notice, this list of conditions and the following disclaimer. * +* * +* * Redistributions in binary form must reproduce the above copyright * +* notice, this list of conditions and the following disclaimer in the * +* documentation and/or other materials provided with the * +* distribution. * +* * +* * Neither the name of the Intel Corporation nor the names of its * +* contributors may be used to endorse or promote products derived from * +* this software without specific prior written permission. * +* * +* * +* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY * +* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * +* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR * +* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * +* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * +* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * +* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * +* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * +* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * +* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * +* * ****************************************************************************** -* LICENSE: -* This submission to OpenSSL is to be made available under the OpenSSL -* license, and only to the OpenSSL project, in order to allow integration -* into the publicly distributed code. -* The use of this code, or portions of this code, or concepts embedded in -* this code, or modification of this code and/or algorithm(s) in it, or the -* use of this code for any other purpose than stated above, requires special -* licensing. -****************************************************************************** -* DISCLAIMER: -* THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS AND THE COPYRIGHT OWNERS -* ``AS IS''. ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS OR THE COPYRIGHT -* OWNERS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, -* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -* POSSIBILITY OF SUCH DAMAGE. -******************************************************************************/ +* Developers and authors: * +* Shay Gueron (1, 2), and Vlad Krasnov (1) * +* (1) Intel Corporation, Israel Development Center, Haifa, Israel * +* (2) University of Haifa, Israel * +*****************************************************************************/ #ifndef RSAZ_EXP_H # define RSAZ_EXP_H |