From 89e12654312dddbbdbf17b5adc95b22cb672f947 Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Wed, 17 Oct 2007 23:18:57 +0800 Subject: [CRYPTO] aes: Move common defines into a header file This three defines are used in all AES related hardware. Signed-off-by: Sebastian Siewior Signed-off-by: Herbert Xu --- arch/s390/crypto/aes_s390.c | 7 +------ arch/x86/crypto/aes_32.c | 4 +--- arch/x86/crypto/aes_64.c | 6 +----- 3 files changed, 3 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 512669691ad..812511bbb54 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -16,17 +16,12 @@ * */ +#include #include #include #include #include "crypt_s390.h" -#define AES_MIN_KEY_SIZE 16 -#define AES_MAX_KEY_SIZE 32 - -/* data block size for all key lengths */ -#define AES_BLOCK_SIZE 16 - #define AES_KEYLEN_128 1 #define AES_KEYLEN_192 2 #define AES_KEYLEN_256 4 diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c index 49aad9397f1..9b0ab50394b 100644 --- a/arch/x86/crypto/aes_32.c +++ b/arch/x86/crypto/aes_32.c @@ -38,6 +38,7 @@ */ #include +#include #include #include #include @@ -48,9 +49,6 @@ asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -#define AES_MIN_KEY_SIZE 16 -#define AES_MAX_KEY_SIZE 32 -#define AES_BLOCK_SIZE 16 #define AES_KS_LENGTH 4 * AES_BLOCK_SIZE #define RC_LENGTH 29 diff --git a/arch/x86/crypto/aes_64.c b/arch/x86/crypto/aes_64.c index 5cdb13ea5cc..0b38a4cd2ce 100644 --- a/arch/x86/crypto/aes_64.c +++ b/arch/x86/crypto/aes_64.c @@ -54,6 +54,7 @@ */ #include +#include #include #include #include @@ -61,11 +62,6 @@ #include #include -#define AES_MIN_KEY_SIZE 16 -#define AES_MAX_KEY_SIZE 32 - -#define AES_BLOCK_SIZE 16 - /* * #define byte(x, nr) ((unsigned char)((x) >> (nr*8))) */ -- cgit v1.2.3 From 81190b321548bb0bf2d6e1f172695275b0fd1363 Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Thu, 8 Nov 2007 21:25:04 +0800 Subject: [CRYPTO] aes-x86-64: Remove setkey The setkey() function can be shared with the generic algorithm. Signed-off-by: Sebastian Siewior Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-x86_64-asm_64.S | 68 ++++----- arch/x86/crypto/aes_64.c | 282 +----------------------------------- 2 files changed, 37 insertions(+), 313 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index 26b40de4d0b..a120f526c3d 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -8,10 +8,10 @@ * including this sentence is retained in full. */ -.extern aes_ft_tab -.extern aes_it_tab -.extern aes_fl_tab -.extern aes_il_tab +.extern crypto_ft_tab +.extern crypto_it_tab +.extern crypto_fl_tab +.extern crypto_il_tab .text @@ -56,13 +56,13 @@ .align 8; \ FUNC: movq r1,r2; \ movq r3,r4; \ - leaq BASE+KEY+52(r8),r9; \ + leaq BASE+KEY+48+4(r8),r9; \ movq r10,r11; \ movl (r7),r5 ## E; \ movl 4(r7),r1 ## E; \ movl 8(r7),r6 ## E; \ movl 12(r7),r7 ## E; \ - movl BASE(r8),r10 ## E; \ + movl BASE+0(r8),r10 ## E; \ xorl -48(r9),r5 ## E; \ xorl -44(r9),r1 ## E; \ xorl -40(r9),r6 ## E; \ @@ -154,37 +154,37 @@ FUNC: movq r1,r2; \ /* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */ entry(aes_enc_blk,0,enc128,enc192) - encrypt_round(aes_ft_tab,-96) - encrypt_round(aes_ft_tab,-80) -enc192: encrypt_round(aes_ft_tab,-64) - encrypt_round(aes_ft_tab,-48) -enc128: encrypt_round(aes_ft_tab,-32) - encrypt_round(aes_ft_tab,-16) - encrypt_round(aes_ft_tab, 0) - encrypt_round(aes_ft_tab, 16) - encrypt_round(aes_ft_tab, 32) - encrypt_round(aes_ft_tab, 48) - encrypt_round(aes_ft_tab, 64) - encrypt_round(aes_ft_tab, 80) - encrypt_round(aes_ft_tab, 96) - encrypt_final(aes_fl_tab,112) + encrypt_round(crypto_ft_tab,-96) + encrypt_round(crypto_ft_tab,-80) +enc192: encrypt_round(crypto_ft_tab,-64) + encrypt_round(crypto_ft_tab,-48) +enc128: encrypt_round(crypto_ft_tab,-32) + encrypt_round(crypto_ft_tab,-16) + encrypt_round(crypto_ft_tab, 0) + encrypt_round(crypto_ft_tab, 16) + encrypt_round(crypto_ft_tab, 32) + encrypt_round(crypto_ft_tab, 48) + encrypt_round(crypto_ft_tab, 64) + encrypt_round(crypto_ft_tab, 80) + encrypt_round(crypto_ft_tab, 96) + encrypt_final(crypto_fl_tab,112) return /* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */ entry(aes_dec_blk,240,dec128,dec192) - decrypt_round(aes_it_tab,-96) - decrypt_round(aes_it_tab,-80) -dec192: decrypt_round(aes_it_tab,-64) - decrypt_round(aes_it_tab,-48) -dec128: decrypt_round(aes_it_tab,-32) - decrypt_round(aes_it_tab,-16) - decrypt_round(aes_it_tab, 0) - decrypt_round(aes_it_tab, 16) - decrypt_round(aes_it_tab, 32) - decrypt_round(aes_it_tab, 48) - decrypt_round(aes_it_tab, 64) - decrypt_round(aes_it_tab, 80) - decrypt_round(aes_it_tab, 96) - decrypt_final(aes_il_tab,112) + decrypt_round(crypto_it_tab,-96) + decrypt_round(crypto_it_tab,-80) +dec192: decrypt_round(crypto_it_tab,-64) + decrypt_round(crypto_it_tab,-48) +dec128: decrypt_round(crypto_it_tab,-32) + decrypt_round(crypto_it_tab,-16) + decrypt_round(crypto_it_tab, 0) + decrypt_round(crypto_it_tab, 16) + decrypt_round(crypto_it_tab, 32) + decrypt_round(crypto_it_tab, 48) + decrypt_round(crypto_it_tab, 64) + decrypt_round(crypto_it_tab, 80) + decrypt_round(crypto_it_tab, 96) + decrypt_final(crypto_il_tab,112) return diff --git a/arch/x86/crypto/aes_64.c b/arch/x86/crypto/aes_64.c index 0b38a4cd2ce..d7a41a97dd3 100644 --- a/arch/x86/crypto/aes_64.c +++ b/arch/x86/crypto/aes_64.c @@ -1,284 +1,9 @@ /* - * Cryptographic API. + * Glue Code for AES Cipher Algorithm * - * AES Cipher Algorithm. - * - * Based on Brian Gladman's code. - * - * Linux developers: - * Alexander Kjeldaas - * Herbert Valerio Riedel - * Kyle McMartin - * Adam J. Richter (conversion to 2.5 API). - * Andreas Steinmetz (adapted to x86_64 assembler) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * --------------------------------------------------------------------------- - * Copyright (c) 2002, Dr Brian Gladman , Worcester, UK. - * All rights reserved. - * - * LICENSE TERMS - * - * The free distribution and use of this software in both source and binary - * form is allowed (with or without changes) provided that: - * - * 1. distributions of this source code include the above copyright - * notice, this list of conditions and the following disclaimer; - * - * 2. distributions in binary form include the above copyright - * notice, this list of conditions and the following disclaimer - * in the documentation and/or other associated materials; - * - * 3. the copyright holder's name is not used to endorse products - * built using this software without specific written permission. - * - * ALTERNATIVELY, provided that this notice is retained in full, this product - * may be distributed under the terms of the GNU General Public License (GPL), - * in which case the provisions of the GPL apply INSTEAD OF those given above. - * - * DISCLAIMER - * - * This software is provided 'as is' with no explicit or implied warranties - * in respect of its properties, including, but not limited to, correctness - * and/or fitness for purpose. - * --------------------------------------------------------------------------- */ -/* Some changes from the Gladman version: - s/RIJNDAEL(e_key)/E_KEY/g - s/RIJNDAEL(d_key)/D_KEY/g -*/ - -#include #include -#include -#include -#include -#include -#include -#include - -/* - * #define byte(x, nr) ((unsigned char)((x) >> (nr*8))) - */ -static inline u8 byte(const u32 x, const unsigned n) -{ - return x >> (n << 3); -} - -struct aes_ctx -{ - u32 key_length; - u32 buf[120]; -}; - -#define E_KEY (&ctx->buf[0]) -#define D_KEY (&ctx->buf[60]) - -static u8 pow_tab[256] __initdata; -static u8 log_tab[256] __initdata; -static u8 sbx_tab[256] __initdata; -static u8 isb_tab[256] __initdata; -static u32 rco_tab[10]; -u32 aes_ft_tab[4][256]; -u32 aes_it_tab[4][256]; - -u32 aes_fl_tab[4][256]; -u32 aes_il_tab[4][256]; - -static inline u8 f_mult(u8 a, u8 b) -{ - u8 aa = log_tab[a], cc = aa + log_tab[b]; - - return pow_tab[cc + (cc < aa ? 1 : 0)]; -} - -#define ff_mult(a, b) (a && b ? f_mult(a, b) : 0) - -#define ls_box(x) \ - (aes_fl_tab[0][byte(x, 0)] ^ \ - aes_fl_tab[1][byte(x, 1)] ^ \ - aes_fl_tab[2][byte(x, 2)] ^ \ - aes_fl_tab[3][byte(x, 3)]) - -static void __init gen_tabs(void) -{ - u32 i, t; - u8 p, q; - - /* log and power tables for GF(2**8) finite field with - 0x011b as modular polynomial - the simplest primitive - root is 0x03, used here to generate the tables */ - - for (i = 0, p = 1; i < 256; ++i) { - pow_tab[i] = (u8)p; - log_tab[p] = (u8)i; - - p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0); - } - - log_tab[1] = 0; - - for (i = 0, p = 1; i < 10; ++i) { - rco_tab[i] = p; - - p = (p << 1) ^ (p & 0x80 ? 0x01b : 0); - } - - for (i = 0; i < 256; ++i) { - p = (i ? pow_tab[255 - log_tab[i]] : 0); - q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2)); - p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2)); - sbx_tab[i] = p; - isb_tab[p] = (u8)i; - } - - for (i = 0; i < 256; ++i) { - p = sbx_tab[i]; - - t = p; - aes_fl_tab[0][i] = t; - aes_fl_tab[1][i] = rol32(t, 8); - aes_fl_tab[2][i] = rol32(t, 16); - aes_fl_tab[3][i] = rol32(t, 24); - - t = ((u32)ff_mult(2, p)) | - ((u32)p << 8) | - ((u32)p << 16) | ((u32)ff_mult(3, p) << 24); - - aes_ft_tab[0][i] = t; - aes_ft_tab[1][i] = rol32(t, 8); - aes_ft_tab[2][i] = rol32(t, 16); - aes_ft_tab[3][i] = rol32(t, 24); - - p = isb_tab[i]; - - t = p; - aes_il_tab[0][i] = t; - aes_il_tab[1][i] = rol32(t, 8); - aes_il_tab[2][i] = rol32(t, 16); - aes_il_tab[3][i] = rol32(t, 24); - - t = ((u32)ff_mult(14, p)) | - ((u32)ff_mult(9, p) << 8) | - ((u32)ff_mult(13, p) << 16) | - ((u32)ff_mult(11, p) << 24); - - aes_it_tab[0][i] = t; - aes_it_tab[1][i] = rol32(t, 8); - aes_it_tab[2][i] = rol32(t, 16); - aes_it_tab[3][i] = rol32(t, 24); - } -} - -#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b) - -#define imix_col(y, x) \ - u = star_x(x); \ - v = star_x(u); \ - w = star_x(v); \ - t = w ^ (x); \ - (y) = u ^ v ^ w; \ - (y) ^= ror32(u ^ t, 8) ^ \ - ror32(v ^ t, 16) ^ \ - ror32(t, 24) - -/* initialise the key schedule from the user supplied key */ - -#define loop4(i) \ -{ \ - t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \ - t ^= E_KEY[4 * i]; E_KEY[4 * i + 4] = t; \ - t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t; \ - t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t; \ - t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t; \ -} - -#define loop6(i) \ -{ \ - t = ror32(t, 8); t = ls_box(t) ^ rco_tab[i]; \ - t ^= E_KEY[6 * i]; E_KEY[6 * i + 6] = t; \ - t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t; \ - t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t; \ - t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t; \ - t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t; \ - t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t; \ -} - -#define loop8(i) \ -{ \ - t = ror32(t, 8); ; t = ls_box(t) ^ rco_tab[i]; \ - t ^= E_KEY[8 * i]; E_KEY[8 * i + 8] = t; \ - t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t; \ - t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t; \ - t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t; \ - t = E_KEY[8 * i + 4] ^ ls_box(t); \ - E_KEY[8 * i + 12] = t; \ - t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t; \ - t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t; \ - t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t; \ -} - -static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) -{ - struct aes_ctx *ctx = crypto_tfm_ctx(tfm); - const __le32 *key = (const __le32 *)in_key; - u32 *flags = &tfm->crt_flags; - u32 i, j, t, u, v, w; - - if (key_len % 8) { - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - - ctx->key_length = key_len; - - D_KEY[key_len + 24] = E_KEY[0] = le32_to_cpu(key[0]); - D_KEY[key_len + 25] = E_KEY[1] = le32_to_cpu(key[1]); - D_KEY[key_len + 26] = E_KEY[2] = le32_to_cpu(key[2]); - D_KEY[key_len + 27] = E_KEY[3] = le32_to_cpu(key[3]); - - switch (key_len) { - case 16: - t = E_KEY[3]; - for (i = 0; i < 10; ++i) - loop4(i); - break; - - case 24: - E_KEY[4] = le32_to_cpu(key[4]); - t = E_KEY[5] = le32_to_cpu(key[5]); - for (i = 0; i < 8; ++i) - loop6 (i); - break; - - case 32: - E_KEY[4] = le32_to_cpu(key[4]); - E_KEY[5] = le32_to_cpu(key[5]); - E_KEY[6] = le32_to_cpu(key[6]); - t = E_KEY[7] = le32_to_cpu(key[7]); - for (i = 0; i < 7; ++i) - loop8(i); - break; - } - - D_KEY[0] = E_KEY[key_len + 24]; - D_KEY[1] = E_KEY[key_len + 25]; - D_KEY[2] = E_KEY[key_len + 26]; - D_KEY[3] = E_KEY[key_len + 27]; - - for (i = 4; i < key_len + 24; ++i) { - j = key_len + 24 - (i & ~3) + (i & 3); - imix_col(D_KEY[j], E_KEY[i]); - } - - return 0; -} asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); @@ -299,14 +24,14 @@ static struct crypto_alg aes_alg = { .cra_priority = 200, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aes_ctx), + .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), .cra_u = { .cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, + .cia_setkey = crypto_aes_set_key, .cia_encrypt = aes_encrypt, .cia_decrypt = aes_decrypt } @@ -315,7 +40,6 @@ static struct crypto_alg aes_alg = { static int __init aes_init(void) { - gen_tabs(); return crypto_register_alg(&aes_alg); } -- cgit v1.2.3 From 5157dea8139cf0edc4834d528531e642c0d27e37 Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Sat, 10 Nov 2007 19:07:16 +0800 Subject: [CRYPTO] aes-i586: Remove setkey The setkey() function can be shared with the generic algorithm. Signed-off-by: Sebastian Siewior Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-i586-asm_32.S | 89 ++++---- arch/x86/crypto/aes_32.c | 461 +------------------------------------- 2 files changed, 46 insertions(+), 504 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S index f942f0c8f63..1093bede3e0 100644 --- a/arch/x86/crypto/aes-i586-asm_32.S +++ b/arch/x86/crypto/aes-i586-asm_32.S @@ -46,9 +46,9 @@ #define in_blk 16 /* offsets in crypto_tfm structure */ -#define ekey (crypto_tfm_ctx_offset + 0) -#define nrnd (crypto_tfm_ctx_offset + 256) -#define dkey (crypto_tfm_ctx_offset + 260) +#define klen (crypto_tfm_ctx_offset + 0) +#define ekey (crypto_tfm_ctx_offset + 4) +#define dkey (crypto_tfm_ctx_offset + 244) // register mapping for encrypt and decrypt subroutines @@ -221,8 +221,8 @@ .global aes_enc_blk -.extern ft_tab -.extern fl_tab +.extern crypto_ft_tab +.extern crypto_fl_tab .align 4 @@ -236,7 +236,7 @@ aes_enc_blk: 1: push %ebx mov in_blk+4(%esp),%r2 push %esi - mov nrnd(%ebp),%r3 // number of rounds + mov klen(%ebp),%r3 // key size push %edi #if ekey != 0 lea ekey(%ebp),%ebp // key pointer @@ -255,26 +255,26 @@ aes_enc_blk: sub $8,%esp // space for register saves on stack add $16,%ebp // increment to next round key - cmp $12,%r3 + cmp $24,%r3 jb 4f // 10 rounds for 128-bit key lea 32(%ebp),%ebp je 3f // 12 rounds for 192-bit key lea 32(%ebp),%ebp -2: fwd_rnd1( -64(%ebp) ,ft_tab) // 14 rounds for 256-bit key - fwd_rnd2( -48(%ebp) ,ft_tab) -3: fwd_rnd1( -32(%ebp) ,ft_tab) // 12 rounds for 192-bit key - fwd_rnd2( -16(%ebp) ,ft_tab) -4: fwd_rnd1( (%ebp) ,ft_tab) // 10 rounds for 128-bit key - fwd_rnd2( +16(%ebp) ,ft_tab) - fwd_rnd1( +32(%ebp) ,ft_tab) - fwd_rnd2( +48(%ebp) ,ft_tab) - fwd_rnd1( +64(%ebp) ,ft_tab) - fwd_rnd2( +80(%ebp) ,ft_tab) - fwd_rnd1( +96(%ebp) ,ft_tab) - fwd_rnd2(+112(%ebp) ,ft_tab) - fwd_rnd1(+128(%ebp) ,ft_tab) - fwd_rnd2(+144(%ebp) ,fl_tab) // last round uses a different table +2: fwd_rnd1( -64(%ebp), crypto_ft_tab) // 14 rounds for 256-bit key + fwd_rnd2( -48(%ebp), crypto_ft_tab) +3: fwd_rnd1( -32(%ebp), crypto_ft_tab) // 12 rounds for 192-bit key + fwd_rnd2( -16(%ebp), crypto_ft_tab) +4: fwd_rnd1( (%ebp), crypto_ft_tab) // 10 rounds for 128-bit key + fwd_rnd2( +16(%ebp), crypto_ft_tab) + fwd_rnd1( +32(%ebp), crypto_ft_tab) + fwd_rnd2( +48(%ebp), crypto_ft_tab) + fwd_rnd1( +64(%ebp), crypto_ft_tab) + fwd_rnd2( +80(%ebp), crypto_ft_tab) + fwd_rnd1( +96(%ebp), crypto_ft_tab) + fwd_rnd2(+112(%ebp), crypto_ft_tab) + fwd_rnd1(+128(%ebp), crypto_ft_tab) + fwd_rnd2(+144(%ebp), crypto_fl_tab) // last round uses a different table // move final values to the output array. CAUTION: the // order of these assigns rely on the register mappings @@ -297,8 +297,8 @@ aes_enc_blk: .global aes_dec_blk -.extern it_tab -.extern il_tab +.extern crypto_it_tab +.extern crypto_il_tab .align 4 @@ -312,14 +312,11 @@ aes_dec_blk: 1: push %ebx mov in_blk+4(%esp),%r2 push %esi - mov nrnd(%ebp),%r3 // number of rounds + mov klen(%ebp),%r3 // key size push %edi #if dkey != 0 lea dkey(%ebp),%ebp // key pointer #endif - mov %r3,%r0 - shl $4,%r0 - add %r0,%ebp // input four columns and xor in first round key @@ -333,27 +330,27 @@ aes_dec_blk: xor 12(%ebp),%r5 sub $8,%esp // space for register saves on stack - sub $16,%ebp // increment to next round key - cmp $12,%r3 + add $16,%ebp // increment to next round key + cmp $24,%r3 jb 4f // 10 rounds for 128-bit key - lea -32(%ebp),%ebp + lea 32(%ebp),%ebp je 3f // 12 rounds for 192-bit key - lea -32(%ebp),%ebp - -2: inv_rnd1( +64(%ebp), it_tab) // 14 rounds for 256-bit key - inv_rnd2( +48(%ebp), it_tab) -3: inv_rnd1( +32(%ebp), it_tab) // 12 rounds for 192-bit key - inv_rnd2( +16(%ebp), it_tab) -4: inv_rnd1( (%ebp), it_tab) // 10 rounds for 128-bit key - inv_rnd2( -16(%ebp), it_tab) - inv_rnd1( -32(%ebp), it_tab) - inv_rnd2( -48(%ebp), it_tab) - inv_rnd1( -64(%ebp), it_tab) - inv_rnd2( -80(%ebp), it_tab) - inv_rnd1( -96(%ebp), it_tab) - inv_rnd2(-112(%ebp), it_tab) - inv_rnd1(-128(%ebp), it_tab) - inv_rnd2(-144(%ebp), il_tab) // last round uses a different table + lea 32(%ebp),%ebp + +2: inv_rnd1( -64(%ebp), crypto_it_tab) // 14 rounds for 256-bit key + inv_rnd2( -48(%ebp), crypto_it_tab) +3: inv_rnd1( -32(%ebp), crypto_it_tab) // 12 rounds for 192-bit key + inv_rnd2( -16(%ebp), crypto_it_tab) +4: inv_rnd1( (%ebp), crypto_it_tab) // 10 rounds for 128-bit key + inv_rnd2( +16(%ebp), crypto_it_tab) + inv_rnd1( +32(%ebp), crypto_it_tab) + inv_rnd2( +48(%ebp), crypto_it_tab) + inv_rnd1( +64(%ebp), crypto_it_tab) + inv_rnd2( +80(%ebp), crypto_it_tab) + inv_rnd1( +96(%ebp), crypto_it_tab) + inv_rnd2(+112(%ebp), crypto_it_tab) + inv_rnd1(+128(%ebp), crypto_it_tab) + inv_rnd2(+144(%ebp), crypto_il_tab) // last round uses a different table // move final values to the output array. CAUTION: the // order of these assigns rely on the register mappings diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c index 9b0ab50394b..8556d9561c2 100644 --- a/arch/x86/crypto/aes_32.c +++ b/arch/x86/crypto/aes_32.c @@ -1,468 +1,14 @@ -/* - * +/* * Glue Code for optimized 586 assembler version of AES - * - * Copyright (c) 2002, Dr Brian Gladman <>, Worcester, UK. - * All rights reserved. - * - * LICENSE TERMS - * - * The free distribution and use of this software in both source and binary - * form is allowed (with or without changes) provided that: - * - * 1. distributions of this source code include the above copyright - * notice, this list of conditions and the following disclaimer; - * - * 2. distributions in binary form include the above copyright - * notice, this list of conditions and the following disclaimer - * in the documentation and/or other associated materials; - * - * 3. the copyright holder's name is not used to endorse products - * built using this software without specific written permission. - * - * ALTERNATIVELY, provided that this notice is retained in full, this product - * may be distributed under the terms of the GNU General Public License (GPL), - * in which case the provisions of the GPL apply INSTEAD OF those given above. - * - * DISCLAIMER - * - * This software is provided 'as is' with no explicit or implied warranties - * in respect of its properties, including, but not limited to, correctness - * and/or fitness for purpose. - * - * Copyright (c) 2003, Adam J. Richter (conversion to - * 2.5 API). - * Copyright (c) 2003, 2004 Fruhwirth Clemens - * Copyright (c) 2004 Red Hat, Inc., James Morris - * */ -#include #include -#include #include -#include -#include #include -#include asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -#define AES_KS_LENGTH 4 * AES_BLOCK_SIZE -#define RC_LENGTH 29 - -struct aes_ctx { - u32 ekey[AES_KS_LENGTH]; - u32 rounds; - u32 dkey[AES_KS_LENGTH]; -}; - -#define WPOLY 0x011b -#define bytes2word(b0, b1, b2, b3) \ - (((u32)(b3) << 24) | ((u32)(b2) << 16) | ((u32)(b1) << 8) | (b0)) - -/* define the finite field multiplies required for Rijndael */ -#define f2(x) ((x) ? pow[log[x] + 0x19] : 0) -#define f3(x) ((x) ? pow[log[x] + 0x01] : 0) -#define f9(x) ((x) ? pow[log[x] + 0xc7] : 0) -#define fb(x) ((x) ? pow[log[x] + 0x68] : 0) -#define fd(x) ((x) ? pow[log[x] + 0xee] : 0) -#define fe(x) ((x) ? pow[log[x] + 0xdf] : 0) -#define fi(x) ((x) ? pow[255 - log[x]]: 0) - -static inline u32 upr(u32 x, int n) -{ - return (x << 8 * n) | (x >> (32 - 8 * n)); -} - -static inline u8 bval(u32 x, int n) -{ - return x >> 8 * n; -} - -/* The forward and inverse affine transformations used in the S-box */ -#define fwd_affine(x) \ - (w = (u32)x, w ^= (w<<1)^(w<<2)^(w<<3)^(w<<4), 0x63^(u8)(w^(w>>8))) - -#define inv_affine(x) \ - (w = (u32)x, w = (w<<1)^(w<<3)^(w<<6), 0x05^(u8)(w^(w>>8))) - -static u32 rcon_tab[RC_LENGTH]; - -u32 ft_tab[4][256]; -u32 fl_tab[4][256]; -static u32 im_tab[4][256]; -u32 il_tab[4][256]; -u32 it_tab[4][256]; - -static void gen_tabs(void) -{ - u32 i, w; - u8 pow[512], log[256]; - - /* - * log and power tables for GF(2^8) finite field with - * WPOLY as modular polynomial - the simplest primitive - * root is 0x03, used here to generate the tables. - */ - i = 0; w = 1; - - do { - pow[i] = (u8)w; - pow[i + 255] = (u8)w; - log[w] = (u8)i++; - w ^= (w << 1) ^ (w & 0x80 ? WPOLY : 0); - } while (w != 1); - - for(i = 0, w = 1; i < RC_LENGTH; ++i) { - rcon_tab[i] = bytes2word(w, 0, 0, 0); - w = f2(w); - } - - for(i = 0; i < 256; ++i) { - u8 b; - - b = fwd_affine(fi((u8)i)); - w = bytes2word(f2(b), b, b, f3(b)); - - /* tables for a normal encryption round */ - ft_tab[0][i] = w; - ft_tab[1][i] = upr(w, 1); - ft_tab[2][i] = upr(w, 2); - ft_tab[3][i] = upr(w, 3); - w = bytes2word(b, 0, 0, 0); - - /* - * tables for last encryption round - * (may also be used in the key schedule) - */ - fl_tab[0][i] = w; - fl_tab[1][i] = upr(w, 1); - fl_tab[2][i] = upr(w, 2); - fl_tab[3][i] = upr(w, 3); - - b = fi(inv_affine((u8)i)); - w = bytes2word(fe(b), f9(b), fd(b), fb(b)); - - /* tables for the inverse mix column operation */ - im_tab[0][b] = w; - im_tab[1][b] = upr(w, 1); - im_tab[2][b] = upr(w, 2); - im_tab[3][b] = upr(w, 3); - - /* tables for a normal decryption round */ - it_tab[0][i] = w; - it_tab[1][i] = upr(w,1); - it_tab[2][i] = upr(w,2); - it_tab[3][i] = upr(w,3); - - w = bytes2word(b, 0, 0, 0); - - /* tables for last decryption round */ - il_tab[0][i] = w; - il_tab[1][i] = upr(w,1); - il_tab[2][i] = upr(w,2); - il_tab[3][i] = upr(w,3); - } -} - -#define four_tables(x,tab,vf,rf,c) \ -( tab[0][bval(vf(x,0,c),rf(0,c))] ^ \ - tab[1][bval(vf(x,1,c),rf(1,c))] ^ \ - tab[2][bval(vf(x,2,c),rf(2,c))] ^ \ - tab[3][bval(vf(x,3,c),rf(3,c))] \ -) - -#define vf1(x,r,c) (x) -#define rf1(r,c) (r) -#define rf2(r,c) ((r-c)&3) - -#define inv_mcol(x) four_tables(x,im_tab,vf1,rf1,0) -#define ls_box(x,c) four_tables(x,fl_tab,vf1,rf2,c) - -#define ff(x) inv_mcol(x) - -#define ke4(k,i) \ -{ \ - k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ - k[4*(i)+5] = ss[1] ^= ss[0]; \ - k[4*(i)+6] = ss[2] ^= ss[1]; \ - k[4*(i)+7] = ss[3] ^= ss[2]; \ -} - -#define kel4(k,i) \ -{ \ - k[4*(i)+4] = ss[0] ^= ls_box(ss[3],3) ^ rcon_tab[i]; \ - k[4*(i)+5] = ss[1] ^= ss[0]; \ - k[4*(i)+6] = ss[2] ^= ss[1]; k[4*(i)+7] = ss[3] ^= ss[2]; \ -} - -#define ke6(k,i) \ -{ \ - k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ - k[6*(i)+ 7] = ss[1] ^= ss[0]; \ - k[6*(i)+ 8] = ss[2] ^= ss[1]; \ - k[6*(i)+ 9] = ss[3] ^= ss[2]; \ - k[6*(i)+10] = ss[4] ^= ss[3]; \ - k[6*(i)+11] = ss[5] ^= ss[4]; \ -} - -#define kel6(k,i) \ -{ \ - k[6*(i)+ 6] = ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ - k[6*(i)+ 7] = ss[1] ^= ss[0]; \ - k[6*(i)+ 8] = ss[2] ^= ss[1]; \ - k[6*(i)+ 9] = ss[3] ^= ss[2]; \ -} - -#define ke8(k,i) \ -{ \ - k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ - k[8*(i)+ 9] = ss[1] ^= ss[0]; \ - k[8*(i)+10] = ss[2] ^= ss[1]; \ - k[8*(i)+11] = ss[3] ^= ss[2]; \ - k[8*(i)+12] = ss[4] ^= ls_box(ss[3],0); \ - k[8*(i)+13] = ss[5] ^= ss[4]; \ - k[8*(i)+14] = ss[6] ^= ss[5]; \ - k[8*(i)+15] = ss[7] ^= ss[6]; \ -} - -#define kel8(k,i) \ -{ \ - k[8*(i)+ 8] = ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ - k[8*(i)+ 9] = ss[1] ^= ss[0]; \ - k[8*(i)+10] = ss[2] ^= ss[1]; \ - k[8*(i)+11] = ss[3] ^= ss[2]; \ -} - -#define kdf4(k,i) \ -{ \ - ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \ - ss[1] = ss[1] ^ ss[3]; \ - ss[2] = ss[2] ^ ss[3]; \ - ss[3] = ss[3]; \ - ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ - ss[i % 4] ^= ss[4]; \ - ss[4] ^= k[4*(i)]; \ - k[4*(i)+4] = ff(ss[4]); \ - ss[4] ^= k[4*(i)+1]; \ - k[4*(i)+5] = ff(ss[4]); \ - ss[4] ^= k[4*(i)+2]; \ - k[4*(i)+6] = ff(ss[4]); \ - ss[4] ^= k[4*(i)+3]; \ - k[4*(i)+7] = ff(ss[4]); \ -} - -#define kd4(k,i) \ -{ \ - ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ - ss[i % 4] ^= ss[4]; \ - ss[4] = ff(ss[4]); \ - k[4*(i)+4] = ss[4] ^= k[4*(i)]; \ - k[4*(i)+5] = ss[4] ^= k[4*(i)+1]; \ - k[4*(i)+6] = ss[4] ^= k[4*(i)+2]; \ - k[4*(i)+7] = ss[4] ^= k[4*(i)+3]; \ -} - -#define kdl4(k,i) \ -{ \ - ss[4] = ls_box(ss[(i+3) % 4], 3) ^ rcon_tab[i]; \ - ss[i % 4] ^= ss[4]; \ - k[4*(i)+4] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \ - k[4*(i)+5] = ss[1] ^ ss[3]; \ - k[4*(i)+6] = ss[0]; \ - k[4*(i)+7] = ss[1]; \ -} - -#define kdf6(k,i) \ -{ \ - ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ - k[6*(i)+ 6] = ff(ss[0]); \ - ss[1] ^= ss[0]; \ - k[6*(i)+ 7] = ff(ss[1]); \ - ss[2] ^= ss[1]; \ - k[6*(i)+ 8] = ff(ss[2]); \ - ss[3] ^= ss[2]; \ - k[6*(i)+ 9] = ff(ss[3]); \ - ss[4] ^= ss[3]; \ - k[6*(i)+10] = ff(ss[4]); \ - ss[5] ^= ss[4]; \ - k[6*(i)+11] = ff(ss[5]); \ -} - -#define kd6(k,i) \ -{ \ - ss[6] = ls_box(ss[5],3) ^ rcon_tab[i]; \ - ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \ - k[6*(i)+ 6] = ss[6] ^= k[6*(i)]; \ - ss[1] ^= ss[0]; \ - k[6*(i)+ 7] = ss[6] ^= k[6*(i)+ 1]; \ - ss[2] ^= ss[1]; \ - k[6*(i)+ 8] = ss[6] ^= k[6*(i)+ 2]; \ - ss[3] ^= ss[2]; \ - k[6*(i)+ 9] = ss[6] ^= k[6*(i)+ 3]; \ - ss[4] ^= ss[3]; \ - k[6*(i)+10] = ss[6] ^= k[6*(i)+ 4]; \ - ss[5] ^= ss[4]; \ - k[6*(i)+11] = ss[6] ^= k[6*(i)+ 5]; \ -} - -#define kdl6(k,i) \ -{ \ - ss[0] ^= ls_box(ss[5],3) ^ rcon_tab[i]; \ - k[6*(i)+ 6] = ss[0]; \ - ss[1] ^= ss[0]; \ - k[6*(i)+ 7] = ss[1]; \ - ss[2] ^= ss[1]; \ - k[6*(i)+ 8] = ss[2]; \ - ss[3] ^= ss[2]; \ - k[6*(i)+ 9] = ss[3]; \ -} - -#define kdf8(k,i) \ -{ \ - ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ - k[8*(i)+ 8] = ff(ss[0]); \ - ss[1] ^= ss[0]; \ - k[8*(i)+ 9] = ff(ss[1]); \ - ss[2] ^= ss[1]; \ - k[8*(i)+10] = ff(ss[2]); \ - ss[3] ^= ss[2]; \ - k[8*(i)+11] = ff(ss[3]); \ - ss[4] ^= ls_box(ss[3],0); \ - k[8*(i)+12] = ff(ss[4]); \ - ss[5] ^= ss[4]; \ - k[8*(i)+13] = ff(ss[5]); \ - ss[6] ^= ss[5]; \ - k[8*(i)+14] = ff(ss[6]); \ - ss[7] ^= ss[6]; \ - k[8*(i)+15] = ff(ss[7]); \ -} - -#define kd8(k,i) \ -{ \ - u32 __g = ls_box(ss[7],3) ^ rcon_tab[i]; \ - ss[0] ^= __g; \ - __g = ff(__g); \ - k[8*(i)+ 8] = __g ^= k[8*(i)]; \ - ss[1] ^= ss[0]; \ - k[8*(i)+ 9] = __g ^= k[8*(i)+ 1]; \ - ss[2] ^= ss[1]; \ - k[8*(i)+10] = __g ^= k[8*(i)+ 2]; \ - ss[3] ^= ss[2]; \ - k[8*(i)+11] = __g ^= k[8*(i)+ 3]; \ - __g = ls_box(ss[3],0); \ - ss[4] ^= __g; \ - __g = ff(__g); \ - k[8*(i)+12] = __g ^= k[8*(i)+ 4]; \ - ss[5] ^= ss[4]; \ - k[8*(i)+13] = __g ^= k[8*(i)+ 5]; \ - ss[6] ^= ss[5]; \ - k[8*(i)+14] = __g ^= k[8*(i)+ 6]; \ - ss[7] ^= ss[6]; \ - k[8*(i)+15] = __g ^= k[8*(i)+ 7]; \ -} - -#define kdl8(k,i) \ -{ \ - ss[0] ^= ls_box(ss[7],3) ^ rcon_tab[i]; \ - k[8*(i)+ 8] = ss[0]; \ - ss[1] ^= ss[0]; \ - k[8*(i)+ 9] = ss[1]; \ - ss[2] ^= ss[1]; \ - k[8*(i)+10] = ss[2]; \ - ss[3] ^= ss[2]; \ - k[8*(i)+11] = ss[3]; \ -} - -static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) -{ - int i; - u32 ss[8]; - struct aes_ctx *ctx = crypto_tfm_ctx(tfm); - const __le32 *key = (const __le32 *)in_key; - u32 *flags = &tfm->crt_flags; - - /* encryption schedule */ - - ctx->ekey[0] = ss[0] = le32_to_cpu(key[0]); - ctx->ekey[1] = ss[1] = le32_to_cpu(key[1]); - ctx->ekey[2] = ss[2] = le32_to_cpu(key[2]); - ctx->ekey[3] = ss[3] = le32_to_cpu(key[3]); - - switch(key_len) { - case 16: - for (i = 0; i < 9; i++) - ke4(ctx->ekey, i); - kel4(ctx->ekey, 9); - ctx->rounds = 10; - break; - - case 24: - ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); - ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); - for (i = 0; i < 7; i++) - ke6(ctx->ekey, i); - kel6(ctx->ekey, 7); - ctx->rounds = 12; - break; - - case 32: - ctx->ekey[4] = ss[4] = le32_to_cpu(key[4]); - ctx->ekey[5] = ss[5] = le32_to_cpu(key[5]); - ctx->ekey[6] = ss[6] = le32_to_cpu(key[6]); - ctx->ekey[7] = ss[7] = le32_to_cpu(key[7]); - for (i = 0; i < 6; i++) - ke8(ctx->ekey, i); - kel8(ctx->ekey, 6); - ctx->rounds = 14; - break; - - default: - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - - /* decryption schedule */ - - ctx->dkey[0] = ss[0] = le32_to_cpu(key[0]); - ctx->dkey[1] = ss[1] = le32_to_cpu(key[1]); - ctx->dkey[2] = ss[2] = le32_to_cpu(key[2]); - ctx->dkey[3] = ss[3] = le32_to_cpu(key[3]); - - switch (key_len) { - case 16: - kdf4(ctx->dkey, 0); - for (i = 1; i < 9; i++) - kd4(ctx->dkey, i); - kdl4(ctx->dkey, 9); - break; - - case 24: - ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); - ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); - kdf6(ctx->dkey, 0); - for (i = 1; i < 7; i++) - kd6(ctx->dkey, i); - kdl6(ctx->dkey, 7); - break; - - case 32: - ctx->dkey[4] = ff(ss[4] = le32_to_cpu(key[4])); - ctx->dkey[5] = ff(ss[5] = le32_to_cpu(key[5])); - ctx->dkey[6] = ff(ss[6] = le32_to_cpu(key[6])); - ctx->dkey[7] = ff(ss[7] = le32_to_cpu(key[7])); - kdf8(ctx->dkey, 0); - for (i = 1; i < 6; i++) - kd8(ctx->dkey, i); - kdl8(ctx->dkey, 6); - break; - } - return 0; -} - static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { aes_enc_blk(tfm, dst, src); @@ -479,14 +25,14 @@ static struct crypto_alg aes_alg = { .cra_priority = 200, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aes_ctx), + .cra_ctxsize = sizeof(struct crypto_aes_ctx), .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), .cra_u = { .cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = aes_set_key, + .cia_setkey = crypto_aes_set_key, .cia_encrypt = aes_encrypt, .cia_decrypt = aes_decrypt } @@ -495,7 +41,6 @@ static struct crypto_alg aes_alg = { static int __init aes_init(void) { - gen_tabs(); return crypto_register_alg(&aes_alg); } -- cgit v1.2.3 From 06e1a8f0505426a97292174a959560fd86ea0a3d Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Fri, 30 Nov 2007 00:15:11 +1100 Subject: [CRYPTO] aes-asm: Merge common glue code 32 bit and 64 bit glue code is using (now) the same piece code. This patch unifies them. Signed-off-by: Sebastian Siewior Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 4 ++-- arch/x86/crypto/aes_32.c | 58 ---------------------------------------------- arch/x86/crypto/aes_64.c | 56 -------------------------------------------- arch/x86/crypto/aes_glue.c | 57 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 116 deletions(-) delete mode 100644 arch/x86/crypto/aes_32.c delete mode 100644 arch/x86/crypto/aes_64.c create mode 100644 arch/x86/crypto/aes_glue.c (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 46bb609e244..b8fbb43df6d 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -8,8 +8,8 @@ obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o -aes-i586-y := aes-i586-asm_32.o aes_32.o +aes-i586-y := aes-i586-asm_32.o aes_glue.o twofish-i586-y := twofish-i586-asm_32.o twofish_32.o -aes-x86_64-y := aes-x86_64-asm_64.o aes_64.o +aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o diff --git a/arch/x86/crypto/aes_32.c b/arch/x86/crypto/aes_32.c deleted file mode 100644 index 8556d9561c2..00000000000 --- a/arch/x86/crypto/aes_32.c +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Glue Code for optimized 586 assembler version of AES - */ - -#include -#include -#include - -asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); - -static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - aes_enc_blk(tfm, dst, src); -} - -static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - aes_dec_blk(tfm, dst, src); -} - -static struct crypto_alg aes_alg = { - .cra_name = "aes", - .cra_driver_name = "aes-i586", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = crypto_aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt - } - } -}; - -static int __init aes_init(void) -{ - return crypto_register_alg(&aes_alg); -} - -static void __exit aes_fini(void) -{ - crypto_unregister_alg(&aes_alg); -} - -module_init(aes_init); -module_exit(aes_fini); - -MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, i586 asm optimized"); -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_AUTHOR("Fruhwirth Clemens, James Morris, Brian Gladman, Adam Richter"); -MODULE_ALIAS("aes"); diff --git a/arch/x86/crypto/aes_64.c b/arch/x86/crypto/aes_64.c deleted file mode 100644 index d7a41a97dd3..00000000000 --- a/arch/x86/crypto/aes_64.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Glue Code for AES Cipher Algorithm - * - */ - -#include - -asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); -asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); - -static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - aes_enc_blk(tfm, dst, src); -} - -static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - aes_dec_blk(tfm, dst, src); -} - -static struct crypto_alg aes_alg = { - .cra_name = "aes", - .cra_driver_name = "aes-x86_64", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct crypto_aes_ctx), - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = crypto_aes_set_key, - .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt - } - } -}; - -static int __init aes_init(void) -{ - return crypto_register_alg(&aes_alg); -} - -static void __exit aes_fini(void) -{ - crypto_unregister_alg(&aes_alg); -} - -module_init(aes_init); -module_exit(aes_fini); - -MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("aes"); diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c new file mode 100644 index 00000000000..71f45782711 --- /dev/null +++ b/arch/x86/crypto/aes_glue.c @@ -0,0 +1,57 @@ +/* + * Glue Code for the asm optimized version of the AES Cipher Algorithm + * + */ + +#include + +asmlinkage void aes_enc_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); +asmlinkage void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in); + +static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + aes_enc_blk(tfm, dst, src); +} + +static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + aes_dec_blk(tfm, dst, src); +} + +static struct crypto_alg aes_alg = { + .cra_name = "aes", + .cra_driver_name = "aes-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_aes_ctx), + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = AES_MIN_KEY_SIZE, + .cia_max_keysize = AES_MAX_KEY_SIZE, + .cia_setkey = crypto_aes_set_key, + .cia_encrypt = aes_encrypt, + .cia_decrypt = aes_decrypt + } + } +}; + +static int __init aes_init(void) +{ + return crypto_register_alg(&aes_alg); +} + +static void __exit aes_fini(void) +{ + crypto_unregister_alg(&aes_alg); +} + +module_init(aes_init); +module_exit(aes_fini); + +MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("aes"); +MODULE_ALIAS("aes-asm"); -- cgit v1.2.3 From b0c3e75d857f3785a4b274e26b1c0b2327580dda Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Sat, 1 Dec 2007 12:47:37 +1100 Subject: [CRYPTO] aes_s390: Add fallback driver Some CPUs support only 128 bit keys in HW. This patch adds SW fallback support for the other keys which may be required. The generic algorithm (and the block mode) must be availble in case of a fallback. Signed-off-by: Sebastian Siewior Signed-off-by: Jan Glauber Signed-off-by: Herbert Xu --- arch/s390/crypto/aes_s390.c | 226 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 205 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 812511bbb54..85246112ab5 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -6,6 +6,7 @@ * s390 Version: * Copyright IBM Corp. 2005,2007 * Author(s): Jan Glauber (jang@de.ibm.com) + * Sebastian Siewior (sebastian@breakpoint.cc> SW-Fallback * * Derived from "crypto/aes_generic.c" * @@ -18,6 +19,7 @@ #include #include +#include #include #include #include "crypt_s390.h" @@ -34,45 +36,89 @@ struct s390_aes_ctx { long enc; long dec; int key_len; + union { + struct crypto_blkcipher *blk; + struct crypto_cipher *cip; + } fallback; }; -static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, - unsigned int key_len) +/* + * Check if the key_len is supported by the HW. + * Returns 0 if it is, a positive number if it is not and software fallback is + * required or a negative number in case the key size is not valid + */ +static int need_fallback(unsigned int key_len) { - struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); - u32 *flags = &tfm->crt_flags; - switch (key_len) { case 16: if (!(keylen_flag & AES_KEYLEN_128)) - goto fail; + return 1; break; case 24: if (!(keylen_flag & AES_KEYLEN_192)) - goto fail; - + return 1; break; case 32: if (!(keylen_flag & AES_KEYLEN_256)) - goto fail; + return 1; break; default: - goto fail; + return -1; break; } + return 0; +} + +static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + int ret; + + sctx->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + sctx->fallback.blk->base.crt_flags |= (tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_cipher_setkey(sctx->fallback.cip, in_key, key_len); + if (ret) { + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= (sctx->fallback.blk->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + return ret; +} + +static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, + unsigned int key_len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + u32 *flags = &tfm->crt_flags; + int ret; + + ret = need_fallback(key_len); + if (ret < 0) { + *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; + return -EINVAL; + } sctx->key_len = key_len; - memcpy(sctx->key, in_key, key_len); - return 0; -fail: - *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; + if (!ret) { + memcpy(sctx->key, in_key, key_len); + return 0; + } + + return setkey_fallback_cip(tfm, in_key, key_len); } static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + if (unlikely(need_fallback(sctx->key_len))) { + crypto_cipher_encrypt_one(sctx->fallback.cip, out, in); + return; + } + switch (sctx->key_len) { case 16: crypt_s390_km(KM_AES_128_ENCRYPT, &sctx->key, out, in, @@ -93,6 +139,11 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) { const struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + if (unlikely(need_fallback(sctx->key_len))) { + crypto_cipher_decrypt_one(sctx->fallback.cip, out, in); + return; + } + switch (sctx->key_len) { case 16: crypt_s390_km(KM_AES_128_DECRYPT, &sctx->key, out, in, @@ -109,6 +160,29 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) } } +static int fallback_init_cip(struct crypto_tfm *tfm) +{ + const char *name = tfm->__crt_alg->cra_name; + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + sctx->fallback.cip = crypto_alloc_cipher(name, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(sctx->fallback.cip)) { + printk(KERN_ERR "Error allocating fallback algo %s\n", name); + return PTR_ERR(sctx->fallback.blk); + } + + return 0; +} + +static void fallback_exit_cip(struct crypto_tfm *tfm) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + crypto_free_cipher(sctx->fallback.cip); + sctx->fallback.cip = NULL; +} static struct crypto_alg aes_alg = { .cra_name = "aes", @@ -120,6 +194,8 @@ static struct crypto_alg aes_alg = { .cra_ctxsize = sizeof(struct s390_aes_ctx), .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), + .cra_init = fallback_init_cip, + .cra_exit = fallback_exit_cip, .cra_u = { .cipher = { .cia_min_keysize = AES_MIN_KEY_SIZE, @@ -131,10 +207,76 @@ static struct crypto_alg aes_alg = { } }; +static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key, + unsigned int len) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + unsigned int ret; + + sctx->fallback.blk->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; + sctx->fallback.blk->base.crt_flags |= (tfm->crt_flags & + CRYPTO_TFM_REQ_MASK); + + ret = crypto_blkcipher_setkey(sctx->fallback.blk, key, len); + if (ret) { + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= (sctx->fallback.blk->base.crt_flags & + CRYPTO_TFM_RES_MASK); + } + return ret; +} + +static int fallback_blk_dec(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + unsigned int ret; + struct crypto_blkcipher *tfm; + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + + memcpy(crypto_blkcipher_crt(sctx->fallback.blk)->iv, desc->info, + AES_BLOCK_SIZE); + + tfm = desc->tfm; + desc->tfm = sctx->fallback.blk; + + ret = crypto_blkcipher_decrypt(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + +static int fallback_blk_enc(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + unsigned int ret; + struct crypto_blkcipher *tfm; + struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); + + memcpy(crypto_blkcipher_crt(sctx->fallback.blk)->iv, desc->info, + AES_BLOCK_SIZE); + + tfm = desc->tfm; + desc->tfm = sctx->fallback.blk; + + ret = crypto_blkcipher_encrypt(desc, dst, src, nbytes); + + desc->tfm = tfm; + return ret; +} + static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + int ret; + + ret = need_fallback(key_len); + if (ret > 0) { + sctx->key_len = key_len; + return setkey_fallback_blk(tfm, in_key, key_len); + } switch (key_len) { case 16: @@ -183,6 +325,9 @@ static int ecb_aes_encrypt(struct blkcipher_desc *desc, struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; + if (unlikely(need_fallback(sctx->key_len))) + return fallback_blk_enc(desc, dst, src, nbytes); + blkcipher_walk_init(&walk, dst, src, nbytes); return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk); } @@ -194,10 +339,37 @@ static int ecb_aes_decrypt(struct blkcipher_desc *desc, struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; + if (unlikely(need_fallback(sctx->key_len))) + return fallback_blk_dec(desc, dst, src, nbytes); + blkcipher_walk_init(&walk, dst, src, nbytes); return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk); } +static int fallback_init_blk(struct crypto_tfm *tfm) +{ + const char *name = tfm->__crt_alg->cra_name; + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + sctx->fallback.blk = crypto_alloc_blkcipher(name, 0, + CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(sctx->fallback.blk)) { + printk(KERN_ERR "Error allocating fallback algo %s\n", name); + return PTR_ERR(sctx->fallback.blk); + } + + return 0; +} + +static void fallback_exit_blk(struct crypto_tfm *tfm) +{ + struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + + crypto_free_blkcipher(sctx->fallback.blk); + sctx->fallback.blk = NULL; +} + static struct crypto_alg ecb_aes_alg = { .cra_name = "ecb(aes)", .cra_driver_name = "ecb-aes-s390", @@ -209,6 +381,8 @@ static struct crypto_alg ecb_aes_alg = { .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(ecb_aes_alg.cra_list), + .cra_init = fallback_init_blk, + .cra_exit = fallback_exit_blk, .cra_u = { .blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, @@ -224,6 +398,13 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm); + int ret; + + ret = need_fallback(key_len); + if (ret > 0) { + sctx->key_len = key_len; + return setkey_fallback_blk(tfm, in_key, key_len); + } switch (key_len) { case 16: @@ -278,6 +459,9 @@ static int cbc_aes_encrypt(struct blkcipher_desc *desc, struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; + if (unlikely(need_fallback(sctx->key_len))) + return fallback_blk_enc(desc, dst, src, nbytes); + blkcipher_walk_init(&walk, dst, src, nbytes); return cbc_aes_crypt(desc, sctx->enc, sctx->iv, &walk); } @@ -289,6 +473,9 @@ static int cbc_aes_decrypt(struct blkcipher_desc *desc, struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); struct blkcipher_walk walk; + if (unlikely(need_fallback(sctx->key_len))) + return fallback_blk_dec(desc, dst, src, nbytes); + blkcipher_walk_init(&walk, dst, src, nbytes); return cbc_aes_crypt(desc, sctx->dec, sctx->iv, &walk); } @@ -304,6 +491,8 @@ static struct crypto_alg cbc_aes_alg = { .cra_type = &crypto_blkcipher_type, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(cbc_aes_alg.cra_list), + .cra_init = fallback_init_blk, + .cra_exit = fallback_exit_blk, .cra_u = { .blkcipher = { .min_keysize = AES_MIN_KEY_SIZE, @@ -331,14 +520,10 @@ static int __init aes_init(void) return -EOPNOTSUPP; /* z9 109 and z9 BC/EC only support 128 bit key length */ - if (keylen_flag == AES_KEYLEN_128) { - aes_alg.cra_u.cipher.cia_max_keysize = AES_MIN_KEY_SIZE; - ecb_aes_alg.cra_u.blkcipher.max_keysize = AES_MIN_KEY_SIZE; - cbc_aes_alg.cra_u.blkcipher.max_keysize = AES_MIN_KEY_SIZE; + if (keylen_flag == AES_KEYLEN_128) printk(KERN_INFO "aes_s390: hardware acceleration only available for" "128 bit keys\n"); - } ret = crypto_register_alg(&aes_alg); if (ret) @@ -377,4 +562,3 @@ MODULE_ALIAS("aes"); MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm"); MODULE_LICENSE("GPL"); - -- cgit v1.2.3 From 2d74d405fc5ea78b20a4a2efd24201db424e07b1 Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Mon, 10 Dec 2007 15:49:41 +0800 Subject: [CRYPTO] s390-aes: Use correct encrypt/decrypt function in fallback crypto_blkcipher_decrypt is wrong because it does not care about the IV. Signed-off-by: Sebastian Siewior Signed-off-by: Herbert Xu --- arch/s390/crypto/aes_s390.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index 85246112ab5..46c97058ebe 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -234,13 +234,10 @@ static int fallback_blk_dec(struct blkcipher_desc *desc, struct crypto_blkcipher *tfm; struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); - memcpy(crypto_blkcipher_crt(sctx->fallback.blk)->iv, desc->info, - AES_BLOCK_SIZE); - tfm = desc->tfm; desc->tfm = sctx->fallback.blk; - ret = crypto_blkcipher_decrypt(desc, dst, src, nbytes); + ret = crypto_blkcipher_decrypt_iv(desc, dst, src, nbytes); desc->tfm = tfm; return ret; @@ -254,13 +251,10 @@ static int fallback_blk_enc(struct blkcipher_desc *desc, struct crypto_blkcipher *tfm; struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm); - memcpy(crypto_blkcipher_crt(sctx->fallback.blk)->iv, desc->info, - AES_BLOCK_SIZE); - tfm = desc->tfm; desc->tfm = sctx->fallback.blk; - ret = crypto_blkcipher_encrypt(desc, dst, src, nbytes); + ret = crypto_blkcipher_encrypt_iv(desc, dst, src, nbytes); desc->tfm = tfm; return ret; -- cgit v1.2.3 From 974e4b752ee623854c5dc2bbfc7c7725029ce173 Mon Sep 17 00:00:00 2001 From: Tan Swee Heng Date: Mon, 10 Dec 2007 15:52:56 +0800 Subject: [CRYPTO] salsa20_i586: Salsa20 stream cipher algorithm (i586 version) This patch contains the salsa20-i586 implementation. The original assembly code came from . I have reformatted it (added indents) so that it matches the other algorithms in arch/x86/crypto. Signed-off-by: Tan Swee Heng Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 + arch/x86/crypto/salsa20-i586-asm_32.S | 1114 +++++++++++++++++++++++++++++++++ arch/x86/crypto/salsa20_glue.c | 127 ++++ 3 files changed, 1243 insertions(+) create mode 100644 arch/x86/crypto/salsa20-i586-asm_32.S create mode 100644 arch/x86/crypto/salsa20_glue.c (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index b8fbb43df6d..25cc8441046 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -4,12 +4,14 @@ obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o +obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o aes-i586-y := aes-i586-asm_32.o aes_glue.o twofish-i586-y := twofish-i586-asm_32.o twofish_32.o +salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S new file mode 100644 index 00000000000..72eb306680b --- /dev/null +++ b/arch/x86/crypto/salsa20-i586-asm_32.S @@ -0,0 +1,1114 @@ +# salsa20_pm.s version 20051229 +# D. J. Bernstein +# Public domain. + +# enter ECRYPT_encrypt_bytes +.text +.p2align 5 +.globl ECRYPT_encrypt_bytes +ECRYPT_encrypt_bytes: + mov %esp,%eax + and $31,%eax + add $256,%eax + sub %eax,%esp + # eax_stack = eax + movl %eax,80(%esp) + # ebx_stack = ebx + movl %ebx,84(%esp) + # esi_stack = esi + movl %esi,88(%esp) + # edi_stack = edi + movl %edi,92(%esp) + # ebp_stack = ebp + movl %ebp,96(%esp) + # x = arg1 + movl 4(%esp,%eax),%edx + # m = arg2 + movl 8(%esp,%eax),%esi + # out = arg3 + movl 12(%esp,%eax),%edi + # bytes = arg4 + movl 16(%esp,%eax),%ebx + # bytes -= 0 + sub $0,%ebx + # goto done if unsigned<= + jbe ._done +._start: + # in0 = *(uint32 *) (x + 0) + movl 0(%edx),%eax + # in1 = *(uint32 *) (x + 4) + movl 4(%edx),%ecx + # in2 = *(uint32 *) (x + 8) + movl 8(%edx),%ebp + # j0 = in0 + movl %eax,164(%esp) + # in3 = *(uint32 *) (x + 12) + movl 12(%edx),%eax + # j1 = in1 + movl %ecx,168(%esp) + # in4 = *(uint32 *) (x + 16) + movl 16(%edx),%ecx + # j2 = in2 + movl %ebp,172(%esp) + # in5 = *(uint32 *) (x + 20) + movl 20(%edx),%ebp + # j3 = in3 + movl %eax,176(%esp) + # in6 = *(uint32 *) (x + 24) + movl 24(%edx),%eax + # j4 = in4 + movl %ecx,180(%esp) + # in7 = *(uint32 *) (x + 28) + movl 28(%edx),%ecx + # j5 = in5 + movl %ebp,184(%esp) + # in8 = *(uint32 *) (x + 32) + movl 32(%edx),%ebp + # j6 = in6 + movl %eax,188(%esp) + # in9 = *(uint32 *) (x + 36) + movl 36(%edx),%eax + # j7 = in7 + movl %ecx,192(%esp) + # in10 = *(uint32 *) (x + 40) + movl 40(%edx),%ecx + # j8 = in8 + movl %ebp,196(%esp) + # in11 = *(uint32 *) (x + 44) + movl 44(%edx),%ebp + # j9 = in9 + movl %eax,200(%esp) + # in12 = *(uint32 *) (x + 48) + movl 48(%edx),%eax + # j10 = in10 + movl %ecx,204(%esp) + # in13 = *(uint32 *) (x + 52) + movl 52(%edx),%ecx + # j11 = in11 + movl %ebp,208(%esp) + # in14 = *(uint32 *) (x + 56) + movl 56(%edx),%ebp + # j12 = in12 + movl %eax,212(%esp) + # in15 = *(uint32 *) (x + 60) + movl 60(%edx),%eax + # j13 = in13 + movl %ecx,216(%esp) + # j14 = in14 + movl %ebp,220(%esp) + # j15 = in15 + movl %eax,224(%esp) + # x_backup = x + movl %edx,64(%esp) +._bytesatleast1: + # bytes - 64 + cmp $64,%ebx + # goto nocopy if unsigned>= + jae ._nocopy + # ctarget = out + movl %edi,228(%esp) + # out = &tmp + leal 0(%esp),%edi + # i = bytes + mov %ebx,%ecx + # while (i) { *out++ = *m++; --i } + rep movsb + # out = &tmp + leal 0(%esp),%edi + # m = &tmp + leal 0(%esp),%esi +._nocopy: + # out_backup = out + movl %edi,72(%esp) + # m_backup = m + movl %esi,68(%esp) + # bytes_backup = bytes + movl %ebx,76(%esp) + # in0 = j0 + movl 164(%esp),%eax + # in1 = j1 + movl 168(%esp),%ecx + # in2 = j2 + movl 172(%esp),%edx + # in3 = j3 + movl 176(%esp),%ebx + # x0 = in0 + movl %eax,100(%esp) + # x1 = in1 + movl %ecx,104(%esp) + # x2 = in2 + movl %edx,108(%esp) + # x3 = in3 + movl %ebx,112(%esp) + # in4 = j4 + movl 180(%esp),%eax + # in5 = j5 + movl 184(%esp),%ecx + # in6 = j6 + movl 188(%esp),%edx + # in7 = j7 + movl 192(%esp),%ebx + # x4 = in4 + movl %eax,116(%esp) + # x5 = in5 + movl %ecx,120(%esp) + # x6 = in6 + movl %edx,124(%esp) + # x7 = in7 + movl %ebx,128(%esp) + # in8 = j8 + movl 196(%esp),%eax + # in9 = j9 + movl 200(%esp),%ecx + # in10 = j10 + movl 204(%esp),%edx + # in11 = j11 + movl 208(%esp),%ebx + # x8 = in8 + movl %eax,132(%esp) + # x9 = in9 + movl %ecx,136(%esp) + # x10 = in10 + movl %edx,140(%esp) + # x11 = in11 + movl %ebx,144(%esp) + # in12 = j12 + movl 212(%esp),%eax + # in13 = j13 + movl 216(%esp),%ecx + # in14 = j14 + movl 220(%esp),%edx + # in15 = j15 + movl 224(%esp),%ebx + # x12 = in12 + movl %eax,148(%esp) + # x13 = in13 + movl %ecx,152(%esp) + # x14 = in14 + movl %edx,156(%esp) + # x15 = in15 + movl %ebx,160(%esp) + # i = 20 + mov $20,%ebp + # p = x0 + movl 100(%esp),%eax + # s = x5 + movl 120(%esp),%ecx + # t = x10 + movl 140(%esp),%edx + # w = x15 + movl 160(%esp),%ebx +._mainloop: + # x0 = p + movl %eax,100(%esp) + # x10 = t + movl %edx,140(%esp) + # p += x12 + addl 148(%esp),%eax + # x5 = s + movl %ecx,120(%esp) + # t += x6 + addl 124(%esp),%edx + # x15 = w + movl %ebx,160(%esp) + # r = x1 + movl 104(%esp),%esi + # r += s + add %ecx,%esi + # v = x11 + movl 144(%esp),%edi + # v += w + add %ebx,%edi + # p <<<= 7 + rol $7,%eax + # p ^= x4 + xorl 116(%esp),%eax + # t <<<= 7 + rol $7,%edx + # t ^= x14 + xorl 156(%esp),%edx + # r <<<= 7 + rol $7,%esi + # r ^= x9 + xorl 136(%esp),%esi + # v <<<= 7 + rol $7,%edi + # v ^= x3 + xorl 112(%esp),%edi + # x4 = p + movl %eax,116(%esp) + # x14 = t + movl %edx,156(%esp) + # p += x0 + addl 100(%esp),%eax + # x9 = r + movl %esi,136(%esp) + # t += x10 + addl 140(%esp),%edx + # x3 = v + movl %edi,112(%esp) + # p <<<= 9 + rol $9,%eax + # p ^= x8 + xorl 132(%esp),%eax + # t <<<= 9 + rol $9,%edx + # t ^= x2 + xorl 108(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 9 + rol $9,%ecx + # s ^= x13 + xorl 152(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 9 + rol $9,%ebx + # w ^= x7 + xorl 128(%esp),%ebx + # x8 = p + movl %eax,132(%esp) + # x2 = t + movl %edx,108(%esp) + # p += x4 + addl 116(%esp),%eax + # x13 = s + movl %ecx,152(%esp) + # t += x14 + addl 156(%esp),%edx + # x7 = w + movl %ebx,128(%esp) + # p <<<= 13 + rol $13,%eax + # p ^= x12 + xorl 148(%esp),%eax + # t <<<= 13 + rol $13,%edx + # t ^= x6 + xorl 124(%esp),%edx + # r += s + add %ecx,%esi + # r <<<= 13 + rol $13,%esi + # r ^= x1 + xorl 104(%esp),%esi + # v += w + add %ebx,%edi + # v <<<= 13 + rol $13,%edi + # v ^= x11 + xorl 144(%esp),%edi + # x12 = p + movl %eax,148(%esp) + # x6 = t + movl %edx,124(%esp) + # p += x8 + addl 132(%esp),%eax + # x1 = r + movl %esi,104(%esp) + # t += x2 + addl 108(%esp),%edx + # x11 = v + movl %edi,144(%esp) + # p <<<= 18 + rol $18,%eax + # p ^= x0 + xorl 100(%esp),%eax + # t <<<= 18 + rol $18,%edx + # t ^= x10 + xorl 140(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 18 + rol $18,%ecx + # s ^= x5 + xorl 120(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 18 + rol $18,%ebx + # w ^= x15 + xorl 160(%esp),%ebx + # x0 = p + movl %eax,100(%esp) + # x10 = t + movl %edx,140(%esp) + # p += x3 + addl 112(%esp),%eax + # p <<<= 7 + rol $7,%eax + # x5 = s + movl %ecx,120(%esp) + # t += x9 + addl 136(%esp),%edx + # x15 = w + movl %ebx,160(%esp) + # r = x4 + movl 116(%esp),%esi + # r += s + add %ecx,%esi + # v = x14 + movl 156(%esp),%edi + # v += w + add %ebx,%edi + # p ^= x1 + xorl 104(%esp),%eax + # t <<<= 7 + rol $7,%edx + # t ^= x11 + xorl 144(%esp),%edx + # r <<<= 7 + rol $7,%esi + # r ^= x6 + xorl 124(%esp),%esi + # v <<<= 7 + rol $7,%edi + # v ^= x12 + xorl 148(%esp),%edi + # x1 = p + movl %eax,104(%esp) + # x11 = t + movl %edx,144(%esp) + # p += x0 + addl 100(%esp),%eax + # x6 = r + movl %esi,124(%esp) + # t += x10 + addl 140(%esp),%edx + # x12 = v + movl %edi,148(%esp) + # p <<<= 9 + rol $9,%eax + # p ^= x2 + xorl 108(%esp),%eax + # t <<<= 9 + rol $9,%edx + # t ^= x8 + xorl 132(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 9 + rol $9,%ecx + # s ^= x7 + xorl 128(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 9 + rol $9,%ebx + # w ^= x13 + xorl 152(%esp),%ebx + # x2 = p + movl %eax,108(%esp) + # x8 = t + movl %edx,132(%esp) + # p += x1 + addl 104(%esp),%eax + # x7 = s + movl %ecx,128(%esp) + # t += x11 + addl 144(%esp),%edx + # x13 = w + movl %ebx,152(%esp) + # p <<<= 13 + rol $13,%eax + # p ^= x3 + xorl 112(%esp),%eax + # t <<<= 13 + rol $13,%edx + # t ^= x9 + xorl 136(%esp),%edx + # r += s + add %ecx,%esi + # r <<<= 13 + rol $13,%esi + # r ^= x4 + xorl 116(%esp),%esi + # v += w + add %ebx,%edi + # v <<<= 13 + rol $13,%edi + # v ^= x14 + xorl 156(%esp),%edi + # x3 = p + movl %eax,112(%esp) + # x9 = t + movl %edx,136(%esp) + # p += x2 + addl 108(%esp),%eax + # x4 = r + movl %esi,116(%esp) + # t += x8 + addl 132(%esp),%edx + # x14 = v + movl %edi,156(%esp) + # p <<<= 18 + rol $18,%eax + # p ^= x0 + xorl 100(%esp),%eax + # t <<<= 18 + rol $18,%edx + # t ^= x10 + xorl 140(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 18 + rol $18,%ecx + # s ^= x5 + xorl 120(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 18 + rol $18,%ebx + # w ^= x15 + xorl 160(%esp),%ebx + # x0 = p + movl %eax,100(%esp) + # x10 = t + movl %edx,140(%esp) + # p += x12 + addl 148(%esp),%eax + # x5 = s + movl %ecx,120(%esp) + # t += x6 + addl 124(%esp),%edx + # x15 = w + movl %ebx,160(%esp) + # r = x1 + movl 104(%esp),%esi + # r += s + add %ecx,%esi + # v = x11 + movl 144(%esp),%edi + # v += w + add %ebx,%edi + # p <<<= 7 + rol $7,%eax + # p ^= x4 + xorl 116(%esp),%eax + # t <<<= 7 + rol $7,%edx + # t ^= x14 + xorl 156(%esp),%edx + # r <<<= 7 + rol $7,%esi + # r ^= x9 + xorl 136(%esp),%esi + # v <<<= 7 + rol $7,%edi + # v ^= x3 + xorl 112(%esp),%edi + # x4 = p + movl %eax,116(%esp) + # x14 = t + movl %edx,156(%esp) + # p += x0 + addl 100(%esp),%eax + # x9 = r + movl %esi,136(%esp) + # t += x10 + addl 140(%esp),%edx + # x3 = v + movl %edi,112(%esp) + # p <<<= 9 + rol $9,%eax + # p ^= x8 + xorl 132(%esp),%eax + # t <<<= 9 + rol $9,%edx + # t ^= x2 + xorl 108(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 9 + rol $9,%ecx + # s ^= x13 + xorl 152(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 9 + rol $9,%ebx + # w ^= x7 + xorl 128(%esp),%ebx + # x8 = p + movl %eax,132(%esp) + # x2 = t + movl %edx,108(%esp) + # p += x4 + addl 116(%esp),%eax + # x13 = s + movl %ecx,152(%esp) + # t += x14 + addl 156(%esp),%edx + # x7 = w + movl %ebx,128(%esp) + # p <<<= 13 + rol $13,%eax + # p ^= x12 + xorl 148(%esp),%eax + # t <<<= 13 + rol $13,%edx + # t ^= x6 + xorl 124(%esp),%edx + # r += s + add %ecx,%esi + # r <<<= 13 + rol $13,%esi + # r ^= x1 + xorl 104(%esp),%esi + # v += w + add %ebx,%edi + # v <<<= 13 + rol $13,%edi + # v ^= x11 + xorl 144(%esp),%edi + # x12 = p + movl %eax,148(%esp) + # x6 = t + movl %edx,124(%esp) + # p += x8 + addl 132(%esp),%eax + # x1 = r + movl %esi,104(%esp) + # t += x2 + addl 108(%esp),%edx + # x11 = v + movl %edi,144(%esp) + # p <<<= 18 + rol $18,%eax + # p ^= x0 + xorl 100(%esp),%eax + # t <<<= 18 + rol $18,%edx + # t ^= x10 + xorl 140(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 18 + rol $18,%ecx + # s ^= x5 + xorl 120(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 18 + rol $18,%ebx + # w ^= x15 + xorl 160(%esp),%ebx + # x0 = p + movl %eax,100(%esp) + # x10 = t + movl %edx,140(%esp) + # p += x3 + addl 112(%esp),%eax + # p <<<= 7 + rol $7,%eax + # x5 = s + movl %ecx,120(%esp) + # t += x9 + addl 136(%esp),%edx + # x15 = w + movl %ebx,160(%esp) + # r = x4 + movl 116(%esp),%esi + # r += s + add %ecx,%esi + # v = x14 + movl 156(%esp),%edi + # v += w + add %ebx,%edi + # p ^= x1 + xorl 104(%esp),%eax + # t <<<= 7 + rol $7,%edx + # t ^= x11 + xorl 144(%esp),%edx + # r <<<= 7 + rol $7,%esi + # r ^= x6 + xorl 124(%esp),%esi + # v <<<= 7 + rol $7,%edi + # v ^= x12 + xorl 148(%esp),%edi + # x1 = p + movl %eax,104(%esp) + # x11 = t + movl %edx,144(%esp) + # p += x0 + addl 100(%esp),%eax + # x6 = r + movl %esi,124(%esp) + # t += x10 + addl 140(%esp),%edx + # x12 = v + movl %edi,148(%esp) + # p <<<= 9 + rol $9,%eax + # p ^= x2 + xorl 108(%esp),%eax + # t <<<= 9 + rol $9,%edx + # t ^= x8 + xorl 132(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 9 + rol $9,%ecx + # s ^= x7 + xorl 128(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 9 + rol $9,%ebx + # w ^= x13 + xorl 152(%esp),%ebx + # x2 = p + movl %eax,108(%esp) + # x8 = t + movl %edx,132(%esp) + # p += x1 + addl 104(%esp),%eax + # x7 = s + movl %ecx,128(%esp) + # t += x11 + addl 144(%esp),%edx + # x13 = w + movl %ebx,152(%esp) + # p <<<= 13 + rol $13,%eax + # p ^= x3 + xorl 112(%esp),%eax + # t <<<= 13 + rol $13,%edx + # t ^= x9 + xorl 136(%esp),%edx + # r += s + add %ecx,%esi + # r <<<= 13 + rol $13,%esi + # r ^= x4 + xorl 116(%esp),%esi + # v += w + add %ebx,%edi + # v <<<= 13 + rol $13,%edi + # v ^= x14 + xorl 156(%esp),%edi + # x3 = p + movl %eax,112(%esp) + # x9 = t + movl %edx,136(%esp) + # p += x2 + addl 108(%esp),%eax + # x4 = r + movl %esi,116(%esp) + # t += x8 + addl 132(%esp),%edx + # x14 = v + movl %edi,156(%esp) + # p <<<= 18 + rol $18,%eax + # p ^= x0 + xorl 100(%esp),%eax + # t <<<= 18 + rol $18,%edx + # t ^= x10 + xorl 140(%esp),%edx + # s += r + add %esi,%ecx + # s <<<= 18 + rol $18,%ecx + # s ^= x5 + xorl 120(%esp),%ecx + # w += v + add %edi,%ebx + # w <<<= 18 + rol $18,%ebx + # w ^= x15 + xorl 160(%esp),%ebx + # i -= 4 + sub $4,%ebp + # goto mainloop if unsigned > + ja ._mainloop + # x0 = p + movl %eax,100(%esp) + # x5 = s + movl %ecx,120(%esp) + # x10 = t + movl %edx,140(%esp) + # x15 = w + movl %ebx,160(%esp) + # out = out_backup + movl 72(%esp),%edi + # m = m_backup + movl 68(%esp),%esi + # in0 = x0 + movl 100(%esp),%eax + # in1 = x1 + movl 104(%esp),%ecx + # in0 += j0 + addl 164(%esp),%eax + # in1 += j1 + addl 168(%esp),%ecx + # in0 ^= *(uint32 *) (m + 0) + xorl 0(%esi),%eax + # in1 ^= *(uint32 *) (m + 4) + xorl 4(%esi),%ecx + # *(uint32 *) (out + 0) = in0 + movl %eax,0(%edi) + # *(uint32 *) (out + 4) = in1 + movl %ecx,4(%edi) + # in2 = x2 + movl 108(%esp),%eax + # in3 = x3 + movl 112(%esp),%ecx + # in2 += j2 + addl 172(%esp),%eax + # in3 += j3 + addl 176(%esp),%ecx + # in2 ^= *(uint32 *) (m + 8) + xorl 8(%esi),%eax + # in3 ^= *(uint32 *) (m + 12) + xorl 12(%esi),%ecx + # *(uint32 *) (out + 8) = in2 + movl %eax,8(%edi) + # *(uint32 *) (out + 12) = in3 + movl %ecx,12(%edi) + # in4 = x4 + movl 116(%esp),%eax + # in5 = x5 + movl 120(%esp),%ecx + # in4 += j4 + addl 180(%esp),%eax + # in5 += j5 + addl 184(%esp),%ecx + # in4 ^= *(uint32 *) (m + 16) + xorl 16(%esi),%eax + # in5 ^= *(uint32 *) (m + 20) + xorl 20(%esi),%ecx + # *(uint32 *) (out + 16) = in4 + movl %eax,16(%edi) + # *(uint32 *) (out + 20) = in5 + movl %ecx,20(%edi) + # in6 = x6 + movl 124(%esp),%eax + # in7 = x7 + movl 128(%esp),%ecx + # in6 += j6 + addl 188(%esp),%eax + # in7 += j7 + addl 192(%esp),%ecx + # in6 ^= *(uint32 *) (m + 24) + xorl 24(%esi),%eax + # in7 ^= *(uint32 *) (m + 28) + xorl 28(%esi),%ecx + # *(uint32 *) (out + 24) = in6 + movl %eax,24(%edi) + # *(uint32 *) (out + 28) = in7 + movl %ecx,28(%edi) + # in8 = x8 + movl 132(%esp),%eax + # in9 = x9 + movl 136(%esp),%ecx + # in8 += j8 + addl 196(%esp),%eax + # in9 += j9 + addl 200(%esp),%ecx + # in8 ^= *(uint32 *) (m + 32) + xorl 32(%esi),%eax + # in9 ^= *(uint32 *) (m + 36) + xorl 36(%esi),%ecx + # *(uint32 *) (out + 32) = in8 + movl %eax,32(%edi) + # *(uint32 *) (out + 36) = in9 + movl %ecx,36(%edi) + # in10 = x10 + movl 140(%esp),%eax + # in11 = x11 + movl 144(%esp),%ecx + # in10 += j10 + addl 204(%esp),%eax + # in11 += j11 + addl 208(%esp),%ecx + # in10 ^= *(uint32 *) (m + 40) + xorl 40(%esi),%eax + # in11 ^= *(uint32 *) (m + 44) + xorl 44(%esi),%ecx + # *(uint32 *) (out + 40) = in10 + movl %eax,40(%edi) + # *(uint32 *) (out + 44) = in11 + movl %ecx,44(%edi) + # in12 = x12 + movl 148(%esp),%eax + # in13 = x13 + movl 152(%esp),%ecx + # in12 += j12 + addl 212(%esp),%eax + # in13 += j13 + addl 216(%esp),%ecx + # in12 ^= *(uint32 *) (m + 48) + xorl 48(%esi),%eax + # in13 ^= *(uint32 *) (m + 52) + xorl 52(%esi),%ecx + # *(uint32 *) (out + 48) = in12 + movl %eax,48(%edi) + # *(uint32 *) (out + 52) = in13 + movl %ecx,52(%edi) + # in14 = x14 + movl 156(%esp),%eax + # in15 = x15 + movl 160(%esp),%ecx + # in14 += j14 + addl 220(%esp),%eax + # in15 += j15 + addl 224(%esp),%ecx + # in14 ^= *(uint32 *) (m + 56) + xorl 56(%esi),%eax + # in15 ^= *(uint32 *) (m + 60) + xorl 60(%esi),%ecx + # *(uint32 *) (out + 56) = in14 + movl %eax,56(%edi) + # *(uint32 *) (out + 60) = in15 + movl %ecx,60(%edi) + # bytes = bytes_backup + movl 76(%esp),%ebx + # in8 = j8 + movl 196(%esp),%eax + # in9 = j9 + movl 200(%esp),%ecx + # in8 += 1 + add $1,%eax + # in9 += 0 + carry + adc $0,%ecx + # j8 = in8 + movl %eax,196(%esp) + # j9 = in9 + movl %ecx,200(%esp) + # bytes - 64 + cmp $64,%ebx + # goto bytesatleast65 if unsigned> + ja ._bytesatleast65 + # goto bytesatleast64 if unsigned>= + jae ._bytesatleast64 + # m = out + mov %edi,%esi + # out = ctarget + movl 228(%esp),%edi + # i = bytes + mov %ebx,%ecx + # while (i) { *out++ = *m++; --i } + rep movsb +._bytesatleast64: + # x = x_backup + movl 64(%esp),%eax + # in8 = j8 + movl 196(%esp),%ecx + # in9 = j9 + movl 200(%esp),%edx + # *(uint32 *) (x + 32) = in8 + movl %ecx,32(%eax) + # *(uint32 *) (x + 36) = in9 + movl %edx,36(%eax) +._done: + # eax = eax_stack + movl 80(%esp),%eax + # ebx = ebx_stack + movl 84(%esp),%ebx + # esi = esi_stack + movl 88(%esp),%esi + # edi = edi_stack + movl 92(%esp),%edi + # ebp = ebp_stack + movl 96(%esp),%ebp + # leave + add %eax,%esp + ret +._bytesatleast65: + # bytes -= 64 + sub $64,%ebx + # out += 64 + add $64,%edi + # m += 64 + add $64,%esi + # goto bytesatleast1 + jmp ._bytesatleast1 +# enter ECRYPT_keysetup +.text +.p2align 5 +.globl ECRYPT_keysetup +ECRYPT_keysetup: + mov %esp,%eax + and $31,%eax + add $256,%eax + sub %eax,%esp + # eax_stack = eax + movl %eax,64(%esp) + # ebx_stack = ebx + movl %ebx,68(%esp) + # esi_stack = esi + movl %esi,72(%esp) + # edi_stack = edi + movl %edi,76(%esp) + # ebp_stack = ebp + movl %ebp,80(%esp) + # k = arg2 + movl 8(%esp,%eax),%ecx + # kbits = arg3 + movl 12(%esp,%eax),%edx + # x = arg1 + movl 4(%esp,%eax),%eax + # in1 = *(uint32 *) (k + 0) + movl 0(%ecx),%ebx + # in2 = *(uint32 *) (k + 4) + movl 4(%ecx),%esi + # in3 = *(uint32 *) (k + 8) + movl 8(%ecx),%edi + # in4 = *(uint32 *) (k + 12) + movl 12(%ecx),%ebp + # *(uint32 *) (x + 4) = in1 + movl %ebx,4(%eax) + # *(uint32 *) (x + 8) = in2 + movl %esi,8(%eax) + # *(uint32 *) (x + 12) = in3 + movl %edi,12(%eax) + # *(uint32 *) (x + 16) = in4 + movl %ebp,16(%eax) + # kbits - 256 + cmp $256,%edx + # goto kbits128 if unsigned< + jb ._kbits128 +._kbits256: + # in11 = *(uint32 *) (k + 16) + movl 16(%ecx),%edx + # in12 = *(uint32 *) (k + 20) + movl 20(%ecx),%ebx + # in13 = *(uint32 *) (k + 24) + movl 24(%ecx),%esi + # in14 = *(uint32 *) (k + 28) + movl 28(%ecx),%ecx + # *(uint32 *) (x + 44) = in11 + movl %edx,44(%eax) + # *(uint32 *) (x + 48) = in12 + movl %ebx,48(%eax) + # *(uint32 *) (x + 52) = in13 + movl %esi,52(%eax) + # *(uint32 *) (x + 56) = in14 + movl %ecx,56(%eax) + # in0 = 1634760805 + mov $1634760805,%ecx + # in5 = 857760878 + mov $857760878,%edx + # in10 = 2036477234 + mov $2036477234,%ebx + # in15 = 1797285236 + mov $1797285236,%esi + # *(uint32 *) (x + 0) = in0 + movl %ecx,0(%eax) + # *(uint32 *) (x + 20) = in5 + movl %edx,20(%eax) + # *(uint32 *) (x + 40) = in10 + movl %ebx,40(%eax) + # *(uint32 *) (x + 60) = in15 + movl %esi,60(%eax) + # goto keysetupdone + jmp ._keysetupdone +._kbits128: + # in11 = *(uint32 *) (k + 0) + movl 0(%ecx),%edx + # in12 = *(uint32 *) (k + 4) + movl 4(%ecx),%ebx + # in13 = *(uint32 *) (k + 8) + movl 8(%ecx),%esi + # in14 = *(uint32 *) (k + 12) + movl 12(%ecx),%ecx + # *(uint32 *) (x + 44) = in11 + movl %edx,44(%eax) + # *(uint32 *) (x + 48) = in12 + movl %ebx,48(%eax) + # *(uint32 *) (x + 52) = in13 + movl %esi,52(%eax) + # *(uint32 *) (x + 56) = in14 + movl %ecx,56(%eax) + # in0 = 1634760805 + mov $1634760805,%ecx + # in5 = 824206446 + mov $824206446,%edx + # in10 = 2036477238 + mov $2036477238,%ebx + # in15 = 1797285236 + mov $1797285236,%esi + # *(uint32 *) (x + 0) = in0 + movl %ecx,0(%eax) + # *(uint32 *) (x + 20) = in5 + movl %edx,20(%eax) + # *(uint32 *) (x + 40) = in10 + movl %ebx,40(%eax) + # *(uint32 *) (x + 60) = in15 + movl %esi,60(%eax) +._keysetupdone: + # eax = eax_stack + movl 64(%esp),%eax + # ebx = ebx_stack + movl 68(%esp),%ebx + # esi = esi_stack + movl 72(%esp),%esi + # edi = edi_stack + movl 76(%esp),%edi + # ebp = ebp_stack + movl 80(%esp),%ebp + # leave + add %eax,%esp + ret +# enter ECRYPT_ivsetup +.text +.p2align 5 +.globl ECRYPT_ivsetup +ECRYPT_ivsetup: + mov %esp,%eax + and $31,%eax + add $256,%eax + sub %eax,%esp + # eax_stack = eax + movl %eax,64(%esp) + # ebx_stack = ebx + movl %ebx,68(%esp) + # esi_stack = esi + movl %esi,72(%esp) + # edi_stack = edi + movl %edi,76(%esp) + # ebp_stack = ebp + movl %ebp,80(%esp) + # iv = arg2 + movl 8(%esp,%eax),%ecx + # x = arg1 + movl 4(%esp,%eax),%eax + # in6 = *(uint32 *) (iv + 0) + movl 0(%ecx),%edx + # in7 = *(uint32 *) (iv + 4) + movl 4(%ecx),%ecx + # in8 = 0 + mov $0,%ebx + # in9 = 0 + mov $0,%esi + # *(uint32 *) (x + 24) = in6 + movl %edx,24(%eax) + # *(uint32 *) (x + 28) = in7 + movl %ecx,28(%eax) + # *(uint32 *) (x + 32) = in8 + movl %ebx,32(%eax) + # *(uint32 *) (x + 36) = in9 + movl %esi,36(%eax) + # eax = eax_stack + movl 64(%esp),%eax + # ebx = ebx_stack + movl 68(%esp),%ebx + # esi = esi_stack + movl 72(%esp),%esi + # edi = edi_stack + movl 76(%esp),%edi + # ebp = ebp_stack + movl 80(%esp),%ebp + # leave + add %eax,%esp + ret diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c new file mode 100644 index 00000000000..3be443995ed --- /dev/null +++ b/arch/x86/crypto/salsa20_glue.c @@ -0,0 +1,127 @@ +/* + * Glue code for optimized assembly version of Salsa20. + * + * Copyright (c) 2007 Tan Swee Heng + * + * The assembly codes are public domain assembly codes written by Daniel. J. + * Bernstein . The codes are modified to include indentation + * and to remove extraneous comments and functions that are not needed. + * - i586 version, renamed as salsa20-i586-asm_32.S + * available from + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include + +#define SALSA20_IV_SIZE 8U +#define SALSA20_MIN_KEY_SIZE 16U +#define SALSA20_MAX_KEY_SIZE 32U + +// use the ECRYPT_* function names +#define salsa20_keysetup ECRYPT_keysetup +#define salsa20_ivsetup ECRYPT_ivsetup +#define salsa20_encrypt_bytes ECRYPT_encrypt_bytes + +struct salsa20_ctx +{ + u32 input[16]; +}; + +asmlinkage void salsa20_keysetup(struct salsa20_ctx *ctx, const u8 *k, + u32 keysize, u32 ivsize); +asmlinkage void salsa20_ivsetup(struct salsa20_ctx *ctx, const u8 *iv); +asmlinkage void salsa20_encrypt_bytes(struct salsa20_ctx *ctx, + const u8 *src, u8 *dst, u32 bytes); + +static int setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keysize) +{ + struct salsa20_ctx *ctx = crypto_tfm_ctx(tfm); + salsa20_keysetup(ctx, key, keysize*8, SALSA20_IV_SIZE*8); + return 0; +} + +static int encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes) +{ + struct blkcipher_walk walk; + struct crypto_blkcipher *tfm = desc->tfm; + struct salsa20_ctx *ctx = crypto_blkcipher_ctx(tfm); + int err; + + blkcipher_walk_init(&walk, dst, src, nbytes); + err = blkcipher_walk_virt_block(desc, &walk, 64); + + salsa20_ivsetup(ctx, walk.iv); + + if (likely(walk.nbytes == nbytes)) + { + salsa20_encrypt_bytes(ctx, walk.src.virt.addr, + walk.dst.virt.addr, nbytes); + return blkcipher_walk_done(desc, &walk, 0); + } + + while (walk.nbytes >= 64) { + salsa20_encrypt_bytes(ctx, walk.src.virt.addr, + walk.dst.virt.addr, + walk.nbytes - (walk.nbytes % 64)); + err = blkcipher_walk_done(desc, &walk, walk.nbytes % 64); + } + + if (walk.nbytes) { + salsa20_encrypt_bytes(ctx, walk.src.virt.addr, + walk.dst.virt.addr, walk.nbytes); + err = blkcipher_walk_done(desc, &walk, 0); + } + + return err; +} + +static struct crypto_alg alg = { + .cra_name = "salsa20", + .cra_driver_name = "salsa20-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, + .cra_type = &crypto_blkcipher_type, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct salsa20_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .blkcipher = { + .setkey = setkey, + .encrypt = encrypt, + .decrypt = encrypt, + .min_keysize = SALSA20_MIN_KEY_SIZE, + .max_keysize = SALSA20_MAX_KEY_SIZE, + .ivsize = SALSA20_IV_SIZE, + } + } +}; + +static int __init init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)"); +MODULE_ALIAS("salsa20"); +MODULE_ALIAS("salsa20-asm"); -- cgit v1.2.3 From 9a7dafbba47384c330779c75a1546684efaa8c1a Mon Sep 17 00:00:00 2001 From: Tan Swee Heng Date: Tue, 18 Dec 2007 00:04:40 +0800 Subject: [CRYPTO] salsa20: Add x86-64 assembly version This is the x86-64 version of the Salsa20 stream cipher algorithm. The original assembly code came from . It has been reformatted for clarity. Signed-off-by: Tan Swee Heng Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 2 + arch/x86/crypto/salsa20-x86_64-asm_64.S | 920 ++++++++++++++++++++++++++++++++ arch/x86/crypto/salsa20_glue.c | 2 + 3 files changed, 924 insertions(+) create mode 100644 arch/x86/crypto/salsa20-x86_64-asm_64.S (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 25cc8441046..09200e12f14 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o +obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o aes-i586-y := aes-i586-asm_32.o aes_glue.o twofish-i586-y := twofish-i586-asm_32.o twofish_32.o @@ -15,3 +16,4 @@ salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o +salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S new file mode 100644 index 00000000000..6214a9b0970 --- /dev/null +++ b/arch/x86/crypto/salsa20-x86_64-asm_64.S @@ -0,0 +1,920 @@ +# enter ECRYPT_encrypt_bytes +.text +.p2align 5 +.globl ECRYPT_encrypt_bytes +ECRYPT_encrypt_bytes: + mov %rsp,%r11 + and $31,%r11 + add $256,%r11 + sub %r11,%rsp + # x = arg1 + mov %rdi,%r8 + # m = arg2 + mov %rsi,%rsi + # out = arg3 + mov %rdx,%rdi + # bytes = arg4 + mov %rcx,%rdx + # unsigned>? bytes - 0 + cmp $0,%rdx + # comment:fp stack unchanged by jump + # goto done if !unsigned> + jbe ._done + # comment:fp stack unchanged by fallthrough +# start: +._start: + # r11_stack = r11 + movq %r11,0(%rsp) + # r12_stack = r12 + movq %r12,8(%rsp) + # r13_stack = r13 + movq %r13,16(%rsp) + # r14_stack = r14 + movq %r14,24(%rsp) + # r15_stack = r15 + movq %r15,32(%rsp) + # rbx_stack = rbx + movq %rbx,40(%rsp) + # rbp_stack = rbp + movq %rbp,48(%rsp) + # in0 = *(uint64 *) (x + 0) + movq 0(%r8),%rcx + # in2 = *(uint64 *) (x + 8) + movq 8(%r8),%r9 + # in4 = *(uint64 *) (x + 16) + movq 16(%r8),%rax + # in6 = *(uint64 *) (x + 24) + movq 24(%r8),%r10 + # in8 = *(uint64 *) (x + 32) + movq 32(%r8),%r11 + # in10 = *(uint64 *) (x + 40) + movq 40(%r8),%r12 + # in12 = *(uint64 *) (x + 48) + movq 48(%r8),%r13 + # in14 = *(uint64 *) (x + 56) + movq 56(%r8),%r14 + # j0 = in0 + movq %rcx,56(%rsp) + # j2 = in2 + movq %r9,64(%rsp) + # j4 = in4 + movq %rax,72(%rsp) + # j6 = in6 + movq %r10,80(%rsp) + # j8 = in8 + movq %r11,88(%rsp) + # j10 = in10 + movq %r12,96(%rsp) + # j12 = in12 + movq %r13,104(%rsp) + # j14 = in14 + movq %r14,112(%rsp) + # x_backup = x + movq %r8,120(%rsp) +# bytesatleast1: +._bytesatleast1: + # unsigned>= 32 + shr $32,%rdi + # x3 = j2 + movq 64(%rsp),%rsi + # x2 = x3 + mov %rsi,%rcx + # (uint64) x3 >>= 32 + shr $32,%rsi + # x5 = j4 + movq 72(%rsp),%r8 + # x4 = x5 + mov %r8,%r9 + # (uint64) x5 >>= 32 + shr $32,%r8 + # x5_stack = x5 + movq %r8,160(%rsp) + # x7 = j6 + movq 80(%rsp),%r8 + # x6 = x7 + mov %r8,%rax + # (uint64) x7 >>= 32 + shr $32,%r8 + # x9 = j8 + movq 88(%rsp),%r10 + # x8 = x9 + mov %r10,%r11 + # (uint64) x9 >>= 32 + shr $32,%r10 + # x11 = j10 + movq 96(%rsp),%r12 + # x10 = x11 + mov %r12,%r13 + # x10_stack = x10 + movq %r13,168(%rsp) + # (uint64) x11 >>= 32 + shr $32,%r12 + # x13 = j12 + movq 104(%rsp),%r13 + # x12 = x13 + mov %r13,%r14 + # (uint64) x13 >>= 32 + shr $32,%r13 + # x15 = j14 + movq 112(%rsp),%r15 + # x14 = x15 + mov %r15,%rbx + # (uint64) x15 >>= 32 + shr $32,%r15 + # x15_stack = x15 + movq %r15,176(%rsp) + # i = 20 + mov $20,%r15 +# mainloop: +._mainloop: + # i_backup = i + movq %r15,184(%rsp) + # x5 = x5_stack + movq 160(%rsp),%r15 + # a = x12 + x0 + lea (%r14,%rdx),%rbp + # (uint32) a <<<= 7 + rol $7,%ebp + # x4 ^= a + xor %rbp,%r9 + # b = x1 + x5 + lea (%rdi,%r15),%rbp + # (uint32) b <<<= 7 + rol $7,%ebp + # x9 ^= b + xor %rbp,%r10 + # a = x0 + x4 + lea (%rdx,%r9),%rbp + # (uint32) a <<<= 9 + rol $9,%ebp + # x8 ^= a + xor %rbp,%r11 + # b = x5 + x9 + lea (%r15,%r10),%rbp + # (uint32) b <<<= 9 + rol $9,%ebp + # x13 ^= b + xor %rbp,%r13 + # a = x4 + x8 + lea (%r9,%r11),%rbp + # (uint32) a <<<= 13 + rol $13,%ebp + # x12 ^= a + xor %rbp,%r14 + # b = x9 + x13 + lea (%r10,%r13),%rbp + # (uint32) b <<<= 13 + rol $13,%ebp + # x1 ^= b + xor %rbp,%rdi + # a = x8 + x12 + lea (%r11,%r14),%rbp + # (uint32) a <<<= 18 + rol $18,%ebp + # x0 ^= a + xor %rbp,%rdx + # b = x13 + x1 + lea (%r13,%rdi),%rbp + # (uint32) b <<<= 18 + rol $18,%ebp + # x5 ^= b + xor %rbp,%r15 + # x10 = x10_stack + movq 168(%rsp),%rbp + # x5_stack = x5 + movq %r15,160(%rsp) + # c = x6 + x10 + lea (%rax,%rbp),%r15 + # (uint32) c <<<= 7 + rol $7,%r15d + # x14 ^= c + xor %r15,%rbx + # c = x10 + x14 + lea (%rbp,%rbx),%r15 + # (uint32) c <<<= 9 + rol $9,%r15d + # x2 ^= c + xor %r15,%rcx + # c = x14 + x2 + lea (%rbx,%rcx),%r15 + # (uint32) c <<<= 13 + rol $13,%r15d + # x6 ^= c + xor %r15,%rax + # c = x2 + x6 + lea (%rcx,%rax),%r15 + # (uint32) c <<<= 18 + rol $18,%r15d + # x10 ^= c + xor %r15,%rbp + # x15 = x15_stack + movq 176(%rsp),%r15 + # x10_stack = x10 + movq %rbp,168(%rsp) + # d = x11 + x15 + lea (%r12,%r15),%rbp + # (uint32) d <<<= 7 + rol $7,%ebp + # x3 ^= d + xor %rbp,%rsi + # d = x15 + x3 + lea (%r15,%rsi),%rbp + # (uint32) d <<<= 9 + rol $9,%ebp + # x7 ^= d + xor %rbp,%r8 + # d = x3 + x7 + lea (%rsi,%r8),%rbp + # (uint32) d <<<= 13 + rol $13,%ebp + # x11 ^= d + xor %rbp,%r12 + # d = x7 + x11 + lea (%r8,%r12),%rbp + # (uint32) d <<<= 18 + rol $18,%ebp + # x15 ^= d + xor %rbp,%r15 + # x15_stack = x15 + movq %r15,176(%rsp) + # x5 = x5_stack + movq 160(%rsp),%r15 + # a = x3 + x0 + lea (%rsi,%rdx),%rbp + # (uint32) a <<<= 7 + rol $7,%ebp + # x1 ^= a + xor %rbp,%rdi + # b = x4 + x5 + lea (%r9,%r15),%rbp + # (uint32) b <<<= 7 + rol $7,%ebp + # x6 ^= b + xor %rbp,%rax + # a = x0 + x1 + lea (%rdx,%rdi),%rbp + # (uint32) a <<<= 9 + rol $9,%ebp + # x2 ^= a + xor %rbp,%rcx + # b = x5 + x6 + lea (%r15,%rax),%rbp + # (uint32) b <<<= 9 + rol $9,%ebp + # x7 ^= b + xor %rbp,%r8 + # a = x1 + x2 + lea (%rdi,%rcx),%rbp + # (uint32) a <<<= 13 + rol $13,%ebp + # x3 ^= a + xor %rbp,%rsi + # b = x6 + x7 + lea (%rax,%r8),%rbp + # (uint32) b <<<= 13 + rol $13,%ebp + # x4 ^= b + xor %rbp,%r9 + # a = x2 + x3 + lea (%rcx,%rsi),%rbp + # (uint32) a <<<= 18 + rol $18,%ebp + # x0 ^= a + xor %rbp,%rdx + # b = x7 + x4 + lea (%r8,%r9),%rbp + # (uint32) b <<<= 18 + rol $18,%ebp + # x5 ^= b + xor %rbp,%r15 + # x10 = x10_stack + movq 168(%rsp),%rbp + # x5_stack = x5 + movq %r15,160(%rsp) + # c = x9 + x10 + lea (%r10,%rbp),%r15 + # (uint32) c <<<= 7 + rol $7,%r15d + # x11 ^= c + xor %r15,%r12 + # c = x10 + x11 + lea (%rbp,%r12),%r15 + # (uint32) c <<<= 9 + rol $9,%r15d + # x8 ^= c + xor %r15,%r11 + # c = x11 + x8 + lea (%r12,%r11),%r15 + # (uint32) c <<<= 13 + rol $13,%r15d + # x9 ^= c + xor %r15,%r10 + # c = x8 + x9 + lea (%r11,%r10),%r15 + # (uint32) c <<<= 18 + rol $18,%r15d + # x10 ^= c + xor %r15,%rbp + # x15 = x15_stack + movq 176(%rsp),%r15 + # x10_stack = x10 + movq %rbp,168(%rsp) + # d = x14 + x15 + lea (%rbx,%r15),%rbp + # (uint32) d <<<= 7 + rol $7,%ebp + # x12 ^= d + xor %rbp,%r14 + # d = x15 + x12 + lea (%r15,%r14),%rbp + # (uint32) d <<<= 9 + rol $9,%ebp + # x13 ^= d + xor %rbp,%r13 + # d = x12 + x13 + lea (%r14,%r13),%rbp + # (uint32) d <<<= 13 + rol $13,%ebp + # x14 ^= d + xor %rbp,%rbx + # d = x13 + x14 + lea (%r13,%rbx),%rbp + # (uint32) d <<<= 18 + rol $18,%ebp + # x15 ^= d + xor %rbp,%r15 + # x15_stack = x15 + movq %r15,176(%rsp) + # x5 = x5_stack + movq 160(%rsp),%r15 + # a = x12 + x0 + lea (%r14,%rdx),%rbp + # (uint32) a <<<= 7 + rol $7,%ebp + # x4 ^= a + xor %rbp,%r9 + # b = x1 + x5 + lea (%rdi,%r15),%rbp + # (uint32) b <<<= 7 + rol $7,%ebp + # x9 ^= b + xor %rbp,%r10 + # a = x0 + x4 + lea (%rdx,%r9),%rbp + # (uint32) a <<<= 9 + rol $9,%ebp + # x8 ^= a + xor %rbp,%r11 + # b = x5 + x9 + lea (%r15,%r10),%rbp + # (uint32) b <<<= 9 + rol $9,%ebp + # x13 ^= b + xor %rbp,%r13 + # a = x4 + x8 + lea (%r9,%r11),%rbp + # (uint32) a <<<= 13 + rol $13,%ebp + # x12 ^= a + xor %rbp,%r14 + # b = x9 + x13 + lea (%r10,%r13),%rbp + # (uint32) b <<<= 13 + rol $13,%ebp + # x1 ^= b + xor %rbp,%rdi + # a = x8 + x12 + lea (%r11,%r14),%rbp + # (uint32) a <<<= 18 + rol $18,%ebp + # x0 ^= a + xor %rbp,%rdx + # b = x13 + x1 + lea (%r13,%rdi),%rbp + # (uint32) b <<<= 18 + rol $18,%ebp + # x5 ^= b + xor %rbp,%r15 + # x10 = x10_stack + movq 168(%rsp),%rbp + # x5_stack = x5 + movq %r15,160(%rsp) + # c = x6 + x10 + lea (%rax,%rbp),%r15 + # (uint32) c <<<= 7 + rol $7,%r15d + # x14 ^= c + xor %r15,%rbx + # c = x10 + x14 + lea (%rbp,%rbx),%r15 + # (uint32) c <<<= 9 + rol $9,%r15d + # x2 ^= c + xor %r15,%rcx + # c = x14 + x2 + lea (%rbx,%rcx),%r15 + # (uint32) c <<<= 13 + rol $13,%r15d + # x6 ^= c + xor %r15,%rax + # c = x2 + x6 + lea (%rcx,%rax),%r15 + # (uint32) c <<<= 18 + rol $18,%r15d + # x10 ^= c + xor %r15,%rbp + # x15 = x15_stack + movq 176(%rsp),%r15 + # x10_stack = x10 + movq %rbp,168(%rsp) + # d = x11 + x15 + lea (%r12,%r15),%rbp + # (uint32) d <<<= 7 + rol $7,%ebp + # x3 ^= d + xor %rbp,%rsi + # d = x15 + x3 + lea (%r15,%rsi),%rbp + # (uint32) d <<<= 9 + rol $9,%ebp + # x7 ^= d + xor %rbp,%r8 + # d = x3 + x7 + lea (%rsi,%r8),%rbp + # (uint32) d <<<= 13 + rol $13,%ebp + # x11 ^= d + xor %rbp,%r12 + # d = x7 + x11 + lea (%r8,%r12),%rbp + # (uint32) d <<<= 18 + rol $18,%ebp + # x15 ^= d + xor %rbp,%r15 + # x15_stack = x15 + movq %r15,176(%rsp) + # x5 = x5_stack + movq 160(%rsp),%r15 + # a = x3 + x0 + lea (%rsi,%rdx),%rbp + # (uint32) a <<<= 7 + rol $7,%ebp + # x1 ^= a + xor %rbp,%rdi + # b = x4 + x5 + lea (%r9,%r15),%rbp + # (uint32) b <<<= 7 + rol $7,%ebp + # x6 ^= b + xor %rbp,%rax + # a = x0 + x1 + lea (%rdx,%rdi),%rbp + # (uint32) a <<<= 9 + rol $9,%ebp + # x2 ^= a + xor %rbp,%rcx + # b = x5 + x6 + lea (%r15,%rax),%rbp + # (uint32) b <<<= 9 + rol $9,%ebp + # x7 ^= b + xor %rbp,%r8 + # a = x1 + x2 + lea (%rdi,%rcx),%rbp + # (uint32) a <<<= 13 + rol $13,%ebp + # x3 ^= a + xor %rbp,%rsi + # b = x6 + x7 + lea (%rax,%r8),%rbp + # (uint32) b <<<= 13 + rol $13,%ebp + # x4 ^= b + xor %rbp,%r9 + # a = x2 + x3 + lea (%rcx,%rsi),%rbp + # (uint32) a <<<= 18 + rol $18,%ebp + # x0 ^= a + xor %rbp,%rdx + # b = x7 + x4 + lea (%r8,%r9),%rbp + # (uint32) b <<<= 18 + rol $18,%ebp + # x5 ^= b + xor %rbp,%r15 + # x10 = x10_stack + movq 168(%rsp),%rbp + # x5_stack = x5 + movq %r15,160(%rsp) + # c = x9 + x10 + lea (%r10,%rbp),%r15 + # (uint32) c <<<= 7 + rol $7,%r15d + # x11 ^= c + xor %r15,%r12 + # c = x10 + x11 + lea (%rbp,%r12),%r15 + # (uint32) c <<<= 9 + rol $9,%r15d + # x8 ^= c + xor %r15,%r11 + # c = x11 + x8 + lea (%r12,%r11),%r15 + # (uint32) c <<<= 13 + rol $13,%r15d + # x9 ^= c + xor %r15,%r10 + # c = x8 + x9 + lea (%r11,%r10),%r15 + # (uint32) c <<<= 18 + rol $18,%r15d + # x10 ^= c + xor %r15,%rbp + # x15 = x15_stack + movq 176(%rsp),%r15 + # x10_stack = x10 + movq %rbp,168(%rsp) + # d = x14 + x15 + lea (%rbx,%r15),%rbp + # (uint32) d <<<= 7 + rol $7,%ebp + # x12 ^= d + xor %rbp,%r14 + # d = x15 + x12 + lea (%r15,%r14),%rbp + # (uint32) d <<<= 9 + rol $9,%ebp + # x13 ^= d + xor %rbp,%r13 + # d = x12 + x13 + lea (%r14,%r13),%rbp + # (uint32) d <<<= 13 + rol $13,%ebp + # x14 ^= d + xor %rbp,%rbx + # d = x13 + x14 + lea (%r13,%rbx),%rbp + # (uint32) d <<<= 18 + rol $18,%ebp + # x15 ^= d + xor %rbp,%r15 + # x15_stack = x15 + movq %r15,176(%rsp) + # i = i_backup + movq 184(%rsp),%r15 + # unsigned>? i -= 4 + sub $4,%r15 + # comment:fp stack unchanged by jump + # goto mainloop if unsigned> + ja ._mainloop + # (uint32) x2 += j2 + addl 64(%rsp),%ecx + # x3 <<= 32 + shl $32,%rsi + # x3 += j2 + addq 64(%rsp),%rsi + # (uint64) x3 >>= 32 + shr $32,%rsi + # x3 <<= 32 + shl $32,%rsi + # x2 += x3 + add %rsi,%rcx + # (uint32) x6 += j6 + addl 80(%rsp),%eax + # x7 <<= 32 + shl $32,%r8 + # x7 += j6 + addq 80(%rsp),%r8 + # (uint64) x7 >>= 32 + shr $32,%r8 + # x7 <<= 32 + shl $32,%r8 + # x6 += x7 + add %r8,%rax + # (uint32) x8 += j8 + addl 88(%rsp),%r11d + # x9 <<= 32 + shl $32,%r10 + # x9 += j8 + addq 88(%rsp),%r10 + # (uint64) x9 >>= 32 + shr $32,%r10 + # x9 <<= 32 + shl $32,%r10 + # x8 += x9 + add %r10,%r11 + # (uint32) x12 += j12 + addl 104(%rsp),%r14d + # x13 <<= 32 + shl $32,%r13 + # x13 += j12 + addq 104(%rsp),%r13 + # (uint64) x13 >>= 32 + shr $32,%r13 + # x13 <<= 32 + shl $32,%r13 + # x12 += x13 + add %r13,%r14 + # (uint32) x0 += j0 + addl 56(%rsp),%edx + # x1 <<= 32 + shl $32,%rdi + # x1 += j0 + addq 56(%rsp),%rdi + # (uint64) x1 >>= 32 + shr $32,%rdi + # x1 <<= 32 + shl $32,%rdi + # x0 += x1 + add %rdi,%rdx + # x5 = x5_stack + movq 160(%rsp),%rdi + # (uint32) x4 += j4 + addl 72(%rsp),%r9d + # x5 <<= 32 + shl $32,%rdi + # x5 += j4 + addq 72(%rsp),%rdi + # (uint64) x5 >>= 32 + shr $32,%rdi + # x5 <<= 32 + shl $32,%rdi + # x4 += x5 + add %rdi,%r9 + # x10 = x10_stack + movq 168(%rsp),%r8 + # (uint32) x10 += j10 + addl 96(%rsp),%r8d + # x11 <<= 32 + shl $32,%r12 + # x11 += j10 + addq 96(%rsp),%r12 + # (uint64) x11 >>= 32 + shr $32,%r12 + # x11 <<= 32 + shl $32,%r12 + # x10 += x11 + add %r12,%r8 + # x15 = x15_stack + movq 176(%rsp),%rdi + # (uint32) x14 += j14 + addl 112(%rsp),%ebx + # x15 <<= 32 + shl $32,%rdi + # x15 += j14 + addq 112(%rsp),%rdi + # (uint64) x15 >>= 32 + shr $32,%rdi + # x15 <<= 32 + shl $32,%rdi + # x14 += x15 + add %rdi,%rbx + # out = out_backup + movq 136(%rsp),%rdi + # m = m_backup + movq 144(%rsp),%rsi + # x0 ^= *(uint64 *) (m + 0) + xorq 0(%rsi),%rdx + # *(uint64 *) (out + 0) = x0 + movq %rdx,0(%rdi) + # x2 ^= *(uint64 *) (m + 8) + xorq 8(%rsi),%rcx + # *(uint64 *) (out + 8) = x2 + movq %rcx,8(%rdi) + # x4 ^= *(uint64 *) (m + 16) + xorq 16(%rsi),%r9 + # *(uint64 *) (out + 16) = x4 + movq %r9,16(%rdi) + # x6 ^= *(uint64 *) (m + 24) + xorq 24(%rsi),%rax + # *(uint64 *) (out + 24) = x6 + movq %rax,24(%rdi) + # x8 ^= *(uint64 *) (m + 32) + xorq 32(%rsi),%r11 + # *(uint64 *) (out + 32) = x8 + movq %r11,32(%rdi) + # x10 ^= *(uint64 *) (m + 40) + xorq 40(%rsi),%r8 + # *(uint64 *) (out + 40) = x10 + movq %r8,40(%rdi) + # x12 ^= *(uint64 *) (m + 48) + xorq 48(%rsi),%r14 + # *(uint64 *) (out + 48) = x12 + movq %r14,48(%rdi) + # x14 ^= *(uint64 *) (m + 56) + xorq 56(%rsi),%rbx + # *(uint64 *) (out + 56) = x14 + movq %rbx,56(%rdi) + # bytes = bytes_backup + movq 152(%rsp),%rdx + # in8 = j8 + movq 88(%rsp),%rcx + # in8 += 1 + add $1,%rcx + # j8 = in8 + movq %rcx,88(%rsp) + # unsigned>? unsigned + ja ._bytesatleast65 + # comment:fp stack unchanged by jump + # goto bytesatleast64 if !unsigned< + jae ._bytesatleast64 + # m = out + mov %rdi,%rsi + # out = ctarget + movq 128(%rsp),%rdi + # i = bytes + mov %rdx,%rcx + # while (i) { *out++ = *m++; --i } + rep movsb + # comment:fp stack unchanged by fallthrough +# bytesatleast64: +._bytesatleast64: + # x = x_backup + movq 120(%rsp),%rdi + # in8 = j8 + movq 88(%rsp),%rsi + # *(uint64 *) (x + 32) = in8 + movq %rsi,32(%rdi) + # r11 = r11_stack + movq 0(%rsp),%r11 + # r12 = r12_stack + movq 8(%rsp),%r12 + # r13 = r13_stack + movq 16(%rsp),%r13 + # r14 = r14_stack + movq 24(%rsp),%r14 + # r15 = r15_stack + movq 32(%rsp),%r15 + # rbx = rbx_stack + movq 40(%rsp),%rbx + # rbp = rbp_stack + movq 48(%rsp),%rbp + # comment:fp stack unchanged by fallthrough +# done: +._done: + # leave + add %r11,%rsp + mov %rdi,%rax + mov %rsi,%rdx + ret +# bytesatleast65: +._bytesatleast65: + # bytes -= 64 + sub $64,%rdx + # out += 64 + add $64,%rdi + # m += 64 + add $64,%rsi + # comment:fp stack unchanged by jump + # goto bytesatleast1 + jmp ._bytesatleast1 +# enter ECRYPT_keysetup +.text +.p2align 5 +.globl ECRYPT_keysetup +ECRYPT_keysetup: + mov %rsp,%r11 + and $31,%r11 + add $256,%r11 + sub %r11,%rsp + # k = arg2 + mov %rsi,%rsi + # kbits = arg3 + mov %rdx,%rdx + # x = arg1 + mov %rdi,%rdi + # in0 = *(uint64 *) (k + 0) + movq 0(%rsi),%r8 + # in2 = *(uint64 *) (k + 8) + movq 8(%rsi),%r9 + # *(uint64 *) (x + 4) = in0 + movq %r8,4(%rdi) + # *(uint64 *) (x + 12) = in2 + movq %r9,12(%rdi) + # unsigned + * - x86-64 version, renamed as salsa20-x86_64-asm_64.S + * available from * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free -- cgit v1.2.3 From 15e7b4452b72ae890f2fcb027b4c4fa63a1c9a7a Mon Sep 17 00:00:00 2001 From: Sebastian Siewior Date: Mon, 14 Jan 2008 17:07:57 +1100 Subject: [CRYPTO] twofish: Merge common glue code There is almost no difference between 32 & 64 bit glue code. Signed-off-by: Sebastian Siewior Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 4 +- arch/x86/crypto/twofish_32.c | 97 ------------------------------------------ arch/x86/crypto/twofish_64.c | 97 ------------------------------------------ arch/x86/crypto/twofish_glue.c | 97 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 99 insertions(+), 196 deletions(-) delete mode 100644 arch/x86/crypto/twofish_32.c delete mode 100644 arch/x86/crypto/twofish_64.c create mode 100644 arch/x86/crypto/twofish_glue.c (limited to 'arch') diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 09200e12f14..3874c2de540 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -11,9 +11,9 @@ obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o aes-i586-y := aes-i586-asm_32.o aes_glue.o -twofish-i586-y := twofish-i586-asm_32.o twofish_32.o +twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o -twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_64.o +twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o diff --git a/arch/x86/crypto/twofish_32.c b/arch/x86/crypto/twofish_32.c deleted file mode 100644 index e3004dfe9c7..00000000000 --- a/arch/x86/crypto/twofish_32.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Glue Code for optimized 586 assembler version of TWOFISH - * - * Originally Twofish for GPG - * By Matthew Skala , July 26, 1998 - * 256-bit key length added March 20, 1999 - * Some modifications to reduce the text size by Werner Koch, April, 1998 - * Ported to the kerneli patch by Marc Mutz - * Ported to CryptoAPI by Colin Slater - * - * The original author has disclaimed all copyright interest in this - * code and thus put it in the public domain. The subsequent authors - * have put this under the GNU General Public License. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - * This code is a "clean room" implementation, written from the paper - * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, - * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available - * through http://www.counterpane.com/twofish.html - * - * For background information on multiplication in finite fields, used for - * the matrix operations in the key schedule, see the book _Contemporary - * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the - * Third Edition. - */ - -#include -#include -#include -#include -#include - - -asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); - -static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_enc_blk(tfm, dst, src); -} - -static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_dec_blk(tfm, dst, src); -} - -static struct crypto_alg alg = { - .cra_name = "twofish", - .cra_driver_name = "twofish-i586", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = TF_MIN_KEY_SIZE, - .cia_max_keysize = TF_MAX_KEY_SIZE, - .cia_setkey = twofish_setkey, - .cia_encrypt = twofish_encrypt, - .cia_decrypt = twofish_decrypt - } - } -}; - -static int __init init(void) -{ - return crypto_register_alg(&alg); -} - -static void __exit fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Twofish Cipher Algorithm, i586 asm optimized"); -MODULE_ALIAS("twofish"); diff --git a/arch/x86/crypto/twofish_64.c b/arch/x86/crypto/twofish_64.c deleted file mode 100644 index 182d91d5cfb..00000000000 --- a/arch/x86/crypto/twofish_64.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Glue Code for optimized x86_64 assembler version of TWOFISH - * - * Originally Twofish for GPG - * By Matthew Skala , July 26, 1998 - * 256-bit key length added March 20, 1999 - * Some modifications to reduce the text size by Werner Koch, April, 1998 - * Ported to the kerneli patch by Marc Mutz - * Ported to CryptoAPI by Colin Slater - * - * The original author has disclaimed all copyright interest in this - * code and thus put it in the public domain. The subsequent authors - * have put this under the GNU General Public License. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 - * USA - * - * This code is a "clean room" implementation, written from the paper - * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, - * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available - * through http://www.counterpane.com/twofish.html - * - * For background information on multiplication in finite fields, used for - * the matrix operations in the key schedule, see the book _Contemporary - * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the - * Third Edition. - */ - -#include -#include -#include -#include -#include -#include - -asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); -asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); - -static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_enc_blk(tfm, dst, src); -} - -static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - twofish_dec_blk(tfm, dst, src); -} - -static struct crypto_alg alg = { - .cra_name = "twofish", - .cra_driver_name = "twofish-x86_64", - .cra_priority = 200, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER, - .cra_blocksize = TF_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct twofish_ctx), - .cra_alignmask = 3, - .cra_module = THIS_MODULE, - .cra_list = LIST_HEAD_INIT(alg.cra_list), - .cra_u = { - .cipher = { - .cia_min_keysize = TF_MIN_KEY_SIZE, - .cia_max_keysize = TF_MAX_KEY_SIZE, - .cia_setkey = twofish_setkey, - .cia_encrypt = twofish_encrypt, - .cia_decrypt = twofish_decrypt - } - } -}; - -static int __init init(void) -{ - return crypto_register_alg(&alg); -} - -static void __exit fini(void) -{ - crypto_unregister_alg(&alg); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Twofish Cipher Algorithm, x86_64 asm optimized"); -MODULE_ALIAS("twofish"); diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c new file mode 100644 index 00000000000..cefaf8b9aa1 --- /dev/null +++ b/arch/x86/crypto/twofish_glue.c @@ -0,0 +1,97 @@ +/* + * Glue Code for assembler optimized version of TWOFISH + * + * Originally Twofish for GPG + * By Matthew Skala , July 26, 1998 + * 256-bit key length added March 20, 1999 + * Some modifications to reduce the text size by Werner Koch, April, 1998 + * Ported to the kerneli patch by Marc Mutz + * Ported to CryptoAPI by Colin Slater + * + * The original author has disclaimed all copyright interest in this + * code and thus put it in the public domain. The subsequent authors + * have put this under the GNU General Public License. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * This code is a "clean room" implementation, written from the paper + * _Twofish: A 128-Bit Block Cipher_ by Bruce Schneier, John Kelsey, + * Doug Whiting, David Wagner, Chris Hall, and Niels Ferguson, available + * through http://www.counterpane.com/twofish.html + * + * For background information on multiplication in finite fields, used for + * the matrix operations in the key schedule, see the book _Contemporary + * Abstract Algebra_ by Joseph A. Gallian, especially chapter 22 in the + * Third Edition. + */ + +#include +#include +#include +#include +#include + +asmlinkage void twofish_enc_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +asmlinkage void twofish_dec_blk(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + +static void twofish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_enc_blk(tfm, dst, src); +} + +static void twofish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) +{ + twofish_dec_blk(tfm, dst, src); +} + +static struct crypto_alg alg = { + .cra_name = "twofish", + .cra_driver_name = "twofish-asm", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = TF_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct twofish_ctx), + .cra_alignmask = 3, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(alg.cra_list), + .cra_u = { + .cipher = { + .cia_min_keysize = TF_MIN_KEY_SIZE, + .cia_max_keysize = TF_MAX_KEY_SIZE, + .cia_setkey = twofish_setkey, + .cia_encrypt = twofish_encrypt, + .cia_decrypt = twofish_decrypt + } + } +}; + +static int __init init(void) +{ + return crypto_register_alg(&alg); +} + +static void __exit fini(void) +{ + crypto_unregister_alg(&alg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized"); +MODULE_ALIAS("twofish"); +MODULE_ALIAS("twofish-asm"); -- cgit v1.2.3