summaryrefslogtreecommitdiff
path: root/arch/x86_64/crypto/aes-x86_64-asm.S
blob: 483cbb23ab8d0da13c7f6c8262d95fc6206ef7b3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
 *
 * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
 *
 * License:
 * This code can be distributed under the terms of the GNU General Public
 * License (GPL) Version 2 provided that the above header down to and
 * including this sentence is retained in full.
 */

.extern aes_ft_tab
.extern aes_it_tab
.extern aes_fl_tab
.extern aes_il_tab

.text

#define R1	%rax
#define R1E	%eax
#define R1X	%ax
#define R1H	%ah
#define R1L	%al
#define R2	%rbx
#define R2E	%ebx
#define R2X	%bx
#define R2H	%bh
#define R2L	%bl
#define R3	%rcx
#define R3E	%ecx
#define R3X	%cx
#define R3H	%ch
#define R3L	%cl
#define R4	%rdx
#define R4E	%edx
#define R4X	%dx
#define R4H	%dh
#define R4L	%dl
#define R5	%rsi
#define R5E	%esi
#define R6	%rdi
#define R6E	%edi
#define R7	%rbp
#define R7E	%ebp
#define R8	%r8
#define R9	%r9
#define R10	%r10
#define R11	%r11

#define prologue(FUNC,BASE,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \
	.global	FUNC;			\
	.type	FUNC,@function;		\
	.align	8;			\
FUNC:	movq	r1,r2;			\
	movq	r3,r4;			\
	leaq	BASE+52(r8),r9;		\
	movq	r10,r11;		\
	movl	(r7),r5 ## E;		\
	movl	4(r7),r1 ## E;		\
	movl	8(r7),r6 ## E;		\
	movl	12(r7),r7 ## E;		\
	movl	(r8),r10 ## E;		\
	xorl	-48(r9),r5 ## E;	\
	xorl	-44(r9),r1 ## E;	\
	xorl	-40(r9),r6 ## E;	\
	xorl	-36(r9),r7 ## E;	\
	cmpl	$24,r10 ## E;		\
	jb	B128;			\
	leaq	32(r9),r9;		\
	je	B192;			\
	leaq	32(r9),r9;

#define epilogue(r1,r2,r3,r4,r5,r6,r7,r8,r9) \
	movq	r1,r2;			\
	movq	r3,r4;			\
	movl	r5 ## E,(r9);		\
	movl	r6 ## E,4(r9);		\
	movl	r7 ## E,8(r9);		\
	movl	r8 ## E,12(r9);		\
	ret;

#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
	movzbl	r2 ## H,r5 ## E;	\
	movzbl	r2 ## L,r6 ## E;	\
	movl	TAB+1024(,r5,4),r5 ## E;\
	movw	r4 ## X,r2 ## X;	\
	movl	TAB(,r6,4),r6 ## E;	\
	roll	$16,r2 ## E;		\
	shrl	$16,r4 ## E;		\
	movzbl	r4 ## H,r7 ## E;	\
	movzbl	r4 ## L,r4 ## E;	\
	xorl	OFFSET(r8),ra ## E;	\
	xorl	OFFSET+4(r8),rb ## E;	\
	xorl	TAB+3072(,r7,4),r5 ## E;\
	xorl	TAB+2048(,r4,4),r6 ## E;\
	movzbl	r1 ## L,r7 ## E;	\
	movzbl	r1 ## H,r4 ## E;	\
	movl	TAB+1024(,r4,4),r4 ## E;\
	movw	r3 ## X,r1 ## X;	\
	roll	$16,r1 ## E;		\
	shrl	$16,r3 ## E;		\
	xorl	TAB(,r7,4),r5 ## E;	\
	movzbl	r3 ## H,r7 ## E;	\
	movzbl	r3 ## L,r3 ## E;	\
	xorl	TAB+3072(,r7,4),r4 ## E;\
	xorl	TAB+2048(,r3,4),r5 ## E;\
	movzbl	r1 ## H,r7 ## E;	\
	movzbl	r1 ## L,r3 ## E;	\
	shrl	$16,r1 ## E;		\
	xorl	TAB+3072(,r7,4),r6 ## E;\
	movl	TAB+2048(,r3,4),r3 ## E;\
	movzbl	r1 ## H,r7 ## E;	\
	movzbl	r1 ## L,r1 ## E;	\
	xorl	TAB+1024(,r7,4),r6 ## E;\
	xorl	TAB(,r1,4),r3 ## E;	\
	movzbl	r2 ## H,r1 ## E;	\
	movzbl	r2 ## L,r7 ## E;	\
	shrl	$16,r2 ## E;		\
	xorl	TAB+3072(,r1,4),r3 ## E;\
	xorl	TAB+2048(,r7,4),r4 ## E;\
	movzbl	r2 ## H,r1 ## E;	\
	movzbl	r2 ## L,r2 ## E;	\
	xorl	OFFSET+8(r8),rc ## E;	\
	xorl	OFFSET+12(r8),rd ## E;	\
	xorl	TAB+1024(,r1,4),r3 ## E;\
	xorl	TAB(,r2,4),r4 ## E;

#define move_regs(r1,r2,r3,r4) \
	movl	r3 ## E,r1 ## E;	\
	movl	r4 ## E,r2 ## E;

#define entry(FUNC,BASE,B128,B192) \
	prologue(FUNC,BASE,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11)

#define return epilogue(R8,R2,R9,R7,R5,R6,R3,R4,R11)

#define encrypt_round(TAB,OFFSET) \
	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
	move_regs(R1,R2,R5,R6)

#define encrypt_final(TAB,OFFSET) \
	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)

#define decrypt_round(TAB,OFFSET) \
	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
	move_regs(R1,R2,R5,R6)

#define decrypt_final(TAB,OFFSET) \
	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)

/* void aes_encrypt(void *ctx, u8 *out, const u8 *in) */

	entry(aes_encrypt,0,enc128,enc192)
	encrypt_round(aes_ft_tab,-96)
	encrypt_round(aes_ft_tab,-80)
enc192:	encrypt_round(aes_ft_tab,-64)
	encrypt_round(aes_ft_tab,-48)
enc128:	encrypt_round(aes_ft_tab,-32)
	encrypt_round(aes_ft_tab,-16)
	encrypt_round(aes_ft_tab,  0)
	encrypt_round(aes_ft_tab, 16)
	encrypt_round(aes_ft_tab, 32)
	encrypt_round(aes_ft_tab, 48)
	encrypt_round(aes_ft_tab, 64)
	encrypt_round(aes_ft_tab, 80)
	encrypt_round(aes_ft_tab, 96)
	encrypt_final(aes_fl_tab,112)
	return

/* void aes_decrypt(void *ctx, u8 *out, const u8 *in) */

	entry(aes_decrypt,240,dec128,dec192)
	decrypt_round(aes_it_tab,-96)
	decrypt_round(aes_it_tab,-80)
dec192:	decrypt_round(aes_it_tab,-64)
	decrypt_round(aes_it_tab,-48)
dec128:	decrypt_round(aes_it_tab,-32)
	decrypt_round(aes_it_tab,-16)
	decrypt_round(aes_it_tab,  0)
	decrypt_round(aes_it_tab, 16)
	decrypt_round(aes_it_tab, 32)
	decrypt_round(aes_it_tab, 48)
	decrypt_round(aes_it_tab, 64)
	decrypt_round(aes_it_tab, 80)
	decrypt_round(aes_it_tab, 96)
	decrypt_final(aes_il_tab,112)
	return