summaryrefslogtreecommitdiffstats
path: root/kernel/arch/arm64/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/arch/arm64/crypto')
-rw-r--r--kernel/arch/arm64/crypto/aes-ce-ccm-core.S53
-rw-r--r--kernel/arch/arm64/crypto/aes-ce-cipher.c25
-rw-r--r--kernel/arch/arm64/crypto/aes-ce.S1
-rw-r--r--kernel/arch/arm64/crypto/aes-glue.c2
-rw-r--r--kernel/arch/arm64/crypto/aes-modes.S91
-rw-r--r--kernel/arch/arm64/crypto/aes-neon.S25
-rw-r--r--kernel/arch/arm64/crypto/ghash-ce-core.S6
-rw-r--r--kernel/arch/arm64/crypto/sha1-ce-core.S4
-rw-r--r--kernel/arch/arm64/crypto/sha2-ce-core.S4
9 files changed, 110 insertions, 101 deletions
diff --git a/kernel/arch/arm64/crypto/aes-ce-ccm-core.S b/kernel/arch/arm64/crypto/aes-ce-ccm-core.S
index a2a7fbcac..3363560c7 100644
--- a/kernel/arch/arm64/crypto/aes-ce-ccm-core.S
+++ b/kernel/arch/arm64/crypto/aes-ce-ccm-core.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
.text
.arch armv8-a+crypto
@@ -19,7 +20,7 @@
*/
ENTRY(ce_aes_ccm_auth_data)
ldr w8, [x3] /* leftover from prev round? */
- ld1 {v0.2d}, [x0] /* load mac */
+ ld1 {v0.16b}, [x0] /* load mac */
cbz w8, 1f
sub w8, w8, #16
eor v1.16b, v1.16b, v1.16b
@@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data)
beq 8f /* out of input? */
cbnz w8, 0b
eor v0.16b, v0.16b, v1.16b
-1: ld1 {v3.2d}, [x4] /* load first round key */
+1: ld1 {v3.16b}, [x4] /* load first round key */
prfm pldl1strm, [x1]
cmp w5, #12 /* which key size? */
add x6, x4, #16
@@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data)
mov v5.16b, v3.16b
b 4f
2: mov v4.16b, v3.16b
- ld1 {v5.2d}, [x6], #16 /* load 2nd round key */
+ ld1 {v5.16b}, [x6], #16 /* load 2nd round key */
3: aese v0.16b, v4.16b
aesmc v0.16b, v0.16b
-4: ld1 {v3.2d}, [x6], #16 /* load next round key */
+4: ld1 {v3.16b}, [x6], #16 /* load next round key */
aese v0.16b, v5.16b
aesmc v0.16b, v0.16b
-5: ld1 {v4.2d}, [x6], #16 /* load next round key */
+5: ld1 {v4.16b}, [x6], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
- ld1 {v5.2d}, [x6], #16 /* load next round key */
+ ld1 {v5.16b}, [x6], #16 /* load next round key */
bpl 3b
aese v0.16b, v4.16b
subs w2, w2, #16 /* last data? */
@@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data)
ld1 {v1.16b}, [x1], #16 /* load next input block */
eor v0.16b, v0.16b, v1.16b /* xor with mac */
bne 1b
-6: st1 {v0.2d}, [x0] /* store mac */
+6: st1 {v0.16b}, [x0] /* store mac */
beq 10f
adds w2, w2, #16
beq 10f
@@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data)
adds w7, w7, #1
bne 9b
eor v0.16b, v0.16b, v1.16b
- st1 {v0.2d}, [x0]
+ st1 {v0.16b}, [x0]
10: str w8, [x3]
ret
ENDPROC(ce_aes_ccm_auth_data)
@@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data)
* u32 rounds);
*/
ENTRY(ce_aes_ccm_final)
- ld1 {v3.2d}, [x2], #16 /* load first round key */
- ld1 {v0.2d}, [x0] /* load mac */
+ ld1 {v3.16b}, [x2], #16 /* load first round key */
+ ld1 {v0.16b}, [x0] /* load mac */
cmp w3, #12 /* which key size? */
sub w3, w3, #2 /* modified # of rounds */
- ld1 {v1.2d}, [x1] /* load 1st ctriv */
+ ld1 {v1.16b}, [x1] /* load 1st ctriv */
bmi 0f
bne 3f
mov v5.16b, v3.16b
b 2f
0: mov v4.16b, v3.16b
-1: ld1 {v5.2d}, [x2], #16 /* load next round key */
+1: ld1 {v5.16b}, [x2], #16 /* load next round key */
aese v0.16b, v4.16b
aesmc v0.16b, v0.16b
aese v1.16b, v4.16b
aesmc v1.16b, v1.16b
-2: ld1 {v3.2d}, [x2], #16 /* load next round key */
+2: ld1 {v3.16b}, [x2], #16 /* load next round key */
aese v0.16b, v5.16b
aesmc v0.16b, v0.16b
aese v1.16b, v5.16b
aesmc v1.16b, v1.16b
-3: ld1 {v4.2d}, [x2], #16 /* load next round key */
+3: ld1 {v4.16b}, [x2], #16 /* load next round key */
subs w3, w3, #3
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
@@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final)
aese v1.16b, v4.16b
/* final round key cancels out */
eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
- st1 {v0.2d}, [x0] /* store result */
+ st1 {v0.16b}, [x0] /* store result */
ret
ENDPROC(ce_aes_ccm_final)
.macro aes_ccm_do_crypt,enc
ldr x8, [x6, #8] /* load lower ctr */
- ld1 {v0.2d}, [x5] /* load mac */
- rev x8, x8 /* keep swabbed ctr in reg */
+ ld1 {v0.16b}, [x5] /* load mac */
+CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
0: /* outer loop */
- ld1 {v1.1d}, [x6] /* load upper ctr */
+ ld1 {v1.8b}, [x6] /* load upper ctr */
prfm pldl1strm, [x1]
add x8, x8, #1
rev x9, x8
cmp w4, #12 /* which key size? */
sub w7, w4, #2 /* get modified # of rounds */
ins v1.d[1], x9 /* no carry in lower ctr */
- ld1 {v3.2d}, [x3] /* load first round key */
+ ld1 {v3.16b}, [x3] /* load first round key */
add x10, x3, #16
bmi 1f
bne 4f
mov v5.16b, v3.16b
b 3f
1: mov v4.16b, v3.16b
- ld1 {v5.2d}, [x10], #16 /* load 2nd round key */
+ ld1 {v5.16b}, [x10], #16 /* load 2nd round key */
2: /* inner loop: 3 rounds, 2x interleaved */
aese v0.16b, v4.16b
aesmc v0.16b, v0.16b
aese v1.16b, v4.16b
aesmc v1.16b, v1.16b
-3: ld1 {v3.2d}, [x10], #16 /* load next round key */
+3: ld1 {v3.16b}, [x10], #16 /* load next round key */
aese v0.16b, v5.16b
aesmc v0.16b, v0.16b
aese v1.16b, v5.16b
aesmc v1.16b, v1.16b
-4: ld1 {v4.2d}, [x10], #16 /* load next round key */
+4: ld1 {v4.16b}, [x10], #16 /* load next round key */
subs w7, w7, #3
aese v0.16b, v3.16b
aesmc v0.16b, v0.16b
aese v1.16b, v3.16b
aesmc v1.16b, v1.16b
- ld1 {v5.2d}, [x10], #16 /* load next round key */
+ ld1 {v5.16b}, [x10], #16 /* load next round key */
bpl 2b
aese v0.16b, v4.16b
aese v1.16b, v4.16b
@@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final)
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
st1 {v1.16b}, [x0], #16 /* write output block */
bne 0b
- rev x8, x8
- st1 {v0.2d}, [x5] /* store mac */
+CPU_LE( rev x8, x8 )
+ st1 {v0.16b}, [x5] /* store mac */
str x8, [x6, #8] /* store lsb end of ctr (BE) */
5: ret
6: eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */
- st1 {v0.2d}, [x5] /* store mac */
+ st1 {v0.16b}, [x5] /* store mac */
add w2, w2, #16 /* process partial tail block */
7: ldrb w9, [x1], #1 /* get 1 byte of input */
umov w6, v1.b[0] /* get top crypted ctr byte */
diff --git a/kernel/arch/arm64/crypto/aes-ce-cipher.c b/kernel/arch/arm64/crypto/aes-ce-cipher.c
index f7bd9bf0b..50d9fe11d 100644
--- a/kernel/arch/arm64/crypto/aes-ce-cipher.c
+++ b/kernel/arch/arm64/crypto/aes-ce-cipher.c
@@ -47,24 +47,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
kernel_neon_begin_partial(4);
__asm__(" ld1 {v0.16b}, %[in] ;"
- " ld1 {v1.2d}, [%[key]], #16 ;"
+ " ld1 {v1.16b}, [%[key]], #16 ;"
" cmp %w[rounds], #10 ;"
" bmi 0f ;"
" bne 3f ;"
" mov v3.16b, v1.16b ;"
" b 2f ;"
"0: mov v2.16b, v1.16b ;"
- " ld1 {v3.2d}, [%[key]], #16 ;"
+ " ld1 {v3.16b}, [%[key]], #16 ;"
"1: aese v0.16b, v2.16b ;"
" aesmc v0.16b, v0.16b ;"
- "2: ld1 {v1.2d}, [%[key]], #16 ;"
+ "2: ld1 {v1.16b}, [%[key]], #16 ;"
" aese v0.16b, v3.16b ;"
" aesmc v0.16b, v0.16b ;"
- "3: ld1 {v2.2d}, [%[key]], #16 ;"
+ "3: ld1 {v2.16b}, [%[key]], #16 ;"
" subs %w[rounds], %w[rounds], #3 ;"
" aese v0.16b, v1.16b ;"
" aesmc v0.16b, v0.16b ;"
- " ld1 {v3.2d}, [%[key]], #16 ;"
+ " ld1 {v3.16b}, [%[key]], #16 ;"
" bpl 1b ;"
" aese v0.16b, v2.16b ;"
" eor v0.16b, v0.16b, v3.16b ;"
@@ -92,24 +92,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
kernel_neon_begin_partial(4);
__asm__(" ld1 {v0.16b}, %[in] ;"
- " ld1 {v1.2d}, [%[key]], #16 ;"
+ " ld1 {v1.16b}, [%[key]], #16 ;"
" cmp %w[rounds], #10 ;"
" bmi 0f ;"
" bne 3f ;"
" mov v3.16b, v1.16b ;"
" b 2f ;"
"0: mov v2.16b, v1.16b ;"
- " ld1 {v3.2d}, [%[key]], #16 ;"
+ " ld1 {v3.16b}, [%[key]], #16 ;"
"1: aesd v0.16b, v2.16b ;"
" aesimc v0.16b, v0.16b ;"
- "2: ld1 {v1.2d}, [%[key]], #16 ;"
+ "2: ld1 {v1.16b}, [%[key]], #16 ;"
" aesd v0.16b, v3.16b ;"
" aesimc v0.16b, v0.16b ;"
- "3: ld1 {v2.2d}, [%[key]], #16 ;"
+ "3: ld1 {v2.16b}, [%[key]], #16 ;"
" subs %w[rounds], %w[rounds], #3 ;"
" aesd v0.16b, v1.16b ;"
" aesimc v0.16b, v0.16b ;"
- " ld1 {v3.2d}, [%[key]], #16 ;"
+ " ld1 {v3.16b}, [%[key]], #16 ;"
" bpl 1b ;"
" aesd v0.16b, v2.16b ;"
" eor v0.16b, v0.16b, v3.16b ;"
@@ -173,7 +173,12 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
u32 *rki = ctx->key_enc + (i * kwords);
u32 *rko = rki + kwords;
+#ifndef CONFIG_CPU_BIG_ENDIAN
rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
+#else
+ rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^
+ rki[0];
+#endif
rko[1] = rko[0] ^ rki[1];
rko[2] = rko[1] ^ rki[2];
rko[3] = rko[2] ^ rki[3];
diff --git a/kernel/arch/arm64/crypto/aes-ce.S b/kernel/arch/arm64/crypto/aes-ce.S
index 78f3cfe92..b46093d56 100644
--- a/kernel/arch/arm64/crypto/aes-ce.S
+++ b/kernel/arch/arm64/crypto/aes-ce.S
@@ -10,6 +10,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#define AES_ENTRY(func) ENTRY(ce_ ## func)
#define AES_ENDPROC(func) ENDPROC(ce_ ## func)
diff --git a/kernel/arch/arm64/crypto/aes-glue.c b/kernel/arch/arm64/crypto/aes-glue.c
index 05d9e16c0..6a51dfccf 100644
--- a/kernel/arch/arm64/crypto/aes-glue.c
+++ b/kernel/arch/arm64/crypto/aes-glue.c
@@ -211,7 +211,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
err = blkcipher_walk_done(desc, &walk,
walk.nbytes % AES_BLOCK_SIZE);
}
- if (nbytes) {
+ if (walk.nbytes % AES_BLOCK_SIZE) {
u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE;
u8 __aligned(8) tail[AES_BLOCK_SIZE];
diff --git a/kernel/arch/arm64/crypto/aes-modes.S b/kernel/arch/arm64/crypto/aes-modes.S
index f6e372c52..838dad5c2 100644
--- a/kernel/arch/arm64/crypto/aes-modes.S
+++ b/kernel/arch/arm64/crypto/aes-modes.S
@@ -193,15 +193,16 @@ AES_ENTRY(aes_cbc_encrypt)
cbz w6, .Lcbcencloop
ld1 {v0.16b}, [x5] /* get iv */
- enc_prepare w3, x2, x5
+ enc_prepare w3, x2, x6
.Lcbcencloop:
ld1 {v1.16b}, [x1], #16 /* get next pt block */
eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */
- encrypt_block v0, w3, x2, x5, w6
+ encrypt_block v0, w3, x2, x6, w7
st1 {v0.16b}, [x0], #16
subs w4, w4, #1
bne .Lcbcencloop
+ st1 {v0.16b}, [x5] /* return iv */
ret
AES_ENDPROC(aes_cbc_encrypt)
@@ -211,7 +212,7 @@ AES_ENTRY(aes_cbc_decrypt)
cbz w6, .LcbcdecloopNx
ld1 {v7.16b}, [x5] /* get iv */
- dec_prepare w3, x2, x5
+ dec_prepare w3, x2, x6
.LcbcdecloopNx:
#if INTERLEAVE >= 2
@@ -248,7 +249,7 @@ AES_ENTRY(aes_cbc_decrypt)
.Lcbcdecloop:
ld1 {v1.16b}, [x1], #16 /* get next ct block */
mov v0.16b, v1.16b /* ...and copy to v0 */
- decrypt_block v0, w3, x2, x5, w6
+ decrypt_block v0, w3, x2, x6, w7
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
mov v7.16b, v1.16b /* ct is next iv */
st1 {v0.16b}, [x0], #16
@@ -256,6 +257,7 @@ AES_ENTRY(aes_cbc_decrypt)
bne .Lcbcdecloop
.Lcbcdecout:
FRAME_POP
+ st1 {v7.16b}, [x5] /* return iv */
ret
AES_ENDPROC(aes_cbc_decrypt)
@@ -267,24 +269,15 @@ AES_ENDPROC(aes_cbc_decrypt)
AES_ENTRY(aes_ctr_encrypt)
FRAME_PUSH
- cbnz w6, .Lctrfirst /* 1st time around? */
- umov x5, v4.d[1] /* keep swabbed ctr in reg */
- rev x5, x5
-#if INTERLEAVE >= 2
- cmn w5, w4 /* 32 bit overflow? */
- bcs .Lctrinc
- add x5, x5, #1 /* increment BE ctr */
- b .LctrincNx
-#else
- b .Lctrinc
-#endif
-.Lctrfirst:
+ cbz w6, .Lctrnotfirst /* 1st time around? */
enc_prepare w3, x2, x6
ld1 {v4.16b}, [x5]
- umov x5, v4.d[1] /* keep swabbed ctr in reg */
- rev x5, x5
+
+.Lctrnotfirst:
+ umov x8, v4.d[1] /* keep swabbed ctr in reg */
+ rev x8, x8
#if INTERLEAVE >= 2
- cmn w5, w4 /* 32 bit overflow? */
+ cmn w8, w4 /* 32 bit overflow? */
bcs .Lctrloop
.LctrloopNx:
subs w4, w4, #INTERLEAVE
@@ -292,11 +285,11 @@ AES_ENTRY(aes_ctr_encrypt)
#if INTERLEAVE == 2
mov v0.8b, v4.8b
mov v1.8b, v4.8b
- rev x7, x5
- add x5, x5, #1
+ rev x7, x8
+ add x8, x8, #1
ins v0.d[1], x7
- rev x7, x5
- add x5, x5, #1
+ rev x7, x8
+ add x8, x8, #1
ins v1.d[1], x7
ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */
do_encrypt_block2x
@@ -305,7 +298,7 @@ AES_ENTRY(aes_ctr_encrypt)
st1 {v0.16b-v1.16b}, [x0], #32
#else
ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */
- dup v7.4s, w5
+ dup v7.4s, w8
mov v0.16b, v4.16b
add v7.4s, v7.4s, v8.4s
mov v1.16b, v4.16b
@@ -323,18 +316,12 @@ AES_ENTRY(aes_ctr_encrypt)
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
st1 {v0.16b-v3.16b}, [x0], #64
- add x5, x5, #INTERLEAVE
+ add x8, x8, #INTERLEAVE
#endif
- cbz w4, .LctroutNx
-.LctrincNx:
- rev x7, x5
+ rev x7, x8
ins v4.d[1], x7
+ cbz w4, .Lctrout
b .LctrloopNx
-.LctroutNx:
- sub x5, x5, #1
- rev x7, x5
- ins v4.d[1], x7
- b .Lctrout
.Lctr1x:
adds w4, w4, #INTERLEAVE
beq .Lctrout
@@ -342,30 +329,39 @@ AES_ENTRY(aes_ctr_encrypt)
.Lctrloop:
mov v0.16b, v4.16b
encrypt_block v0, w3, x2, x6, w7
+
+ adds x8, x8, #1 /* increment BE ctr */
+ rev x7, x8
+ ins v4.d[1], x7
+ bcs .Lctrcarry /* overflow? */
+
+.Lctrcarrydone:
subs w4, w4, #1
bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */
ld1 {v3.16b}, [x1], #16
eor v3.16b, v0.16b, v3.16b
st1 {v3.16b}, [x0], #16
- beq .Lctrout
-.Lctrinc:
- adds x5, x5, #1 /* increment BE ctr */
- rev x7, x5
- ins v4.d[1], x7
- bcc .Lctrloop /* no overflow? */
- umov x7, v4.d[0] /* load upper word of ctr */
- rev x7, x7 /* ... to handle the carry */
- add x7, x7, #1
- rev x7, x7
- ins v4.d[0], x7
- b .Lctrloop
+ bne .Lctrloop
+
+.Lctrout:
+ st1 {v4.16b}, [x5] /* return next CTR value */
+ FRAME_POP
+ ret
+
.Lctrhalfblock:
ld1 {v3.8b}, [x1]
eor v3.8b, v0.8b, v3.8b
st1 {v3.8b}, [x0]
-.Lctrout:
FRAME_POP
ret
+
+.Lctrcarry:
+ umov x7, v4.d[0] /* load upper word of ctr */
+ rev x7, x7 /* ... to handle the carry */
+ add x7, x7, #1
+ rev x7, x7
+ ins v4.d[0], x7
+ b .Lctrcarrydone
AES_ENDPROC(aes_ctr_encrypt)
.ltorg
@@ -386,7 +382,8 @@ AES_ENDPROC(aes_ctr_encrypt)
.endm
.Lxts_mul_x:
- .word 1, 0, 0x87, 0
+CPU_LE( .quad 1, 0x87 )
+CPU_BE( .quad 0x87, 1 )
AES_ENTRY(aes_xts_encrypt)
FRAME_PUSH
diff --git a/kernel/arch/arm64/crypto/aes-neon.S b/kernel/arch/arm64/crypto/aes-neon.S
index b93170e1c..85f07ead7 100644
--- a/kernel/arch/arm64/crypto/aes-neon.S
+++ b/kernel/arch/arm64/crypto/aes-neon.S
@@ -9,6 +9,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
#define AES_ENTRY(func) ENTRY(neon_ ## func)
#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
@@ -83,13 +84,13 @@
.endm
.macro do_block, enc, in, rounds, rk, rkp, i
- ld1 {v15.16b}, [\rk]
+ ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
sub_bytes \in
- ld1 {v15.16b}, [\rkp], #16
+ ld1 {v15.4s}, [\rkp], #16
subs \i, \i, #1
beq 2222f
.if \enc == 1
@@ -229,7 +230,7 @@
.endm
.macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
- ld1 {v15.16b}, [\rk]
+ ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
@@ -237,7 +238,7 @@
sub_bytes_2x \in0, \in1
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
- ld1 {v15.16b}, [\rkp], #16
+ ld1 {v15.4s}, [\rkp], #16
subs \i, \i, #1
beq 2222f
.if \enc == 1
@@ -254,7 +255,7 @@
.endm
.macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
- ld1 {v15.16b}, [\rk]
+ ld1 {v15.4s}, [\rk]
add \rkp, \rk, #16
mov \i, \rounds
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
@@ -266,7 +267,7 @@
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
- ld1 {v15.16b}, [\rkp], #16
+ ld1 {v15.4s}, [\rkp], #16
subs \i, \i, #1
beq 2222f
.if \enc == 1
@@ -306,12 +307,16 @@
.text
.align 4
.LForward_ShiftRows:
- .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
- .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
+CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 )
+CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb )
+CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 )
+CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 )
.LReverse_ShiftRows:
- .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
- .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
+CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb )
+CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 )
+CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 )
+CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 )
.LForward_Sbox:
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
diff --git a/kernel/arch/arm64/crypto/ghash-ce-core.S b/kernel/arch/arm64/crypto/ghash-ce-core.S
index dc4570158..f0bb9f0b5 100644
--- a/kernel/arch/arm64/crypto/ghash-ce-core.S
+++ b/kernel/arch/arm64/crypto/ghash-ce-core.S
@@ -29,8 +29,8 @@
* struct ghash_key const *k, const char *head)
*/
ENTRY(pmull_ghash_update)
- ld1 {SHASH.16b}, [x3]
- ld1 {XL.16b}, [x1]
+ ld1 {SHASH.2d}, [x3]
+ ld1 {XL.2d}, [x1]
movi MASK.16b, #0xe1
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
shl MASK.2d, MASK.2d, #57
@@ -74,6 +74,6 @@ CPU_LE( rev64 T1.16b, T1.16b )
cbnz w0, 0b
- st1 {XL.16b}, [x1]
+ st1 {XL.2d}, [x1]
ret
ENDPROC(pmull_ghash_update)
diff --git a/kernel/arch/arm64/crypto/sha1-ce-core.S b/kernel/arch/arm64/crypto/sha1-ce-core.S
index 033aae6d7..c98e7e849 100644
--- a/kernel/arch/arm64/crypto/sha1-ce-core.S
+++ b/kernel/arch/arm64/crypto/sha1-ce-core.S
@@ -78,7 +78,7 @@ ENTRY(sha1_ce_transform)
ld1r {k3.4s}, [x6]
/* load state */
- ldr dga, [x0]
+ ld1 {dgav.4s}, [x0]
ldr dgb, [x0, #16]
/* load sha1_ce_state::finalize */
@@ -144,7 +144,7 @@ CPU_LE( rev32 v11.16b, v11.16b )
b 1b
/* store new state */
-3: str dga, [x0]
+3: st1 {dgav.4s}, [x0]
str dgb, [x0, #16]
ret
ENDPROC(sha1_ce_transform)
diff --git a/kernel/arch/arm64/crypto/sha2-ce-core.S b/kernel/arch/arm64/crypto/sha2-ce-core.S
index 5df9d9d47..01cfee066 100644
--- a/kernel/arch/arm64/crypto/sha2-ce-core.S
+++ b/kernel/arch/arm64/crypto/sha2-ce-core.S
@@ -85,7 +85,7 @@ ENTRY(sha2_ce_transform)
ld1 {v12.4s-v15.4s}, [x8]
/* load state */
- ldp dga, dgb, [x0]
+ ld1 {dgav.4s, dgbv.4s}, [x0]
/* load sha256_ce_state::finalize */
ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize]
@@ -148,6 +148,6 @@ CPU_LE( rev32 v19.16b, v19.16b )
b 1b
/* store new state */
-3: stp dga, dgb, [x0]
+3: st1 {dgav.4s, dgbv.4s}, [x0]
ret
ENDPROC(sha2_ce_transform)