diff options
Diffstat (limited to 'kernel/arch/arm64')
43 files changed, 517 insertions, 255 deletions
diff --git a/kernel/arch/arm64/Kconfig b/kernel/arch/arm64/Kconfig index 1baa6537c..9196cf82f 100644 --- a/kernel/arch/arm64/Kconfig +++ b/kernel/arch/arm64/Kconfig @@ -392,6 +392,15 @@ config CAVIUM_ERRATUM_22375 If unsure, say Y. +config CAVIUM_ERRATUM_23144 + bool "Cavium erratum 23144: ITS SYNC hang on dual socket system" + depends on NUMA + default y + help + ITS SYNC command hang for cross node io and collections/cpu mapping. + + If unsure, say Y. + config CAVIUM_ERRATUM_23154 bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed" default y @@ -402,6 +411,17 @@ config CAVIUM_ERRATUM_23154 If unsure, say Y. +config CAVIUM_ERRATUM_27456 + bool "Cavium erratum 27456: Broadcast TLBI instructions may cause icache corruption" + default y + help + On ThunderX T88 pass 1.x through 2.1 parts, broadcast TLBI + instructions may cause the icache to become corrupted if it + contains data for a non-current ASID. The fix is to + invalidate the icache when changing the mm context. + + If unsure, say Y. + endmenu diff --git a/kernel/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/kernel/arch/arm64/boot/dts/rockchip/rk3368.dtsi index cc093a482..e0ee2b00d 100644 --- a/kernel/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/kernel/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -262,6 +262,8 @@ #io-channel-cells = <1>; clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>; clock-names = "saradc", "apb_pclk"; + resets = <&cru SRST_SARADC>; + reset-names = "saradc-apb"; status = "disabled"; }; @@ -517,7 +519,7 @@ #address-cells = <0>; reg = <0x0 0xffb71000 0x0 0x1000>, - <0x0 0xffb72000 0x0 0x1000>, + <0x0 0xffb72000 0x0 0x2000>, <0x0 0xffb74000 0x0 0x2000>, <0x0 0xffb76000 0x0 0x2000>; interrupts = <GIC_PPI 9 diff --git a/kernel/arch/arm64/crypto/aes-ce-ccm-core.S b/kernel/arch/arm64/crypto/aes-ce-ccm-core.S index a2a7fbcac..3363560c7 100644 --- a/kernel/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/kernel/arch/arm64/crypto/aes-ce-ccm-core.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> .text .arch armv8-a+crypto @@ -19,7 +20,7 @@ */ ENTRY(ce_aes_ccm_auth_data) ldr w8, [x3] /* leftover from prev round? */ - ld1 {v0.2d}, [x0] /* load mac */ + ld1 {v0.16b}, [x0] /* load mac */ cbz w8, 1f sub w8, w8, #16 eor v1.16b, v1.16b, v1.16b @@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data) beq 8f /* out of input? */ cbnz w8, 0b eor v0.16b, v0.16b, v1.16b -1: ld1 {v3.2d}, [x4] /* load first round key */ +1: ld1 {v3.16b}, [x4] /* load first round key */ prfm pldl1strm, [x1] cmp w5, #12 /* which key size? */ add x6, x4, #16 @@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data) mov v5.16b, v3.16b b 4f 2: mov v4.16b, v3.16b - ld1 {v5.2d}, [x6], #16 /* load 2nd round key */ + ld1 {v5.16b}, [x6], #16 /* load 2nd round key */ 3: aese v0.16b, v4.16b aesmc v0.16b, v0.16b -4: ld1 {v3.2d}, [x6], #16 /* load next round key */ +4: ld1 {v3.16b}, [x6], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b -5: ld1 {v4.2d}, [x6], #16 /* load next round key */ +5: ld1 {v4.16b}, [x6], #16 /* load next round key */ subs w7, w7, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b - ld1 {v5.2d}, [x6], #16 /* load next round key */ + ld1 {v5.16b}, [x6], #16 /* load next round key */ bpl 3b aese v0.16b, v4.16b subs w2, w2, #16 /* last data? */ @@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data) ld1 {v1.16b}, [x1], #16 /* load next input block */ eor v0.16b, v0.16b, v1.16b /* xor with mac */ bne 1b -6: st1 {v0.2d}, [x0] /* store mac */ +6: st1 {v0.16b}, [x0] /* store mac */ beq 10f adds w2, w2, #16 beq 10f @@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data) adds w7, w7, #1 bne 9b eor v0.16b, v0.16b, v1.16b - st1 {v0.2d}, [x0] + st1 {v0.16b}, [x0] 10: str w8, [x3] ret ENDPROC(ce_aes_ccm_auth_data) @@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data) * u32 rounds); */ ENTRY(ce_aes_ccm_final) - ld1 {v3.2d}, [x2], #16 /* load first round key */ - ld1 {v0.2d}, [x0] /* load mac */ + ld1 {v3.16b}, [x2], #16 /* load first round key */ + ld1 {v0.16b}, [x0] /* load mac */ cmp w3, #12 /* which key size? */ sub w3, w3, #2 /* modified # of rounds */ - ld1 {v1.2d}, [x1] /* load 1st ctriv */ + ld1 {v1.16b}, [x1] /* load 1st ctriv */ bmi 0f bne 3f mov v5.16b, v3.16b b 2f 0: mov v4.16b, v3.16b -1: ld1 {v5.2d}, [x2], #16 /* load next round key */ +1: ld1 {v5.16b}, [x2], #16 /* load next round key */ aese v0.16b, v4.16b aesmc v0.16b, v0.16b aese v1.16b, v4.16b aesmc v1.16b, v1.16b -2: ld1 {v3.2d}, [x2], #16 /* load next round key */ +2: ld1 {v3.16b}, [x2], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b aese v1.16b, v5.16b aesmc v1.16b, v1.16b -3: ld1 {v4.2d}, [x2], #16 /* load next round key */ +3: ld1 {v4.16b}, [x2], #16 /* load next round key */ subs w3, w3, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b @@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final) aese v1.16b, v4.16b /* final round key cancels out */ eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ - st1 {v0.2d}, [x0] /* store result */ + st1 {v0.16b}, [x0] /* store result */ ret ENDPROC(ce_aes_ccm_final) .macro aes_ccm_do_crypt,enc ldr x8, [x6, #8] /* load lower ctr */ - ld1 {v0.2d}, [x5] /* load mac */ - rev x8, x8 /* keep swabbed ctr in reg */ + ld1 {v0.16b}, [x5] /* load mac */ +CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ 0: /* outer loop */ - ld1 {v1.1d}, [x6] /* load upper ctr */ + ld1 {v1.8b}, [x6] /* load upper ctr */ prfm pldl1strm, [x1] add x8, x8, #1 rev x9, x8 cmp w4, #12 /* which key size? */ sub w7, w4, #2 /* get modified # of rounds */ ins v1.d[1], x9 /* no carry in lower ctr */ - ld1 {v3.2d}, [x3] /* load first round key */ + ld1 {v3.16b}, [x3] /* load first round key */ add x10, x3, #16 bmi 1f bne 4f mov v5.16b, v3.16b b 3f 1: mov v4.16b, v3.16b - ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ + ld1 {v5.16b}, [x10], #16 /* load 2nd round key */ 2: /* inner loop: 3 rounds, 2x interleaved */ aese v0.16b, v4.16b aesmc v0.16b, v0.16b aese v1.16b, v4.16b aesmc v1.16b, v1.16b -3: ld1 {v3.2d}, [x10], #16 /* load next round key */ +3: ld1 {v3.16b}, [x10], #16 /* load next round key */ aese v0.16b, v5.16b aesmc v0.16b, v0.16b aese v1.16b, v5.16b aesmc v1.16b, v1.16b -4: ld1 {v4.2d}, [x10], #16 /* load next round key */ +4: ld1 {v4.16b}, [x10], #16 /* load next round key */ subs w7, w7, #3 aese v0.16b, v3.16b aesmc v0.16b, v0.16b aese v1.16b, v3.16b aesmc v1.16b, v1.16b - ld1 {v5.2d}, [x10], #16 /* load next round key */ + ld1 {v5.16b}, [x10], #16 /* load next round key */ bpl 2b aese v0.16b, v4.16b aese v1.16b, v4.16b @@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final) eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ st1 {v1.16b}, [x0], #16 /* write output block */ bne 0b - rev x8, x8 - st1 {v0.2d}, [x5] /* store mac */ +CPU_LE( rev x8, x8 ) + st1 {v0.16b}, [x5] /* store mac */ str x8, [x6, #8] /* store lsb end of ctr (BE) */ 5: ret 6: eor v0.16b, v0.16b, v5.16b /* final round mac */ eor v1.16b, v1.16b, v5.16b /* final round enc */ - st1 {v0.2d}, [x5] /* store mac */ + st1 {v0.16b}, [x5] /* store mac */ add w2, w2, #16 /* process partial tail block */ 7: ldrb w9, [x1], #1 /* get 1 byte of input */ umov w6, v1.b[0] /* get top crypted ctr byte */ diff --git a/kernel/arch/arm64/crypto/aes-ce-cipher.c b/kernel/arch/arm64/crypto/aes-ce-cipher.c index f7bd9bf0b..50d9fe11d 100644 --- a/kernel/arch/arm64/crypto/aes-ce-cipher.c +++ b/kernel/arch/arm64/crypto/aes-ce-cipher.c @@ -47,24 +47,24 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_begin_partial(4); __asm__(" ld1 {v0.16b}, %[in] ;" - " ld1 {v1.2d}, [%[key]], #16 ;" + " ld1 {v1.16b}, [%[key]], #16 ;" " cmp %w[rounds], #10 ;" " bmi 0f ;" " bne 3f ;" " mov v3.16b, v1.16b ;" " b 2f ;" "0: mov v2.16b, v1.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" "1: aese v0.16b, v2.16b ;" " aesmc v0.16b, v0.16b ;" - "2: ld1 {v1.2d}, [%[key]], #16 ;" + "2: ld1 {v1.16b}, [%[key]], #16 ;" " aese v0.16b, v3.16b ;" " aesmc v0.16b, v0.16b ;" - "3: ld1 {v2.2d}, [%[key]], #16 ;" + "3: ld1 {v2.16b}, [%[key]], #16 ;" " subs %w[rounds], %w[rounds], #3 ;" " aese v0.16b, v1.16b ;" " aesmc v0.16b, v0.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" " bpl 1b ;" " aese v0.16b, v2.16b ;" " eor v0.16b, v0.16b, v3.16b ;" @@ -92,24 +92,24 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[]) kernel_neon_begin_partial(4); __asm__(" ld1 {v0.16b}, %[in] ;" - " ld1 {v1.2d}, [%[key]], #16 ;" + " ld1 {v1.16b}, [%[key]], #16 ;" " cmp %w[rounds], #10 ;" " bmi 0f ;" " bne 3f ;" " mov v3.16b, v1.16b ;" " b 2f ;" "0: mov v2.16b, v1.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" "1: aesd v0.16b, v2.16b ;" " aesimc v0.16b, v0.16b ;" - "2: ld1 {v1.2d}, [%[key]], #16 ;" + "2: ld1 {v1.16b}, [%[key]], #16 ;" " aesd v0.16b, v3.16b ;" " aesimc v0.16b, v0.16b ;" - "3: ld1 {v2.2d}, [%[key]], #16 ;" + "3: ld1 {v2.16b}, [%[key]], #16 ;" " subs %w[rounds], %w[rounds], #3 ;" " aesd v0.16b, v1.16b ;" " aesimc v0.16b, v0.16b ;" - " ld1 {v3.2d}, [%[key]], #16 ;" + " ld1 {v3.16b}, [%[key]], #16 ;" " bpl 1b ;" " aesd v0.16b, v2.16b ;" " eor v0.16b, v0.16b, v3.16b ;" @@ -173,7 +173,12 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key, u32 *rki = ctx->key_enc + (i * kwords); u32 *rko = rki + kwords; +#ifndef CONFIG_CPU_BIG_ENDIAN rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0]; +#else + rko[0] = rol32(aes_sub(rki[kwords - 1]), 8) ^ (rcon[i] << 24) ^ + rki[0]; +#endif rko[1] = rko[0] ^ rki[1]; rko[2] = rko[1] ^ rki[2]; rko[3] = rko[2] ^ rki[3]; diff --git a/kernel/arch/arm64/crypto/aes-ce.S b/kernel/arch/arm64/crypto/aes-ce.S index 78f3cfe92..b46093d56 100644 --- a/kernel/arch/arm64/crypto/aes-ce.S +++ b/kernel/arch/arm64/crypto/aes-ce.S @@ -10,6 +10,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #define AES_ENTRY(func) ENTRY(ce_ ## func) #define AES_ENDPROC(func) ENDPROC(ce_ ## func) diff --git a/kernel/arch/arm64/crypto/aes-glue.c b/kernel/arch/arm64/crypto/aes-glue.c index 05d9e16c0..6a51dfccf 100644 --- a/kernel/arch/arm64/crypto/aes-glue.c +++ b/kernel/arch/arm64/crypto/aes-glue.c @@ -211,7 +211,7 @@ static int ctr_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, err = blkcipher_walk_done(desc, &walk, walk.nbytes % AES_BLOCK_SIZE); } - if (nbytes) { + if (walk.nbytes % AES_BLOCK_SIZE) { u8 *tdst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; u8 *tsrc = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; u8 __aligned(8) tail[AES_BLOCK_SIZE]; diff --git a/kernel/arch/arm64/crypto/aes-modes.S b/kernel/arch/arm64/crypto/aes-modes.S index f6e372c52..838dad5c2 100644 --- a/kernel/arch/arm64/crypto/aes-modes.S +++ b/kernel/arch/arm64/crypto/aes-modes.S @@ -193,15 +193,16 @@ AES_ENTRY(aes_cbc_encrypt) cbz w6, .Lcbcencloop ld1 {v0.16b}, [x5] /* get iv */ - enc_prepare w3, x2, x5 + enc_prepare w3, x2, x6 .Lcbcencloop: ld1 {v1.16b}, [x1], #16 /* get next pt block */ eor v0.16b, v0.16b, v1.16b /* ..and xor with iv */ - encrypt_block v0, w3, x2, x5, w6 + encrypt_block v0, w3, x2, x6, w7 st1 {v0.16b}, [x0], #16 subs w4, w4, #1 bne .Lcbcencloop + st1 {v0.16b}, [x5] /* return iv */ ret AES_ENDPROC(aes_cbc_encrypt) @@ -211,7 +212,7 @@ AES_ENTRY(aes_cbc_decrypt) cbz w6, .LcbcdecloopNx ld1 {v7.16b}, [x5] /* get iv */ - dec_prepare w3, x2, x5 + dec_prepare w3, x2, x6 .LcbcdecloopNx: #if INTERLEAVE >= 2 @@ -248,7 +249,7 @@ AES_ENTRY(aes_cbc_decrypt) .Lcbcdecloop: ld1 {v1.16b}, [x1], #16 /* get next ct block */ mov v0.16b, v1.16b /* ...and copy to v0 */ - decrypt_block v0, w3, x2, x5, w6 + decrypt_block v0, w3, x2, x6, w7 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ mov v7.16b, v1.16b /* ct is next iv */ st1 {v0.16b}, [x0], #16 @@ -256,6 +257,7 @@ AES_ENTRY(aes_cbc_decrypt) bne .Lcbcdecloop .Lcbcdecout: FRAME_POP + st1 {v7.16b}, [x5] /* return iv */ ret AES_ENDPROC(aes_cbc_decrypt) @@ -267,24 +269,15 @@ AES_ENDPROC(aes_cbc_decrypt) AES_ENTRY(aes_ctr_encrypt) FRAME_PUSH - cbnz w6, .Lctrfirst /* 1st time around? */ - umov x5, v4.d[1] /* keep swabbed ctr in reg */ - rev x5, x5 -#if INTERLEAVE >= 2 - cmn w5, w4 /* 32 bit overflow? */ - bcs .Lctrinc - add x5, x5, #1 /* increment BE ctr */ - b .LctrincNx -#else - b .Lctrinc -#endif -.Lctrfirst: + cbz w6, .Lctrnotfirst /* 1st time around? */ enc_prepare w3, x2, x6 ld1 {v4.16b}, [x5] - umov x5, v4.d[1] /* keep swabbed ctr in reg */ - rev x5, x5 + +.Lctrnotfirst: + umov x8, v4.d[1] /* keep swabbed ctr in reg */ + rev x8, x8 #if INTERLEAVE >= 2 - cmn w5, w4 /* 32 bit overflow? */ + cmn w8, w4 /* 32 bit overflow? */ bcs .Lctrloop .LctrloopNx: subs w4, w4, #INTERLEAVE @@ -292,11 +285,11 @@ AES_ENTRY(aes_ctr_encrypt) #if INTERLEAVE == 2 mov v0.8b, v4.8b mov v1.8b, v4.8b - rev x7, x5 - add x5, x5, #1 + rev x7, x8 + add x8, x8, #1 ins v0.d[1], x7 - rev x7, x5 - add x5, x5, #1 + rev x7, x8 + add x8, x8, #1 ins v1.d[1], x7 ld1 {v2.16b-v3.16b}, [x1], #32 /* get 2 input blocks */ do_encrypt_block2x @@ -305,7 +298,7 @@ AES_ENTRY(aes_ctr_encrypt) st1 {v0.16b-v1.16b}, [x0], #32 #else ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ - dup v7.4s, w5 + dup v7.4s, w8 mov v0.16b, v4.16b add v7.4s, v7.4s, v8.4s mov v1.16b, v4.16b @@ -323,18 +316,12 @@ AES_ENTRY(aes_ctr_encrypt) eor v2.16b, v7.16b, v2.16b eor v3.16b, v5.16b, v3.16b st1 {v0.16b-v3.16b}, [x0], #64 - add x5, x5, #INTERLEAVE + add x8, x8, #INTERLEAVE #endif - cbz w4, .LctroutNx -.LctrincNx: - rev x7, x5 + rev x7, x8 ins v4.d[1], x7 + cbz w4, .Lctrout b .LctrloopNx -.LctroutNx: - sub x5, x5, #1 - rev x7, x5 - ins v4.d[1], x7 - b .Lctrout .Lctr1x: adds w4, w4, #INTERLEAVE beq .Lctrout @@ -342,30 +329,39 @@ AES_ENTRY(aes_ctr_encrypt) .Lctrloop: mov v0.16b, v4.16b encrypt_block v0, w3, x2, x6, w7 + + adds x8, x8, #1 /* increment BE ctr */ + rev x7, x8 + ins v4.d[1], x7 + bcs .Lctrcarry /* overflow? */ + +.Lctrcarrydone: subs w4, w4, #1 bmi .Lctrhalfblock /* blocks < 0 means 1/2 block */ ld1 {v3.16b}, [x1], #16 eor v3.16b, v0.16b, v3.16b st1 {v3.16b}, [x0], #16 - beq .Lctrout -.Lctrinc: - adds x5, x5, #1 /* increment BE ctr */ - rev x7, x5 - ins v4.d[1], x7 - bcc .Lctrloop /* no overflow? */ - umov x7, v4.d[0] /* load upper word of ctr */ - rev x7, x7 /* ... to handle the carry */ - add x7, x7, #1 - rev x7, x7 - ins v4.d[0], x7 - b .Lctrloop + bne .Lctrloop + +.Lctrout: + st1 {v4.16b}, [x5] /* return next CTR value */ + FRAME_POP + ret + .Lctrhalfblock: ld1 {v3.8b}, [x1] eor v3.8b, v0.8b, v3.8b st1 {v3.8b}, [x0] -.Lctrout: FRAME_POP ret + +.Lctrcarry: + umov x7, v4.d[0] /* load upper word of ctr */ + rev x7, x7 /* ... to handle the carry */ + add x7, x7, #1 + rev x7, x7 + ins v4.d[0], x7 + b .Lctrcarrydone AES_ENDPROC(aes_ctr_encrypt) .ltorg @@ -386,7 +382,8 @@ AES_ENDPROC(aes_ctr_encrypt) .endm .Lxts_mul_x: - .word 1, 0, 0x87, 0 +CPU_LE( .quad 1, 0x87 ) +CPU_BE( .quad 0x87, 1 ) AES_ENTRY(aes_xts_encrypt) FRAME_PUSH diff --git a/kernel/arch/arm64/crypto/aes-neon.S b/kernel/arch/arm64/crypto/aes-neon.S index b93170e1c..85f07ead7 100644 --- a/kernel/arch/arm64/crypto/aes-neon.S +++ b/kernel/arch/arm64/crypto/aes-neon.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #define AES_ENTRY(func) ENTRY(neon_ ## func) #define AES_ENDPROC(func) ENDPROC(neon_ ## func) @@ -83,13 +84,13 @@ .endm .macro do_block, enc, in, rounds, rk, rkp, i - ld1 {v15.16b}, [\rk] + ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */ tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */ sub_bytes \in - ld1 {v15.16b}, [\rkp], #16 + ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 beq 2222f .if \enc == 1 @@ -229,7 +230,7 @@ .endm .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i - ld1 {v15.16b}, [\rk] + ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ @@ -237,7 +238,7 @@ sub_bytes_2x \in0, \in1 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ - ld1 {v15.16b}, [\rkp], #16 + ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 beq 2222f .if \enc == 1 @@ -254,7 +255,7 @@ .endm .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i - ld1 {v15.16b}, [\rk] + ld1 {v15.4s}, [\rk] add \rkp, \rk, #16 mov \i, \rounds 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */ @@ -266,7 +267,7 @@ tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */ tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */ tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */ - ld1 {v15.16b}, [\rkp], #16 + ld1 {v15.4s}, [\rkp], #16 subs \i, \i, #1 beq 2222f .if \enc == 1 @@ -306,12 +307,16 @@ .text .align 4 .LForward_ShiftRows: - .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 - .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb +CPU_LE( .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3 ) +CPU_LE( .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb ) +CPU_BE( .byte 0xb, 0x6, 0x1, 0xc, 0x7, 0x2, 0xd, 0x8 ) +CPU_BE( .byte 0x3, 0xe, 0x9, 0x4, 0xf, 0xa, 0x5, 0x0 ) .LReverse_ShiftRows: - .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb - .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 +CPU_LE( .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb ) +CPU_LE( .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3 ) +CPU_BE( .byte 0x3, 0x6, 0x9, 0xc, 0xf, 0x2, 0x5, 0x8 ) +CPU_BE( .byte 0xb, 0xe, 0x1, 0x4, 0x7, 0xa, 0xd, 0x0 ) .LForward_Sbox: .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5 diff --git a/kernel/arch/arm64/crypto/ghash-ce-core.S b/kernel/arch/arm64/crypto/ghash-ce-core.S index dc4570158..f0bb9f0b5 100644 --- a/kernel/arch/arm64/crypto/ghash-ce-core.S +++ b/kernel/arch/arm64/crypto/ghash-ce-core.S @@ -29,8 +29,8 @@ * struct ghash_key const *k, const char *head) */ ENTRY(pmull_ghash_update) - ld1 {SHASH.16b}, [x3] - ld1 {XL.16b}, [x1] + ld1 {SHASH.2d}, [x3] + ld1 {XL.2d}, [x1] movi MASK.16b, #0xe1 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 shl MASK.2d, MASK.2d, #57 @@ -74,6 +74,6 @@ CPU_LE( rev64 T1.16b, T1.16b ) cbnz w0, 0b - st1 {XL.16b}, [x1] + st1 {XL.2d}, [x1] ret ENDPROC(pmull_ghash_update) diff --git a/kernel/arch/arm64/crypto/sha1-ce-core.S b/kernel/arch/arm64/crypto/sha1-ce-core.S index 033aae6d7..c98e7e849 100644 --- a/kernel/arch/arm64/crypto/sha1-ce-core.S +++ b/kernel/arch/arm64/crypto/sha1-ce-core.S @@ -78,7 +78,7 @@ ENTRY(sha1_ce_transform) ld1r {k3.4s}, [x6] /* load state */ - ldr dga, [x0] + ld1 {dgav.4s}, [x0] ldr dgb, [x0, #16] /* load sha1_ce_state::finalize */ @@ -144,7 +144,7 @@ CPU_LE( rev32 v11.16b, v11.16b ) b 1b /* store new state */ -3: str dga, [x0] +3: st1 {dgav.4s}, [x0] str dgb, [x0, #16] ret ENDPROC(sha1_ce_transform) diff --git a/kernel/arch/arm64/crypto/sha2-ce-core.S b/kernel/arch/arm64/crypto/sha2-ce-core.S index 5df9d9d47..01cfee066 100644 --- a/kernel/arch/arm64/crypto/sha2-ce-core.S +++ b/kernel/arch/arm64/crypto/sha2-ce-core.S @@ -85,7 +85,7 @@ ENTRY(sha2_ce_transform) ld1 {v12.4s-v15.4s}, [x8] /* load state */ - ldp dga, dgb, [x0] + ld1 {dgav.4s, dgbv.4s}, [x0] /* load sha256_ce_state::finalize */ ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] @@ -148,6 +148,6 @@ CPU_LE( rev32 v19.16b, v19.16b ) b 1b /* store new state */ -3: stp dga, dgb, [x0] +3: st1 {dgav.4s, dgbv.4s}, [x0] ret ENDPROC(sha2_ce_transform) diff --git a/kernel/arch/arm64/include/asm/arch_gicv3.h b/kernel/arch/arm64/include/asm/arch_gicv3.h index 2731d3b25..8ec88e5b2 100644 --- a/kernel/arch/arm64/include/asm/arch_gicv3.h +++ b/kernel/arch/arm64/include/asm/arch_gicv3.h @@ -103,6 +103,7 @@ static inline u64 gic_read_iar_common(void) u64 irqstat; asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat)); + dsb(sy); return irqstat; } diff --git a/kernel/arch/arm64/include/asm/cpufeature.h b/kernel/arch/arm64/include/asm/cpufeature.h index 8f271b83f..8884b5d5f 100644 --- a/kernel/arch/arm64/include/asm/cpufeature.h +++ b/kernel/arch/arm64/include/asm/cpufeature.h @@ -30,8 +30,9 @@ #define ARM64_HAS_LSE_ATOMICS 5 #define ARM64_WORKAROUND_CAVIUM_23154 6 #define ARM64_WORKAROUND_834220 7 +#define ARM64_WORKAROUND_CAVIUM_27456 8 -#define ARM64_NCAPS 8 +#define ARM64_NCAPS 9 #ifndef __ASSEMBLY__ @@ -76,7 +77,7 @@ struct arm64_cpu_capabilities { const char *desc; u16 capability; bool (*matches)(const struct arm64_cpu_capabilities *); - void (*enable)(void *); /* Called on all active CPUs */ + int (*enable)(void *); /* Called on all active CPUs */ union { struct { /* To be used for erratum handling only */ u32 midr_model; diff --git a/kernel/arch/arm64/include/asm/elf.h b/kernel/arch/arm64/include/asm/elf.h index faad6df49..44dd892a4 100644 --- a/kernel/arch/arm64/include/asm/elf.h +++ b/kernel/arch/arm64/include/asm/elf.h @@ -136,6 +136,7 @@ typedef struct user_fpsimd_state elf_fpregset_t; #define SET_PERSONALITY(ex) clear_thread_flag(TIF_32BIT); +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ #define ARCH_DLINFO \ do { \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ @@ -156,14 +157,14 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12)) #endif -#ifdef CONFIG_COMPAT - #ifdef __AARCH64EB__ #define COMPAT_ELF_PLATFORM ("v8b") #else #define COMPAT_ELF_PLATFORM ("v8l") #endif +#ifdef CONFIG_COMPAT + #define COMPAT_ELF_ET_DYN_BASE (2 * TASK_SIZE_32 / 3) /* AArch32 registers. */ diff --git a/kernel/arch/arm64/include/asm/futex.h b/kernel/arch/arm64/include/asm/futex.h index 007a69fc4..5f3ab8c1d 100644 --- a/kernel/arch/arm64/include/asm/futex.h +++ b/kernel/arch/arm64/include/asm/futex.h @@ -121,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; asm volatile("// futex_atomic_cmpxchg_inatomic\n" +ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, CONFIG_ARM64_PAN) " prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" @@ -137,6 +138,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, " .align 3\n" " .quad 1b, 4b, 2b, 4b\n" " .popsection\n" +ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) : "memory"); diff --git a/kernel/arch/arm64/include/asm/kvm_arm.h b/kernel/arch/arm64/include/asm/kvm_arm.h index 5e6857b6b..2d960f858 100644 --- a/kernel/arch/arm64/include/asm/kvm_arm.h +++ b/kernel/arch/arm64/include/asm/kvm_arm.h @@ -107,8 +107,6 @@ #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) -#define TCR_EL2_FLAGS (TCR_EL2_RES1 | TCR_EL2_PS_40B) - /* VTCR_EL2 Registers bits */ #define VTCR_EL2_RES1 (1 << 31) #define VTCR_EL2_PS_MASK (7 << 16) diff --git a/kernel/arch/arm64/include/asm/opcodes.h b/kernel/arch/arm64/include/asm/opcodes.h index 4e603ea36..123f45d92 100644 --- a/kernel/arch/arm64/include/asm/opcodes.h +++ b/kernel/arch/arm64/include/asm/opcodes.h @@ -1 +1,5 @@ +#ifdef CONFIG_CPU_BIG_ENDIAN +#define CONFIG_CPU_ENDIAN_BE8 CONFIG_CPU_BIG_ENDIAN +#endif + #include <../../arm/include/asm/opcodes.h> diff --git a/kernel/arch/arm64/include/asm/percpu.h b/kernel/arch/arm64/include/asm/percpu.h index 0a456bef8..8a336852e 100644 --- a/kernel/arch/arm64/include/asm/percpu.h +++ b/kernel/arch/arm64/include/asm/percpu.h @@ -44,48 +44,44 @@ static inline unsigned long __percpu_##op(void *ptr, \ \ switch (size) { \ case 1: \ - do { \ - asm ("//__per_cpu_" #op "_1\n" \ - "ldxrb %w[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_1\n" \ + "1: ldxrb %w[ret], %[ptr]\n" \ #asm_op " %w[ret], %w[ret], %w[val]\n" \ - "stxrb %w[loop], %w[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u8 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxrb %w[loop], %w[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u8 *)ptr) \ + : [val] "Ir" (val)); \ break; \ case 2: \ - do { \ - asm ("//__per_cpu_" #op "_2\n" \ - "ldxrh %w[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_2\n" \ + "1: ldxrh %w[ret], %[ptr]\n" \ #asm_op " %w[ret], %w[ret], %w[val]\n" \ - "stxrh %w[loop], %w[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u16 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxrh %w[loop], %w[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u16 *)ptr) \ + : [val] "Ir" (val)); \ break; \ case 4: \ - do { \ - asm ("//__per_cpu_" #op "_4\n" \ - "ldxr %w[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_4\n" \ + "1: ldxr %w[ret], %[ptr]\n" \ #asm_op " %w[ret], %w[ret], %w[val]\n" \ - "stxr %w[loop], %w[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u32 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxr %w[loop], %w[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u32 *)ptr) \ + : [val] "Ir" (val)); \ break; \ case 8: \ - do { \ - asm ("//__per_cpu_" #op "_8\n" \ - "ldxr %[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_8\n" \ + "1: ldxr %[ret], %[ptr]\n" \ #asm_op " %[ret], %[ret], %[val]\n" \ - "stxr %w[loop], %[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u64 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxr %w[loop], %[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u64 *)ptr) \ + : [val] "Ir" (val)); \ break; \ default: \ BUILD_BUG(); \ @@ -150,44 +146,40 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, switch (size) { case 1: - do { - asm ("//__percpu_xchg_1\n" - "ldxrb %w[ret], %[ptr]\n" - "stxrb %w[loop], %w[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u8 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_1\n" + "1: ldxrb %w[ret], %[ptr]\n" + " stxrb %w[loop], %w[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u8 *)ptr) + : [val] "r" (val)); break; case 2: - do { - asm ("//__percpu_xchg_2\n" - "ldxrh %w[ret], %[ptr]\n" - "stxrh %w[loop], %w[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u16 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_2\n" + "1: ldxrh %w[ret], %[ptr]\n" + " stxrh %w[loop], %w[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u16 *)ptr) + : [val] "r" (val)); break; case 4: - do { - asm ("//__percpu_xchg_4\n" - "ldxr %w[ret], %[ptr]\n" - "stxr %w[loop], %w[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u32 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_4\n" + "1: ldxr %w[ret], %[ptr]\n" + " stxr %w[loop], %w[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u32 *)ptr) + : [val] "r" (val)); break; case 8: - do { - asm ("//__percpu_xchg_8\n" - "ldxr %[ret], %[ptr]\n" - "stxr %w[loop], %[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u64 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_8\n" + "1: ldxr %[ret], %[ptr]\n" + " stxr %w[loop], %[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u64 *)ptr) + : [val] "r" (val)); break; default: BUILD_BUG(); diff --git a/kernel/arch/arm64/include/asm/pgtable-hwdef.h b/kernel/arch/arm64/include/asm/pgtable-hwdef.h index d6739e836..b9da9545b 100644 --- a/kernel/arch/arm64/include/asm/pgtable-hwdef.h +++ b/kernel/arch/arm64/include/asm/pgtable-hwdef.h @@ -117,7 +117,6 @@ * Section */ #define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0) -#define PMD_SECT_PROT_NONE (_AT(pmdval_t, 1) << 58) #define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */ #define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) diff --git a/kernel/arch/arm64/include/asm/pgtable.h b/kernel/arch/arm64/include/asm/pgtable.h index eaa9cabf4..67c2ad6d3 100644 --- a/kernel/arch/arm64/include/asm/pgtable.h +++ b/kernel/arch/arm64/include/asm/pgtable.h @@ -69,11 +69,11 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) -#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) -#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC)) -#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT)) -#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL)) +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) +#define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_DEVICE_nGnRE)) +#define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT)) +#define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_SECT_NORMAL (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL)) @@ -83,7 +83,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) -#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) @@ -155,6 +155,7 @@ extern struct page *empty_zero_page; #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) +#define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) #ifdef CONFIG_ARM64_HW_AFDBM #define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) @@ -165,8 +166,6 @@ extern struct page *empty_zero_page; #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) #define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) -#define pte_valid_user(pte) \ - ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) #define pte_valid_not_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) @@ -264,13 +263,13 @@ extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - if (pte_valid_user(pte)) { - if (!pte_special(pte) && pte_exec(pte)) - __sync_icache_dcache(pte, addr); + if (pte_present(pte)) { if (pte_sw_dirty(pte) && pte_write(pte)) pte_val(pte) &= ~PTE_RDONLY; else pte_val(pte) |= PTE_RDONLY; + if (pte_user(pte) && pte_exec(pte) && !pte_special(pte)) + __sync_icache_dcache(pte, addr); } /* @@ -348,6 +347,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#define pmd_present(pmd) pte_present(pmd_pte(pmd)) #define pmd_dirty(pmd) pte_dirty(pmd_pte(pmd)) #define pmd_young(pmd) pte_young(pmd_pte(pmd)) #define pmd_wrprotect(pmd) pte_pmd(pte_wrprotect(pmd_pte(pmd))) @@ -356,7 +356,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address, #define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd))) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd))) -#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_TYPE_MASK)) +#define pmd_mknotpresent(pmd) (__pmd(pmd_val(pmd) & ~PMD_SECT_VALID)) #define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) @@ -395,7 +395,6 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot); #define pmd_none(pmd) (!pmd_val(pmd)) -#define pmd_present(pmd) (pmd_val(pmd)) #define pmd_bad(pmd) (!(pmd_val(pmd) & 2)) @@ -539,6 +538,21 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) } #ifdef CONFIG_ARM64_HW_AFDBM +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +extern int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty); + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +static inline int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty) +{ + return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty); +} +#endif + /* * Atomic pte/pmd modifications. */ @@ -591,9 +605,9 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, } #ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define __HAVE_ARCH_PMDP_GET_AND_CLEAR -static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) +#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR +static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) { return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp)); } @@ -641,6 +655,7 @@ extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; * bits 0-1: present (must be zero) * bits 2-7: swap type * bits 8-57: swap offset + * bit 58: PTE_PROT_NONE (must be zero) */ #define __SWP_TYPE_SHIFT 2 #define __SWP_TYPE_BITS 6 diff --git a/kernel/arch/arm64/include/asm/processor.h b/kernel/arch/arm64/include/asm/processor.h index 4acb7ca94..d08559528 100644 --- a/kernel/arch/arm64/include/asm/processor.h +++ b/kernel/arch/arm64/include/asm/processor.h @@ -186,6 +186,6 @@ static inline void spin_lock_prefetch(const void *x) #endif -void cpu_enable_pan(void *__unused); +int cpu_enable_pan(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/kernel/arch/arm64/include/asm/ptrace.h b/kernel/arch/arm64/include/asm/ptrace.h index e9e5467e0..7f9475508 100644 --- a/kernel/arch/arm64/include/asm/ptrace.h +++ b/kernel/arch/arm64/include/asm/ptrace.h @@ -58,6 +58,7 @@ #define COMPAT_PSR_Z_BIT 0x40000000 #define COMPAT_PSR_N_BIT 0x80000000 #define COMPAT_PSR_IT_MASK 0x0600fc00 /* If-Then execution state mask */ +#define COMPAT_PSR_GE_MASK 0x000f0000 #ifdef CONFIG_CPU_BIG_ENDIAN #define COMPAT_PSR_ENDSTATE COMPAT_PSR_E_BIT @@ -116,6 +117,8 @@ struct pt_regs { }; u64 orig_x0; u64 syscallno; + u64 orig_addr_limit; + u64 unused; // maintain 16 byte alignment }; #define arch_has_single_step() (1) @@ -151,35 +154,9 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->regs[0]; } -/* - * Are the current registers suitable for user mode? (used to maintain - * security in signal handlers) - */ -static inline int valid_user_regs(struct user_pt_regs *regs) -{ - if (user_mode(regs) && (regs->pstate & PSR_I_BIT) == 0) { - regs->pstate &= ~(PSR_F_BIT | PSR_A_BIT); - - /* The T bit is reserved for AArch64 */ - if (!(regs->pstate & PSR_MODE32_BIT)) - regs->pstate &= ~COMPAT_PSR_T_BIT; - - return 1; - } - - /* - * Force PSR to something logical... - */ - regs->pstate &= PSR_f | PSR_s | (PSR_x & ~PSR_A_BIT) | \ - COMPAT_PSR_T_BIT | PSR_MODE32_BIT; - - if (!(regs->pstate & PSR_MODE32_BIT)) { - regs->pstate &= ~COMPAT_PSR_T_BIT; - regs->pstate |= PSR_MODE_EL0t; - } - - return 0; -} +/* We must avoid circular header include via sched.h */ +struct task_struct; +int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task); #define instruction_pointer(regs) ((unsigned long)(regs)->pc) diff --git a/kernel/arch/arm64/include/asm/spinlock.h b/kernel/arch/arm64/include/asm/spinlock.h index c85e96d17..499e8de33 100644 --- a/kernel/arch/arm64/include/asm/spinlock.h +++ b/kernel/arch/arm64/include/asm/spinlock.h @@ -312,4 +312,14 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) #define arch_read_relax(lock) cpu_relax() #define arch_write_relax(lock) cpu_relax() +/* + * Accesses appearing in program order before a spin_lock() operation + * can be reordered with accesses inside the critical section, by virtue + * of arch_spin_lock being constructed using acquire semantics. + * + * In cases where this is problematic (e.g. try_to_wake_up), an + * smp_mb__before_spinlock() can restore the required ordering. + */ +#define smp_mb__before_spinlock() smp_mb() + #endif /* __ASM_SPINLOCK_H */ diff --git a/kernel/arch/arm64/include/uapi/asm/auxvec.h b/kernel/arch/arm64/include/uapi/asm/auxvec.h index 22d6d8885..4cf0c1778 100644 --- a/kernel/arch/arm64/include/uapi/asm/auxvec.h +++ b/kernel/arch/arm64/include/uapi/asm/auxvec.h @@ -19,4 +19,6 @@ /* vDSO location */ #define AT_SYSINFO_EHDR 33 +#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */ + #endif diff --git a/kernel/arch/arm64/include/uapi/asm/ptrace.h b/kernel/arch/arm64/include/uapi/asm/ptrace.h index 208db3df1..3378238b5 100644 --- a/kernel/arch/arm64/include/uapi/asm/ptrace.h +++ b/kernel/arch/arm64/include/uapi/asm/ptrace.h @@ -76,6 +76,7 @@ struct user_fpsimd_state { __uint128_t vregs[32]; __u32 fpsr; __u32 fpcr; + __u32 __reserved[2]; }; struct user_hwdebug_state { diff --git a/kernel/arch/arm64/kernel/asm-offsets.c b/kernel/arch/arm64/kernel/asm-offsets.c index c5038409e..d74475928 100644 --- a/kernel/arch/arm64/kernel/asm-offsets.c +++ b/kernel/arch/arm64/kernel/asm-offsets.c @@ -59,6 +59,7 @@ int main(void) DEFINE(S_PC, offsetof(struct pt_regs, pc)); DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_SYSCALLNO, offsetof(struct pt_regs, syscallno)); + DEFINE(S_ORIG_ADDR_LIMIT, offsetof(struct pt_regs, orig_addr_limit)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); diff --git a/kernel/arch/arm64/kernel/cpu_errata.c b/kernel/arch/arm64/kernel/cpu_errata.c index feb6b4efa..a3e846a28 100644 --- a/kernel/arch/arm64/kernel/cpu_errata.c +++ b/kernel/arch/arm64/kernel/cpu_errata.c @@ -100,6 +100,15 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_RANGE(MIDR_THUNDERX, 0x00, 0x01), }, #endif +#ifdef CONFIG_CAVIUM_ERRATUM_27456 + { + /* Cavium ThunderX, T88 pass 1.x - 2.1 */ + .desc = "Cavium erratum 27456", + .capability = ARM64_WORKAROUND_CAVIUM_27456, + MIDR_RANGE(MIDR_THUNDERX, 0x00, + (1 << MIDR_VARIANT_SHIFT) | 1), + }, +#endif { } }; diff --git a/kernel/arch/arm64/kernel/cpufeature.c b/kernel/arch/arm64/kernel/cpufeature.c index 0669c6328..2735bf814 100644 --- a/kernel/arch/arm64/kernel/cpufeature.c +++ b/kernel/arch/arm64/kernel/cpufeature.c @@ -19,7 +19,9 @@ #define pr_fmt(fmt) "CPU features: " fmt #include <linux/bsearch.h> +#include <linux/cpumask.h> #include <linux/sort.h> +#include <linux/stop_machine.h> #include <linux/types.h> #include <asm/cpu.h> #include <asm/cpufeature.h> @@ -764,7 +766,13 @@ static void enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) for (i = 0; caps[i].desc; i++) if (caps[i].enable && cpus_have_cap(caps[i].capability)) - on_each_cpu(caps[i].enable, NULL, true); + /* + * Use stop_machine() as it schedules the work allowing + * us to modify PSTATE, instead of on_each_cpu() which + * uses an IPI, giving us a PSTATE that disappears when + * we return. + */ + stop_machine(caps[i].enable, NULL, cpu_online_mask); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/kernel/arch/arm64/kernel/cpuinfo.c b/kernel/arch/arm64/kernel/cpuinfo.c index 212ae6361..0166cfbc8 100644 --- a/kernel/arch/arm64/kernel/cpuinfo.c +++ b/kernel/arch/arm64/kernel/cpuinfo.c @@ -22,6 +22,8 @@ #include <linux/bitops.h> #include <linux/bug.h> +#include <linux/compat.h> +#include <linux/elf.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/personality.h> @@ -85,7 +87,8 @@ static const char *const compat_hwcap_str[] = { "idivt", "vfpd32", "lpae", - "evtstrm" + "evtstrm", + NULL }; static const char *const compat_hwcap2_str[] = { @@ -101,6 +104,7 @@ static const char *const compat_hwcap2_str[] = { static int c_show(struct seq_file *m, void *v) { int i, j; + bool compat = personality(current->personality) == PER_LINUX32; for_each_online_cpu(i) { struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); @@ -112,6 +116,9 @@ static int c_show(struct seq_file *m, void *v) * "processor". Give glibc what it expects. */ seq_printf(m, "processor\t: %d\n", i); + if (compat) + seq_printf(m, "model name\t: ARMv8 Processor rev %d (%s)\n", + MIDR_REVISION(midr), COMPAT_ELF_PLATFORM); seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", loops_per_jiffy / (500000UL/HZ), @@ -124,7 +131,7 @@ static int c_show(struct seq_file *m, void *v) * software which does already (at least for 32-bit). */ seq_puts(m, "Features\t:"); - if (personality(current->personality) == PER_LINUX32) { + if (compat) { #ifdef CONFIG_COMPAT for (j = 0; compat_hwcap_str[j]; j++) if (compat_elf_hwcap & (1 << j)) diff --git a/kernel/arch/arm64/kernel/debug-monitors.c b/kernel/arch/arm64/kernel/debug-monitors.c index c1492ba1f..c8875b64b 100644 --- a/kernel/arch/arm64/kernel/debug-monitors.c +++ b/kernel/arch/arm64/kernel/debug-monitors.c @@ -152,7 +152,6 @@ static int debug_monitors_init(void) /* Clear the OS lock. */ on_each_cpu(clear_os_lock, NULL, 1); isb(); - local_dbg_enable(); /* Register hotplug handler. */ __register_cpu_notifier(&os_lock_nb); @@ -423,8 +422,10 @@ int kernel_active_single_step(void) /* ptrace API */ void user_enable_single_step(struct task_struct *task) { - set_ti_thread_flag(task_thread_info(task), TIF_SINGLESTEP); - set_regs_spsr_ss(task_pt_regs(task)); + struct thread_info *ti = task_thread_info(task); + + if (!test_and_set_ti_thread_flag(ti, TIF_SINGLESTEP)) + set_regs_spsr_ss(task_pt_regs(task)); } void user_disable_single_step(struct task_struct *task) diff --git a/kernel/arch/arm64/kernel/entry.S b/kernel/arch/arm64/kernel/entry.S index dd8fd31f8..cf92d4ee5 100644 --- a/kernel/arch/arm64/kernel/entry.S +++ b/kernel/arch/arm64/kernel/entry.S @@ -27,6 +27,7 @@ #include <asm/cpufeature.h> #include <asm/errno.h> #include <asm/esr.h> +#include <asm/memory.h> #include <asm/thread_info.h> #include <asm/unistd.h> @@ -93,7 +94,13 @@ disable_step_tsk x19, x20 // exceptions when scheduling. .else add x21, sp, #S_FRAME_SIZE - .endif + get_thread_info tsk + /* Save the task's original addr_limit and set USER_DS (TASK_SIZE_64) */ + ldr x20, [tsk, #TI_ADDR_LIMIT] + str x20, [sp, #S_ORIG_ADDR_LIMIT] + mov x20, #TASK_SIZE_64 + str x20, [tsk, #TI_ADDR_LIMIT] + .endif /* \el == 0 */ mrs x22, elr_el1 mrs x23, spsr_el1 stp lr, x21, [sp, #S_LR] @@ -117,6 +124,12 @@ .endm .macro kernel_exit, el + .if \el != 0 + /* Restore the task's original addr_limit. */ + ldr x20, [sp, #S_ORIG_ADDR_LIMIT] + str x20, [tsk, #TI_ADDR_LIMIT] + .endif + ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter @@ -555,7 +568,7 @@ el0_inv: mov x0, sp mov x1, #BAD_SYNC mov x2, x25 - bl bad_mode + bl bad_el0_sync b ret_to_user ENDPROC(el0_sync) diff --git a/kernel/arch/arm64/kernel/head.S b/kernel/arch/arm64/kernel/head.S index b68525792..20ceb5edf 100644 --- a/kernel/arch/arm64/kernel/head.S +++ b/kernel/arch/arm64/kernel/head.S @@ -518,8 +518,9 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems b.lt 4f // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to - msr mdcr_el2, x0 // all PMU counters from EL1 4: + csel x0, xzr, x0, lt // all PMU counters from EL1 + msr mdcr_el2, x0 // (if they exist) /* Stage-2 translation */ msr vttbr_el2, xzr diff --git a/kernel/arch/arm64/kernel/ptrace.c b/kernel/arch/arm64/kernel/ptrace.c index ff7f13239..55909b220 100644 --- a/kernel/arch/arm64/kernel/ptrace.c +++ b/kernel/arch/arm64/kernel/ptrace.c @@ -39,6 +39,7 @@ #include <linux/elf.h> #include <asm/compat.h> +#include <asm/cpufeature.h> #include <asm/debug-monitors.h> #include <asm/pgtable.h> #include <asm/syscall.h> @@ -449,6 +450,8 @@ static int hw_break_set(struct task_struct *target, /* (address, ctrl) registers */ limit = regset->n * regset->size; while (count && offset < limit) { + if (count < PTRACE_HBP_ADDR_SZ) + return -EINVAL; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &addr, offset, offset + PTRACE_HBP_ADDR_SZ); if (ret) @@ -458,6 +461,8 @@ static int hw_break_set(struct task_struct *target, return ret; offset += PTRACE_HBP_ADDR_SZ; + if (!count) + break; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, offset, offset + PTRACE_HBP_CTRL_SZ); if (ret) @@ -494,13 +499,13 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - struct user_pt_regs newregs; + struct user_pt_regs newregs = task_pt_regs(target)->user_regs; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newregs, 0, -1); if (ret) return ret; - if (!valid_user_regs(&newregs)) + if (!valid_user_regs(&newregs, target)) return -EINVAL; task_pt_regs(target)->user_regs = newregs; @@ -524,7 +529,8 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - struct user_fpsimd_state newstate; + struct user_fpsimd_state newstate = + target->thread.fpsimd_state.user_fpsimd; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1); if (ret) @@ -548,7 +554,7 @@ static int tls_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { int ret; - unsigned long tls; + unsigned long tls = target->thread.tp_value; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) @@ -574,7 +580,8 @@ static int system_call_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - int syscallno, ret; + int syscallno = task_pt_regs(target)->syscallno; + int ret; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &syscallno, 0, -1); if (ret) @@ -770,7 +777,7 @@ static int compat_gpr_set(struct task_struct *target, } - if (valid_user_regs(&newregs.user_regs)) + if (valid_user_regs(&newregs.user_regs, target)) *task_pt_regs(target) = newregs; else ret = -EINVAL; @@ -846,7 +853,7 @@ static int compat_tls_set(struct task_struct *target, const void __user *ubuf) { int ret; - compat_ulong_t tls; + compat_ulong_t tls = target->thread.tp_value; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); if (ret) @@ -1272,3 +1279,79 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) if (test_thread_flag(TIF_SYSCALL_TRACE)) tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); } + +/* + * Bits which are always architecturally RES0 per ARM DDI 0487A.h + * Userspace cannot use these until they have an architectural meaning. + * We also reserve IL for the kernel; SS is handled dynamically. + */ +#define SPSR_EL1_AARCH64_RES0_BITS \ + (GENMASK_ULL(63,32) | GENMASK_ULL(27, 22) | GENMASK_ULL(20, 10) | \ + GENMASK_ULL(5, 5)) +#define SPSR_EL1_AARCH32_RES0_BITS \ + (GENMASK_ULL(63,32) | GENMASK_ULL(24, 22) | GENMASK_ULL(20,20)) + +static int valid_compat_regs(struct user_pt_regs *regs) +{ + regs->pstate &= ~SPSR_EL1_AARCH32_RES0_BITS; + + if (!system_supports_mixed_endian_el0()) { + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + regs->pstate |= COMPAT_PSR_E_BIT; + else + regs->pstate &= ~COMPAT_PSR_E_BIT; + } + + if (user_mode(regs) && (regs->pstate & PSR_MODE32_BIT) && + (regs->pstate & COMPAT_PSR_A_BIT) == 0 && + (regs->pstate & COMPAT_PSR_I_BIT) == 0 && + (regs->pstate & COMPAT_PSR_F_BIT) == 0) { + return 1; + } + + /* + * Force PSR to a valid 32-bit EL0t, preserving the same bits as + * arch/arm. + */ + regs->pstate &= COMPAT_PSR_N_BIT | COMPAT_PSR_Z_BIT | + COMPAT_PSR_C_BIT | COMPAT_PSR_V_BIT | + COMPAT_PSR_Q_BIT | COMPAT_PSR_IT_MASK | + COMPAT_PSR_GE_MASK | COMPAT_PSR_E_BIT | + COMPAT_PSR_T_BIT; + regs->pstate |= PSR_MODE32_BIT; + + return 0; +} + +static int valid_native_regs(struct user_pt_regs *regs) +{ + regs->pstate &= ~SPSR_EL1_AARCH64_RES0_BITS; + + if (user_mode(regs) && !(regs->pstate & PSR_MODE32_BIT) && + (regs->pstate & PSR_D_BIT) == 0 && + (regs->pstate & PSR_A_BIT) == 0 && + (regs->pstate & PSR_I_BIT) == 0 && + (regs->pstate & PSR_F_BIT) == 0) { + return 1; + } + + /* Force PSR to a valid 64-bit EL0t */ + regs->pstate &= PSR_N_BIT | PSR_Z_BIT | PSR_C_BIT | PSR_V_BIT; + + return 0; +} + +/* + * Are the current registers suitable for user mode? (used to maintain + * security in signal handlers) + */ +int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task) +{ + if (!test_tsk_thread_flag(task, TIF_SINGLESTEP)) + regs->pstate &= ~DBG_SPSR_SS; + + if (is_compat_thread(task_thread_info(task))) + return valid_compat_regs(regs); + else + return valid_native_regs(regs); +} diff --git a/kernel/arch/arm64/kernel/signal.c b/kernel/arch/arm64/kernel/signal.c index e18c48cb6..a8eafdbc7 100644 --- a/kernel/arch/arm64/kernel/signal.c +++ b/kernel/arch/arm64/kernel/signal.c @@ -115,7 +115,7 @@ static int restore_sigframe(struct pt_regs *regs, */ regs->syscallno = ~0UL; - err |= !valid_user_regs(®s->user_regs); + err |= !valid_user_regs(®s->user_regs, current); if (err == 0) { struct fpsimd_context *fpsimd_ctx = @@ -307,7 +307,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) /* * Check that the resulting registers are actually sane. */ - ret |= !valid_user_regs(®s->user_regs); + ret |= !valid_user_regs(®s->user_regs, current); /* * Fast forward the stepping logic so we step into the signal diff --git a/kernel/arch/arm64/kernel/signal32.c b/kernel/arch/arm64/kernel/signal32.c index 71ef6dc89..107335637 100644 --- a/kernel/arch/arm64/kernel/signal32.c +++ b/kernel/arch/arm64/kernel/signal32.c @@ -356,7 +356,7 @@ static int compat_restore_sigframe(struct pt_regs *regs, */ regs->syscallno = ~0UL; - err |= !valid_user_regs(®s->user_regs); + err |= !valid_user_regs(®s->user_regs, current); aux = (struct compat_aux_sigframe __user *) sf->uc.uc_regspace; if (err == 0) diff --git a/kernel/arch/arm64/kernel/smp.c b/kernel/arch/arm64/kernel/smp.c index b1adc51b2..f3c3d8fee 100644 --- a/kernel/arch/arm64/kernel/smp.c +++ b/kernel/arch/arm64/kernel/smp.c @@ -188,7 +188,6 @@ asmlinkage void secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); - local_dbg_enable(); local_irq_enable(); local_async_enable(); @@ -334,8 +333,8 @@ void __init smp_cpus_done(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { - cpuinfo_store_boot_cpu(); set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + cpuinfo_store_boot_cpu(); } static u64 __init of_get_cpu_mpidr(struct device_node *dn) diff --git a/kernel/arch/arm64/kernel/suspend.c b/kernel/arch/arm64/kernel/suspend.c index 1095aa483..00c1372bf 100644 --- a/kernel/arch/arm64/kernel/suspend.c +++ b/kernel/arch/arm64/kernel/suspend.c @@ -1,7 +1,9 @@ #include <linux/ftrace.h> #include <linux/percpu.h> #include <linux/slab.h> +#include <asm/alternative.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/debug-monitors.h> #include <asm/pgtable.h> #include <asm/memory.h> @@ -111,6 +113,13 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) set_my_cpu_offset(per_cpu_offset(smp_processor_id())); /* + * PSTATE was not saved over suspend/resume, re-enable any + * detected features that might not have been set correctly. + */ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, + CONFIG_ARM64_PAN)); + + /* * Restore HW breakpoint registers to sane values * before debug exceptions are possibly reenabled * through local_dbg_restore. diff --git a/kernel/arch/arm64/kernel/traps.c b/kernel/arch/arm64/kernel/traps.c index e9b9b5364..ca7f0ac5f 100644 --- a/kernel/arch/arm64/kernel/traps.c +++ b/kernel/arch/arm64/kernel/traps.c @@ -434,16 +434,33 @@ const char *esr_get_class_string(u32 esr) } /* - * bad_mode handles the impossible case in the exception vector. + * bad_mode handles the impossible case in the exception vector. This is always + * fatal. */ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) { - siginfo_t info; - void __user *pc = (void __user *)instruction_pointer(regs); console_verbose(); pr_crit("Bad mode in %s handler detected, code 0x%08x -- %s\n", handler[reason], esr, esr_get_class_string(esr)); + + die("Oops - bad mode", regs, 0); + local_irq_disable(); + panic("bad mode"); +} + +/* + * bad_el0_sync handles unexpected, but potentially recoverable synchronous + * exceptions taken from EL0. Unlike bad_mode, this returns. + */ +asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) +{ + siginfo_t info; + void __user *pc = (void __user *)instruction_pointer(regs); + console_verbose(); + + pr_crit("Bad EL0 synchronous exception detected on CPU%d, code 0x%08x -- %s\n", + smp_processor_id(), esr, esr_get_class_string(esr)); __show_regs(regs); info.si_signo = SIGILL; @@ -451,7 +468,10 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) info.si_code = ILL_ILLOPC; info.si_addr = pc; - arm64_notify_die("Oops - bad mode", regs, &info, 0); + current->thread.fault_address = 0; + current->thread.fault_code = 0; + + force_sig_info(info.si_signo, &info, current); } void __pte_error(const char *file, int line, unsigned long val) diff --git a/kernel/arch/arm64/kvm/hyp-init.S b/kernel/arch/arm64/kvm/hyp-init.S index 178ba2248..84c338f01 100644 --- a/kernel/arch/arm64/kvm/hyp-init.S +++ b/kernel/arch/arm64/kvm/hyp-init.S @@ -64,7 +64,7 @@ __do_hyp_init: mrs x4, tcr_el1 ldr x5, =TCR_EL2_MASK and x4, x4, x5 - ldr x5, =TCR_EL2_FLAGS + mov x5, #TCR_EL2_RES1 orr x4, x4, x5 #ifndef CONFIG_ARM64_VA_BITS_48 @@ -85,15 +85,18 @@ __do_hyp_init: ldr_l x5, idmap_t0sz bfi x4, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH #endif - msr tcr_el2, x4 - - ldr x4, =VTCR_EL2_FLAGS /* * Read the PARange bits from ID_AA64MMFR0_EL1 and set the PS bits in - * VTCR_EL2. + * TCR_EL2 and VTCR_EL2. */ mrs x5, ID_AA64MMFR0_EL1 bfi x4, x5, #16, #3 + + msr tcr_el2, x4 + + ldr x4, =VTCR_EL2_FLAGS + bfi x4, x5, #16, #3 + msr vtcr_el2, x4 mrs x4, mair_el1 diff --git a/kernel/arch/arm64/kvm/inject_fault.c b/kernel/arch/arm64/kvm/inject_fault.c index 648112e90..3972e65fb 100644 --- a/kernel/arch/arm64/kvm/inject_fault.c +++ b/kernel/arch/arm64/kvm/inject_fault.c @@ -130,7 +130,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT); if (!is_iabt) - esr |= ESR_ELx_EC_DABT_LOW; + esr |= ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT; vcpu_sys_reg(vcpu, ESR_EL1) = esr | ESR_ELx_FSC_EXTABT; } diff --git a/kernel/arch/arm64/mm/fault.c b/kernel/arch/arm64/mm/fault.c index 92ddac1e8..247bae758 100644 --- a/kernel/arch/arm64/mm/fault.c +++ b/kernel/arch/arm64/mm/fault.c @@ -29,7 +29,9 @@ #include <linux/sched.h> #include <linux/highmem.h> #include <linux/perf_event.h> +#include <linux/preempt.h> +#include <asm/bug.h> #include <asm/cpufeature.h> #include <asm/exception.h> #include <asm/debug-monitors.h> @@ -81,6 +83,56 @@ void show_pte(struct mm_struct *mm, unsigned long addr) printk("\n"); } +#ifdef CONFIG_ARM64_HW_AFDBM +/* + * This function sets the access flags (dirty, accessed), as well as write + * permission, and only to a more permissive setting. + * + * It needs to cope with hardware update of the accessed/dirty state by other + * agents in the system and can safely skip the __sync_icache_dcache() call as, + * like set_pte_at(), the PTE is never changed from no-exec to exec here. + * + * Returns whether or not the PTE actually changed. + */ +int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) +{ + pteval_t old_pteval; + unsigned int tmp; + + if (pte_same(*ptep, entry)) + return 0; + + /* only preserve the access flags and write permission */ + pte_val(entry) &= PTE_AF | PTE_WRITE | PTE_DIRTY; + + /* + * PTE_RDONLY is cleared by default in the asm below, so set it in + * back if necessary (read-only or clean PTE). + */ + if (!pte_write(entry) || !pte_sw_dirty(entry)) + pte_val(entry) |= PTE_RDONLY; + + /* + * Setting the flags must be done atomically to avoid racing with the + * hardware update of the access/dirty state. + */ + asm volatile("// ptep_set_access_flags\n" + " prfm pstl1strm, %2\n" + "1: ldxr %0, %2\n" + " and %0, %0, %3 // clear PTE_RDONLY\n" + " orr %0, %0, %4 // set flags\n" + " stxr %w1, %0, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)) + : "L" (~PTE_RDONLY), "r" (pte_val(entry))); + + flush_tlb_fix_spurious_fault(vma, address); + return 1; +} +#endif + /* * The kernel tried to access some page that wasn't present. */ @@ -556,8 +608,16 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, } #ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(void *__unused) +int cpu_enable_pan(void *__unused) { + /* + * We modify PSTATE. This won't work from irq context as the PSTATE + * is discarded once we return from the exception. + */ + WARN_ON_ONCE(in_interrupt()); + config_sctlr_el1(SCTLR_EL1_SPAN, 0); + asm(SET_PSTATE_PAN(1)); + return 0; } #endif /* CONFIG_ARM64_PAN */ diff --git a/kernel/arch/arm64/mm/mmu.c b/kernel/arch/arm64/mm/mmu.c index 116ad654d..653735a8c 100644 --- a/kernel/arch/arm64/mm/mmu.c +++ b/kernel/arch/arm64/mm/mmu.c @@ -652,9 +652,9 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) /* * Check whether the physical FDT address is set and meets the minimum * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be - * at least 8 bytes so that we can always access the size field of the - * FDT header after mapping the first chunk, double check here if that - * is indeed the case. + * at least 8 bytes so that we can always access the magic and size + * fields of the FDT header after mapping the first chunk, double check + * here if that is indeed the case. */ BUILD_BUG_ON(MIN_FDT_ALIGN < 8); if (!dt_phys || dt_phys % MIN_FDT_ALIGN) @@ -682,7 +682,7 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) create_mapping(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, SWAPPER_BLOCK_SIZE, prot); - if (fdt_check_header(dt_virt) != 0) + if (fdt_magic(dt_virt) != FDT_MAGIC) return NULL; size = fdt_totalsize(dt_virt); diff --git a/kernel/arch/arm64/mm/proc.S b/kernel/arch/arm64/mm/proc.S index b8f04b3f2..18201e9e8 100644 --- a/kernel/arch/arm64/mm/proc.S +++ b/kernel/arch/arm64/mm/proc.S @@ -25,6 +25,8 @@ #include <asm/hwcap.h> #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> +#include <asm/cpufeature.h> +#include <asm/alternative.h> #include "proc-macros.S" @@ -137,7 +139,17 @@ ENTRY(cpu_do_switch_mm) bfi x0, x1, #48, #16 // set the ASID msr ttbr0_el1, x0 // set TTBR0 isb +alternative_if_not ARM64_WORKAROUND_CAVIUM_27456 ret + nop + nop + nop +alternative_else + ic iallu + dsb nsh + isb + ret +alternative_endif ENDPROC(cpu_do_switch_mm) .section ".text.init", #alloc, #execinstr @@ -156,6 +168,8 @@ ENTRY(__cpu_setup) msr cpacr_el1, x0 // Enable FP/ASIMD mov x0, #1 << 12 // Reset mdscr_el1 and disable msr mdscr_el1, x0 // access to the DCC from EL0 + isb // Unmask debug exceptions now, + enable_dbg // since this is per-cpu reset_pmuserenr_el0 x0 // Disable PMU access from EL0 /* * Memory region attributes for LPAE: |