diff options
author | José Pekkarinen <jose.pekkarinen@nokia.com> | 2016-04-11 10:41:07 +0300 |
---|---|---|
committer | José Pekkarinen <jose.pekkarinen@nokia.com> | 2016-04-13 08:17:18 +0300 |
commit | e09b41010ba33a20a87472ee821fa407a5b8da36 (patch) | |
tree | d10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/arch/arm64/include | |
parent | f93b97fd65072de626c074dbe099a1fff05ce060 (diff) |
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page.
During the rebasing, the following patch collided:
Force tick interrupt and get rid of softirq magic(I70131fb85).
Collisions have been removed because its logic was found on the
source already.
Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769
Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/arch/arm64/include')
80 files changed, 2795 insertions, 1189 deletions
diff --git a/kernel/arch/arm64/include/asm/Kbuild b/kernel/arch/arm64/include/asm/Kbuild index 55103e50c..70fd9ffb5 100644 --- a/kernel/arch/arm64/include/asm/Kbuild +++ b/kernel/arch/arm64/include/asm/Kbuild @@ -25,6 +25,7 @@ generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h +generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += msgbuf.h generic-y += msi.h @@ -35,7 +36,6 @@ generic-y += poll.h generic-y += preempt.h generic-y += resource.h generic-y += rwsem.h -generic-y += scatterlist.h generic-y += sections.h generic-y += segment.h generic-y += sembuf.h diff --git a/kernel/arch/arm64/include/asm/acpi.h b/kernel/arch/arm64/include/asm/acpi.h index 59c05d8ea..caafd63b8 100644 --- a/kernel/arch/arm64/include/asm/acpi.h +++ b/kernel/arch/arm64/include/asm/acpi.h @@ -13,11 +13,19 @@ #define _ASM_ACPI_H #include <linux/mm.h> -#include <linux/irqchip/arm-gic-acpi.h> +#include <linux/psci.h> #include <asm/cputype.h> #include <asm/smp_plat.h> +/* Macros for consistency checks of the GICC subtable of MADT */ +#define ACPI_MADT_GICC_LENGTH \ + (acpi_gbl_FADT.header.revision < 6 ? 76 : 80) + +#define BAD_MADT_GICC_ENTRY(entry, end) \ + (!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) || \ + (entry)->header.length != ACPI_MADT_GICC_LENGTH) + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI /* ACPI table mapping after acpi_gbl_permanent_mmap is set */ @@ -39,18 +47,6 @@ extern int acpi_disabled; extern int acpi_noirq; extern int acpi_pci_disabled; -/* 1 to indicate PSCI 0.2+ is implemented */ -static inline bool acpi_psci_present(void) -{ - return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_COMPLIANT; -} - -/* 1 to indicate HVC must be used instead of SMC as the PSCI conduit */ -static inline bool acpi_psci_use_hvc(void) -{ - return acpi_gbl_FADT.arm_boot_flags & ACPI_FADT_PSCI_USE_HVC; -} - static inline void disable_acpi(void) { acpi_disabled = 1; @@ -88,9 +84,16 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); #else -static inline bool acpi_psci_present(void) { return false; } -static inline bool acpi_psci_use_hvc(void) { return false; } static inline void acpi_init_cpus(void) { } #endif /* CONFIG_ACPI */ +static inline const char *acpi_get_enable_method(int cpu) +{ + return acpi_psci_present() ? "psci" : NULL; +} + +#ifdef CONFIG_ACPI_APEI +pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr); +#endif + #endif /*_ASM_ACPI_H*/ diff --git a/kernel/arch/arm64/include/asm/alternative-asm.h b/kernel/arch/arm64/include/asm/alternative-asm.h deleted file mode 100644 index 919a67855..000000000 --- a/kernel/arch/arm64/include/asm/alternative-asm.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef __ASM_ALTERNATIVE_ASM_H -#define __ASM_ALTERNATIVE_ASM_H - -#ifdef __ASSEMBLY__ - -.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len - .word \orig_offset - . - .word \alt_offset - . - .hword \feature - .byte \orig_len - .byte \alt_len -.endm - -.macro alternative_insn insn1 insn2 cap -661: \insn1 -662: .pushsection .altinstructions, "a" - altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f - .popsection - .pushsection .altinstr_replacement, "ax" -663: \insn2 -664: .popsection - .if ((664b-663b) != (662b-661b)) - .error "Alternatives instruction length mismatch" - .endif -.endm - -#endif /* __ASSEMBLY__ */ - -#endif /* __ASM_ALTERNATIVE_ASM_H */ diff --git a/kernel/arch/arm64/include/asm/alternative.h b/kernel/arch/arm64/include/asm/alternative.h index d261f01e2..d56ec0715 100644 --- a/kernel/arch/arm64/include/asm/alternative.h +++ b/kernel/arch/arm64/include/asm/alternative.h @@ -1,6 +1,10 @@ #ifndef __ASM_ALTERNATIVE_H #define __ASM_ALTERNATIVE_H +#ifndef __ASSEMBLY__ + +#include <linux/init.h> +#include <linux/kconfig.h> #include <linux/types.h> #include <linux/stddef.h> #include <linux/stringify.h> @@ -13,7 +17,7 @@ struct alt_instr { u8 alt_len; /* size of new instruction(s), <= orig_len */ }; -void apply_alternatives_all(void); +void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); void free_alternatives_memory(void); @@ -24,8 +28,22 @@ void free_alternatives_memory(void); " .byte 662b-661b\n" /* source len */ \ " .byte 664f-663f\n" /* replacement len */ -/* alternative assembly primitive: */ -#define ALTERNATIVE(oldinstr, newinstr, feature) \ +/* + * alternative assembly primitive: + * + * If any of these .org directive fail, it means that insn1 and insn2 + * don't have the same length. This used to be written as + * + * .if ((664b-663b) != (662b-661b)) + * .error "Alternatives instruction length mismatch" + * .endif + * + * but most assemblers die if insn1 or insn2 have a .inst. This should + * be fixed in a binutils release posterior to 2.25.51.0.2 (anything + * containing commit 4e4d08cf7399b606 or c1baaddf8861). + */ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ @@ -37,8 +55,98 @@ void free_alternatives_memory(void); newinstr "\n" \ "664:\n\t" \ ".popsection\n\t" \ - ".if ((664b-663b) != (662b-661b))\n\t" \ - " .error \"Alternatives instruction length mismatch\"\n\t"\ + ".org . - (664b-663b) + (662b-661b)\n\t" \ + ".org . - (662b-661b) + (664b-663b)\n" \ ".endif\n" +#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) + +#else + +.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len + .word \orig_offset - . + .word \alt_offset - . + .hword \feature + .byte \orig_len + .byte \alt_len +.endm + +.macro alternative_insn insn1, insn2, cap, enable = 1 + .if \enable +661: \insn1 +662: .pushsection .altinstructions, "a" + altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f + .popsection + .pushsection .altinstr_replacement, "ax" +663: \insn2 +664: .popsection + .org . - (664b-663b) + (662b-661b) + .org . - (662b-661b) + (664b-663b) + .endif +.endm + +/* + * Begin an alternative code sequence. + * + * The code that follows this macro will be assembled and linked as + * normal. There are no restrictions on this code. + */ +.macro alternative_if_not cap, enable = 1 + .if \enable + .pushsection .altinstructions, "a" + altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f + .popsection +661: + .endif +.endm + +/* + * Provide the alternative code sequence. + * + * The code that follows this macro is assembled into a special + * section to be used for dynamic patching. Code that follows this + * macro must: + * + * 1. Be exactly the same length (in bytes) as the default code + * sequence. + * + * 2. Not contain a branch target that is used outside of the + * alternative sequence it is defined in (branches into an + * alternative sequence are not fixed up). + */ +.macro alternative_else, enable = 1 + .if \enable +662: .pushsection .altinstr_replacement, "ax" +663: + .endif +.endm + +/* + * Complete an alternative code sequence. + */ +.macro alternative_endif, enable = 1 + .if \enable +664: .popsection + .org . - (664b-663b) + (662b-661b) + .org . - (662b-661b) + (664b-663b) + .endif +.endm + +#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ + alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) + + +#endif /* __ASSEMBLY__ */ + +/* + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature)); + * + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO)); + * N.B. If CONFIG_FOO is specified, but not selected, the whole block + * will be omitted, including oldinstr. + */ +#define ALTERNATIVE(oldinstr, newinstr, ...) \ + _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1) + #endif /* __ASM_ALTERNATIVE_H */ diff --git a/kernel/arch/arm64/include/asm/arch_gicv3.h b/kernel/arch/arm64/include/asm/arch_gicv3.h new file mode 100644 index 000000000..2731d3b25 --- /dev/null +++ b/kernel/arch/arm64/include/asm/arch_gicv3.h @@ -0,0 +1,171 @@ +/* + * arch/arm64/include/asm/arch_gicv3.h + * + * Copyright (C) 2015 ARM Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_ARCH_GICV3_H +#define __ASM_ARCH_GICV3_H + +#include <asm/sysreg.h> + +#define ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) +#define ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) +#define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) +#define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) +#define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) +#define ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) +#define ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) +#define ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) + +#define ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) + +/* + * System register definitions + */ +#define ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) +#define ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) +#define ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) +#define ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) +#define ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) +#define ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) +#define ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) + +#define __LR0_EL2(x) sys_reg(3, 4, 12, 12, x) +#define __LR8_EL2(x) sys_reg(3, 4, 12, 13, x) + +#define ICH_LR0_EL2 __LR0_EL2(0) +#define ICH_LR1_EL2 __LR0_EL2(1) +#define ICH_LR2_EL2 __LR0_EL2(2) +#define ICH_LR3_EL2 __LR0_EL2(3) +#define ICH_LR4_EL2 __LR0_EL2(4) +#define ICH_LR5_EL2 __LR0_EL2(5) +#define ICH_LR6_EL2 __LR0_EL2(6) +#define ICH_LR7_EL2 __LR0_EL2(7) +#define ICH_LR8_EL2 __LR8_EL2(0) +#define ICH_LR9_EL2 __LR8_EL2(1) +#define ICH_LR10_EL2 __LR8_EL2(2) +#define ICH_LR11_EL2 __LR8_EL2(3) +#define ICH_LR12_EL2 __LR8_EL2(4) +#define ICH_LR13_EL2 __LR8_EL2(5) +#define ICH_LR14_EL2 __LR8_EL2(6) +#define ICH_LR15_EL2 __LR8_EL2(7) + +#define __AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) +#define ICH_AP0R0_EL2 __AP0Rx_EL2(0) +#define ICH_AP0R1_EL2 __AP0Rx_EL2(1) +#define ICH_AP0R2_EL2 __AP0Rx_EL2(2) +#define ICH_AP0R3_EL2 __AP0Rx_EL2(3) + +#define __AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x) +#define ICH_AP1R0_EL2 __AP1Rx_EL2(0) +#define ICH_AP1R1_EL2 __AP1Rx_EL2(1) +#define ICH_AP1R2_EL2 __AP1Rx_EL2(2) +#define ICH_AP1R3_EL2 __AP1Rx_EL2(3) + +#ifndef __ASSEMBLY__ + +#include <linux/stringify.h> +#include <asm/barrier.h> + +/* + * Low-level accessors + * + * These system registers are 32 bits, but we make sure that the compiler + * sets the GP register's most significant bits to 0 with an explicit cast. + */ + +static inline void gic_write_eoir(u32 irq) +{ + asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" ((u64)irq)); + isb(); +} + +static inline void gic_write_dir(u32 irq) +{ + asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" ((u64)irq)); + isb(); +} + +static inline u64 gic_read_iar_common(void) +{ + u64 irqstat; + + asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat)); + return irqstat; +} + +/* + * Cavium ThunderX erratum 23154 + * + * The gicv3 of ThunderX requires a modified version for reading the + * IAR status to ensure data synchronization (access to icc_iar1_el1 + * is not sync'ed before and after). + */ +static inline u64 gic_read_iar_cavium_thunderx(void) +{ + u64 irqstat; + + asm volatile( + "nop;nop;nop;nop\n\t" + "nop;nop;nop;nop\n\t" + "mrs_s %0, " __stringify(ICC_IAR1_EL1) "\n\t" + "nop;nop;nop;nop" + : "=r" (irqstat)); + mb(); + + return irqstat; +} + +static inline void gic_write_pmr(u32 val) +{ + asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" ((u64)val)); +} + +static inline void gic_write_ctlr(u32 val) +{ + asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" ((u64)val)); + isb(); +} + +static inline void gic_write_grpen1(u32 val) +{ + asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" ((u64)val)); + isb(); +} + +static inline void gic_write_sgi1r(u64 val) +{ + asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val)); +} + +static inline u32 gic_read_sre(void) +{ + u64 val; + + asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); + return val; +} + +static inline void gic_write_sre(u32 val) +{ + asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" ((u64)val)); + isb(); +} + +#define gic_read_typer(c) readq_relaxed(c) +#define gic_write_irouter(v, c) writeq_relaxed(v, c) + +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_ARCH_GICV3_H */ diff --git a/kernel/arch/arm64/include/asm/assembler.h b/kernel/arch/arm64/include/asm/assembler.h index 144b64ad9..12eff928e 100644 --- a/kernel/arch/arm64/include/asm/assembler.h +++ b/kernel/arch/arm64/include/asm/assembler.h @@ -50,18 +50,6 @@ .endm /* - * Save/disable and restore interrupts. - */ - .macro save_and_disable_irqs, olddaif - mrs \olddaif, daif - disable_irq - .endm - - .macro restore_irqs, olddaif - msr daif, \olddaif - .endm - -/* * Enable and disable debug exceptions. */ .macro disable_dbg @@ -103,9 +91,7 @@ * SMP data memory barrier */ .macro smp_dmb, opt -#ifdef CONFIG_SMP dmb \opt -#endif .endm #define USER(l, x...) \ @@ -207,4 +193,15 @@ lr .req x30 // link register str \src, [\tmp, :lo12:\sym] .endm +/* + * Annotate a function as position independent, i.e., safe to be called before + * the kernel virtual mapping is activated. + */ +#define ENDPIPROC(x) \ + .globl __pi_##x; \ + .type __pi_##x, %function; \ + .set __pi_##x, x; \ + .size __pi_##x, . - x; \ + ENDPROC(x) + #endif /* __ASM_ASSEMBLER_H */ diff --git a/kernel/arch/arm64/include/asm/atomic.h b/kernel/arch/arm64/include/asm/atomic.h index 7047051de..f3a3586a4 100644 --- a/kernel/arch/arm64/include/asm/atomic.h +++ b/kernel/arch/arm64/include/asm/atomic.h @@ -24,233 +24,127 @@ #include <linux/types.h> #include <asm/barrier.h> -#include <asm/cmpxchg.h> - -#define ATOMIC_INIT(i) { (i) } +#include <asm/lse.h> #ifdef __KERNEL__ -/* - * On ARM, ordinary assignment (str instruction) doesn't clear the local - * strex/ldrex monitor on some implementations. The reason we can use it for - * atomic_set() is the clrex or dummy strex done on every exception return. - */ -#define atomic_read(v) ACCESS_ONCE((v)->counter) -#define atomic_set(v,i) (((v)->counter) = (i)) - -/* - * AArch64 UP and SMP safe atomic ops. We use load exclusive and - * store exclusive to ensure that these are atomic. We may loop - * to ensure that the update happens. - */ - -#define ATOMIC_OP(op, asm_op) \ -static inline void atomic_##op(int i, atomic_t *v) \ -{ \ - unsigned long tmp; \ - int result; \ - \ - asm volatile("// atomic_" #op "\n" \ -"1: ldxr %w0, %2\n" \ -" " #asm_op " %w0, %w0, %w3\n" \ -" stxr %w1, %w0, %2\n" \ -" cbnz %w1, 1b" \ - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ - : "Ir" (i)); \ -} \ +#define __ARM64_IN_ATOMIC_IMPL -#define ATOMIC_OP_RETURN(op, asm_op) \ -static inline int atomic_##op##_return(int i, atomic_t *v) \ -{ \ - unsigned long tmp; \ - int result; \ - \ - asm volatile("// atomic_" #op "_return\n" \ -"1: ldxr %w0, %2\n" \ -" " #asm_op " %w0, %w0, %w3\n" \ -" stlxr %w1, %w0, %2\n" \ -" cbnz %w1, 1b" \ - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ - : "Ir" (i) \ - : "memory"); \ - \ - smp_mb(); \ - return result; \ -} - -#define ATOMIC_OPS(op, asm_op) \ - ATOMIC_OP(op, asm_op) \ - ATOMIC_OP_RETURN(op, asm_op) - -ATOMIC_OPS(add, add) -ATOMIC_OPS(sub, sub) - -#undef ATOMIC_OPS -#undef ATOMIC_OP_RETURN -#undef ATOMIC_OP - -static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) -{ - unsigned long tmp; - int oldval; - - smp_mb(); - - asm volatile("// atomic_cmpxchg\n" -"1: ldxr %w1, %2\n" -" cmp %w1, %w3\n" -" b.ne 2f\n" -" stxr %w0, %w4, %2\n" -" cbnz %w0, 1b\n" -"2:" - : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) - : "Ir" (old), "r" (new) - : "cc"); - - smp_mb(); - return oldval; -} - -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) +#if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE) +#include <asm/atomic_lse.h> +#else +#include <asm/atomic_ll_sc.h> +#endif -static inline int __atomic_add_unless(atomic_t *v, int a, int u) -{ - int c, old; +#undef __ARM64_IN_ATOMIC_IMPL - c = atomic_read(v); - while (c != u && (old = atomic_cmpxchg((v), c, c + a)) != c) - c = old; - return c; -} +#include <asm/cmpxchg.h> -#define atomic_inc(v) atomic_add(1, v) -#define atomic_dec(v) atomic_sub(1, v) +#define ___atomic_add_unless(v, a, u, sfx) \ +({ \ + typeof((v)->counter) c, old; \ + \ + c = atomic##sfx##_read(v); \ + while (c != (u) && \ + (old = atomic##sfx##_cmpxchg((v), c, c + (a))) != c) \ + c = old; \ + c; \ + }) -#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) -#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) -#define atomic_inc_return(v) (atomic_add_return(1, v)) -#define atomic_dec_return(v) (atomic_sub_return(1, v)) -#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) +#define ATOMIC_INIT(i) { (i) } -#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0) +#define atomic_read(v) READ_ONCE((v)->counter) +#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i)) + +#define atomic_add_return_relaxed atomic_add_return_relaxed +#define atomic_add_return_acquire atomic_add_return_acquire +#define atomic_add_return_release atomic_add_return_release +#define atomic_add_return atomic_add_return + +#define atomic_inc_return_relaxed(v) atomic_add_return_relaxed(1, (v)) +#define atomic_inc_return_acquire(v) atomic_add_return_acquire(1, (v)) +#define atomic_inc_return_release(v) atomic_add_return_release(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) + +#define atomic_sub_return_relaxed atomic_sub_return_relaxed +#define atomic_sub_return_acquire atomic_sub_return_acquire +#define atomic_sub_return_release atomic_sub_return_release +#define atomic_sub_return atomic_sub_return + +#define atomic_dec_return_relaxed(v) atomic_sub_return_relaxed(1, (v)) +#define atomic_dec_return_acquire(v) atomic_sub_return_acquire(1, (v)) +#define atomic_dec_return_release(v) atomic_sub_return_release(1, (v)) +#define atomic_dec_return(v) atomic_sub_return(1, (v)) + +#define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new)) +#define atomic_xchg_acquire(v, new) xchg_acquire(&((v)->counter), (new)) +#define atomic_xchg_release(v, new) xchg_release(&((v)->counter), (new)) +#define atomic_xchg(v, new) xchg(&((v)->counter), (new)) + +#define atomic_cmpxchg_relaxed(v, old, new) \ + cmpxchg_relaxed(&((v)->counter), (old), (new)) +#define atomic_cmpxchg_acquire(v, old, new) \ + cmpxchg_acquire(&((v)->counter), (old), (new)) +#define atomic_cmpxchg_release(v, old, new) \ + cmpxchg_release(&((v)->counter), (old), (new)) +#define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new)) + +#define atomic_inc(v) atomic_add(1, (v)) +#define atomic_dec(v) atomic_sub(1, (v)) +#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) +#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0) +#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) +#define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0) +#define __atomic_add_unless(v, a, u) ___atomic_add_unless(v, a, u,) +#define atomic_andnot atomic_andnot /* * 64-bit atomic operations. */ -#define ATOMIC64_INIT(i) { (i) } - -#define atomic64_read(v) ACCESS_ONCE((v)->counter) -#define atomic64_set(v,i) (((v)->counter) = (i)) - -#define ATOMIC64_OP(op, asm_op) \ -static inline void atomic64_##op(long i, atomic64_t *v) \ -{ \ - long result; \ - unsigned long tmp; \ - \ - asm volatile("// atomic64_" #op "\n" \ -"1: ldxr %0, %2\n" \ -" " #asm_op " %0, %0, %3\n" \ -" stxr %w1, %0, %2\n" \ -" cbnz %w1, 1b" \ - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ - : "Ir" (i)); \ -} \ - -#define ATOMIC64_OP_RETURN(op, asm_op) \ -static inline long atomic64_##op##_return(long i, atomic64_t *v) \ -{ \ - long result; \ - unsigned long tmp; \ - \ - asm volatile("// atomic64_" #op "_return\n" \ -"1: ldxr %0, %2\n" \ -" " #asm_op " %0, %0, %3\n" \ -" stlxr %w1, %0, %2\n" \ -" cbnz %w1, 1b" \ - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ - : "Ir" (i) \ - : "memory"); \ - \ - smp_mb(); \ - return result; \ -} - -#define ATOMIC64_OPS(op, asm_op) \ - ATOMIC64_OP(op, asm_op) \ - ATOMIC64_OP_RETURN(op, asm_op) - -ATOMIC64_OPS(add, add) -ATOMIC64_OPS(sub, sub) - -#undef ATOMIC64_OPS -#undef ATOMIC64_OP_RETURN -#undef ATOMIC64_OP - -static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) -{ - long oldval; - unsigned long res; - - smp_mb(); - - asm volatile("// atomic64_cmpxchg\n" -"1: ldxr %1, %2\n" -" cmp %1, %3\n" -" b.ne 2f\n" -" stxr %w0, %4, %2\n" -" cbnz %w0, 1b\n" -"2:" - : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) - : "Ir" (old), "r" (new) - : "cc"); - - smp_mb(); - return oldval; -} - -#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) - -static inline long atomic64_dec_if_positive(atomic64_t *v) -{ - long result; - unsigned long tmp; - - asm volatile("// atomic64_dec_if_positive\n" -"1: ldxr %0, %2\n" -" subs %0, %0, #1\n" -" b.mi 2f\n" -" stlxr %w1, %0, %2\n" -" cbnz %w1, 1b\n" -" dmb ish\n" -"2:" - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : - : "cc", "memory"); - - return result; -} - -static inline int atomic64_add_unless(atomic64_t *v, long a, long u) -{ - long c, old; - - c = atomic64_read(v); - while (c != u && (old = atomic64_cmpxchg((v), c, c + a)) != c) - c = old; - - return c != u; -} - -#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) -#define atomic64_inc(v) atomic64_add(1LL, (v)) -#define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) +#define ATOMIC64_INIT ATOMIC_INIT +#define atomic64_read atomic_read +#define atomic64_set atomic_set + +#define atomic64_add_return_relaxed atomic64_add_return_relaxed +#define atomic64_add_return_acquire atomic64_add_return_acquire +#define atomic64_add_return_release atomic64_add_return_release +#define atomic64_add_return atomic64_add_return + +#define atomic64_inc_return_relaxed(v) atomic64_add_return_relaxed(1, (v)) +#define atomic64_inc_return_acquire(v) atomic64_add_return_acquire(1, (v)) +#define atomic64_inc_return_release(v) atomic64_add_return_release(1, (v)) +#define atomic64_inc_return(v) atomic64_add_return(1, (v)) + +#define atomic64_sub_return_relaxed atomic64_sub_return_relaxed +#define atomic64_sub_return_acquire atomic64_sub_return_acquire +#define atomic64_sub_return_release atomic64_sub_return_release +#define atomic64_sub_return atomic64_sub_return + +#define atomic64_dec_return_relaxed(v) atomic64_sub_return_relaxed(1, (v)) +#define atomic64_dec_return_acquire(v) atomic64_sub_return_acquire(1, (v)) +#define atomic64_dec_return_release(v) atomic64_sub_return_release(1, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) + +#define atomic64_xchg_relaxed atomic_xchg_relaxed +#define atomic64_xchg_acquire atomic_xchg_acquire +#define atomic64_xchg_release atomic_xchg_release +#define atomic64_xchg atomic_xchg + +#define atomic64_cmpxchg_relaxed atomic_cmpxchg_relaxed +#define atomic64_cmpxchg_acquire atomic_cmpxchg_acquire +#define atomic64_cmpxchg_release atomic_cmpxchg_release +#define atomic64_cmpxchg atomic_cmpxchg + +#define atomic64_inc(v) atomic64_add(1, (v)) +#define atomic64_dec(v) atomic64_sub(1, (v)) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) -#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) -#define atomic64_dec(v) atomic64_sub(1LL, (v)) -#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) -#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) +#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0) +#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0) +#define atomic64_add_negative(i, v) (atomic64_add_return((i), (v)) < 0) +#define atomic64_add_unless(v, a, u) (___atomic_add_unless(v, a, u, 64) != u) +#define atomic64_andnot atomic64_andnot + +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) #endif #endif diff --git a/kernel/arch/arm64/include/asm/atomic_ll_sc.h b/kernel/arch/arm64/include/asm/atomic_ll_sc.h new file mode 100644 index 000000000..f61c84f6b --- /dev/null +++ b/kernel/arch/arm64/include/asm/atomic_ll_sc.h @@ -0,0 +1,269 @@ +/* + * Based on arch/arm/include/asm/atomic.h + * + * Copyright (C) 1996 Russell King. + * Copyright (C) 2002 Deep Blue Solutions Ltd. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ASM_ATOMIC_LL_SC_H +#define __ASM_ATOMIC_LL_SC_H + +#ifndef __ARM64_IN_ATOMIC_IMPL +#error "please don't include this file directly" +#endif + +/* + * AArch64 UP and SMP safe atomic ops. We use load exclusive and + * store exclusive to ensure that these are atomic. We may loop + * to ensure that the update happens. + * + * NOTE: these functions do *not* follow the PCS and must explicitly + * save any clobbered registers other than x0 (regardless of return + * value). This is achieved through -fcall-saved-* compiler flags for + * this file, which unfortunately don't work on a per-function basis + * (the optimize attribute silently ignores these options). + */ + +#define ATOMIC_OP(op, asm_op) \ +__LL_SC_INLINE void \ +__LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + asm volatile("// atomic_" #op "\n" \ +" prfm pstl1strm, %2\n" \ +"1: ldxr %w0, %2\n" \ +" " #asm_op " %w0, %w0, %w3\n" \ +" stxr %w1, %w0, %2\n" \ +" cbnz %w1, 1b" \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i)); \ +} \ +__LL_SC_EXPORT(atomic_##op); + +#define ATOMIC_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \ +__LL_SC_INLINE int \ +__LL_SC_PREFIX(atomic_##op##_return##name(int i, atomic_t *v)) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + asm volatile("// atomic_" #op "_return" #name "\n" \ +" prfm pstl1strm, %2\n" \ +"1: ld" #acq "xr %w0, %2\n" \ +" " #asm_op " %w0, %w0, %w3\n" \ +" st" #rel "xr %w1, %w0, %2\n" \ +" cbnz %w1, 1b\n" \ +" " #mb \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : cl); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(atomic_##op##_return##name); + +#define ATOMIC_OPS(...) \ + ATOMIC_OP(__VA_ARGS__) \ + ATOMIC_OP_RETURN( , dmb ish, , l, "memory", __VA_ARGS__) + +#define ATOMIC_OPS_RLX(...) \ + ATOMIC_OPS(__VA_ARGS__) \ + ATOMIC_OP_RETURN(_relaxed, , , , , __VA_ARGS__)\ + ATOMIC_OP_RETURN(_acquire, , a, , "memory", __VA_ARGS__)\ + ATOMIC_OP_RETURN(_release, , , l, "memory", __VA_ARGS__) + +ATOMIC_OPS_RLX(add, add) +ATOMIC_OPS_RLX(sub, sub) + +ATOMIC_OP(and, and) +ATOMIC_OP(andnot, bic) +ATOMIC_OP(or, orr) +ATOMIC_OP(xor, eor) + +#undef ATOMIC_OPS_RLX +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +#define ATOMIC64_OP(op, asm_op) \ +__LL_SC_INLINE void \ +__LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \ +{ \ + long result; \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_" #op "\n" \ +" prfm pstl1strm, %2\n" \ +"1: ldxr %0, %2\n" \ +" " #asm_op " %0, %0, %3\n" \ +" stxr %w1, %0, %2\n" \ +" cbnz %w1, 1b" \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i)); \ +} \ +__LL_SC_EXPORT(atomic64_##op); + +#define ATOMIC64_OP_RETURN(name, mb, acq, rel, cl, op, asm_op) \ +__LL_SC_INLINE long \ +__LL_SC_PREFIX(atomic64_##op##_return##name(long i, atomic64_t *v)) \ +{ \ + long result; \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_" #op "_return" #name "\n" \ +" prfm pstl1strm, %2\n" \ +"1: ld" #acq "xr %0, %2\n" \ +" " #asm_op " %0, %0, %3\n" \ +" st" #rel "xr %w1, %0, %2\n" \ +" cbnz %w1, 1b\n" \ +" " #mb \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : cl); \ + \ + return result; \ +} \ +__LL_SC_EXPORT(atomic64_##op##_return##name); + +#define ATOMIC64_OPS(...) \ + ATOMIC64_OP(__VA_ARGS__) \ + ATOMIC64_OP_RETURN(, dmb ish, , l, "memory", __VA_ARGS__) + +#define ATOMIC64_OPS_RLX(...) \ + ATOMIC64_OPS(__VA_ARGS__) \ + ATOMIC64_OP_RETURN(_relaxed,, , , , __VA_ARGS__) \ + ATOMIC64_OP_RETURN(_acquire,, a, , "memory", __VA_ARGS__) \ + ATOMIC64_OP_RETURN(_release,, , l, "memory", __VA_ARGS__) + +ATOMIC64_OPS_RLX(add, add) +ATOMIC64_OPS_RLX(sub, sub) + +ATOMIC64_OP(and, and) +ATOMIC64_OP(andnot, bic) +ATOMIC64_OP(or, orr) +ATOMIC64_OP(xor, eor) + +#undef ATOMIC64_OPS_RLX +#undef ATOMIC64_OPS +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP + +__LL_SC_INLINE long +__LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) +{ + long result; + unsigned long tmp; + + asm volatile("// atomic64_dec_if_positive\n" +" prfm pstl1strm, %2\n" +"1: ldxr %0, %2\n" +" subs %0, %0, #1\n" +" b.lt 2f\n" +" stlxr %w1, %0, %2\n" +" cbnz %w1, 1b\n" +" dmb ish\n" +"2:" + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) + : + : "cc", "memory"); + + return result; +} +__LL_SC_EXPORT(atomic64_dec_if_positive); + +#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl) \ +__LL_SC_INLINE unsigned long \ +__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ + unsigned long old, \ + unsigned long new)) \ +{ \ + unsigned long tmp, oldval; \ + \ + asm volatile( \ + " prfm pstl1strm, %[v]\n" \ + "1: ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n" \ + " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ + " cbnz %" #w "[tmp], 2f\n" \ + " st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \ + " cbnz %w[tmp], 1b\n" \ + " " #mb "\n" \ + " mov %" #w "[oldval], %" #w "[old]\n" \ + "2:" \ + : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \ + [v] "+Q" (*(unsigned long *)ptr) \ + : [old] "Lr" (old), [new] "r" (new) \ + : cl); \ + \ + return oldval; \ +} \ +__LL_SC_EXPORT(__cmpxchg_case_##name); + +__CMPXCHG_CASE(w, b, 1, , , , ) +__CMPXCHG_CASE(w, h, 2, , , , ) +__CMPXCHG_CASE(w, , 4, , , , ) +__CMPXCHG_CASE( , , 8, , , , ) +__CMPXCHG_CASE(w, b, acq_1, , a, , "memory") +__CMPXCHG_CASE(w, h, acq_2, , a, , "memory") +__CMPXCHG_CASE(w, , acq_4, , a, , "memory") +__CMPXCHG_CASE( , , acq_8, , a, , "memory") +__CMPXCHG_CASE(w, b, rel_1, , , l, "memory") +__CMPXCHG_CASE(w, h, rel_2, , , l, "memory") +__CMPXCHG_CASE(w, , rel_4, , , l, "memory") +__CMPXCHG_CASE( , , rel_8, , , l, "memory") +__CMPXCHG_CASE(w, b, mb_1, dmb ish, , l, "memory") +__CMPXCHG_CASE(w, h, mb_2, dmb ish, , l, "memory") +__CMPXCHG_CASE(w, , mb_4, dmb ish, , l, "memory") +__CMPXCHG_CASE( , , mb_8, dmb ish, , l, "memory") + +#undef __CMPXCHG_CASE + +#define __CMPXCHG_DBL(name, mb, rel, cl) \ +__LL_SC_INLINE long \ +__LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \ + unsigned long old2, \ + unsigned long new1, \ + unsigned long new2, \ + volatile void *ptr)) \ +{ \ + unsigned long tmp, ret; \ + \ + asm volatile("// __cmpxchg_double" #name "\n" \ + " prfm pstl1strm, %2\n" \ + "1: ldxp %0, %1, %2\n" \ + " eor %0, %0, %3\n" \ + " eor %1, %1, %4\n" \ + " orr %1, %0, %1\n" \ + " cbnz %1, 2f\n" \ + " st" #rel "xp %w0, %5, %6, %2\n" \ + " cbnz %w0, 1b\n" \ + " " #mb "\n" \ + "2:" \ + : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \ + : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \ + : cl); \ + \ + return ret; \ +} \ +__LL_SC_EXPORT(__cmpxchg_double##name); + +__CMPXCHG_DBL( , , , ) +__CMPXCHG_DBL(_mb, dmb ish, l, "memory") + +#undef __CMPXCHG_DBL + +#endif /* __ASM_ATOMIC_LL_SC_H */ diff --git a/kernel/arch/arm64/include/asm/atomic_lse.h b/kernel/arch/arm64/include/asm/atomic_lse.h new file mode 100644 index 000000000..197e06afb --- /dev/null +++ b/kernel/arch/arm64/include/asm/atomic_lse.h @@ -0,0 +1,430 @@ +/* + * Based on arch/arm/include/asm/atomic.h + * + * Copyright (C) 1996 Russell King. + * Copyright (C) 2002 Deep Blue Solutions Ltd. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ASM_ATOMIC_LSE_H +#define __ASM_ATOMIC_LSE_H + +#ifndef __ARM64_IN_ATOMIC_IMPL +#error "please don't include this file directly" +#endif + +#define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op) + +static inline void atomic_andnot(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot), + " stclr %w[i], %[v]\n") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic_or(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or), + " stset %w[i], %[v]\n") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic_xor(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor), + " steor %w[i], %[v]\n") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic_add(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add), + " stadd %w[i], %[v]\n") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +#define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ +static inline int atomic_add_return##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(add_return##name), \ + /* LSE atomics */ \ + " ldadd" #mb " %w[i], w30, %[v]\n" \ + " add %w[i], %w[i], w30") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : "x30" , ##cl); \ + \ + return w0; \ +} + +ATOMIC_OP_ADD_RETURN(_relaxed, ) +ATOMIC_OP_ADD_RETURN(_acquire, a, "memory") +ATOMIC_OP_ADD_RETURN(_release, l, "memory") +ATOMIC_OP_ADD_RETURN( , al, "memory") + +#undef ATOMIC_OP_ADD_RETURN + +static inline void atomic_and(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC(and), + /* LSE atomics */ + " mvn %w[i], %w[i]\n" + " stclr %w[i], %[v]") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic_sub(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC(sub), + /* LSE atomics */ + " neg %w[i], %w[i]\n" + " stadd %w[i], %[v]") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +#define ATOMIC_OP_SUB_RETURN(name, mb, cl...) \ +static inline int atomic_sub_return##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(sub_return##name) \ + " nop", \ + /* LSE atomics */ \ + " neg %w[i], %w[i]\n" \ + " ldadd" #mb " %w[i], w30, %[v]\n" \ + " add %w[i], %w[i], w30") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : "x30" , ##cl); \ + \ + return w0; \ +} + +ATOMIC_OP_SUB_RETURN(_relaxed, ) +ATOMIC_OP_SUB_RETURN(_acquire, a, "memory") +ATOMIC_OP_SUB_RETURN(_release, l, "memory") +ATOMIC_OP_SUB_RETURN( , al, "memory") + +#undef ATOMIC_OP_SUB_RETURN +#undef __LL_SC_ATOMIC + +#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) + +static inline void atomic64_andnot(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot), + " stclr %[i], %[v]\n") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic64_or(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or), + " stset %[i], %[v]\n") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic64_xor(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor), + " steor %[i], %[v]\n") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic64_add(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add), + " stadd %[i], %[v]\n") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +#define ATOMIC64_OP_ADD_RETURN(name, mb, cl...) \ +static inline long atomic64_add_return##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(add_return##name), \ + /* LSE atomics */ \ + " ldadd" #mb " %[i], x30, %[v]\n" \ + " add %[i], %[i], x30") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : "x30" , ##cl); \ + \ + return x0; \ +} + +ATOMIC64_OP_ADD_RETURN(_relaxed, ) +ATOMIC64_OP_ADD_RETURN(_acquire, a, "memory") +ATOMIC64_OP_ADD_RETURN(_release, l, "memory") +ATOMIC64_OP_ADD_RETURN( , al, "memory") + +#undef ATOMIC64_OP_ADD_RETURN + +static inline void atomic64_and(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC64(and), + /* LSE atomics */ + " mvn %[i], %[i]\n" + " stclr %[i], %[v]") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic64_sub(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC64(sub), + /* LSE atomics */ + " neg %[i], %[i]\n" + " stadd %[i], %[v]") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +#define ATOMIC64_OP_SUB_RETURN(name, mb, cl...) \ +static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC64(sub_return##name) \ + " nop", \ + /* LSE atomics */ \ + " neg %[i], %[i]\n" \ + " ldadd" #mb " %[i], x30, %[v]\n" \ + " add %[i], %[i], x30") \ + : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : "x30" , ##cl); \ + \ + return x0; \ +} + +ATOMIC64_OP_SUB_RETURN(_relaxed, ) +ATOMIC64_OP_SUB_RETURN(_acquire, a, "memory") +ATOMIC64_OP_SUB_RETURN(_release, l, "memory") +ATOMIC64_OP_SUB_RETURN( , al, "memory") + +#undef ATOMIC64_OP_SUB_RETURN + +static inline long atomic64_dec_if_positive(atomic64_t *v) +{ + register long x0 asm ("x0") = (long)v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC64(dec_if_positive) + " nop\n" + " nop\n" + " nop\n" + " nop\n" + " nop", + /* LSE atomics */ + "1: ldr x30, %[v]\n" + " subs %[ret], x30, #1\n" + " b.lt 2f\n" + " casal x30, %[ret], %[v]\n" + " sub x30, x30, #1\n" + " sub x30, x30, %[ret]\n" + " cbnz x30, 1b\n" + "2:") + : [ret] "+&r" (x0), [v] "+Q" (v->counter) + : + : "x30", "cc", "memory"); + + return x0; +} + +#undef __LL_SC_ATOMIC64 + +#define __LL_SC_CMPXCHG(op) __LL_SC_CALL(__cmpxchg_case_##op) + +#define __CMPXCHG_CASE(w, sz, name, mb, cl...) \ +static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ + unsigned long old, \ + unsigned long new) \ +{ \ + register unsigned long x0 asm ("x0") = (unsigned long)ptr; \ + register unsigned long x1 asm ("x1") = old; \ + register unsigned long x2 asm ("x2") = new; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_CMPXCHG(name) \ + " nop", \ + /* LSE atomics */ \ + " mov " #w "30, %" #w "[old]\n" \ + " cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n" \ + " mov %" #w "[ret], " #w "30") \ + : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \ + : [old] "r" (x1), [new] "r" (x2) \ + : "x30" , ##cl); \ + \ + return x0; \ +} + +__CMPXCHG_CASE(w, b, 1, ) +__CMPXCHG_CASE(w, h, 2, ) +__CMPXCHG_CASE(w, , 4, ) +__CMPXCHG_CASE(x, , 8, ) +__CMPXCHG_CASE(w, b, acq_1, a, "memory") +__CMPXCHG_CASE(w, h, acq_2, a, "memory") +__CMPXCHG_CASE(w, , acq_4, a, "memory") +__CMPXCHG_CASE(x, , acq_8, a, "memory") +__CMPXCHG_CASE(w, b, rel_1, l, "memory") +__CMPXCHG_CASE(w, h, rel_2, l, "memory") +__CMPXCHG_CASE(w, , rel_4, l, "memory") +__CMPXCHG_CASE(x, , rel_8, l, "memory") +__CMPXCHG_CASE(w, b, mb_1, al, "memory") +__CMPXCHG_CASE(w, h, mb_2, al, "memory") +__CMPXCHG_CASE(w, , mb_4, al, "memory") +__CMPXCHG_CASE(x, , mb_8, al, "memory") + +#undef __LL_SC_CMPXCHG +#undef __CMPXCHG_CASE + +#define __LL_SC_CMPXCHG_DBL(op) __LL_SC_CALL(__cmpxchg_double##op) + +#define __CMPXCHG_DBL(name, mb, cl...) \ +static inline long __cmpxchg_double##name(unsigned long old1, \ + unsigned long old2, \ + unsigned long new1, \ + unsigned long new2, \ + volatile void *ptr) \ +{ \ + unsigned long oldval1 = old1; \ + unsigned long oldval2 = old2; \ + register unsigned long x0 asm ("x0") = old1; \ + register unsigned long x1 asm ("x1") = old2; \ + register unsigned long x2 asm ("x2") = new1; \ + register unsigned long x3 asm ("x3") = new2; \ + register unsigned long x4 asm ("x4") = (unsigned long)ptr; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + " nop\n" \ + " nop\n" \ + __LL_SC_CMPXCHG_DBL(name), \ + /* LSE atomics */ \ + " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ + " eor %[old1], %[old1], %[oldval1]\n" \ + " eor %[old2], %[old2], %[oldval2]\n" \ + " orr %[old1], %[old1], %[old2]") \ + : [old1] "+r" (x0), [old2] "+r" (x1), \ + [v] "+Q" (*(unsigned long *)ptr) \ + : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ + [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ + : "x30" , ##cl); \ + \ + return x0; \ +} + +__CMPXCHG_DBL( , ) +__CMPXCHG_DBL(_mb, al, "memory") + +#undef __LL_SC_CMPXCHG_DBL +#undef __CMPXCHG_DBL + +#endif /* __ASM_ATOMIC_LSE_H */ diff --git a/kernel/arch/arm64/include/asm/barrier.h b/kernel/arch/arm64/include/asm/barrier.h index 71f19c4dc..9622eb48f 100644 --- a/kernel/arch/arm64/include/asm/barrier.h +++ b/kernel/arch/arm64/include/asm/barrier.h @@ -35,28 +35,6 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst) -#ifndef CONFIG_SMP -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() - -#define smp_store_release(p, v) \ -do { \ - compiletime_assert_atomic_type(*p); \ - barrier(); \ - ACCESS_ONCE(*p) = (v); \ -} while (0) - -#define smp_load_acquire(p) \ -({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ - barrier(); \ - ___p1; \ -}) - -#else - #define smp_mb() dmb(ish) #define smp_rmb() dmb(ishld) #define smp_wmb() dmb(ishst) @@ -86,35 +64,37 @@ do { \ #define smp_load_acquire(p) \ ({ \ - typeof(*p) ___p1; \ + union { typeof(*p) __val; char __c[1]; } __u; \ compiletime_assert_atomic_type(*p); \ switch (sizeof(*p)) { \ case 1: \ asm volatile ("ldarb %w0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ + : "=r" (*(__u8 *)__u.__c) \ + : "Q" (*p) : "memory"); \ break; \ case 2: \ asm volatile ("ldarh %w0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ + : "=r" (*(__u16 *)__u.__c) \ + : "Q" (*p) : "memory"); \ break; \ case 4: \ asm volatile ("ldar %w0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ + : "=r" (*(__u32 *)__u.__c) \ + : "Q" (*p) : "memory"); \ break; \ case 8: \ asm volatile ("ldar %0, %1" \ - : "=r" (___p1) : "Q" (*p) : "memory"); \ + : "=r" (*(__u64 *)__u.__c) \ + : "Q" (*p) : "memory"); \ break; \ } \ - ___p1; \ + __u.__val; \ }) -#endif - #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) -#define set_mb(var, value) do { var = value; smp_mb(); } while (0) +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) #define nop() asm volatile("nop"); #define smp_mb__before_atomic() smp_mb() diff --git a/kernel/arch/arm64/include/asm/boot.h b/kernel/arch/arm64/include/asm/boot.h new file mode 100644 index 000000000..81151b67b --- /dev/null +++ b/kernel/arch/arm64/include/asm/boot.h @@ -0,0 +1,14 @@ + +#ifndef __ASM_BOOT_H +#define __ASM_BOOT_H + +#include <asm/sizes.h> + +/* + * arm64 requires the DTB to be 8 byte aligned and + * not exceed 2MB in size. + */ +#define MIN_FDT_ALIGN 8 +#define MAX_FDT_SIZE SZ_2M + +#endif diff --git a/kernel/arch/arm64/include/asm/bug.h b/kernel/arch/arm64/include/asm/bug.h new file mode 100644 index 000000000..4a748ce9b --- /dev/null +++ b/kernel/arch/arm64/include/asm/bug.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2015 ARM Limited + * Author: Dave Martin <Dave.Martin@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _ARCH_ARM64_ASM_BUG_H +#define _ARCH_ARM64_ASM_BUG_H + +#include <asm/debug-monitors.h> + +#ifdef CONFIG_GENERIC_BUG +#define HAVE_ARCH_BUG + +#ifdef CONFIG_DEBUG_BUGVERBOSE +#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line) +#define __BUGVERBOSE_LOCATION(file, line) \ + ".pushsection .rodata.str,\"aMS\",@progbits,1\n" \ + "2: .string \"" file "\"\n\t" \ + ".popsection\n\t" \ + \ + ".long 2b - 0b\n\t" \ + ".short " #line "\n\t" +#else +#define _BUGVERBOSE_LOCATION(file, line) +#endif + +#define _BUG_FLAGS(flags) __BUG_FLAGS(flags) + +#define __BUG_FLAGS(flags) asm volatile ( \ + ".pushsection __bug_table,\"a\"\n\t" \ + ".align 2\n\t" \ + "0: .long 1f - 0b\n\t" \ +_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ + ".short " #flags "\n\t" \ + ".popsection\n" \ + \ + "1: brk %[imm]" \ + :: [imm] "i" (BUG_BRK_IMM) \ +) + +#define BUG() do { \ + _BUG_FLAGS(0); \ + unreachable(); \ +} while (0) + +#define __WARN_TAINT(taint) _BUG_FLAGS(BUGFLAG_TAINT(taint)) + +#endif /* ! CONFIG_GENERIC_BUG */ + +#include <asm-generic/bug.h> + +#endif /* ! _ARCH_ARM64_ASM_BUG_H */ diff --git a/kernel/arch/arm64/include/asm/cache.h b/kernel/arch/arm64/include/asm/cache.h index bde449936..5082b30bc 100644 --- a/kernel/arch/arm64/include/asm/cache.h +++ b/kernel/arch/arm64/include/asm/cache.h @@ -18,7 +18,7 @@ #include <asm/cachetype.h> -#define L1_CACHE_SHIFT 6 +#define L1_CACHE_SHIFT 7 #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) /* diff --git a/kernel/arch/arm64/include/asm/cacheflush.h b/kernel/arch/arm64/include/asm/cacheflush.h index 67d309cc3..54efedaf3 100644 --- a/kernel/arch/arm64/include/asm/cacheflush.h +++ b/kernel/arch/arm64/include/asm/cacheflush.h @@ -40,10 +40,6 @@ * the implementation assumes non-aliasing VIPT D-cache and (aliasing) * VIPT or ASID-tagged VIVT I-cache. * - * flush_cache_all() - * - * Unconditionally clean and invalidate the entire cache. - * * flush_cache_mm(mm) * * Clean and invalidate all user space cache entries @@ -69,7 +65,6 @@ * - kaddr - page address * - size - region size */ -extern void flush_cache_all(void); extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern void flush_icache_range(unsigned long start, unsigned long end); extern void __flush_dcache_area(void *addr, size_t len); @@ -120,6 +115,13 @@ extern void copy_to_user_page(struct vm_area_struct *, struct page *, #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 extern void flush_dcache_page(struct page *); +static inline void __local_flush_icache_all(void) +{ + asm("ic iallu"); + dsb(nsh); + isb(); +} + static inline void __flush_icache_all(void) { asm("ic ialluis"); diff --git a/kernel/arch/arm64/include/asm/cachetype.h b/kernel/arch/arm64/include/asm/cachetype.h index da2fc9e3c..f5588692f 100644 --- a/kernel/arch/arm64/include/asm/cachetype.h +++ b/kernel/arch/arm64/include/asm/cachetype.h @@ -34,8 +34,8 @@ #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) -#define ICACHEF_ALIASING BIT(0) -#define ICACHEF_AIVIVT BIT(1) +#define ICACHEF_ALIASING 0 +#define ICACHEF_AIVIVT 1 extern unsigned long __icache_flags; diff --git a/kernel/arch/arm64/include/asm/cmpxchg.h b/kernel/arch/arm64/include/asm/cmpxchg.h index d8c25b7b1..9ea611ea6 100644 --- a/kernel/arch/arm64/include/asm/cmpxchg.h +++ b/kernel/arch/arm64/include/asm/cmpxchg.h @@ -21,231 +21,185 @@ #include <linux/bug.h> #include <linux/mmdebug.h> +#include <asm/atomic.h> #include <asm/barrier.h> +#include <asm/lse.h> -static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size) -{ - unsigned long ret, tmp; - - switch (size) { - case 1: - asm volatile("// __xchg1\n" - "1: ldxrb %w0, %2\n" - " stlxrb %w1, %w3, %2\n" - " cbnz %w1, 1b\n" - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) - : "r" (x) - : "memory"); - break; - case 2: - asm volatile("// __xchg2\n" - "1: ldxrh %w0, %2\n" - " stlxrh %w1, %w3, %2\n" - " cbnz %w1, 1b\n" - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) - : "r" (x) - : "memory"); - break; - case 4: - asm volatile("// __xchg4\n" - "1: ldxr %w0, %2\n" - " stlxr %w1, %w3, %2\n" - " cbnz %w1, 1b\n" - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) - : "r" (x) - : "memory"); - break; - case 8: - asm volatile("// __xchg8\n" - "1: ldxr %0, %2\n" - " stlxr %w1, %3, %2\n" - " cbnz %w1, 1b\n" - : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr) - : "r" (x) - : "memory"); - break; - default: - BUILD_BUG(); - } - - smp_mb(); - return ret; +/* + * We need separate acquire parameters for ll/sc and lse, since the full + * barrier case is generated as release+dmb for the former and + * acquire+release for the latter. + */ +#define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl) \ +static inline unsigned long __xchg_case_##name(unsigned long x, \ + volatile void *ptr) \ +{ \ + unsigned long ret, tmp; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " prfm pstl1strm, %2\n" \ + "1: ld" #acq "xr" #sz "\t%" #w "0, %2\n" \ + " st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n" \ + " cbnz %w1, 1b\n" \ + " " #mb, \ + /* LSE atomics */ \ + " nop\n" \ + " nop\n" \ + " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \ + " nop\n" \ + " " #nop_lse) \ + : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) \ + : "r" (x) \ + : cl); \ + \ + return ret; \ } -#define xchg(ptr,x) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \ - __ret; \ -}) - -static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long oldval = 0, res; - - switch (size) { - case 1: - do { - asm volatile("// __cmpxchg1\n" - " ldxrb %w1, %2\n" - " mov %w0, #0\n" - " cmp %w1, %w3\n" - " b.ne 1f\n" - " stxrb %w0, %w4, %2\n" - "1:\n" - : "=&r" (res), "=&r" (oldval), "+Q" (*(u8 *)ptr) - : "Ir" (old), "r" (new) - : "cc"); - } while (res); - break; - - case 2: - do { - asm volatile("// __cmpxchg2\n" - " ldxrh %w1, %2\n" - " mov %w0, #0\n" - " cmp %w1, %w3\n" - " b.ne 1f\n" - " stxrh %w0, %w4, %2\n" - "1:\n" - : "=&r" (res), "=&r" (oldval), "+Q" (*(u16 *)ptr) - : "Ir" (old), "r" (new) - : "cc"); - } while (res); - break; - - case 4: - do { - asm volatile("// __cmpxchg4\n" - " ldxr %w1, %2\n" - " mov %w0, #0\n" - " cmp %w1, %w3\n" - " b.ne 1f\n" - " stxr %w0, %w4, %2\n" - "1:\n" - : "=&r" (res), "=&r" (oldval), "+Q" (*(u32 *)ptr) - : "Ir" (old), "r" (new) - : "cc"); - } while (res); - break; - - case 8: - do { - asm volatile("// __cmpxchg8\n" - " ldxr %1, %2\n" - " mov %w0, #0\n" - " cmp %1, %3\n" - " b.ne 1f\n" - " stxr %w0, %4, %2\n" - "1:\n" - : "=&r" (res), "=&r" (oldval), "+Q" (*(u64 *)ptr) - : "Ir" (old), "r" (new) - : "cc"); - } while (res); - break; - - default: - BUILD_BUG(); - } - - return oldval; +__XCHG_CASE(w, b, 1, , , , , , ) +__XCHG_CASE(w, h, 2, , , , , , ) +__XCHG_CASE(w, , 4, , , , , , ) +__XCHG_CASE( , , 8, , , , , , ) +__XCHG_CASE(w, b, acq_1, , , a, a, , "memory") +__XCHG_CASE(w, h, acq_2, , , a, a, , "memory") +__XCHG_CASE(w, , acq_4, , , a, a, , "memory") +__XCHG_CASE( , , acq_8, , , a, a, , "memory") +__XCHG_CASE(w, b, rel_1, , , , , l, "memory") +__XCHG_CASE(w, h, rel_2, , , , , l, "memory") +__XCHG_CASE(w, , rel_4, , , , , l, "memory") +__XCHG_CASE( , , rel_8, , , , , l, "memory") +__XCHG_CASE(w, b, mb_1, dmb ish, nop, , a, l, "memory") +__XCHG_CASE(w, h, mb_2, dmb ish, nop, , a, l, "memory") +__XCHG_CASE(w, , mb_4, dmb ish, nop, , a, l, "memory") +__XCHG_CASE( , , mb_8, dmb ish, nop, , a, l, "memory") + +#undef __XCHG_CASE + +#define __XCHG_GEN(sfx) \ +static inline unsigned long __xchg##sfx(unsigned long x, \ + volatile void *ptr, \ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __xchg_case##sfx##_1(x, ptr); \ + case 2: \ + return __xchg_case##sfx##_2(x, ptr); \ + case 4: \ + return __xchg_case##sfx##_4(x, ptr); \ + case 8: \ + return __xchg_case##sfx##_8(x, ptr); \ + default: \ + BUILD_BUG(); \ + } \ + \ + unreachable(); \ } -#define system_has_cmpxchg_double() 1 - -static inline int __cmpxchg_double(volatile void *ptr1, volatile void *ptr2, - unsigned long old1, unsigned long old2, - unsigned long new1, unsigned long new2, int size) -{ - unsigned long loop, lost; +__XCHG_GEN() +__XCHG_GEN(_acq) +__XCHG_GEN(_rel) +__XCHG_GEN(_mb) - switch (size) { - case 8: - VM_BUG_ON((unsigned long *)ptr2 - (unsigned long *)ptr1 != 1); - do { - asm volatile("// __cmpxchg_double8\n" - " ldxp %0, %1, %2\n" - " eor %0, %0, %3\n" - " eor %1, %1, %4\n" - " orr %1, %0, %1\n" - " mov %w0, #0\n" - " cbnz %1, 1f\n" - " stxp %w0, %5, %6, %2\n" - "1:\n" - : "=&r"(loop), "=&r"(lost), "+Q" (*(u64 *)ptr1) - : "r" (old1), "r"(old2), "r"(new1), "r"(new2)); - } while (loop); - break; - default: - BUILD_BUG(); - } +#undef __XCHG_GEN - return !lost; -} - -static inline int __cmpxchg_double_mb(volatile void *ptr1, volatile void *ptr2, - unsigned long old1, unsigned long old2, - unsigned long new1, unsigned long new2, int size) -{ - int ret; - - smp_mb(); - ret = __cmpxchg_double(ptr1, ptr2, old1, old2, new1, new2, size); - smp_mb(); +#define __xchg_wrapper(sfx, ptr, x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg##sfx((unsigned long)(x), (ptr), sizeof(*(ptr))); \ + __ret; \ +}) - return ret; +/* xchg */ +#define xchg_relaxed(...) __xchg_wrapper( , __VA_ARGS__) +#define xchg_acquire(...) __xchg_wrapper(_acq, __VA_ARGS__) +#define xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__) +#define xchg(...) __xchg_wrapper( _mb, __VA_ARGS__) + +#define __CMPXCHG_GEN(sfx) \ +static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ + unsigned long old, \ + unsigned long new, \ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __cmpxchg_case##sfx##_1(ptr, (u8)old, new); \ + case 2: \ + return __cmpxchg_case##sfx##_2(ptr, (u16)old, new); \ + case 4: \ + return __cmpxchg_case##sfx##_4(ptr, old, new); \ + case 8: \ + return __cmpxchg_case##sfx##_8(ptr, old, new); \ + default: \ + BUILD_BUG(); \ + } \ + \ + unreachable(); \ } -static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long ret; - - smp_mb(); - ret = __cmpxchg(ptr, old, new, size); - smp_mb(); +__CMPXCHG_GEN() +__CMPXCHG_GEN(_acq) +__CMPXCHG_GEN(_rel) +__CMPXCHG_GEN(_mb) - return ret; -} +#undef __CMPXCHG_GEN -#define cmpxchg(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __cmpxchg_mb((ptr), (unsigned long)(o), (unsigned long)(n), \ - sizeof(*(ptr))); \ - __ret; \ +#define __cmpxchg_wrapper(sfx, ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __cmpxchg##sfx((ptr), (unsigned long)(o), \ + (unsigned long)(n), sizeof(*(ptr))); \ + __ret; \ }) -#define cmpxchg_local(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __cmpxchg((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr))); \ - __ret; \ +/* cmpxchg */ +#define cmpxchg_relaxed(...) __cmpxchg_wrapper( , __VA_ARGS__) +#define cmpxchg_acquire(...) __cmpxchg_wrapper(_acq, __VA_ARGS__) +#define cmpxchg_release(...) __cmpxchg_wrapper(_rel, __VA_ARGS__) +#define cmpxchg(...) __cmpxchg_wrapper( _mb, __VA_ARGS__) +#define cmpxchg_local cmpxchg_relaxed + +/* cmpxchg64 */ +#define cmpxchg64_relaxed cmpxchg_relaxed +#define cmpxchg64_acquire cmpxchg_acquire +#define cmpxchg64_release cmpxchg_release +#define cmpxchg64 cmpxchg +#define cmpxchg64_local cmpxchg_local + +/* cmpxchg_double */ +#define system_has_cmpxchg_double() 1 + +#define __cmpxchg_double_check(ptr1, ptr2) \ +({ \ + if (sizeof(*(ptr1)) != 8) \ + BUILD_BUG(); \ + VM_BUG_ON((unsigned long *)(ptr2) - (unsigned long *)(ptr1) != 1); \ }) #define cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \ ({\ int __ret;\ - __ret = __cmpxchg_double_mb((ptr1), (ptr2), (unsigned long)(o1), \ - (unsigned long)(o2), (unsigned long)(n1), \ - (unsigned long)(n2), sizeof(*(ptr1)));\ + __cmpxchg_double_check(ptr1, ptr2); \ + __ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2), \ + (unsigned long)(n1), (unsigned long)(n2), \ + ptr1); \ __ret; \ }) #define cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \ ({\ int __ret;\ - __ret = __cmpxchg_double((ptr1), (ptr2), (unsigned long)(o1), \ - (unsigned long)(o2), (unsigned long)(n1), \ - (unsigned long)(n2), sizeof(*(ptr1)));\ + __cmpxchg_double_check(ptr1, ptr2); \ + __ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2), \ + (unsigned long)(n1), (unsigned long)(n2), \ + ptr1); \ __ret; \ }) +/* this_cpu_cmpxchg */ #define _protect_cmpxchg_local(pcp, o, n) \ ({ \ typeof(*raw_cpu_ptr(&(pcp))) __ret; \ @@ -271,9 +225,4 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) -#define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n)) -#define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n)) - -#define cmpxchg64_relaxed(ptr,o,n) cmpxchg_local((ptr),(o),(n)) - #endif /* __ASM_CMPXCHG_H */ diff --git a/kernel/arch/arm64/include/asm/compat.h b/kernel/arch/arm64/include/asm/compat.h index 7fbed6919..eb8432bb8 100644 --- a/kernel/arch/arm64/include/asm/compat.h +++ b/kernel/arch/arm64/include/asm/compat.h @@ -23,7 +23,6 @@ */ #include <linux/types.h> #include <linux/sched.h> -#include <linux/ptrace.h> #define COMPAT_USER_HZ 100 #ifdef __AARCH64EB__ @@ -234,7 +233,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -#define compat_user_stack_pointer() (user_stack_pointer(current_pt_regs())) +#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current))) static inline void __user *arch_compat_alloc_user_space(long len) { diff --git a/kernel/arch/arm64/include/asm/cpu.h b/kernel/arch/arm64/include/asm/cpu.h index 8e797b2fc..b5e9cee4b 100644 --- a/kernel/arch/arm64/include/asm/cpu.h +++ b/kernel/arch/arm64/include/asm/cpu.h @@ -63,4 +63,8 @@ DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); void cpuinfo_store_cpu(void); void __init cpuinfo_store_boot_cpu(void); +void __init init_cpu_features(struct cpuinfo_arm64 *info); +void update_cpu_features(int cpu, struct cpuinfo_arm64 *info, + struct cpuinfo_arm64 *boot); + #endif /* __ASM_CPU_H */ diff --git a/kernel/arch/arm64/include/asm/cpu_ops.h b/kernel/arch/arm64/include/asm/cpu_ops.h index 5a31d6716..8f03446cf 100644 --- a/kernel/arch/arm64/include/asm/cpu_ops.h +++ b/kernel/arch/arm64/include/asm/cpu_ops.h @@ -19,15 +19,15 @@ #include <linux/init.h> #include <linux/threads.h> -struct device_node; - /** * struct cpu_operations - Callback operations for hotplugging CPUs. * * @name: Name of the property as appears in a devicetree cpu node's - * enable-method property. - * @cpu_init: Reads any data necessary for a specific enable-method from the - * devicetree, for a given cpu node and proposed logical id. + * enable-method property. On systems booting with ACPI, @name + * identifies the struct cpu_operations entry corresponding to + * the boot protocol specified in the ACPI MADT table. + * @cpu_init: Reads any data necessary for a specific enable-method for a + * proposed logical id. * @cpu_prepare: Early one-time preparation step for a cpu. If there is a * mechanism for doing so, tests whether it is possible to boot * the given CPU. @@ -40,15 +40,15 @@ struct device_node; * @cpu_die: Makes a cpu leave the kernel. Must not fail. Called from the * cpu being killed. * @cpu_kill: Ensures a cpu has left the kernel. Called from another cpu. - * @cpu_init_idle: Reads any data necessary to initialize CPU idle states from - * devicetree, for a given cpu node and proposed logical id. + * @cpu_init_idle: Reads any data necessary to initialize CPU idle states for + * a proposed logical id. * @cpu_suspend: Suspends a cpu and saves the required context. May fail owing * to wrong parameters or error conditions. Called from the * CPU being suspended. Must be called with IRQs disabled. */ struct cpu_operations { const char *name; - int (*cpu_init)(struct device_node *, unsigned int); + int (*cpu_init)(unsigned int); int (*cpu_prepare)(unsigned int); int (*cpu_boot)(unsigned int); void (*cpu_postboot)(void); @@ -58,14 +58,17 @@ struct cpu_operations { int (*cpu_kill)(unsigned int cpu); #endif #ifdef CONFIG_CPU_IDLE - int (*cpu_init_idle)(struct device_node *, unsigned int); + int (*cpu_init_idle)(unsigned int); int (*cpu_suspend)(unsigned long); #endif }; extern const struct cpu_operations *cpu_ops[NR_CPUS]; -int __init cpu_read_ops(struct device_node *dn, int cpu); -void __init cpu_read_bootcpu_ops(void); -const struct cpu_operations *cpu_get_ops(const char *name); +int __init cpu_read_ops(int cpu); + +static inline void __init cpu_read_bootcpu_ops(void) +{ + cpu_read_ops(0); +} #endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/kernel/arch/arm64/include/asm/cpufeature.h b/kernel/arch/arm64/include/asm/cpufeature.h index 82cb9f98b..8f271b83f 100644 --- a/kernel/arch/arm64/include/asm/cpufeature.h +++ b/kernel/arch/arm64/include/asm/cpufeature.h @@ -10,6 +10,7 @@ #define __ASM_CPUFEATURE_H #include <asm/hwcap.h> +#include <asm/sysreg.h> /* * In the arm64 world (as in the ARM world), elf_hwcap is used both internally @@ -24,20 +25,71 @@ #define ARM64_WORKAROUND_CLEAN_CACHE 0 #define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 #define ARM64_WORKAROUND_845719 2 +#define ARM64_HAS_SYSREG_GIC_CPUIF 3 +#define ARM64_HAS_PAN 4 +#define ARM64_HAS_LSE_ATOMICS 5 +#define ARM64_WORKAROUND_CAVIUM_23154 6 +#define ARM64_WORKAROUND_834220 7 -#define ARM64_NCAPS 3 +#define ARM64_NCAPS 8 #ifndef __ASSEMBLY__ +#include <linux/kernel.h> + +/* CPU feature register tracking */ +enum ftr_type { + FTR_EXACT, /* Use a predefined safe value */ + FTR_LOWER_SAFE, /* Smaller value is safe */ + FTR_HIGHER_SAFE,/* Bigger value is safe */ +}; + +#define FTR_STRICT true /* SANITY check strict matching required */ +#define FTR_NONSTRICT false /* SANITY check ignored */ + +#define FTR_SIGNED true /* Value should be treated as signed */ +#define FTR_UNSIGNED false /* Value should be treated as unsigned */ + +struct arm64_ftr_bits { + bool sign; /* Value is signed ? */ + bool strict; /* CPU Sanity check: strict matching required ? */ + enum ftr_type type; + u8 shift; + u8 width; + s64 safe_val; /* safe value for discrete features */ +}; + +/* + * @arm64_ftr_reg - Feature register + * @strict_mask Bits which should match across all CPUs for sanity. + * @sys_val Safe value across the CPUs (system view) + */ +struct arm64_ftr_reg { + u32 sys_id; + const char *name; + u64 strict_mask; + u64 sys_val; + struct arm64_ftr_bits *ftr_bits; +}; + struct arm64_cpu_capabilities { const char *desc; u16 capability; bool (*matches)(const struct arm64_cpu_capabilities *); + void (*enable)(void *); /* Called on all active CPUs */ union { struct { /* To be used for erratum handling only */ u32 midr_model; u32 midr_range_min, midr_range_max; }; + + struct { /* Feature register checking */ + u32 sys_reg; + int field_pos; + int min_field_value; + int hwcap_type; + unsigned long hwcap; + }; }; }; @@ -64,12 +116,73 @@ static inline void cpus_set_cap(unsigned int num) __set_bit(num, cpu_hwcaps); } -void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, +static inline int __attribute_const__ +cpuid_feature_extract_field_width(u64 features, int field, int width) +{ + return (s64)(features << (64 - width - field)) >> (64 - width); +} + +static inline int __attribute_const__ +cpuid_feature_extract_field(u64 features, int field) +{ + return cpuid_feature_extract_field_width(features, field, 4); +} + +static inline unsigned int __attribute_const__ +cpuid_feature_extract_unsigned_field_width(u64 features, int field, int width) +{ + return (u64)(features << (64 - width - field)) >> (64 - width); +} + +static inline unsigned int __attribute_const__ +cpuid_feature_extract_unsigned_field(u64 features, int field) +{ + return cpuid_feature_extract_unsigned_field_width(features, field, 4); +} + +static inline u64 arm64_ftr_mask(struct arm64_ftr_bits *ftrp) +{ + return (u64)GENMASK(ftrp->shift + ftrp->width - 1, ftrp->shift); +} + +static inline s64 arm64_ftr_value(struct arm64_ftr_bits *ftrp, u64 val) +{ + return ftrp->sign ? + cpuid_feature_extract_field_width(val, ftrp->shift, ftrp->width) : + cpuid_feature_extract_unsigned_field_width(val, ftrp->shift, ftrp->width); +} + +static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) +{ + return cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL_SHIFT) == 0x1 || + cpuid_feature_extract_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1; +} + +void __init setup_cpu_features(void); + +void update_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void check_local_cpu_errata(void); -void check_local_cpu_features(void); -bool cpu_supports_mixed_endian_el0(void); -bool system_supports_mixed_endian_el0(void); + +#ifdef CONFIG_HOTPLUG_CPU +void verify_local_cpu_capabilities(void); +#else +static inline void verify_local_cpu_capabilities(void) +{ +} +#endif + +u64 read_system_reg(u32 id); + +static inline bool cpu_supports_mixed_endian_el0(void) +{ + return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); +} + +static inline bool system_supports_mixed_endian_el0(void) +{ + return id_aa64mmfr0_mixed_endian_el0(read_system_reg(SYS_ID_AA64MMFR0_EL1)); +} #endif /* __ASSEMBLY__ */ diff --git a/kernel/arch/arm64/include/asm/cpuidle.h b/kernel/arch/arm64/include/asm/cpuidle.h index 141b2fcab..0f74f05d6 100644 --- a/kernel/arch/arm64/include/asm/cpuidle.h +++ b/kernel/arch/arm64/include/asm/cpuidle.h @@ -5,20 +5,16 @@ #ifdef CONFIG_CPU_IDLE extern int arm_cpuidle_init(unsigned int cpu); -extern int cpu_suspend(unsigned long arg); +extern int arm_cpuidle_suspend(int index); #else static inline int arm_cpuidle_init(unsigned int cpu) { return -EOPNOTSUPP; } -static inline int cpu_suspend(unsigned long arg) +static inline int arm_cpuidle_suspend(int index) { return -EOPNOTSUPP; } #endif -static inline int arm_cpuidle_suspend(int index) -{ - return cpu_suspend(index); -} #endif diff --git a/kernel/arch/arm64/include/asm/cputype.h b/kernel/arch/arm64/include/asm/cputype.h index a84ec605b..1a5949364 100644 --- a/kernel/arch/arm64/include/asm/cputype.h +++ b/kernel/arch/arm64/include/asm/cputype.h @@ -62,27 +62,18 @@ (0xf << MIDR_ARCHITECTURE_SHIFT) | \ ((partnum) << MIDR_PARTNUM_SHIFT)) -#define ARM_CPU_IMP_ARM 0x41 -#define ARM_CPU_IMP_APM 0x50 +#define ARM_CPU_IMP_ARM 0x41 +#define ARM_CPU_IMP_APM 0x50 +#define ARM_CPU_IMP_CAVIUM 0x43 -#define ARM_CPU_PART_AEM_V8 0xD0F -#define ARM_CPU_PART_FOUNDATION 0xD00 -#define ARM_CPU_PART_CORTEX_A57 0xD07 -#define ARM_CPU_PART_CORTEX_A53 0xD03 +#define ARM_CPU_PART_AEM_V8 0xD0F +#define ARM_CPU_PART_FOUNDATION 0xD00 +#define ARM_CPU_PART_CORTEX_A57 0xD07 +#define ARM_CPU_PART_CORTEX_A53 0xD03 -#define APM_CPU_PART_POTENZA 0x000 +#define APM_CPU_PART_POTENZA 0x000 -#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 -#define ID_AA64MMFR0_BIGENDEL0_MASK (0xf << ID_AA64MMFR0_BIGENDEL0_SHIFT) -#define ID_AA64MMFR0_BIGENDEL0(mmfr0) \ - (((mmfr0) & ID_AA64MMFR0_BIGENDEL0_MASK) >> ID_AA64MMFR0_BIGENDEL0_SHIFT) -#define ID_AA64MMFR0_BIGEND_SHIFT 8 -#define ID_AA64MMFR0_BIGEND_MASK (0xf << ID_AA64MMFR0_BIGEND_SHIFT) -#define ID_AA64MMFR0_BIGEND(mmfr0) \ - (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT) - -#define SCTLR_EL1_CP15BEN (0x1 << 5) -#define SCTLR_EL1_SED (0x1 << 8) +#define CAVIUM_CPU_PART_THUNDERX 0x0A1 #ifndef __ASSEMBLY__ @@ -115,12 +106,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void) { return read_cpuid(CTR_EL0); } - -static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) -{ - return (ID_AA64MMFR0_BIGEND(mmfr0) == 0x1) || - (ID_AA64MMFR0_BIGENDEL0(mmfr0) == 0x1); -} #endif /* __ASSEMBLY__ */ #endif diff --git a/kernel/arch/arm64/include/asm/dcc.h b/kernel/arch/arm64/include/asm/dcc.h new file mode 100644 index 000000000..65e0190e9 --- /dev/null +++ b/kernel/arch/arm64/include/asm/dcc.h @@ -0,0 +1,55 @@ +/* Copyright (c) 2014-2015 The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * A call to __dcc_getchar() or __dcc_putchar() is typically followed by + * a call to __dcc_getstatus(). We want to make sure that the CPU does + * not speculative read the DCC status before executing the read or write + * instruction. That's what the ISBs are for. + * + * The 'volatile' ensures that the compiler does not cache the status bits, + * and instead reads the DCC register every time. + */ +#ifndef __ASM_DCC_H +#define __ASM_DCC_H + +#include <asm/barrier.h> + +static inline u32 __dcc_getstatus(void) +{ + u32 ret; + + asm volatile("mrs %0, mdccsr_el0" : "=r" (ret)); + + return ret; +} + +static inline char __dcc_getchar(void) +{ + char c; + + asm volatile("mrs %0, dbgdtrrx_el0" : "=r" (c)); + isb(); + + return c; +} + +static inline void __dcc_putchar(char c) +{ + /* + * The typecast is to make absolutely certain that 'c' is + * zero-extended. + */ + asm volatile("msr dbgdtrtx_el0, %0" + : : "r" ((unsigned long)(unsigned char)c)); + isb(); +} + +#endif diff --git a/kernel/arch/arm64/include/asm/debug-monitors.h b/kernel/arch/arm64/include/asm/debug-monitors.h index 40ec68aa6..279c85b5e 100644 --- a/kernel/arch/arm64/include/asm/debug-monitors.h +++ b/kernel/arch/arm64/include/asm/debug-monitors.h @@ -18,6 +18,12 @@ #ifdef __KERNEL__ +#include <linux/errno.h> +#include <linux/types.h> +#include <asm/esr.h> +#include <asm/insn.h> +#include <asm/ptrace.h> + /* Low-level stepping controls. */ #define DBG_MDSCR_SS (1 << 0) #define DBG_SPSR_SS (1 << 21) @@ -38,12 +44,7 @@ /* * Break point instruction encoding */ -#define BREAK_INSTR_SIZE 4 - -/* - * ESR values expected for dynamic and compile time BRK instruction - */ -#define DBG_ESR_VAL_BRK(x) (0xf2000000 | ((x) & 0xfffff)) +#define BREAK_INSTR_SIZE AARCH64_INSN_SIZE /* * #imm16 values used for BRK instruction generation @@ -51,10 +52,12 @@ * 0x100: for triggering a fault on purpose (reserved) * 0x400: for dynamic BRK instruction * 0x401: for compile time BRK instruction + * 0x800: kernel-mode BUG() and WARN() traps */ #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 +#define BUG_BRK_IMM 0x800 /* * BRK instruction encoding @@ -68,25 +71,10 @@ */ #define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5)) -/* - * Extract byte from BRK instruction - */ -#define KGDB_DYN_DBG_BRK_INS_BYTE(x) \ - ((((AARCH64_BREAK_MON) & 0xffe0001f) >> (x * 8)) & 0xff) - -/* - * Extract byte from BRK #imm16 - */ -#define KGBD_DYN_DBG_BRK_IMM_BYTE(x) \ - (((((KGDB_DYN_DBG_BRK_IMM) & 0xffff) << 5) >> (x * 8)) & 0xff) - -#define KGDB_DYN_DBG_BRK_BYTE(x) \ - (KGDB_DYN_DBG_BRK_INS_BYTE(x) | KGBD_DYN_DBG_BRK_IMM_BYTE(x)) - -#define KGDB_DYN_BRK_INS_BYTE0 KGDB_DYN_DBG_BRK_BYTE(0) -#define KGDB_DYN_BRK_INS_BYTE1 KGDB_DYN_DBG_BRK_BYTE(1) -#define KGDB_DYN_BRK_INS_BYTE2 KGDB_DYN_DBG_BRK_BYTE(2) -#define KGDB_DYN_BRK_INS_BYTE3 KGDB_DYN_DBG_BRK_BYTE(3) +#define AARCH64_BREAK_KGDB_DYN_DBG \ + (AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5)) +#define KGDB_DYN_BRK_INS_BYTE(x) \ + ((AARCH64_BREAK_KGDB_DYN_DBG >> (8 * (x))) & 0xff) #define CACHE_FLUSH_IS_SAFE 1 @@ -127,13 +115,13 @@ void unregister_break_hook(struct break_hook *hook); u8 debug_monitors_arch(void); -enum debug_el { +enum dbg_active_el { DBG_ACTIVE_EL0 = 0, DBG_ACTIVE_EL1, }; -void enable_debug_monitors(enum debug_el el); -void disable_debug_monitors(enum debug_el el); +void enable_debug_monitors(enum dbg_active_el el); +void disable_debug_monitors(enum dbg_active_el el); void user_rewind_single_step(struct task_struct *task); void user_fastforward_single_step(struct task_struct *task); diff --git a/kernel/arch/arm64/include/asm/dma-mapping.h b/kernel/arch/arm64/include/asm/dma-mapping.h index 9437e3dc5..61e08f360 100644 --- a/kernel/arch/arm64/include/asm/dma-mapping.h +++ b/kernel/arch/arm64/include/asm/dma-mapping.h @@ -21,20 +21,22 @@ #include <linux/types.h> #include <linux/vmalloc.h> -#include <asm-generic/dma-coherent.h> - #include <xen/xen.h> #include <asm/xen/hypervisor.h> #define DMA_ERROR_CODE (~(dma_addr_t)0) -extern struct dma_map_ops *dma_ops; +extern struct dma_map_ops dummy_dma_ops; static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) { - if (unlikely(!dev) || !dev->archdata.dma_ops) - return dma_ops; - else + if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; + + /* + * We expect no ISA devices, and all other DMA masters are expected to + * have someone call arch_setup_dma_ops at device creation time. + */ + return &dummy_dma_ops; } static inline struct dma_map_ops *get_dma_ops(struct device *dev) @@ -45,13 +47,15 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return __generic_dma_ops(dev); } -static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - struct iommu_ops *iommu, bool coherent) -{ - dev->archdata.dma_coherent = coherent; -} +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + struct iommu_ops *iommu, bool coherent); #define arch_setup_dma_ops arch_setup_dma_ops +#ifdef CONFIG_IOMMU_DMA +void arch_teardown_dma_ops(struct device *dev); +#define arch_teardown_dma_ops arch_teardown_dma_ops +#endif + /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) { @@ -72,28 +76,6 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) return (phys_addr_t)dev_addr; } -static inline int dma_mapping_error(struct device *dev, dma_addr_t dev_addr) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - debug_dma_mapping_error(dev, dev_addr); - return ops->mapping_error(dev, dev_addr); -} - -static inline int dma_supported(struct device *dev, u64 mask) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - return ops->dma_supported(dev, mask); -} - -static inline int dma_set_mask(struct device *dev, u64 mask) -{ - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - *dev->dma_mask = mask; - - return 0; -} - static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { if (!dev->dma_mask) @@ -106,50 +88,5 @@ static inline void dma_mark_clean(void *addr, size_t size) { } -#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL) -#define dma_free_coherent(d, s, h, f) dma_free_attrs(d, s, h, f, NULL) - -static inline void *dma_alloc_attrs(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - struct dma_attrs *attrs) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - void *vaddr; - - if (dma_alloc_from_coherent(dev, size, dma_handle, &vaddr)) - return vaddr; - - vaddr = ops->alloc(dev, size, dma_handle, flags, attrs); - debug_dma_alloc_coherent(dev, size, *dma_handle, vaddr); - return vaddr; -} - -static inline void dma_free_attrs(struct device *dev, size_t size, - void *vaddr, dma_addr_t dev_addr, - struct dma_attrs *attrs) -{ - struct dma_map_ops *ops = get_dma_ops(dev); - - if (dma_release_from_coherent(dev, get_order(size), vaddr)) - return; - - debug_dma_free_coherent(dev, size, vaddr, dev_addr); - ops->free(dev, size, vaddr, dev_addr, attrs); -} - -/* - * There is no dma_cache_sync() implementation, so just return NULL here. - */ -static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t *handle, gfp_t flags) -{ - return NULL; -} - -static inline void dma_free_noncoherent(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t handle) -{ -} - #endif /* __KERNEL__ */ #endif /* __ASM_DMA_MAPPING_H */ diff --git a/kernel/arch/arm64/include/asm/esr.h b/kernel/arch/arm64/include/asm/esr.h index 70522450c..77eeb2cc6 100644 --- a/kernel/arch/arm64/include/asm/esr.h +++ b/kernel/arch/arm64/include/asm/esr.h @@ -18,6 +18,8 @@ #ifndef __ASM_ESR_H #define __ASM_ESR_H +#include <asm/memory.h> + #define ESR_ELx_EC_UNKNOWN (0x00) #define ESR_ELx_EC_WFx (0x01) /* Unallocated EC: 0x02 */ @@ -99,6 +101,13 @@ #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) +/* ESR value templates for specific events */ + +/* BRK instruction trap from AArch64 state */ +#define ESR_ELx_VAL_BRK64(imm) \ + ((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | \ + ((imm) & 0xffff)) + #ifndef __ASSEMBLY__ #include <asm/types.h> diff --git a/kernel/arch/arm64/include/asm/exception.h b/kernel/arch/arm64/include/asm/exception.h index 0303705fc..6cb7e1a6b 100644 --- a/kernel/arch/arm64/include/asm/exception.h +++ b/kernel/arch/arm64/include/asm/exception.h @@ -18,7 +18,13 @@ #ifndef __ASM_EXCEPTION_H #define __ASM_EXCEPTION_H +#include <linux/ftrace.h> + #define __exception __attribute__((section(".exception.text"))) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#define __exception_irq_entry __irq_entry +#else #define __exception_irq_entry __exception +#endif #endif /* __ASM_EXCEPTION_H */ diff --git a/kernel/arch/arm64/include/asm/fixmap.h b/kernel/arch/arm64/include/asm/fixmap.h index 95e6b6dcb..309704544 100644 --- a/kernel/arch/arm64/include/asm/fixmap.h +++ b/kernel/arch/arm64/include/asm/fixmap.h @@ -8,7 +8,7 @@ * Copyright (C) 1998 Ingo Molnar * Copyright (C) 2013 Mark Salter <msalter@redhat.com> * - * Adapted from arch/x86_64 version. + * Adapted from arch/x86 version. * */ @@ -17,6 +17,8 @@ #ifndef __ASSEMBLY__ #include <linux/kernel.h> +#include <linux/sizes.h> +#include <asm/boot.h> #include <asm/page.h> /* @@ -32,6 +34,20 @@ */ enum fixed_addresses { FIX_HOLE, + + /* + * Reserve a virtual window for the FDT that is 2 MB larger than the + * maximum supported size, and put it at the top of the fixmap region. + * The additional space ensures that any FDT that does not exceed + * MAX_FDT_SIZE can be mapped regardless of whether it crosses any + * 2 MB alignment boundaries. + * + * Keep this at the top so it remains 2 MB aligned. + */ +#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M) + FIX_FDT_END, + FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, + FIX_EARLYCON_MEM_BASE, FIX_TEXT_POKE0, __end_of_permanent_fixed_addresses, @@ -40,11 +56,7 @@ enum fixed_addresses { * Temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. */ -#ifdef CONFIG_ARM64_64K_PAGES -#define NR_FIX_BTMAPS 4 -#else -#define NR_FIX_BTMAPS 64 -#endif +#define NR_FIX_BTMAPS (SZ_256K / PAGE_SIZE) #define FIX_BTMAPS_SLOTS 7 #define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) diff --git a/kernel/arch/arm64/include/asm/futex.h b/kernel/arch/arm64/include/asm/futex.h index 74069b3bd..007a69fc4 100644 --- a/kernel/arch/arm64/include/asm/futex.h +++ b/kernel/arch/arm64/include/asm/futex.h @@ -20,10 +20,17 @@ #include <linux/futex.h> #include <linux/uaccess.h> + +#include <asm/alternative.h> +#include <asm/cpufeature.h> #include <asm/errno.h> +#include <asm/sysreg.h> #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ asm volatile( \ + ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) \ +" prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ insn "\n" \ "2: stlxr %w3, %w0, %2\n" \ @@ -39,6 +46,8 @@ " .align 3\n" \ " .quad 1b, 4b, 2b, 4b\n" \ " .popsection\n" \ + ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ : "r" (oparg), "Ir" (-EFAULT) \ : "memory") @@ -112,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; asm volatile("// futex_atomic_cmpxchg_inatomic\n" +" prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" " cbnz %w3, 3f\n" diff --git a/kernel/arch/arm64/include/asm/hardirq.h b/kernel/arch/arm64/include/asm/hardirq.h index 6aae421f4..a57601f9d 100644 --- a/kernel/arch/arm64/include/asm/hardirq.h +++ b/kernel/arch/arm64/include/asm/hardirq.h @@ -24,9 +24,7 @@ typedef struct { unsigned int __softirq_pending; -#ifdef CONFIG_SMP unsigned int ipi_irqs[NR_IPI]; -#endif } ____cacheline_aligned irq_cpustat_t; #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ @@ -34,10 +32,8 @@ typedef struct { #define __inc_irq_stat(cpu, member) __IRQ_STAT(cpu, member)++ #define __get_irq_stat(cpu, member) __IRQ_STAT(cpu, member) -#ifdef CONFIG_SMP u64 smp_irq_stat_cpu(unsigned int cpu); #define arch_irq_stat_cpu smp_irq_stat_cpu -#endif #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 @@ -47,9 +43,4 @@ static inline void ack_bad_irq(unsigned int irq) irq_err_count++; } -/* - * No arch-specific IRQ flags. - */ -#define set_irq_flags(irq, flags) - #endif /* __ASM_HARDIRQ_H */ diff --git a/kernel/arch/arm64/include/asm/hugetlb.h b/kernel/arch/arm64/include/asm/hugetlb.h index 5b7ca8ace..bb4052e85 100644 --- a/kernel/arch/arm64/include/asm/hugetlb.h +++ b/kernel/arch/arm64/include/asm/hugetlb.h @@ -13,10 +13,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __ASM_HUGETLB_H @@ -86,10 +82,6 @@ static inline int prepare_hugepage_range(struct file *file, return 0; } -static inline void hugetlb_prefault_arch_hook(struct mm_struct *mm) -{ -} - static inline int huge_pte_none(pte_t pte) { return pte_none(pte); @@ -100,15 +92,6 @@ static inline pte_t huge_pte_wrprotect(pte_t pte) return pte_wrprotect(pte); } -static inline int arch_prepare_hugepage(struct page *page) -{ - return 0; -} - -static inline void arch_release_hugepage(struct page *page) -{ -} - static inline void arch_clear_hugepage_flags(struct page *page) { clear_bit(PG_dcache_clean, &page->flags); diff --git a/kernel/arch/arm64/include/asm/hw_breakpoint.h b/kernel/arch/arm64/include/asm/hw_breakpoint.h index 52b484b6a..9732908bf 100644 --- a/kernel/arch/arm64/include/asm/hw_breakpoint.h +++ b/kernel/arch/arm64/include/asm/hw_breakpoint.h @@ -16,6 +16,9 @@ #ifndef __ASM_HW_BREAKPOINT_H #define __ASM_HW_BREAKPOINT_H +#include <asm/cputype.h> +#include <asm/cpufeature.h> + #ifdef __KERNEL__ struct arch_hw_breakpoint_ctrl { @@ -132,5 +135,23 @@ static inline void ptrace_hw_copy_thread(struct task_struct *task) extern struct pmu perf_ops_bp; +/* Determine number of BRP registers available. */ +static inline int get_num_brps(void) +{ + u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1); + return 1 + + cpuid_feature_extract_unsigned_field(dfr0, + ID_AA64DFR0_BRPS_SHIFT); +} + +/* Determine number of WRP registers available. */ +static inline int get_num_wrps(void) +{ + u64 dfr0 = read_system_reg(SYS_ID_AA64DFR0_EL1); + return 1 + + cpuid_feature_extract_unsigned_field(dfr0, + ID_AA64DFR0_WRPS_SHIFT); +} + #endif /* __KERNEL__ */ #endif /* __ASM_BREAKPOINT_H */ diff --git a/kernel/arch/arm64/include/asm/hwcap.h b/kernel/arch/arm64/include/asm/hwcap.h index 0ad735166..400b80b49 100644 --- a/kernel/arch/arm64/include/asm/hwcap.h +++ b/kernel/arch/arm64/include/asm/hwcap.h @@ -52,6 +52,14 @@ extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; #endif +enum { + CAP_HWCAP = 1, +#ifdef CONFIG_COMPAT + CAP_COMPAT_HWCAP, + CAP_COMPAT_HWCAP2, +#endif +}; + extern unsigned long elf_hwcap; #endif #endif diff --git a/kernel/arch/arm64/include/asm/insn.h b/kernel/arch/arm64/include/asm/insn.h index f81b328d9..30e50eb54 100644 --- a/kernel/arch/arm64/include/asm/insn.h +++ b/kernel/arch/arm64/include/asm/insn.h @@ -281,6 +281,7 @@ __AARCH64_INSN_FUNCS(ret, 0xFFFFFC1F, 0xD65F0000) #undef __AARCH64_INSN_FUNCS bool aarch64_insn_is_nop(u32 insn); +bool aarch64_insn_is_branch_imm(u32 insn); int aarch64_insn_read(void *addr, u32 *insnp); int aarch64_insn_write(void *addr, u32 insn); @@ -351,6 +352,8 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, int shift, enum aarch64_insn_variant variant, enum aarch64_insn_logic_type type); +s32 aarch64_get_branch_offset(u32 insn); +u32 aarch64_set_branch_offset(u32 insn, s32 offset); bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn); diff --git a/kernel/arch/arm64/include/asm/io.h b/kernel/arch/arm64/include/asm/io.h index 540f7c0ae..44be1e03e 100644 --- a/kernel/arch/arm64/include/asm/io.h +++ b/kernel/arch/arm64/include/asm/io.h @@ -117,10 +117,10 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * ordering rules but do not guarantee any ordering relative to Normal memory * accesses. */ -#define readb_relaxed(c) ({ u8 __v = __raw_readb(c); __v; }) -#define readw_relaxed(c) ({ u16 __v = le16_to_cpu((__force __le16)__raw_readw(c)); __v; }) -#define readl_relaxed(c) ({ u32 __v = le32_to_cpu((__force __le32)__raw_readl(c)); __v; }) -#define readq_relaxed(c) ({ u64 __v = le64_to_cpu((__force __le64)__raw_readq(c)); __v; }) +#define readb_relaxed(c) ({ u8 __r = __raw_readb(c); __r; }) +#define readw_relaxed(c) ({ u16 __r = le16_to_cpu((__force __le16)__raw_readw(c)); __r; }) +#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; }) +#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; }) #define writeb_relaxed(v,c) ((void)__raw_writeb((v),(c))) #define writew_relaxed(v,c) ((void)__raw_writew((__force u16)cpu_to_le16(v),(c))) @@ -170,6 +170,7 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size); #define ioremap(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_nocache(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define ioremap_wc(addr, size) __ioremap((addr), (size), __pgprot(PROT_NORMAL_NC)) +#define ioremap_wt(addr, size) __ioremap((addr), (size), __pgprot(PROT_DEVICE_nGnRE)) #define iounmap __iounmap /* diff --git a/kernel/arch/arm64/include/asm/irq.h b/kernel/arch/arm64/include/asm/irq.h index bbb251b14..8e8d30684 100644 --- a/kernel/arch/arm64/include/asm/irq.h +++ b/kernel/arch/arm64/include/asm/irq.h @@ -1,24 +1,15 @@ #ifndef __ASM_IRQ_H #define __ASM_IRQ_H -#include <linux/irqchip/arm-gic-acpi.h> - #include <asm-generic/irq.h> struct pt_regs; -extern void migrate_irqs(void); extern void set_handle_irq(void (*handle_irq)(struct pt_regs *)); -static inline void acpi_irq_init(void) +static inline int nr_legacy_irqs(void) { - /* - * Hardcode ACPI IRQ chip initialization to GICv2 for now. - * Proper irqchip infrastructure will be implemented along with - * incoming GICv2m|GICv3|ITS bits. - */ - acpi_gic_init(); + return 0; } -#define acpi_irq_init acpi_irq_init #endif diff --git a/kernel/arch/arm64/include/asm/irq_work.h b/kernel/arch/arm64/include/asm/irq_work.h index b4f6b19a8..8e24ef3f7 100644 --- a/kernel/arch/arm64/include/asm/irq_work.h +++ b/kernel/arch/arm64/include/asm/irq_work.h @@ -1,8 +1,6 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H -#ifdef CONFIG_SMP - #include <asm/smp.h> static inline bool arch_irq_work_has_interrupt(void) @@ -10,13 +8,4 @@ static inline bool arch_irq_work_has_interrupt(void) return !!__smp_cross_call; } -#else - -static inline bool arch_irq_work_has_interrupt(void) -{ - return false; -} - -#endif - #endif /* __ASM_IRQ_WORK_H */ diff --git a/kernel/arch/arm64/include/asm/jump_label.h b/kernel/arch/arm64/include/asm/jump_label.h index c0e5165c2..1b5e0e843 100644 --- a/kernel/arch/arm64/include/asm/jump_label.h +++ b/kernel/arch/arm64/include/asm/jump_label.h @@ -26,14 +26,28 @@ #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE -static __always_inline bool arch_static_branch(struct static_key *key) +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { asm goto("1: nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" ".align 3\n\t" ".quad 1b, %l[l_yes], %c0\n\t" ".popsection\n\t" - : : "i"(key) : : l_yes); + : : "i"(&((char *)key)[branch]) : : l_yes); + + return false; +l_yes: + return true; +} + +static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch) +{ + asm goto("1: b %l[l_yes]\n\t" + ".pushsection __jump_table, \"aw\"\n\t" + ".align 3\n\t" + ".quad 1b, %l[l_yes], %c0\n\t" + ".popsection\n\t" + : : "i"(&((char *)key)[branch]) : : l_yes); return false; l_yes: diff --git a/kernel/arch/arm64/include/asm/kasan.h b/kernel/arch/arm64/include/asm/kasan.h new file mode 100644 index 000000000..2774fa384 --- /dev/null +++ b/kernel/arch/arm64/include/asm/kasan.h @@ -0,0 +1,38 @@ +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_KASAN + +#include <linux/linkage.h> +#include <asm/memory.h> + +/* + * KASAN_SHADOW_START: beginning of the kernel virtual addresses. + * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/8 of kernel virtual addresses. + */ +#define KASAN_SHADOW_START (VA_START) +#define KASAN_SHADOW_END (KASAN_SHADOW_START + (1UL << (VA_BITS - 3))) + +/* + * This value is used to map an address to the corresponding shadow + * address by the following formula: + * shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET; + * + * (1 << 61) shadow addresses - [KASAN_SHADOW_OFFSET,KASAN_SHADOW_END] + * cover all 64-bits of virtual addresses. So KASAN_SHADOW_OFFSET + * should satisfy the following equation: + * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END - (1ULL << 61) + */ +#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << (64 - 3))) + +void kasan_init(void); +asmlinkage void kasan_early_init(void); + +#else +static inline void kasan_init(void) { } +#endif + +#endif +#endif diff --git a/kernel/arch/arm64/include/asm/kernel-pgtable.h b/kernel/arch/arm64/include/asm/kernel-pgtable.h new file mode 100644 index 000000000..a459714ee --- /dev/null +++ b/kernel/arch/arm64/include/asm/kernel-pgtable.h @@ -0,0 +1,83 @@ +/* + * Kernel page table mapping + * + * Copyright (C) 2015 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef __ASM_KERNEL_PGTABLE_H +#define __ASM_KERNEL_PGTABLE_H + + +/* + * The linear mapping and the start of memory are both 2M aligned (per + * the arm64 booting.txt requirements). Hence we can use section mapping + * with 4K (section size = 2M) but not with 16K (section size = 32M) or + * 64K (section size = 512M). + */ +#ifdef CONFIG_ARM64_4K_PAGES +#define ARM64_SWAPPER_USES_SECTION_MAPS 1 +#else +#define ARM64_SWAPPER_USES_SECTION_MAPS 0 +#endif + +/* + * The idmap and swapper page tables need some space reserved in the kernel + * image. Both require pgd, pud (4 levels only) and pmd tables to (section) + * map the kernel. With the 64K page configuration, swapper and idmap need to + * map to pte level. The swapper also maps the FDT (see __create_page_tables + * for more information). Note that the number of ID map translation levels + * could be increased on the fly if system RAM is out of reach for the default + * VA range, so pages required to map highest possible PA are reserved in all + * cases. + */ +#if ARM64_SWAPPER_USES_SECTION_MAPS +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) +#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1) +#else +#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) +#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT)) +#endif + +#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) +#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE) + +/* Initial memory map size */ +#if ARM64_SWAPPER_USES_SECTION_MAPS +#define SWAPPER_BLOCK_SHIFT SECTION_SHIFT +#define SWAPPER_BLOCK_SIZE SECTION_SIZE +#define SWAPPER_TABLE_SHIFT PUD_SHIFT +#else +#define SWAPPER_BLOCK_SHIFT PAGE_SHIFT +#define SWAPPER_BLOCK_SIZE PAGE_SIZE +#define SWAPPER_TABLE_SHIFT PMD_SHIFT +#endif + +/* The size of the initial kernel direct mapping */ +#define SWAPPER_INIT_MAP_SIZE (_AC(1, UL) << SWAPPER_TABLE_SHIFT) + +/* + * Initial memory map attributes. + */ +#define SWAPPER_PTE_FLAGS (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) +#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) + +#if ARM64_SWAPPER_USES_SECTION_MAPS +#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS) +#else +#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS) +#endif + + +#endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/kernel/arch/arm64/include/asm/kvm_arm.h b/kernel/arch/arm64/include/asm/kvm_arm.h index ac6fafb95..5e6857b6b 100644 --- a/kernel/arch/arm64/include/asm/kvm_arm.h +++ b/kernel/arch/arm64/include/asm/kvm_arm.h @@ -95,6 +95,7 @@ SCTLR_EL2_SA | SCTLR_EL2_I) /* TCR_EL2 Registers bits */ +#define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS (7 << 16) #define TCR_EL2_PS_40B (2 << 16) @@ -106,9 +107,10 @@ #define TCR_EL2_MASK (TCR_EL2_TG0 | TCR_EL2_SH0 | \ TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ) -#define TCR_EL2_FLAGS (TCR_EL2_PS_40B) +#define TCR_EL2_FLAGS (TCR_EL2_RES1 | TCR_EL2_PS_40B) /* VTCR_EL2 Registers bits */ +#define VTCR_EL2_RES1 (1 << 31) #define VTCR_EL2_PS_MASK (7 << 16) #define VTCR_EL2_TG0_MASK (1 << 14) #define VTCR_EL2_TG0_4K (0 << 14) @@ -147,7 +149,8 @@ */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ + VTCR_EL2_RES1) #define VTTBR_X (38 - VTCR_EL2_T0SZ_40B) #else /* @@ -158,7 +161,8 @@ */ #define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \ - VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B) + VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \ + VTCR_EL2_RES1) #define VTTBR_X (37 - VTCR_EL2_T0SZ_40B) #endif @@ -168,13 +172,15 @@ #define VTTBR_VMID_MASK (UL(0xFF) << VTTBR_VMID_SHIFT) /* Hyp System Trap Register */ -#define HSTR_EL2_TTEE (1 << 16) #define HSTR_EL2_T(x) (1 << x) +/* Hyp Coproccessor Trap Register Shifts */ +#define CPTR_EL2_TFP_SHIFT 10 + /* Hyp Coprocessor Trap Register */ #define CPTR_EL2_TCPAC (1 << 31) #define CPTR_EL2_TTA (1 << 20) -#define CPTR_EL2_TFP (1 << 10) +#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) /* Hyp Debug Configuration Register bits */ #define MDCR_EL2_TDRA (1 << 11) @@ -194,4 +200,20 @@ /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) +#define kvm_arm_exception_type \ + {0, "IRQ" }, \ + {1, "TRAP" } + +#define ECN(x) { ESR_ELx_EC_##x, #x } + +#define kvm_arm_exception_class \ + ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \ + ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \ + ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \ + ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ + ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ + ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ + ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ + ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) + #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/kernel/arch/arm64/include/asm/kvm_asm.h b/kernel/arch/arm64/include/asm/kvm_asm.h index 4f7310fa7..5e377101f 100644 --- a/kernel/arch/arm64/include/asm/kvm_asm.h +++ b/kernel/arch/arm64/include/asm/kvm_asm.h @@ -27,7 +27,7 @@ #define MPIDR_EL1 1 /* MultiProcessor Affinity Register */ #define CSSELR_EL1 2 /* Cache Size Selection Register */ #define SCTLR_EL1 3 /* System Control Register */ -#define ACTLR_EL1 4 /* Auxilliary Control Register */ +#define ACTLR_EL1 4 /* Auxiliary Control Register */ #define CPACR_EL1 5 /* Coprocessor Access Control */ #define TTBR0_EL1 6 /* Translation Table Base Register 0 */ #define TTBR1_EL1 7 /* Translation Table Base Register 1 */ @@ -46,24 +46,14 @@ #define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ #define PAR_EL1 21 /* Physical Address Register */ #define MDSCR_EL1 22 /* Monitor Debug System Control Register */ -#define DBGBCR0_EL1 23 /* Debug Breakpoint Control Registers (0-15) */ -#define DBGBCR15_EL1 38 -#define DBGBVR0_EL1 39 /* Debug Breakpoint Value Registers (0-15) */ -#define DBGBVR15_EL1 54 -#define DBGWCR0_EL1 55 /* Debug Watchpoint Control Registers (0-15) */ -#define DBGWCR15_EL1 70 -#define DBGWVR0_EL1 71 /* Debug Watchpoint Value Registers (0-15) */ -#define DBGWVR15_EL1 86 -#define MDCCINT_EL1 87 /* Monitor Debug Comms Channel Interrupt Enable Reg */ +#define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */ /* 32bit specific registers. Keep them at the end of the range */ -#define DACR32_EL2 88 /* Domain Access Control Register */ -#define IFSR32_EL2 89 /* Instruction Fault Status Register */ -#define FPEXC32_EL2 90 /* Floating-Point Exception Control Register */ -#define DBGVCR32_EL2 91 /* Debug Vector Catch Register */ -#define TEECR32_EL1 92 /* ThumbEE Configuration Register */ -#define TEEHBR32_EL1 93 /* ThumbEE Handler Base Register */ -#define NR_SYS_REGS 94 +#define DACR32_EL2 24 /* Domain Access Control Register */ +#define IFSR32_EL2 25 /* Instruction Fault Status Register */ +#define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */ +#define DBGVCR32_EL2 27 /* Debug Vector Catch Register */ +#define NR_SYS_REGS 28 /* 32bit mapping */ #define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ @@ -132,10 +122,7 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_ich_vtr_el2(void); -extern char __save_vgic_v2_state[]; -extern char __restore_vgic_v2_state[]; -extern char __save_vgic_v3_state[]; -extern char __restore_vgic_v3_state[]; +extern u32 __kvm_get_mdcr_el2(void); #endif diff --git a/kernel/arch/arm64/include/asm/kvm_emulate.h b/kernel/arch/arm64/include/asm/kvm_emulate.h index 17e92f05b..25a40213b 100644 --- a/kernel/arch/arm64/include/asm/kvm_emulate.h +++ b/kernel/arch/arm64/include/asm/kvm_emulate.h @@ -99,12 +99,22 @@ static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) *vcpu_cpsr(vcpu) |= COMPAT_PSR_T_BIT; } -static inline unsigned long *vcpu_reg(const struct kvm_vcpu *vcpu, u8 reg_num) +/* + * vcpu_get_reg and vcpu_set_reg should always be passed a register number + * coming from a read of ESR_EL2. Otherwise, it may give the wrong result on + * AArch32 with banked registers. + */ +static inline unsigned long vcpu_get_reg(const struct kvm_vcpu *vcpu, + u8 reg_num) { - if (vcpu_mode_is_32bit(vcpu)) - return vcpu_reg32(vcpu, reg_num); + return (reg_num == 31) ? 0 : vcpu_gp_regs(vcpu)->regs.regs[reg_num]; +} - return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.regs[reg_num]; +static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, + unsigned long val) +{ + if (reg_num != 31) + vcpu_gp_regs(vcpu)->regs.regs[reg_num] = val; } /* Get vcpu SPSR for current mode */ diff --git a/kernel/arch/arm64/include/asm/kvm_host.h b/kernel/arch/arm64/include/asm/kvm_host.h index f0f58c9be..a35ce7266 100644 --- a/kernel/arch/arm64/include/asm/kvm_host.h +++ b/kernel/arch/arm64/include/asm/kvm_host.h @@ -30,19 +30,16 @@ #define __KVM_HAVE_ARCH_INTC_INITIALIZED -#if defined(CONFIG_KVM_ARM_MAX_VCPUS) -#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS -#else -#define KVM_MAX_VCPUS 0 -#endif - #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_HALT_POLL_NS_DEFAULT 500000 #include <kvm/arm_vgic.h> #include <kvm/arm_arch_timer.h> +#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS + #define KVM_VCPU_MAX_FEATURES 3 int __attribute_const__ kvm_target_cpu(void); @@ -103,15 +100,34 @@ struct kvm_vcpu_arch { /* HYP configuration */ u64 hcr_el2; + u32 mdcr_el2; /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* Debug state */ + /* Guest debug state */ u64 debug_flags; + /* + * We maintain more than a single set of debug registers to support + * debugging the guest from the host and to maintain separate host and + * guest state during world switches. vcpu_debug_state are the debug + * registers of the vcpu as the guest sees them. host_debug_state are + * the host registers which are saved and restored during + * world switches. external_debug_state contains the debug + * values we want to debug the guest. This is set via the + * KVM_SET_GUEST_DEBUG ioctl. + * + * debug_ptr points to the set of debug registers that should be loaded + * onto the hardware when running the guest. + */ + struct kvm_guest_debug_arch *debug_ptr; + struct kvm_guest_debug_arch vcpu_debug_state; + struct kvm_guest_debug_arch external_debug_state; + /* Pointer to host CPU context */ kvm_cpu_context_t *host_cpu_context; + struct kvm_guest_debug_arch host_debug_state; /* VGIC state */ struct vgic_cpu vgic_cpu; @@ -122,7 +138,21 @@ struct kvm_vcpu_arch { * here. */ - /* Don't run the guest */ + /* + * Guest registers we preserve during guest debugging. + * + * These shadow registers are updated by the kvm_handle_sys_reg + * trap handler if the guest accesses or updates them while we + * are using guest debug. + */ + struct { + u32 mdscr_el1; + } guest_debug_preserved; + + /* vcpu power-off state */ + bool power_off; + + /* Don't run the guest (internal implementation need) */ bool pause; /* IO related fields */ @@ -165,6 +195,7 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u32 halt_successful_poll; + u32 halt_attempted_poll; u32 halt_wakeup; }; @@ -216,38 +247,15 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, hyp_stack_ptr, vector_ptr); } -struct vgic_sr_vectors { - void *save_vgic; - void *restore_vgic; -}; - -static inline void vgic_arch_setup(const struct vgic_params *vgic) -{ - extern struct vgic_sr_vectors __vgic_sr_vectors; - - switch(vgic->type) - { - case VGIC_V2: - __vgic_sr_vectors.save_vgic = __save_vgic_v2_state; - __vgic_sr_vectors.restore_vgic = __restore_vgic_v2_state; - break; - -#ifdef CONFIG_ARM_GIC_V3 - case VGIC_V3: - __vgic_sr_vectors.save_vgic = __save_vgic_v3_state; - __vgic_sr_vectors.restore_vgic = __restore_vgic_v3_state; - break; -#endif - - default: - BUG(); - } -} - static inline void kvm_arch_hardware_disable(void) {} static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +void kvm_arm_init_debug(void); +void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); +void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); +void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/kernel/arch/arm64/include/asm/lse.h b/kernel/arch/arm64/include/asm/lse.h new file mode 100644 index 000000000..3de42d686 --- /dev/null +++ b/kernel/arch/arm64/include/asm/lse.h @@ -0,0 +1,53 @@ +#ifndef __ASM_LSE_H +#define __ASM_LSE_H + +#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) + +#include <linux/stringify.h> +#include <asm/alternative.h> +#include <asm/cpufeature.h> + +#ifdef __ASSEMBLER__ + +.arch_extension lse + +.macro alt_lse, llsc, lse + alternative_insn "\llsc", "\lse", ARM64_HAS_LSE_ATOMICS +.endm + +#else /* __ASSEMBLER__ */ + +__asm__(".arch_extension lse"); + +/* Move the ll/sc atomics out-of-line */ +#define __LL_SC_INLINE +#define __LL_SC_PREFIX(x) __ll_sc_##x +#define __LL_SC_EXPORT(x) EXPORT_SYMBOL(__LL_SC_PREFIX(x)) + +/* Macro for constructing calls to out-of-line ll/sc atomics */ +#define __LL_SC_CALL(op) "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n" + +/* In-line patching at runtime */ +#define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ + ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS) + +#endif /* __ASSEMBLER__ */ +#else /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ + +#ifdef __ASSEMBLER__ + +.macro alt_lse, llsc, lse + \llsc +.endm + +#else /* __ASSEMBLER__ */ + +#define __LL_SC_INLINE static inline +#define __LL_SC_PREFIX(x) x +#define __LL_SC_EXPORT(x) + +#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc + +#endif /* __ASSEMBLER__ */ +#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ +#endif /* __ASM_LSE_H */ diff --git a/kernel/arch/arm64/include/asm/memory.h b/kernel/arch/arm64/include/asm/memory.h index 44a59c20e..853953cd1 100644 --- a/kernel/arch/arm64/include/asm/memory.h +++ b/kernel/arch/arm64/include/asm/memory.h @@ -42,12 +42,14 @@ * PAGE_OFFSET - the virtual address of the start of the kernel image (top * (VA_BITS - 1)) * VA_BITS - the maximum number of bits for virtual addresses. + * VA_START - the first kernel virtual address. * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area. * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ #define VA_BITS (CONFIG_ARM64_VA_BITS) +#define VA_START (UL(0xffffffffffffffff) << VA_BITS) #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) @@ -68,10 +70,6 @@ #define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4)) -#if TASK_SIZE_64 > MODULES_VADDR -#error Top of 64-bit user space clashes with start of module space -#endif - /* * Physical vs virtual RAM address space conversion. These are * private definitions which should NOT be used outside memory.h @@ -81,12 +79,6 @@ #define __phys_to_virt(x) ((unsigned long)((x) - PHYS_OFFSET + PAGE_OFFSET)) /* - * Convert a physical address to a Page Frame Number and back - */ -#define __phys_to_pfn(paddr) ((unsigned long)((paddr) >> PAGE_SHIFT)) -#define __pfn_to_phys(pfn) ((phys_addr_t)(pfn) << PAGE_SHIFT) - -/* * Convert a page to/from a physical address */ #define page_to_phys(page) (__pfn_to_phys(page_to_pfn(page))) @@ -100,6 +92,7 @@ #define MT_DEVICE_GRE 2 #define MT_NORMAL_NC 3 #define MT_NORMAL 4 +#define MT_NORMAL_WT 5 /* * Memory types for Stage-2 translation diff --git a/kernel/arch/arm64/include/asm/mmu.h b/kernel/arch/arm64/include/asm/mmu.h index 3d311761e..990124a67 100644 --- a/kernel/arch/arm64/include/asm/mmu.h +++ b/kernel/arch/arm64/include/asm/mmu.h @@ -17,22 +17,23 @@ #define __ASM_MMU_H typedef struct { - unsigned int id; - raw_spinlock_t id_lock; - void *vdso; + atomic64_t id; + void *vdso; } mm_context_t; -#define INIT_MM_CONTEXT(name) \ - .context.id_lock = __RAW_SPIN_LOCK_UNLOCKED(name.context.id_lock), - -#define ASID(mm) ((mm)->context.id & 0xffff) +/* + * This macro is only used by the TLBI code, which cannot race with an + * ASID change and therefore doesn't need to reload the counter using + * atomic64_read. + */ +#define ASID(mm) ((mm)->context.id.counter & 0xffff) extern void paging_init(void); -extern void setup_mm_for_reboot(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot); +extern void *fixmap_remap_fdt(phys_addr_t dt_phys); #endif diff --git a/kernel/arch/arm64/include/asm/mmu_context.h b/kernel/arch/arm64/include/asm/mmu_context.h index 8ec41e5f5..24165784b 100644 --- a/kernel/arch/arm64/include/asm/mmu_context.h +++ b/kernel/arch/arm64/include/asm/mmu_context.h @@ -28,13 +28,6 @@ #include <asm/cputype.h> #include <asm/pgtable.h> -#define MAX_ASID_BITS 16 - -extern unsigned int cpu_last_asid; - -void __init_new_context(struct task_struct *tsk, struct mm_struct *mm); -void __new_context(struct mm_struct *mm); - #ifdef CONFIG_PID_IN_CONTEXTIDR static inline void contextidr_thread_switch(struct task_struct *next) { @@ -77,96 +70,38 @@ static inline bool __cpu_uses_extended_idmap(void) unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS))); } -static inline void __cpu_set_tcr_t0sz(u64 t0sz) -{ - unsigned long tcr; - - if (__cpu_uses_extended_idmap()) - asm volatile ( - " mrs %0, tcr_el1 ;" - " bfi %0, %1, %2, %3 ;" - " msr tcr_el1, %0 ;" - " isb" - : "=&r" (tcr) - : "r"(t0sz), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); -} - -/* - * Set TCR.T0SZ to the value appropriate for activating the identity map. - */ -static inline void cpu_set_idmap_tcr_t0sz(void) -{ - __cpu_set_tcr_t0sz(idmap_t0sz); -} - /* * Set TCR.T0SZ to its default value (based on VA_BITS) */ static inline void cpu_set_default_tcr_t0sz(void) { - __cpu_set_tcr_t0sz(TCR_T0SZ(VA_BITS)); -} - -static inline void switch_new_context(struct mm_struct *mm) -{ - unsigned long flags; - - __new_context(mm); + unsigned long tcr; - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); -} + if (!__cpu_uses_extended_idmap()) + return; -static inline void check_and_switch_context(struct mm_struct *mm, - struct task_struct *tsk) -{ - /* - * Required during context switch to avoid speculative page table - * walking with the wrong TTBR. - */ - cpu_set_reserved_ttbr0(); - - if (!((mm->context.id ^ cpu_last_asid) >> MAX_ASID_BITS)) - /* - * The ASID is from the current generation, just switch to the - * new pgd. This condition is only true for calls from - * context_switch() and interrupts are already disabled. - */ - cpu_switch_mm(mm->pgd, mm); - else if (irqs_disabled()) - /* - * Defer the new ASID allocation until after the context - * switch critical region since __new_context() cannot be - * called with interrupts disabled. - */ - set_ti_thread_flag(task_thread_info(tsk), TIF_SWITCH_MM); - else - /* - * That is a direct call to switch_mm() or activate_mm() with - * interrupts enabled and a new context. - */ - switch_new_context(mm); + asm volatile ( + " mrs %0, tcr_el1 ;" + " bfi %0, %1, %2, %3 ;" + " msr tcr_el1, %0 ;" + " isb" + : "=&r" (tcr) + : "r"(TCR_T0SZ(VA_BITS)), "I"(TCR_T0SZ_OFFSET), "I"(TCR_TxSZ_WIDTH)); } -#define init_new_context(tsk,mm) (__init_new_context(tsk,mm),0) +/* + * It would be nice to return ASIDs back to the allocator, but unfortunately + * that introduces a race with a generation rollover where we could erroneously + * free an ASID allocated in a future generation. We could workaround this by + * freeing the ASID from the context of the dying mm (e.g. in arch_exit_mmap), + * but we'd then need to make sure that we didn't dirty any TLBs afterwards. + * Setting a reserved TTBR0 or EPD0 would work, but it all gets ugly when you + * take CPU migration into account. + */ #define destroy_context(mm) do { } while(0) +void check_and_switch_context(struct mm_struct *mm, unsigned int cpu); -#define finish_arch_post_lock_switch \ - finish_arch_post_lock_switch -static inline void finish_arch_post_lock_switch(void) -{ - if (test_and_clear_thread_flag(TIF_SWITCH_MM)) { - struct mm_struct *mm = current->mm; - unsigned long flags; - - __new_context(mm); - - local_irq_save(flags); - cpu_switch_mm(mm->pgd, mm); - local_irq_restore(flags); - } -} +#define init_new_context(tsk,mm) ({ atomic64_set(&(mm)->context.id, 0); 0; }) /* * This is called when "tsk" is about to enter lazy TLB mode. @@ -194,6 +129,9 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, { unsigned int cpu = smp_processor_id(); + if (prev == next) + return; + /* * init_mm.pgd does not contain any user mappings and it is always * active for kernel addresses in TTBR1. Just set the reserved TTBR0. @@ -203,8 +141,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, return; } - if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next) - check_and_switch_context(next, tsk); + check_and_switch_context(next, cpu); } #define deactivate_mm(tsk,mm) do { } while (0) diff --git a/kernel/arch/arm64/include/asm/page.h b/kernel/arch/arm64/include/asm/page.h index 7d9c7e4a4..9b2f5a9d0 100644 --- a/kernel/arch/arm64/include/asm/page.h +++ b/kernel/arch/arm64/include/asm/page.h @@ -20,31 +20,22 @@ #define __ASM_PAGE_H /* PAGE_SHIFT determines the page size */ +/* CONT_SHIFT determines the number of pages which can be tracked together */ #ifdef CONFIG_ARM64_64K_PAGES #define PAGE_SHIFT 16 +#define CONT_SHIFT 5 +#elif defined(CONFIG_ARM64_16K_PAGES) +#define PAGE_SHIFT 14 +#define CONT_SHIFT 7 #else #define PAGE_SHIFT 12 +#define CONT_SHIFT 4 #endif -#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) +#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) -/* - * The idmap and swapper page tables need some space reserved in the kernel - * image. Both require pgd, pud (4 levels only) and pmd tables to (section) - * map the kernel. With the 64K page configuration, swapper and idmap need to - * map to pte level. The swapper also maps the FDT (see __create_page_tables - * for more information). Note that the number of ID map translation levels - * could be increased on the fly if system RAM is out of reach for the default - * VA range, so 3 pages are reserved in all cases. - */ -#ifdef CONFIG_ARM64_64K_PAGES -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS) -#else -#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1) -#endif - -#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) -#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) +#define CONT_SIZE (_AC(1, UL) << (CONT_SHIFT + PAGE_SHIFT)) +#define CONT_MASK (~(CONT_SIZE-1)) #ifndef __ASSEMBLY__ diff --git a/kernel/arch/arm64/include/asm/percpu.h b/kernel/arch/arm64/include/asm/percpu.h index 4fde8c1df..0a456bef8 100644 --- a/kernel/arch/arm64/include/asm/percpu.h +++ b/kernel/arch/arm64/include/asm/percpu.h @@ -16,8 +16,6 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H -#ifdef CONFIG_SMP - static inline void set_my_cpu_offset(unsigned long off) { asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); @@ -38,12 +36,6 @@ static inline unsigned long __my_cpu_offset(void) } #define __my_cpu_offset __my_cpu_offset() -#else /* !CONFIG_SMP */ - -#define set_my_cpu_offset(x) do { } while (0) - -#endif /* CONFIG_SMP */ - #define PERCPU_OP(op, asm_op) \ static inline unsigned long __percpu_##op(void *ptr, \ unsigned long val, int size) \ diff --git a/kernel/arch/arm64/include/asm/perf_event.h b/kernel/arch/arm64/include/asm/perf_event.h index d26d1d53c..7bd3cdb53 100644 --- a/kernel/arch/arm64/include/asm/perf_event.h +++ b/kernel/arch/arm64/include/asm/perf_event.h @@ -17,11 +17,18 @@ #ifndef __ASM_PERF_EVENT_H #define __ASM_PERF_EVENT_H -#ifdef CONFIG_HW_PERF_EVENTS +#ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); #define perf_misc_flags(regs) perf_misc_flags(regs) #endif +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->pc = (__ip); \ + (regs)->regs[29] = (unsigned long) __builtin_frame_address(0); \ + (regs)->sp = current_stack_pointer; \ + (regs)->pstate = PSR_MODE_EL1h; \ +} + #endif diff --git a/kernel/arch/arm64/include/asm/pgalloc.h b/kernel/arch/arm64/include/asm/pgalloc.h index 76420568d..c15053902 100644 --- a/kernel/arch/arm64/include/asm/pgalloc.h +++ b/kernel/arch/arm64/include/asm/pgalloc.h @@ -27,6 +27,7 @@ #define check_pgt_cache() do { } while (0) #define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO) +#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) #if CONFIG_PGTABLE_LEVELS > 2 diff --git a/kernel/arch/arm64/include/asm/pgtable-hwdef.h b/kernel/arch/arm64/include/asm/pgtable-hwdef.h index 59bfae75d..d6739e836 100644 --- a/kernel/arch/arm64/include/asm/pgtable-hwdef.h +++ b/kernel/arch/arm64/include/asm/pgtable-hwdef.h @@ -16,13 +16,46 @@ #ifndef __ASM_PGTABLE_HWDEF_H #define __ASM_PGTABLE_HWDEF_H +/* + * Number of page-table levels required to address 'va_bits' wide + * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT) + * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence: + * + * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3)) + * + * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d)) + * + * We cannot include linux/kernel.h which defines DIV_ROUND_UP here + * due to build issues. So we open code DIV_ROUND_UP here: + * + * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3)) + * + * which gets simplified as : + */ +#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) + +/* + * Size mapped by an entry at level n ( 0 <= n <= 3) + * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits + * in the final page. The maximum number of translation levels supported by + * the architecture is 4. Hence, starting at at level n, we have further + * ((4 - n) - 1) levels of translation excluding the offset within the page. + * So, the total number of bits mapped by an entry at level n is : + * + * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT + * + * Rearranging it a bit we get : + * (4 - n) * (PAGE_SHIFT - 3) + 3 + */ +#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3) + #define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) /* * PMD_SHIFT determines the size a level 2 page table entry can map. */ #if CONFIG_PGTABLE_LEVELS > 2 -#define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) +#define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) #define PTRS_PER_PMD PTRS_PER_PTE @@ -32,7 +65,7 @@ * PUD_SHIFT determines the size a level 1 page table entry can map. */ #if CONFIG_PGTABLE_LEVELS > 3 -#define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) +#define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) #define PTRS_PER_PUD PTRS_PER_PTE @@ -42,7 +75,7 @@ * PGDIR_SHIFT determines the size a top-level page table entry can map * (depending on the configuration, this level can be 0, 1 or 2). */ -#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_PGTABLE_LEVELS + 3) +#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) #define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) @@ -55,6 +88,13 @@ #define SECTION_MASK (~(SECTION_SIZE-1)) /* + * Contiguous page definitions. + */ +#define CONT_PTES (_AC(1, UL) << CONT_SHIFT) +/* the the numerical offset of the PTE within a range of CONT_PTES */ +#define CONT_RANGE_OFFSET(addr) (((addr)>>PAGE_SHIFT)&(CONT_PTES-1)) + +/* * Hardware page table definitions. * * Level 1 descriptor (PUD). @@ -83,6 +123,7 @@ #define PMD_SECT_S (_AT(pmdval_t, 3) << 8) #define PMD_SECT_AF (_AT(pmdval_t, 1) << 10) #define PMD_SECT_NG (_AT(pmdval_t, 1) << 11) +#define PMD_SECT_CONT (_AT(pmdval_t, 1) << 52) #define PMD_SECT_PXN (_AT(pmdval_t, 1) << 53) #define PMD_SECT_UXN (_AT(pmdval_t, 1) << 54) @@ -104,6 +145,8 @@ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ #define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */ +#define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */ +#define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ @@ -168,5 +211,7 @@ #define TCR_TG1_64K (UL(3) << 30) #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) +#define TCR_HA (UL(1) << 39) +#define TCR_HD (UL(1) << 40) #endif diff --git a/kernel/arch/arm64/include/asm/pgtable.h b/kernel/arch/arm64/include/asm/pgtable.h index 56283f8a6..eaa9cabf4 100644 --- a/kernel/arch/arm64/include/asm/pgtable.h +++ b/kernel/arch/arm64/include/asm/pgtable.h @@ -16,6 +16,7 @@ #ifndef __ASM_PGTABLE_H #define __ASM_PGTABLE_H +#include <asm/bug.h> #include <asm/proc-fns.h> #include <asm/memory.h> @@ -25,44 +26,53 @@ * Software defined PTE bits definition. */ #define PTE_VALID (_AT(pteval_t, 1) << 0) +#define PTE_WRITE (PTE_DBM) /* same as DBM (51) */ #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) -#define PTE_WRITE (_AT(pteval_t, 1) << 57) #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. * - * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array + * VMEMAP_SIZE: allows the whole linear region to be covered by a struct page array * (rounded up to PUD_SIZE). * VMALLOC_START: beginning of the kernel VA space * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space, * fixed mappings and modules */ #define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) -#define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) + +#ifndef CONFIG_KASAN +#define VMALLOC_START (VA_START) +#else +#include <asm/kasan.h> +#define VMALLOC_START (KASAN_SHADOW_END + SZ_64K) +#endif + #define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) -#define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) +#define VMEMMAP_START (VMALLOC_END + SZ_64K) +#define vmemmap ((struct page *)VMEMMAP_START - \ + SECTION_ALIGN_DOWN(memstart_addr >> PAGE_SHIFT)) #define FIRST_USER_ADDRESS 0UL #ifndef __ASSEMBLY__ + +#include <linux/mmdebug.h> + extern void __pte_error(const char *file, int line, unsigned long val); extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); -#ifdef CONFIG_SMP #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#else -#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF) -#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF) -#endif +#define PROT_DEVICE_nGnRnE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRnE)) #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC)) +#define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_WT)) #define PROT_NORMAL (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL)) #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE)) @@ -72,7 +82,10 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define _PAGE_DEFAULT (PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) #define PAGE_KERNEL __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_RO __pgprot(_PAGE_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | PTE_RDONLY) +#define PAGE_KERNEL_ROX __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) +#define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) #define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) @@ -80,7 +93,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) #define PAGE_S2_DEVICE __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_DEVICE_nGnRE) | PTE_S2_RDONLY | PTE_UXN) -#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_TYPE_MASK) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) +#define PAGE_NONE __pgprot(((_PAGE_DEFAULT) & ~PTE_VALID) | PTE_PROT_NONE | PTE_PXN | PTE_UXN) #define PAGE_SHARED __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN | PTE_WRITE) #define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_WRITE) #define PAGE_COPY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_NG | PTE_PXN | PTE_UXN) @@ -137,12 +150,21 @@ extern struct page *empty_zero_page; * The following only work if pte_present(). Undefined behaviour otherwise. */ #define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) -#define pte_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) +#define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) +#ifdef CONFIG_ARM64_HW_AFDBM +#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) +#else +#define pte_hw_dirty(pte) (0) +#endif +#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) +#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) + +#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) #define pte_valid_not_user(pte) \ @@ -195,6 +217,16 @@ static inline pte_t pte_mkspecial(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_SPECIAL)); } +static inline pte_t pte_mkcont(pte_t pte) +{ + return set_pte_bit(pte, __pgprot(PTE_CONT)); +} + +static inline pte_t pte_mknoncont(pte_t pte) +{ + return clear_pte_bit(pte, __pgprot(PTE_CONT)); +} + static inline void set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; @@ -209,20 +241,53 @@ static inline void set_pte(pte_t *ptep, pte_t pte) } } +struct mm_struct; +struct vm_area_struct; + extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); +/* + * PTE bits configuration in the presence of hardware Dirty Bit Management + * (PTE_WRITE == PTE_DBM): + * + * Dirty Writable | PTE_RDONLY PTE_WRITE PTE_DIRTY (sw) + * 0 0 | 1 0 0 + * 0 1 | 1 1 0 + * 1 0 | 1 0 1 + * 1 1 | 0 1 x + * + * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via + * the page fault mechanism. Checking the dirty status of a pte becomes: + * + * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) + */ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { if (pte_valid_user(pte)) { if (!pte_special(pte) && pte_exec(pte)) __sync_icache_dcache(pte, addr); - if (pte_dirty(pte) && pte_write(pte)) + if (pte_sw_dirty(pte) && pte_write(pte)) pte_val(pte) &= ~PTE_RDONLY; else pte_val(pte) |= PTE_RDONLY; } + /* + * If the existing pte is valid, check for potential race with + * hardware updates of the pte (ptep_set_access_flags safely changes + * valid ptes without going through an invalid entry). + */ + if (IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && + pte_valid(*ptep) && pte_valid(pte)) { + VM_WARN_ONCE(!pte_young(pte), + "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", + __func__, pte_val(*ptep), pte_val(pte)); + VM_WARN_ONCE(pte_write(*ptep) && !pte_dirty(pte), + "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", + __func__, pte_val(*ptep), pte_val(pte)); + } + set_pte(ptep, pte); } @@ -460,7 +525,10 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE; + /* preserve the hardware dirty information */ + if (pte_hw_dirty(pte)) + pte = pte_mkdirty(pte); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } @@ -470,6 +538,101 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); } +#ifdef CONFIG_ARM64_HW_AFDBM +/* + * Atomic pte/pmd modifications. + */ +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, + pte_t *ptep) +{ + pteval_t pteval; + unsigned int tmp, res; + + asm volatile("// ptep_test_and_clear_young\n" + " prfm pstl1strm, %2\n" + "1: ldxr %0, %2\n" + " ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n" + " and %0, %0, %4 // clear PTE_AF\n" + " stxr %w1, %0, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res) + : "L" (~PTE_AF), "I" (ilog2(PTE_AF))); + + return res; +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, + pmd_t *pmdp) +{ + return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pteval_t old_pteval; + unsigned int tmp; + + asm volatile("// ptep_get_and_clear\n" + " prfm pstl1strm, %2\n" + "1: ldxr %0, %2\n" + " stxr %w1, xzr, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))); + + return __pte(old_pteval); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR +static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp)); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +/* + * ptep_set_wrprotect - mark read-only while trasferring potential hardware + * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. + */ +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) +{ + pteval_t pteval; + unsigned long tmp; + + asm volatile("// ptep_set_wrprotect\n" + " prfm pstl1strm, %2\n" + "1: ldxr %0, %2\n" + " tst %0, %4 // check for hw dirty (!PTE_RDONLY)\n" + " csel %1, %3, xzr, eq // set PTE_DIRTY|PTE_RDONLY if dirty\n" + " orr %0, %0, %1 // if !dirty, PTE_RDONLY is already set\n" + " and %0, %0, %5 // clear PTE_WRITE/PTE_DBM\n" + " stxr %w1, %0, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)) + : "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE) + : "cc"); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PMDP_SET_WRPROTECT +static inline void pmdp_set_wrprotect(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + ptep_set_wrprotect(mm, address, (pte_t *)pmdp); +} +#endif +#endif /* CONFIG_ARM64_HW_AFDBM */ + extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; @@ -505,6 +668,24 @@ extern int kern_addr_valid(unsigned long addr); #define pgtable_cache_init() do { } while (0) +/* + * On AArch64, the cache coherency is handled via the set_pte_at() function. + */ +static inline void update_mmu_cache(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + /* + * We don't do anything here, so there's a very small chance of + * us retaking a user fault which we just fixed up. The alternative + * is doing a dsb(ishst), but that penalises the fastpath. + */ +} + +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) + +#define kc_vaddr_to_offset(v) ((v) & ~VA_START) +#define kc_offset_to_vaddr(o) ((o) | VA_START) + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/kernel/arch/arm64/include/asm/pmu.h b/kernel/arch/arm64/include/asm/pmu.h deleted file mode 100644 index b7710a596..000000000 --- a/kernel/arch/arm64/include/asm/pmu.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Based on arch/arm/include/asm/pmu.h - * - * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_PMU_H -#define __ASM_PMU_H - -#ifdef CONFIG_HW_PERF_EVENTS - -/* The events for a given PMU register set. */ -struct pmu_hw_events { - /* - * The events that are active on the PMU for the given index. - */ - struct perf_event **events; - - /* - * A 1 bit for an index indicates that the counter is being used for - * an event. A 0 means that the counter can be used. - */ - unsigned long *used_mask; - - /* - * Hardware lock to serialize accesses to PMU registers. Needed for the - * read/modify/write sequences. - */ - raw_spinlock_t pmu_lock; -}; - -struct arm_pmu { - struct pmu pmu; - cpumask_t active_irqs; - int *irq_affinity; - const char *name; - irqreturn_t (*handle_irq)(int irq_num, void *dev); - void (*enable)(struct hw_perf_event *evt, int idx); - void (*disable)(struct hw_perf_event *evt, int idx); - int (*get_event_idx)(struct pmu_hw_events *hw_events, - struct hw_perf_event *hwc); - int (*set_event_filter)(struct hw_perf_event *evt, - struct perf_event_attr *attr); - u32 (*read_counter)(int idx); - void (*write_counter)(int idx, u32 val); - void (*start)(void); - void (*stop)(void); - void (*reset)(void *); - int (*map_event)(struct perf_event *event); - int num_events; - atomic_t active_events; - struct mutex reserve_mutex; - u64 max_period; - struct platform_device *plat_device; - struct pmu_hw_events *(*get_hw_events)(void); -}; - -#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) - -int __init armpmu_register(struct arm_pmu *armpmu, char *name, int type); - -u64 armpmu_event_update(struct perf_event *event, - struct hw_perf_event *hwc, - int idx); - -int armpmu_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, - int idx); - -#endif /* CONFIG_HW_PERF_EVENTS */ -#endif /* __ASM_PMU_H */ diff --git a/kernel/arch/arm64/include/asm/proc-fns.h b/kernel/arch/arm64/include/asm/proc-fns.h index 220633b79..14ad6e4e8 100644 --- a/kernel/arch/arm64/include/asm/proc-fns.h +++ b/kernel/arch/arm64/include/asm/proc-fns.h @@ -28,12 +28,8 @@ struct mm_struct; struct cpu_suspend_ctx; -extern void cpu_cache_off(void); extern void cpu_do_idle(void); extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm); -extern void cpu_reset(unsigned long addr) __attribute__((noreturn)); -void cpu_soft_restart(phys_addr_t cpu_reset, - unsigned long addr) __attribute__((noreturn)); extern void cpu_do_suspend(struct cpu_suspend_ctx *ptr); extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr); diff --git a/kernel/arch/arm64/include/asm/processor.h b/kernel/arch/arm64/include/asm/processor.h index d2c37a1df..4acb7ca94 100644 --- a/kernel/arch/arm64/include/asm/processor.h +++ b/kernel/arch/arm64/include/asm/processor.h @@ -78,13 +78,30 @@ struct cpu_context { struct thread_struct { struct cpu_context cpu_context; /* cpu context */ - unsigned long tp_value; + unsigned long tp_value; /* TLS register */ +#ifdef CONFIG_COMPAT + unsigned long tp2_value; +#endif struct fpsimd_state fpsimd_state; unsigned long fault_address; /* fault info */ unsigned long fault_code; /* ESR_EL1 value */ struct debug_info debug; /* debugging */ }; +#ifdef CONFIG_COMPAT +#define task_user_tls(t) \ +({ \ + unsigned long *__tls; \ + if (is_compat_thread(task_thread_info(t))) \ + __tls = &(t)->thread.tp2_value; \ + else \ + __tls = &(t)->thread.tp_value; \ + __tls; \ + }) +#else +#define task_user_tls(t) (&(t)->thread.tp_value) +#endif + #define INIT_THREAD { } static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) @@ -169,4 +186,6 @@ static inline void spin_lock_prefetch(const void *x) #endif +void cpu_enable_pan(void *__unused); + #endif /* __ASM_PROCESSOR_H */ diff --git a/kernel/arch/arm64/include/asm/psci.h b/kernel/arch/arm64/include/asm/psci.h deleted file mode 100644 index 2454bc59c..000000000 --- a/kernel/arch/arm64/include/asm/psci.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2013 ARM Limited - */ - -#ifndef __ASM_PSCI_H -#define __ASM_PSCI_H - -int psci_dt_init(void); -int psci_acpi_init(void); - -#endif /* __ASM_PSCI_H */ diff --git a/kernel/arch/arm64/include/asm/ptrace.h b/kernel/arch/arm64/include/asm/ptrace.h index d6dd9fdbc..e9e5467e0 100644 --- a/kernel/arch/arm64/include/asm/ptrace.h +++ b/kernel/arch/arm64/include/asm/ptrace.h @@ -83,14 +83,14 @@ #define compat_sp regs[13] #define compat_lr regs[14] #define compat_sp_hyp regs[15] -#define compat_sp_irq regs[16] -#define compat_lr_irq regs[17] -#define compat_sp_svc regs[18] -#define compat_lr_svc regs[19] -#define compat_sp_abt regs[20] -#define compat_lr_abt regs[21] -#define compat_sp_und regs[22] -#define compat_lr_und regs[23] +#define compat_lr_irq regs[16] +#define compat_sp_irq regs[17] +#define compat_lr_svc regs[18] +#define compat_sp_svc regs[19] +#define compat_lr_abt regs[20] +#define compat_sp_abt regs[21] +#define compat_lr_und regs[22] +#define compat_sp_und regs[23] #define compat_r8_fiq regs[24] #define compat_r9_fiq regs[25] #define compat_r10_fiq regs[26] @@ -183,11 +183,7 @@ static inline int valid_user_regs(struct user_pt_regs *regs) #define instruction_pointer(regs) ((unsigned long)(regs)->pc) -#ifdef CONFIG_SMP extern unsigned long profile_pc(struct pt_regs *regs); -#else -#define profile_pc(regs) instruction_pointer(regs) -#endif #endif /* __ASSEMBLY__ */ #endif diff --git a/kernel/arch/arm64/include/asm/smp.h b/kernel/arch/arm64/include/asm/smp.h index bf22650b1..d9c3d6a61 100644 --- a/kernel/arch/arm64/include/asm/smp.h +++ b/kernel/arch/arm64/include/asm/smp.h @@ -20,10 +20,6 @@ #include <linux/cpumask.h> #include <linux/thread_info.h> -#ifndef CONFIG_SMP -# error "<asm/smp.h> included in non-SMP build" -#endif - #define raw_smp_processor_id() (current_thread_info()->cpu) struct seq_file; @@ -42,7 +38,7 @@ extern void handle_IPI(int ipinr, struct pt_regs *regs); * Discover the set of possible CPUs and determine their * SMP operations. */ -extern void of_smp_init_cpus(void); +extern void smp_init_cpus(void); /* * Provide a function to raise an IPI cross call on CPUs in callmap. diff --git a/kernel/arch/arm64/include/asm/smp_plat.h b/kernel/arch/arm64/include/asm/smp_plat.h index 8dcd61e32..af58dcdef 100644 --- a/kernel/arch/arm64/include/asm/smp_plat.h +++ b/kernel/arch/arm64/include/asm/smp_plat.h @@ -19,6 +19,8 @@ #ifndef __ASM_SMP_PLAT_H #define __ASM_SMP_PLAT_H +#include <linux/cpumask.h> + #include <asm/types.h> struct mpidr_hash { @@ -39,7 +41,19 @@ static inline u32 mpidr_hash_size(void) */ extern u64 __cpu_logical_map[NR_CPUS]; #define cpu_logical_map(cpu) __cpu_logical_map[cpu] - -void __init do_post_cpus_up_work(void); +/* + * Retrieve logical cpu index corresponding to a given MPIDR.Aff* + * - mpidr: MPIDR.Aff* bits to be used for the look-up + * + * Returns the cpu logical index or -EINVAL on look-up error + */ +static inline int get_logical_index(u64 mpidr) +{ + int cpu; + for (cpu = 0; cpu < nr_cpu_ids; cpu++) + if (cpu_logical_map(cpu) == mpidr) + return cpu; + return -EINVAL; +} #endif /* __ASM_SMP_PLAT_H */ diff --git a/kernel/arch/arm64/include/asm/spinlock.h b/kernel/arch/arm64/include/asm/spinlock.h index cee128732..c85e96d17 100644 --- a/kernel/arch/arm64/include/asm/spinlock.h +++ b/kernel/arch/arm64/include/asm/spinlock.h @@ -16,6 +16,7 @@ #ifndef __ASM_SPINLOCK_H #define __ASM_SPINLOCK_H +#include <asm/lse.h> #include <asm/spinlock_types.h> #include <asm/processor.h> @@ -38,11 +39,21 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) asm volatile( /* Atomically increment the next ticket. */ + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ " prfm pstl1strm, %3\n" "1: ldaxr %w0, %3\n" " add %w1, %w0, %w5\n" " stxr %w2, %w1, %3\n" -" cbnz %w2, 1b\n" +" cbnz %w2, 1b\n", + /* LSE atomics */ +" mov %w2, %w5\n" +" ldadda %w2, %w0, %3\n" +" nop\n" +" nop\n" +" nop\n" + ) + /* Did we get the lock? */ " eor %w1, %w0, %w0, ror #16\n" " cbz %w1, 3f\n" @@ -67,15 +78,25 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) unsigned int tmp; arch_spinlock_t lockval; - asm volatile( -" prfm pstl1strm, %2\n" -"1: ldaxr %w0, %2\n" -" eor %w1, %w0, %w0, ror #16\n" -" cbnz %w1, 2f\n" -" add %w0, %w0, %3\n" -" stxr %w1, %w0, %2\n" -" cbnz %w1, 1b\n" -"2:" + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " prfm pstl1strm, %2\n" + "1: ldaxr %w0, %2\n" + " eor %w1, %w0, %w0, ror #16\n" + " cbnz %w1, 2f\n" + " add %w0, %w0, %3\n" + " stxr %w1, %w0, %2\n" + " cbnz %w1, 1b\n" + "2:", + /* LSE atomics */ + " ldr %w0, %2\n" + " eor %w1, %w0, %w0, ror #16\n" + " cbnz %w1, 1f\n" + " add %w1, %w0, %3\n" + " casa %w0, %w1, %2\n" + " and %w1, %w1, #0xffff\n" + " eor %w1, %w1, %w0, lsr #16\n" + "1:") : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) : "I" (1 << TICKET_SHIFT) : "memory"); @@ -85,10 +106,19 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { - asm volatile( -" stlrh %w1, %0\n" - : "=Q" (lock->owner) - : "r" (lock->owner + 1) + unsigned long tmp; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " ldrh %w1, %0\n" + " add %w1, %w1, #1\n" + " stlrh %w1, %0", + /* LSE atomics */ + " mov %w1, #1\n" + " nop\n" + " staddlh %w1, %0") + : "=Q" (lock->owner), "=&r" (tmp) + : : "memory"); } @@ -123,13 +153,24 @@ static inline void arch_write_lock(arch_rwlock_t *rw) { unsigned int tmp; - asm volatile( + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ " sevl\n" "1: wfe\n" "2: ldaxr %w0, %1\n" " cbnz %w0, 1b\n" " stxr %w0, %w2, %1\n" " cbnz %w0, 2b\n" + " nop", + /* LSE atomics */ + "1: mov %w0, wzr\n" + "2: casa %w0, %w2, %1\n" + " cbz %w0, 3f\n" + " ldxr %w0, %1\n" + " cbz %w0, 2b\n" + " wfe\n" + " b 1b\n" + "3:") : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) : "memory"); @@ -139,11 +180,18 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) { unsigned int tmp; - asm volatile( - " ldaxr %w0, %1\n" - " cbnz %w0, 1f\n" + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + "1: ldaxr %w0, %1\n" + " cbnz %w0, 2f\n" " stxr %w0, %w2, %1\n" - "1:\n" + " cbnz %w0, 1b\n" + "2:", + /* LSE atomics */ + " mov %w0, wzr\n" + " casa %w0, %w2, %1\n" + " nop\n" + " nop") : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) : "memory"); @@ -153,9 +201,10 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) static inline void arch_write_unlock(arch_rwlock_t *rw) { - asm volatile( - " stlr %w1, %0\n" - : "=Q" (rw->lock) : "r" (0) : "memory"); + asm volatile(ARM64_LSE_ATOMIC_INSN( + " stlr wzr, %0", + " swpl wzr, wzr, %0") + : "=Q" (rw->lock) :: "memory"); } /* write_can_lock - would write_trylock() succeed? */ @@ -172,6 +221,10 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) * * The memory barriers are implicit with the load-acquire and store-release * instructions. + * + * Note that in UNDEFINED cases, such as unlocking a lock twice, the LL/SC + * and LSE implementations may exhibit different behaviour (although this + * will have no effect on lockdep). */ static inline void arch_read_lock(arch_rwlock_t *rw) { @@ -179,26 +232,43 @@ static inline void arch_read_lock(arch_rwlock_t *rw) asm volatile( " sevl\n" + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ "1: wfe\n" "2: ldaxr %w0, %2\n" " add %w0, %w0, #1\n" " tbnz %w0, #31, 1b\n" " stxr %w1, %w0, %2\n" - " cbnz %w1, 2b\n" + " nop\n" + " cbnz %w1, 2b", + /* LSE atomics */ + "1: wfe\n" + "2: ldxr %w0, %2\n" + " adds %w1, %w0, #1\n" + " tbnz %w1, #31, 1b\n" + " casa %w0, %w1, %2\n" + " sbc %w0, %w1, %w0\n" + " cbnz %w0, 2b") : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : - : "memory"); + : "cc", "memory"); } static inline void arch_read_unlock(arch_rwlock_t *rw) { unsigned int tmp, tmp2; - asm volatile( + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ "1: ldxr %w0, %2\n" " sub %w0, %w0, #1\n" " stlxr %w1, %w0, %2\n" - " cbnz %w1, 1b\n" + " cbnz %w1, 1b", + /* LSE atomics */ + " movn %w0, #0\n" + " nop\n" + " nop\n" + " staddl %w0, %2") : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : : "memory"); @@ -206,17 +276,28 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) static inline int arch_read_trylock(arch_rwlock_t *rw) { - unsigned int tmp, tmp2 = 1; + unsigned int tmp, tmp2; - asm volatile( - " ldaxr %w0, %2\n" + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " mov %w1, #1\n" + "1: ldaxr %w0, %2\n" " add %w0, %w0, #1\n" - " tbnz %w0, #31, 1f\n" + " tbnz %w0, #31, 2f\n" " stxr %w1, %w0, %2\n" - "1:\n" - : "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock) + " cbnz %w1, 1b\n" + "2:", + /* LSE atomics */ + " ldr %w0, %2\n" + " adds %w1, %w0, #1\n" + " tbnz %w1, #31, 1f\n" + " casa %w0, %w1, %2\n" + " sbc %w1, %w1, %w0\n" + " nop\n" + "1:") + : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : - : "memory"); + : "cc", "memory"); return !tmp2; } diff --git a/kernel/arch/arm64/include/asm/spinlock_types.h b/kernel/arch/arm64/include/asm/spinlock_types.h index b8d383665..55be59a35 100644 --- a/kernel/arch/arm64/include/asm/spinlock_types.h +++ b/kernel/arch/arm64/include/asm/spinlock_types.h @@ -20,6 +20,8 @@ # error "please don't include this file directly" #endif +#include <linux/types.h> + #define TICKET_SHIFT 16 typedef struct { diff --git a/kernel/arch/arm64/include/asm/string.h b/kernel/arch/arm64/include/asm/string.h index 64d2d4884..2eb714c46 100644 --- a/kernel/arch/arm64/include/asm/string.h +++ b/kernel/arch/arm64/include/asm/string.h @@ -36,17 +36,33 @@ extern __kernel_size_t strnlen(const char *, __kernel_size_t); #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *, const void *, __kernel_size_t); +extern void *__memcpy(void *, const void *, __kernel_size_t); #define __HAVE_ARCH_MEMMOVE extern void *memmove(void *, const void *, __kernel_size_t); +extern void *__memmove(void *, const void *, __kernel_size_t); #define __HAVE_ARCH_MEMCHR extern void *memchr(const void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMSET extern void *memset(void *, int, __kernel_size_t); +extern void *__memset(void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMCMP extern int memcmp(const void *, const void *, size_t); + +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * should use not instrumented version of mem* functions. + */ + +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) +#endif + #endif diff --git a/kernel/arch/arm64/include/asm/suspend.h b/kernel/arch/arm64/include/asm/suspend.h index 003802f58..59a5b0f1e 100644 --- a/kernel/arch/arm64/include/asm/suspend.h +++ b/kernel/arch/arm64/include/asm/suspend.h @@ -21,6 +21,6 @@ struct sleep_save_sp { phys_addr_t save_ptr_stash_phys; }; -extern int __cpu_suspend(unsigned long arg, int (*fn)(unsigned long)); +extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)); extern void cpu_resume(void); #endif diff --git a/kernel/arch/arm64/include/asm/sysreg.h b/kernel/arch/arm64/include/asm/sysreg.h index 5c89df0ac..d48ab5b41 100644 --- a/kernel/arch/arm64/include/asm/sysreg.h +++ b/kernel/arch/arm64/include/asm/sysreg.h @@ -20,8 +20,176 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H +#include <asm/opcodes.h> + +/* + * ARMv8 ARM reserves the following encoding for system registers: + * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", + * C5.2, version:ARM DDI 0487A.f) + * [20-19] : Op0 + * [18-16] : Op1 + * [15-12] : CRn + * [11-8] : CRm + * [7-5] : Op2 + */ #define sys_reg(op0, op1, crn, crm, op2) \ - ((((op0)-2)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) + ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) + +#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0) +#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5) +#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6) + +#define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) +#define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) +#define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) +#define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) +#define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) +#define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) +#define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) + +#define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) +#define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) +#define SYS_ID_ISAR2_EL1 sys_reg(3, 0, 0, 2, 2) +#define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) +#define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) +#define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) +#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) + +#define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) +#define SYS_MVFR1_EL1 sys_reg(3, 0, 0, 3, 1) +#define SYS_MVFR2_EL1 sys_reg(3, 0, 0, 3, 2) + +#define SYS_ID_AA64PFR0_EL1 sys_reg(3, 0, 0, 4, 0) +#define SYS_ID_AA64PFR1_EL1 sys_reg(3, 0, 0, 4, 1) + +#define SYS_ID_AA64DFR0_EL1 sys_reg(3, 0, 0, 5, 0) +#define SYS_ID_AA64DFR1_EL1 sys_reg(3, 0, 0, 5, 1) + +#define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) +#define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) + +#define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) +#define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) + +#define SYS_CNTFRQ_EL0 sys_reg(3, 3, 14, 0, 0) +#define SYS_CTR_EL0 sys_reg(3, 3, 0, 0, 1) +#define SYS_DCZID_EL0 sys_reg(3, 3, 0, 0, 7) + +#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) + +#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ + (!!x)<<8 | 0x1f) + +/* SCTLR_EL1 */ +#define SCTLR_EL1_CP15BEN (0x1 << 5) +#define SCTLR_EL1_SED (0x1 << 8) +#define SCTLR_EL1_SPAN (0x1 << 23) + + +/* id_aa64isar0 */ +#define ID_AA64ISAR0_RDM_SHIFT 28 +#define ID_AA64ISAR0_ATOMICS_SHIFT 20 +#define ID_AA64ISAR0_CRC32_SHIFT 16 +#define ID_AA64ISAR0_SHA2_SHIFT 12 +#define ID_AA64ISAR0_SHA1_SHIFT 8 +#define ID_AA64ISAR0_AES_SHIFT 4 + +/* id_aa64pfr0 */ +#define ID_AA64PFR0_GIC_SHIFT 24 +#define ID_AA64PFR0_ASIMD_SHIFT 20 +#define ID_AA64PFR0_FP_SHIFT 16 +#define ID_AA64PFR0_EL3_SHIFT 12 +#define ID_AA64PFR0_EL2_SHIFT 8 +#define ID_AA64PFR0_EL1_SHIFT 4 +#define ID_AA64PFR0_EL0_SHIFT 0 + +#define ID_AA64PFR0_FP_NI 0xf +#define ID_AA64PFR0_FP_SUPPORTED 0x0 +#define ID_AA64PFR0_ASIMD_NI 0xf +#define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 +#define ID_AA64PFR0_EL1_64BIT_ONLY 0x1 +#define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 + +/* id_aa64mmfr0 */ +#define ID_AA64MMFR0_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_TGRAN16_SHIFT 20 +#define ID_AA64MMFR0_BIGENDEL0_SHIFT 16 +#define ID_AA64MMFR0_SNSMEM_SHIFT 12 +#define ID_AA64MMFR0_BIGENDEL_SHIFT 8 +#define ID_AA64MMFR0_ASID_SHIFT 4 +#define ID_AA64MMFR0_PARANGE_SHIFT 0 + +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 + +/* id_aa64mmfr1 */ +#define ID_AA64MMFR1_PAN_SHIFT 20 +#define ID_AA64MMFR1_LOR_SHIFT 16 +#define ID_AA64MMFR1_HPD_SHIFT 12 +#define ID_AA64MMFR1_VHE_SHIFT 8 +#define ID_AA64MMFR1_VMIDBITS_SHIFT 4 +#define ID_AA64MMFR1_HADBS_SHIFT 0 + +/* id_aa64dfr0 */ +#define ID_AA64DFR0_CTX_CMPS_SHIFT 28 +#define ID_AA64DFR0_WRPS_SHIFT 20 +#define ID_AA64DFR0_BRPS_SHIFT 12 +#define ID_AA64DFR0_PMUVER_SHIFT 8 +#define ID_AA64DFR0_TRACEVER_SHIFT 4 +#define ID_AA64DFR0_DEBUGVER_SHIFT 0 + +#define ID_ISAR5_RDM_SHIFT 24 +#define ID_ISAR5_CRC32_SHIFT 16 +#define ID_ISAR5_SHA2_SHIFT 12 +#define ID_ISAR5_SHA1_SHIFT 8 +#define ID_ISAR5_AES_SHIFT 4 +#define ID_ISAR5_SEVL_SHIFT 0 + +#define MVFR0_FPROUND_SHIFT 28 +#define MVFR0_FPSHVEC_SHIFT 24 +#define MVFR0_FPSQRT_SHIFT 20 +#define MVFR0_FPDIVIDE_SHIFT 16 +#define MVFR0_FPTRAP_SHIFT 12 +#define MVFR0_FPDP_SHIFT 8 +#define MVFR0_FPSP_SHIFT 4 +#define MVFR0_SIMD_SHIFT 0 + +#define MVFR1_SIMDFMAC_SHIFT 28 +#define MVFR1_FPHP_SHIFT 24 +#define MVFR1_SIMDHP_SHIFT 20 +#define MVFR1_SIMDSP_SHIFT 16 +#define MVFR1_SIMDINT_SHIFT 12 +#define MVFR1_SIMDLS_SHIFT 8 +#define MVFR1_FPDNAN_SHIFT 4 +#define MVFR1_FPFTZ_SHIFT 0 + + +#define ID_AA64MMFR0_TGRAN4_SHIFT 28 +#define ID_AA64MMFR0_TGRAN64_SHIFT 24 +#define ID_AA64MMFR0_TGRAN16_SHIFT 20 + +#define ID_AA64MMFR0_TGRAN4_NI 0xf +#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN64_NI 0xf +#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 +#define ID_AA64MMFR0_TGRAN16_NI 0x0 +#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 + +#if defined(CONFIG_ARM64_4K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN4_SUPPORTED +#elif defined(CONFIG_ARM64_16K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN16_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN16_SUPPORTED +#elif defined(CONFIG_ARM64_64K_PAGES) +#define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN64_SHIFT +#define ID_AA64MMFR0_TGRAN_SUPPORTED ID_AA64MMFR0_TGRAN64_SUPPORTED +#endif #ifdef __ASSEMBLY__ @@ -31,11 +199,11 @@ .equ __reg_num_xzr, 31 .macro mrs_s, rt, sreg - .inst 0xd5300000|(\sreg)|(__reg_num_\rt) + .inst 0xd5200000|(\sreg)|(__reg_num_\rt) .endm .macro msr_s, sreg, rt - .inst 0xd5100000|(\sreg)|(__reg_num_\rt) + .inst 0xd5000000|(\sreg)|(__reg_num_\rt) .endm #else @@ -47,14 +215,23 @@ asm( " .equ __reg_num_xzr, 31\n" "\n" " .macro mrs_s, rt, sreg\n" -" .inst 0xd5300000|(\\sreg)|(__reg_num_\\rt)\n" +" .inst 0xd5200000|(\\sreg)|(__reg_num_\\rt)\n" " .endm\n" "\n" " .macro msr_s, sreg, rt\n" -" .inst 0xd5100000|(\\sreg)|(__reg_num_\\rt)\n" +" .inst 0xd5000000|(\\sreg)|(__reg_num_\\rt)\n" " .endm\n" ); +static inline void config_sctlr_el1(u32 clear, u32 set) +{ + u32 val; + + asm volatile("mrs %0, sctlr_el1" : "=r" (val)); + val &= ~clear; + val |= set; + asm volatile("msr sctlr_el1, %0" : : "r" (val)); +} #endif #endif /* __ASM_SYSREG_H */ diff --git a/kernel/arch/arm64/include/asm/system_misc.h b/kernel/arch/arm64/include/asm/system_misc.h index 7a18fabbe..57f110bea 100644 --- a/kernel/arch/arm64/include/asm/system_misc.h +++ b/kernel/arch/arm64/include/asm/system_misc.h @@ -23,6 +23,8 @@ #include <linux/compiler.h> #include <linux/linkage.h> #include <linux/irqflags.h> +#include <linux/signal.h> +#include <linux/ratelimit.h> #include <linux/reboot.h> struct pt_regs; @@ -41,9 +43,19 @@ struct mm_struct; extern void show_pte(struct mm_struct *mm, unsigned long addr); extern void __show_regs(struct pt_regs *); -void soft_restart(unsigned long); extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); +#define show_unhandled_signals_ratelimited() \ +({ \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + bool __show_ratelimited = false; \ + if (show_unhandled_signals && __ratelimit(&_rs)) \ + __show_ratelimited = true; \ + __show_ratelimited; \ +}) + #define UDBG_UNDEFINED (1 << 0) #define UDBG_SYSCALL (1 << 1) #define UDBG_BADABORT (1 << 2) diff --git a/kernel/arch/arm64/include/asm/thread_info.h b/kernel/arch/arm64/include/asm/thread_info.h index 7d45b00db..5f4e89fbc 100644 --- a/kernel/arch/arm64/include/asm/thread_info.h +++ b/kernel/arch/arm64/include/asm/thread_info.h @@ -23,8 +23,10 @@ #include <linux/compiler.h> -#ifndef CONFIG_ARM64_64K_PAGES +#ifdef CONFIG_ARM64_4K_PAGES #define THREAD_SIZE_ORDER 2 +#elif defined(CONFIG_ARM64_16K_PAGES) +#define THREAD_SIZE_ORDER 0 #endif #define THREAD_SIZE 16384 @@ -113,7 +115,6 @@ static inline struct thread_info *current_thread_info(void) #define TIF_RESTORE_SIGMASK 20 #define TIF_SINGLESTEP 21 #define TIF_32BIT 22 /* 32bit process */ -#define TIF_SWITCH_MM 23 /* deferred switch_mm */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) @@ -128,7 +129,8 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ - _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) + _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ + _TIF_NEED_RESCHED_LAZY) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ diff --git a/kernel/arch/arm64/include/asm/tlb.h b/kernel/arch/arm64/include/asm/tlb.h index 3a0242c7e..ffdaea795 100644 --- a/kernel/arch/arm64/include/asm/tlb.h +++ b/kernel/arch/arm64/include/asm/tlb.h @@ -37,12 +37,21 @@ static inline void __tlb_remove_table(void *_table) static inline void tlb_flush(struct mmu_gather *tlb) { - if (tlb->fullmm) { - flush_tlb_mm(tlb->mm); - } else { - struct vm_area_struct vma = { .vm_mm = tlb->mm, }; - flush_tlb_range(&vma, tlb->start, tlb->end); - } + struct vm_area_struct vma = { .vm_mm = tlb->mm, }; + + /* + * The ASID allocator will either invalidate the ASID or mark + * it as used. + */ + if (tlb->fullmm) + return; + + /* + * The intermediate page table levels are already handled by + * the __(pte|pmd|pud)_free_tlb() functions, so last level + * TLBI is sufficient here. + */ + __flush_tlb_range(&vma, tlb->start, tlb->end, true); } static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, diff --git a/kernel/arch/arm64/include/asm/tlbflush.h b/kernel/arch/arm64/include/asm/tlbflush.h index c3bb05b98..b460ae28e 100644 --- a/kernel/arch/arm64/include/asm/tlbflush.h +++ b/kernel/arch/arm64/include/asm/tlbflush.h @@ -28,8 +28,6 @@ * TLB Management * ============== * - * The arch/arm64/mm/tlb.S files implement these methods. - * * The TLB specific code is expected to perform whatever tests it needs * to determine if it should invalidate the TLB for each call. Start * addresses are inclusive and end addresses are exclusive; it is safe to @@ -65,6 +63,14 @@ * only require the D-TLB to be invalidated. * - kaddr - Kernel virtual memory address */ +static inline void local_flush_tlb_all(void) +{ + dsb(nshst); + asm("tlbi vmalle1"); + dsb(nsh); + isb(); +} + static inline void flush_tlb_all(void) { dsb(ishst); @@ -75,7 +81,7 @@ static inline void flush_tlb_all(void) static inline void flush_tlb_mm(struct mm_struct *mm) { - unsigned long asid = (unsigned long)ASID(mm) << 48; + unsigned long asid = ASID(mm) << 48; dsb(ishst); asm("tlbi aside1is, %0" : : "r" (asid)); @@ -85,31 +91,59 @@ static inline void flush_tlb_mm(struct mm_struct *mm) static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { - unsigned long addr = uaddr >> 12 | - ((unsigned long)ASID(vma->vm_mm) << 48); + unsigned long addr = uaddr >> 12 | (ASID(vma->vm_mm) << 48); dsb(ishst); - asm("tlbi vae1is, %0" : : "r" (addr)); + asm("tlbi vale1is, %0" : : "r" (addr)); dsb(ish); } +/* + * This is meant to avoid soft lock-ups on large TLB flushing ranges and not + * necessarily a performance improvement. + */ +#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) + static inline void __flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) + unsigned long start, unsigned long end, + bool last_level) { - unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; + unsigned long asid = ASID(vma->vm_mm) << 48; unsigned long addr; + + if ((end - start) > MAX_TLB_RANGE) { + flush_tlb_mm(vma->vm_mm); + return; + } + start = asid | (start >> 12); end = asid | (end >> 12); dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - asm("tlbi vae1is, %0" : : "r"(addr)); + for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { + if (last_level) + asm("tlbi vale1is, %0" : : "r"(addr)); + else + asm("tlbi vae1is, %0" : : "r"(addr)); + } dsb(ish); } -static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long end) +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + __flush_tlb_range(vma, start, end, false); +} + +static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; + + if ((end - start) > MAX_TLB_RANGE) { + flush_tlb_all(); + return; + } + start >>= 12; end >>= 12; @@ -121,55 +155,17 @@ static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long e } /* - * This is meant to avoid soft lock-ups on large TLB flushing ranges and not - * necessarily a performance improvement. - */ -#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if ((end - start) <= MAX_TLB_RANGE) - __flush_tlb_range(vma, start, end); - else - flush_tlb_mm(vma->vm_mm); -} - -static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - if ((end - start) <= MAX_TLB_RANGE) - __flush_tlb_kernel_range(start, end); - else - flush_tlb_all(); -} - -/* * Used to invalidate the TLB (walk caches) corresponding to intermediate page * table levels (pgd/pud/pmd). */ static inline void __flush_tlb_pgtable(struct mm_struct *mm, unsigned long uaddr) { - unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48); + unsigned long addr = uaddr >> 12 | (ASID(mm) << 48); - dsb(ishst); asm("tlbi vae1is, %0" : : "r" (addr)); dsb(ish); } -/* - * On AArch64, the cache coherency is handled via the set_pte_at() function. - */ -static inline void update_mmu_cache(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - /* - * set_pte() does not have a DSB for user mappings, so make sure that - * the page table write is visible. - */ - dsb(ishst); -} - -#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) #endif diff --git a/kernel/arch/arm64/include/asm/topology.h b/kernel/arch/arm64/include/asm/topology.h index 7ebcd31ce..a3e9d6fdb 100644 --- a/kernel/arch/arm64/include/asm/topology.h +++ b/kernel/arch/arm64/include/asm/topology.h @@ -1,8 +1,6 @@ #ifndef __ASM_TOPOLOGY_H #define __ASM_TOPOLOGY_H -#ifdef CONFIG_SMP - #include <linux/cpumask.h> struct cpu_topology { @@ -18,19 +16,12 @@ extern struct cpu_topology cpu_topology[NR_CPUS]; #define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id) #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) -#define topology_thread_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) +#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); -#else - -static inline void init_cpu_topology(void) { } -static inline void store_cpu_topology(unsigned int cpuid) { } - -#endif - #include <asm-generic/topology.h> #endif /* _ASM_ARM_TOPOLOGY_H */ diff --git a/kernel/arch/arm64/include/asm/traps.h b/kernel/arch/arm64/include/asm/traps.h index 232e4ba5d..0cc2f29bf 100644 --- a/kernel/arch/arm64/include/asm/traps.h +++ b/kernel/arch/arm64/include/asm/traps.h @@ -34,13 +34,32 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static inline int __in_irqentry_text(unsigned long ptr) +{ + extern char __irqentry_text_start[]; + extern char __irqentry_text_end[]; + + return ptr >= (unsigned long)&__irqentry_text_start && + ptr < (unsigned long)&__irqentry_text_end; +} +#else +static inline int __in_irqentry_text(unsigned long ptr) +{ + return 0; +} +#endif + static inline int in_exception_text(unsigned long ptr) { extern char __exception_text_start[]; extern char __exception_text_end[]; + int in; + + in = ptr >= (unsigned long)&__exception_text_start && + ptr < (unsigned long)&__exception_text_end; - return ptr >= (unsigned long)&__exception_text_start && - ptr < (unsigned long)&__exception_text_end; + return in ? : __in_irqentry_text(ptr); } #endif diff --git a/kernel/arch/arm64/include/asm/uaccess.h b/kernel/arch/arm64/include/asm/uaccess.h index 07e1ba449..b2ede967f 100644 --- a/kernel/arch/arm64/include/asm/uaccess.h +++ b/kernel/arch/arm64/include/asm/uaccess.h @@ -24,7 +24,10 @@ #include <linux/string.h> #include <linux/thread_info.h> +#include <asm/alternative.h> +#include <asm/cpufeature.h> #include <asm/ptrace.h> +#include <asm/sysreg.h> #include <asm/errno.h> #include <asm/memory.h> #include <asm/compiler.h> @@ -131,6 +134,8 @@ static inline void set_fs(mm_segment_t fs) do { \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_asm("ldrb", "%w", __gu_val, (ptr), (err)); \ @@ -148,6 +153,8 @@ do { \ BUILD_BUG(); \ } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN)); \ } while (0) #define __get_user(x, ptr) \ @@ -194,6 +201,8 @@ do { \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ __put_user_asm("strb", "%w", __pu_val, (ptr), (err)); \ @@ -210,6 +219,8 @@ do { \ default: \ BUILD_BUG(); \ } \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN)); \ } while (0) #define __put_user(x, ptr) \ diff --git a/kernel/arch/arm64/include/asm/unistd.h b/kernel/arch/arm64/include/asm/unistd.h index 3bc498c25..41e58fe3c 100644 --- a/kernel/arch/arm64/include/asm/unistd.h +++ b/kernel/arch/arm64/include/asm/unistd.h @@ -44,7 +44,7 @@ #define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) -#define __NR_compat_syscalls 388 +#define __NR_compat_syscalls 390 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/kernel/arch/arm64/include/asm/unistd32.h b/kernel/arch/arm64/include/asm/unistd32.h index cef934a90..5b925b761 100644 --- a/kernel/arch/arm64/include/asm/unistd32.h +++ b/kernel/arch/arm64/include/asm/unistd32.h @@ -797,3 +797,12 @@ __SYSCALL(__NR_memfd_create, sys_memfd_create) __SYSCALL(__NR_bpf, sys_bpf) #define __NR_execveat 387 __SYSCALL(__NR_execveat, compat_sys_execveat) +#define __NR_userfaultfd 388 +__SYSCALL(__NR_userfaultfd, sys_userfaultfd) +#define __NR_membarrier 389 +__SYSCALL(__NR_membarrier, sys_membarrier) + +/* + * Please add new compat syscalls above this comment and update + * __NR_compat_syscalls in asm/unistd.h. + */ diff --git a/kernel/arch/arm64/include/asm/xen/events.h b/kernel/arch/arm64/include/asm/xen/events.h index 86553213c..4318866d0 100644 --- a/kernel/arch/arm64/include/asm/xen/events.h +++ b/kernel/arch/arm64/include/asm/xen/events.h @@ -18,4 +18,10 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) #define xchg_xen_ulong(ptr, val) xchg((ptr), (val)) +/* Rebind event channel is supported by default */ +static inline bool xen_support_evtchn_rebind(void) +{ + return true; +} + #endif /* _ASM_ARM64_XEN_EVENTS_H */ diff --git a/kernel/arch/arm64/include/uapi/asm/hwcap.h b/kernel/arch/arm64/include/uapi/asm/hwcap.h index 73cf0f54d..361c8a8ef 100644 --- a/kernel/arch/arm64/include/uapi/asm/hwcap.h +++ b/kernel/arch/arm64/include/uapi/asm/hwcap.h @@ -27,5 +27,6 @@ #define HWCAP_SHA1 (1 << 5) #define HWCAP_SHA2 (1 << 6) #define HWCAP_CRC32 (1 << 7) +#define HWCAP_ATOMICS (1 << 8) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/kernel/arch/arm64/include/uapi/asm/kvm.h b/kernel/arch/arm64/include/uapi/asm/kvm.h index d26832022..2d4ca4bb0 100644 --- a/kernel/arch/arm64/include/uapi/asm/kvm.h +++ b/kernel/arch/arm64/include/uapi/asm/kvm.h @@ -32,7 +32,7 @@ #ifndef __ASSEMBLY__ #include <linux/psci.h> -#include <asm/types.h> +#include <linux/types.h> #include <asm/ptrace.h> #define __KVM_HAVE_GUEST_DEBUG @@ -53,14 +53,20 @@ struct kvm_regs { struct user_fpsimd_state fp_regs; }; -/* Supported Processor Types */ +/* + * Supported CPU Targets - Adding a new target type is not recommended, + * unless there are some special registers not supported by the + * genericv8 syreg table. + */ #define KVM_ARM_TARGET_AEM_V8 0 #define KVM_ARM_TARGET_FOUNDATION_V8 1 #define KVM_ARM_TARGET_CORTEX_A57 2 #define KVM_ARM_TARGET_XGENE_POTENZA 3 #define KVM_ARM_TARGET_CORTEX_A53 4 +/* Generic ARM v8 target */ +#define KVM_ARM_TARGET_GENERIC_V8 5 -#define KVM_ARM_NUM_TARGETS 5 +#define KVM_ARM_NUM_TARGETS 6 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 @@ -100,12 +106,39 @@ struct kvm_sregs { struct kvm_fpu { }; +/* + * See v8 ARM ARM D7.3: Debug Registers + * + * The architectural limit is 16 debug registers of each type although + * in practice there are usually less (see ID_AA64DFR0_EL1). + * + * Although the control registers are architecturally defined as 32 + * bits wide we use a 64 bit structure here to keep parity with + * KVM_GET/SET_ONE_REG behaviour which treats all system registers as + * 64 bit values. It also allows for the possibility of the + * architecture expanding the control registers without having to + * change the userspace ABI. + */ +#define KVM_ARM_MAX_DBG_REGS 16 struct kvm_guest_debug_arch { + __u64 dbg_bcr[KVM_ARM_MAX_DBG_REGS]; + __u64 dbg_bvr[KVM_ARM_MAX_DBG_REGS]; + __u64 dbg_wcr[KVM_ARM_MAX_DBG_REGS]; + __u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS]; }; struct kvm_debug_exit_arch { + __u32 hsr; + __u64 far; /* used for watchpoints */ }; +/* + * Architecture specific defines for kvm_guest_debug->control + */ + +#define KVM_GUESTDBG_USE_SW_BP (1 << 16) +#define KVM_GUESTDBG_USE_HW (1 << 17) + struct kvm_sync_regs { }; diff --git a/kernel/arch/arm64/include/uapi/asm/ptrace.h b/kernel/arch/arm64/include/uapi/asm/ptrace.h index 6913643bb..208db3df1 100644 --- a/kernel/arch/arm64/include/uapi/asm/ptrace.h +++ b/kernel/arch/arm64/include/uapi/asm/ptrace.h @@ -44,6 +44,7 @@ #define PSR_I_BIT 0x00000080 #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 +#define PSR_PAN_BIT 0x00400000 #define PSR_Q_BIT 0x08000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 diff --git a/kernel/arch/arm64/include/uapi/asm/signal.h b/kernel/arch/arm64/include/uapi/asm/signal.h index 8d1e72364..991bf5db2 100644 --- a/kernel/arch/arm64/include/uapi/asm/signal.h +++ b/kernel/arch/arm64/include/uapi/asm/signal.h @@ -19,6 +19,9 @@ /* Required for AArch32 compatibility. */ #define SA_RESTORER 0x04000000 +#define MINSIGSTKSZ 5120 +#define SIGSTKSZ 16384 + #include <asm-generic/signal.h> #endif |