From e09b41010ba33a20a87472ee821fa407a5b8da36 Mon Sep 17 00:00:00 2001 From: José Pekkarinen Date: Mon, 11 Apr 2016 10:41:07 +0300 Subject: These changes are the raw update to linux-4.4.6-rt14. Kernel sources are taken from kernel.org, and rt patch from the rt wiki download page. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen --- kernel/arch/arm/mm/Kconfig | 50 ++- kernel/arch/arm/mm/Makefile | 4 + kernel/arch/arm/mm/abort-ev4.S | 1 + kernel/arch/arm/mm/abort-ev5t.S | 4 +- kernel/arch/arm/mm/abort-ev5tj.S | 4 +- kernel/arch/arm/mm/abort-ev6.S | 8 +- kernel/arch/arm/mm/abort-ev7.S | 1 + kernel/arch/arm/mm/abort-lv4t.S | 2 + kernel/arch/arm/mm/abort-macro.S | 14 +- kernel/arch/arm/mm/alignment.c | 32 +- kernel/arch/arm/mm/cache-feroceon-l2.c | 6 +- kernel/arch/arm/mm/cache-l2x0.c | 112 +++++-- kernel/arch/arm/mm/cache-uniphier.c | 555 +++++++++++++++++++++++++++++++++ kernel/arch/arm/mm/context.c | 38 ++- kernel/arch/arm/mm/dma-mapping.c | 81 +++-- kernel/arch/arm/mm/dma.h | 32 ++ kernel/arch/arm/mm/fault.c | 22 ++ kernel/arch/arm/mm/fault.h | 1 + kernel/arch/arm/mm/flush.c | 15 + kernel/arch/arm/mm/highmem.c | 26 +- kernel/arch/arm/mm/hugetlbpage.c | 5 - kernel/arch/arm/mm/init.c | 93 ++++-- kernel/arch/arm/mm/ioremap.c | 33 +- kernel/arch/arm/mm/mmu.c | 256 +++++++++------ kernel/arch/arm/mm/nommu.c | 48 +-- kernel/arch/arm/mm/pgd.c | 10 + kernel/arch/arm/mm/proc-v7-2level.S | 12 +- kernel/arch/arm/mm/proc-v7-3level.S | 14 +- kernel/arch/arm/mm/proc-v7.S | 186 ++++++----- kernel/arch/arm/mm/proc-v7m.S | 2 +- kernel/arch/arm/mm/pv-fixup-asm.S | 88 ++++++ 31 files changed, 1367 insertions(+), 388 deletions(-) create mode 100644 kernel/arch/arm/mm/cache-uniphier.c create mode 100644 kernel/arch/arm/mm/dma.h create mode 100644 kernel/arch/arm/mm/pv-fixup-asm.S (limited to 'kernel/arch/arm/mm') diff --git a/kernel/arch/arm/mm/Kconfig b/kernel/arch/arm/mm/Kconfig index b4f92b9a1..41218867a 100644 --- a/kernel/arch/arm/mm/Kconfig +++ b/kernel/arch/arm/mm/Kconfig @@ -6,7 +6,7 @@ comment "Processor Type" # ARM7TDMI config CPU_ARM7TDMI - bool "Support ARM7TDMI processor" + bool depends on !MMU select CPU_32v4T select CPU_ABRT_LV4T @@ -56,7 +56,7 @@ config CPU_ARM740T # ARM9TDMI config CPU_ARM9TDMI - bool "Support ARM9TDMI processor" + bool depends on !MMU select CPU_32v4T select CPU_ABRT_NOMMU @@ -419,28 +419,24 @@ config CPU_THUMBONLY config CPU_32v3 bool select CPU_USE_DOMAINS if MMU - select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU config CPU_32v4 bool select CPU_USE_DOMAINS if MMU - select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU config CPU_32v4T bool select CPU_USE_DOMAINS if MMU - select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU config CPU_32v5 bool select CPU_USE_DOMAINS if MMU - select NEEDS_SYSCALL_FOR_CMPXCHG if SMP select NEED_KUSER_HELPERS select TLS_REG_EMUL if SMP || !MMU @@ -604,6 +600,22 @@ config CPU_USE_DOMAINS This option enables or disables the use of domain switching via the set_fs() function. +config CPU_V7M_NUM_IRQ + int "Number of external interrupts connected to the NVIC" + depends on CPU_V7M + default 90 if ARCH_STM32 + default 38 if ARCH_EFM32 + default 112 if SOC_VF610 + default 240 + help + This option indicates the number of interrupts connected to the NVIC. + The value can be larger than the real number of interrupts supported + by the system, but must not be lower. + The default value is 240, corresponding to the maximum number of + interrupts supported by the NVIC on Cortex-M family. + + If unsure, keep default value. + # # CPU supports 36-bit I/O # @@ -624,6 +636,10 @@ config ARM_LPAE If unsure, say N. +config ARM_PV_FIXUP + def_bool y + depends on ARM_LPAE && ARM_PATCH_PHYS_VIRT && ARCH_KEYSTONE + config ARCH_PHYS_ADDR_T_64BIT def_bool ARM_LPAE @@ -785,14 +801,6 @@ config TLS_REG_EMUL a few prototypes like that in existence) and therefore access to that required register must be emulated. -config NEEDS_SYSCALL_FOR_CMPXCHG - bool - select NEED_KUSER_HELPERS - help - SMP on a pre-ARMv6 processor? Well OK then. - Forget about fast user space cmpxchg support. - It is just not possible. - config NEED_KUSER_HELPERS bool @@ -863,6 +871,7 @@ config OUTER_CACHE config OUTER_CACHE_SYNC bool + select ARM_HEAVY_MB help The outer cache has a outer_cache_fns.sync function pointer that can be used to drain the write buffer of the outer cache. @@ -965,6 +974,16 @@ config CACHE_TAUROS2 This option enables the Tauros2 L2 cache controller (as found on PJ1/PJ4). +config CACHE_UNIPHIER + bool "Enable the UniPhier outer cache controller" + depends on ARCH_UNIPHIER + default y + select OUTER_CACHE + select OUTER_CACHE_SYNC + help + This option enables the UniPhier outer cache (system cache) + controller. + config CACHE_XSC3L2 bool "Enable the L2 cache on XScale3" depends on CPU_XSC3 @@ -1011,6 +1030,9 @@ config ARCH_HAS_BARRIERS This option allows the use of custom mandatory barriers included via the mach/barriers.h file. +config ARM_HEAVY_MB + bool + config ARCH_SUPPORTS_BIG_ENDIAN bool help diff --git a/kernel/arch/arm/mm/Makefile b/kernel/arch/arm/mm/Makefile index d3afdf9eb..7f76d96ce 100644 --- a/kernel/arch/arm/mm/Makefile +++ b/kernel/arch/arm/mm/Makefile @@ -18,6 +18,7 @@ obj-$(CONFIG_MODULES) += proc-syms.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_ARM_PV_FIXUP) += pv-fixup-asm.o obj-$(CONFIG_CPU_ABRT_NOMMU) += abort-nommu.o obj-$(CONFIG_CPU_ABRT_EV4) += abort-ev4.o @@ -55,6 +56,8 @@ obj-$(CONFIG_CPU_XSCALE) += copypage-xscale.o obj-$(CONFIG_CPU_XSC3) += copypage-xsc3.o obj-$(CONFIG_CPU_COPY_FA) += copypage-fa.o +CFLAGS_copypage-feroceon.o := -march=armv5te + obj-$(CONFIG_CPU_TLB_V4WT) += tlb-v4.o obj-$(CONFIG_CPU_TLB_V4WB) += tlb-v4wb.o obj-$(CONFIG_CPU_TLB_V4WBI) += tlb-v4wbi.o @@ -100,3 +103,4 @@ obj-$(CONFIG_CACHE_FEROCEON_L2) += cache-feroceon-l2.o obj-$(CONFIG_CACHE_L2X0) += cache-l2x0.o l2c-l2x0-resume.o obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o +obj-$(CONFIG_CACHE_UNIPHIER) += cache-uniphier.o diff --git a/kernel/arch/arm/mm/abort-ev4.S b/kernel/arch/arm/mm/abort-ev4.S index 54473cd4a..b3b31e30c 100644 --- a/kernel/arch/arm/mm/abort-ev4.S +++ b/kernel/arch/arm/mm/abort-ev4.S @@ -19,6 +19,7 @@ ENTRY(v4_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR ldr r3, [r4] @ read aborted ARM instruction + uaccess_disable ip @ disable userspace access bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR tst r3, #1 << 20 @ L = 1 -> write? orreq r1, r1, #1 << 11 @ yes. diff --git a/kernel/arch/arm/mm/abort-ev5t.S b/kernel/arch/arm/mm/abort-ev5t.S index a0908d465..a6a381a6c 100644 --- a/kernel/arch/arm/mm/abort-ev5t.S +++ b/kernel/arch/arm/mm/abort-ev5t.S @@ -21,8 +21,10 @@ ENTRY(v5t_early_abort) mrc p15, 0, r0, c6, c0, 0 @ get FAR do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 ldreq r3, [r4] @ read aborted ARM instruction + uaccess_disable ip @ disable user access bic r1, r1, #1 << 11 @ clear bits 11 of FSR - do_ldrd_abort tmp=ip, insn=r3 + teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? + beq do_DataAbort @ yes tst r3, #1 << 20 @ check write orreq r1, r1, #1 << 11 b do_DataAbort diff --git a/kernel/arch/arm/mm/abort-ev5tj.S b/kernel/arch/arm/mm/abort-ev5tj.S index 4006b7a61..00ab011be 100644 --- a/kernel/arch/arm/mm/abort-ev5tj.S +++ b/kernel/arch/arm/mm/abort-ev5tj.S @@ -24,7 +24,9 @@ ENTRY(v5tj_early_abort) bne do_DataAbort do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 ldreq r3, [r4] @ read aborted ARM instruction - do_ldrd_abort tmp=ip, insn=r3 + uaccess_disable ip @ disable userspace access + teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? + beq do_DataAbort @ yes tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. b do_DataAbort diff --git a/kernel/arch/arm/mm/abort-ev6.S b/kernel/arch/arm/mm/abort-ev6.S index 8c48c5c22..8801a15aa 100644 --- a/kernel/arch/arm/mm/abort-ev6.S +++ b/kernel/arch/arm/mm/abort-ev6.S @@ -26,16 +26,18 @@ ENTRY(v6_early_abort) ldr ip, =0x4107b36 mrc p15, 0, r3, c0, c0, 0 @ get processor id teq ip, r3, lsr #4 @ r0 ARM1136? - bne do_DataAbort + bne 1f tst r5, #PSR_J_BIT @ Java? tsteq r5, #PSR_T_BIT @ Thumb? - bne do_DataAbort + bne 1f bic r1, r1, #1 << 11 @ clear bit 11 of FSR ldr r3, [r4] @ read aborted ARM instruction ARM_BE8(rev r3, r3) - do_ldrd_abort tmp=ip, insn=r3 + teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? + beq 1f @ yes tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. #endif +1: uaccess_disable ip @ disable userspace access b do_DataAbort diff --git a/kernel/arch/arm/mm/abort-ev7.S b/kernel/arch/arm/mm/abort-ev7.S index 4812ad054..e8d0e08c2 100644 --- a/kernel/arch/arm/mm/abort-ev7.S +++ b/kernel/arch/arm/mm/abort-ev7.S @@ -15,6 +15,7 @@ ENTRY(v7_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR + uaccess_disable ip @ disable userspace access /* * V6 code adjusts the returned DFSR. diff --git a/kernel/arch/arm/mm/abort-lv4t.S b/kernel/arch/arm/mm/abort-lv4t.S index f3982580c..6d8e8e336 100644 --- a/kernel/arch/arm/mm/abort-lv4t.S +++ b/kernel/arch/arm/mm/abort-lv4t.S @@ -26,6 +26,7 @@ ENTRY(v4t_late_abort) #endif bne .data_thumb_abort ldr r8, [r4] @ read arm instruction + uaccess_disable ip @ disable userspace access tst r8, #1 << 20 @ L = 1 -> write? orreq r1, r1, #1 << 11 @ yes. and r7, r8, #15 << 24 @@ -155,6 +156,7 @@ ENTRY(v4t_late_abort) .data_thumb_abort: ldrh r8, [r4] @ read instruction + uaccess_disable ip @ disable userspace access tst r8, #1 << 11 @ L = 1 -> write? orreq r1, r1, #1 << 8 @ yes and r7, r8, #15 << 12 diff --git a/kernel/arch/arm/mm/abort-macro.S b/kernel/arch/arm/mm/abort-macro.S index 2cbf68ef0..4509bee4e 100644 --- a/kernel/arch/arm/mm/abort-macro.S +++ b/kernel/arch/arm/mm/abort-macro.S @@ -13,6 +13,7 @@ tst \psr, #PSR_T_BIT beq not_thumb ldrh \tmp, [\pc] @ Read aborted Thumb instruction + uaccess_disable ip @ disable userspace access and \tmp, \tmp, # 0xfe00 @ Mask opcode field cmp \tmp, # 0x5600 @ Is it ldrsb? orreq \tmp, \tmp, #1 << 11 @ Set L-bit if yes @@ -29,12 +30,9 @@ not_thumb: * [7:4] == 1101 * [20] == 0 */ - .macro do_ldrd_abort, tmp, insn - tst \insn, #0x0e100000 @ [27:25,20] == 0 - bne not_ldrd - and \tmp, \insn, #0x000000f0 @ [7:4] == 1101 - cmp \tmp, #0x000000d0 - beq do_DataAbort -not_ldrd: + .macro teq_ldrd, tmp, insn + mov \tmp, #0x0e100000 + orr \tmp, #0x000000f0 + and \tmp, \insn, \tmp + teq \tmp, #0x000000d0 .endm - diff --git a/kernel/arch/arm/mm/alignment.c b/kernel/arch/arm/mm/alignment.c index 9769f1eef..7d5f4c736 100644 --- a/kernel/arch/arm/mm/alignment.c +++ b/kernel/arch/arm/mm/alignment.c @@ -365,15 +365,21 @@ do_alignment_ldrhstrh(unsigned long addr, unsigned long instr, struct pt_regs *r user: if (LDST_L_BIT(instr)) { unsigned long val; + unsigned int __ua_flags = uaccess_save_and_enable(); + get16t_unaligned_check(val, addr); + uaccess_restore(__ua_flags); /* signed half-word? */ if (instr & 0x40) val = (signed long)((signed short) val); regs->uregs[rd] = val; - } else + } else { + unsigned int __ua_flags = uaccess_save_and_enable(); put16t_unaligned_check(regs->uregs[rd], addr); + uaccess_restore(__ua_flags); + } return TYPE_LDST; @@ -420,14 +426,21 @@ do_alignment_ldrdstrd(unsigned long addr, unsigned long instr, user: if (load) { - unsigned long val; + unsigned long val, val2; + unsigned int __ua_flags = uaccess_save_and_enable(); + get32t_unaligned_check(val, addr); + get32t_unaligned_check(val2, addr + 4); + + uaccess_restore(__ua_flags); + regs->uregs[rd] = val; - get32t_unaligned_check(val, addr + 4); - regs->uregs[rd2] = val; + regs->uregs[rd2] = val2; } else { + unsigned int __ua_flags = uaccess_save_and_enable(); put32t_unaligned_check(regs->uregs[rd], addr); put32t_unaligned_check(regs->uregs[rd2], addr + 4); + uaccess_restore(__ua_flags); } return TYPE_LDST; @@ -458,10 +471,15 @@ do_alignment_ldrstr(unsigned long addr, unsigned long instr, struct pt_regs *reg trans: if (LDST_L_BIT(instr)) { unsigned int val; + unsigned int __ua_flags = uaccess_save_and_enable(); get32t_unaligned_check(val, addr); + uaccess_restore(__ua_flags); regs->uregs[rd] = val; - } else + } else { + unsigned int __ua_flags = uaccess_save_and_enable(); put32t_unaligned_check(regs->uregs[rd], addr); + uaccess_restore(__ua_flags); + } return TYPE_LDST; fault: @@ -531,6 +549,7 @@ do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *reg #endif if (user_mode(regs)) { + unsigned int __ua_flags = uaccess_save_and_enable(); for (regbits = REGMASK_BITS(instr), rd = 0; regbits; regbits >>= 1, rd += 1) if (regbits & 1) { @@ -542,6 +561,7 @@ do_alignment_ldmstm(unsigned long addr, unsigned long instr, struct pt_regs *reg put32t_unaligned_check(regs->uregs[rd], eaddr); eaddr += 4; } + uaccess_restore(__ua_flags); } else { for (regbits = REGMASK_BITS(instr), rd = 0; regbits; regbits >>= 1, rd += 1) @@ -783,7 +803,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) } } } else { - fault = probe_kernel_address(instrptr, instr); + fault = probe_kernel_address((void *)instrptr, instr); instr = __mem_to_opcode_arm(instr); } diff --git a/kernel/arch/arm/mm/cache-feroceon-l2.c b/kernel/arch/arm/mm/cache-feroceon-l2.c index 097181e08..5c1b7a7b9 100644 --- a/kernel/arch/arm/mm/cache-feroceon-l2.c +++ b/kernel/arch/arm/mm/cache-feroceon-l2.c @@ -368,7 +368,6 @@ int __init feroceon_of_init(void) struct device_node *node; void __iomem *base; bool l2_wt_override = false; - struct resource res; #if defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) l2_wt_override = true; @@ -376,10 +375,7 @@ int __init feroceon_of_init(void) node = of_find_matching_node(NULL, feroceon_ids); if (node && of_device_is_compatible(node, "marvell,kirkwood-cache")) { - if (of_address_to_resource(node, 0, &res)) - return -ENODEV; - - base = ioremap(res.start, resource_size(&res)); + base = of_iomap(node, 0); if (!base) return -ENOMEM; diff --git a/kernel/arch/arm/mm/cache-l2x0.c b/kernel/arch/arm/mm/cache-l2x0.c index e309c8f35..493692d83 100644 --- a/kernel/arch/arm/mm/cache-l2x0.c +++ b/kernel/arch/arm/mm/cache-l2x0.c @@ -38,10 +38,11 @@ struct l2c_init_data { unsigned way_size_0; unsigned num_lock; void (*of_parse)(const struct device_node *, u32 *, u32 *); - void (*enable)(void __iomem *, u32, unsigned); + void (*enable)(void __iomem *, unsigned); void (*fixup)(void __iomem *, u32, struct outer_cache_fns *); void (*save)(void __iomem *); void (*configure)(void __iomem *); + void (*unlock)(void __iomem *, unsigned); struct outer_cache_fns outer_cache; }; @@ -110,14 +111,6 @@ static inline void l2c_unlock(void __iomem *base, unsigned num) static void l2c_configure(void __iomem *base) { - if (outer_cache.configure) { - outer_cache.configure(&l2x0_saved_regs); - return; - } - - if (l2x0_data->configure) - l2x0_data->configure(base); - l2c_write_sec(l2x0_saved_regs.aux_ctrl, base, L2X0_AUX_CTRL); } @@ -125,18 +118,16 @@ static void l2c_configure(void __iomem *base) * Enable the L2 cache controller. This function must only be * called when the cache controller is known to be disabled. */ -static void l2c_enable(void __iomem *base, u32 aux, unsigned num_lock) +static void l2c_enable(void __iomem *base, unsigned num_lock) { unsigned long flags; - /* Do not touch the controller if already enabled. */ - if (readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN) - return; - - l2x0_saved_regs.aux_ctrl = aux; - l2c_configure(base); + if (outer_cache.configure) + outer_cache.configure(&l2x0_saved_regs); + else + l2x0_data->configure(base); - l2c_unlock(base, num_lock); + l2x0_data->unlock(base, num_lock); local_irq_save(flags); __l2c_op_way(base + L2X0_INV_WAY); @@ -163,7 +154,11 @@ static void l2c_save(void __iomem *base) static void l2c_resume(void) { - l2c_enable(l2x0_base, l2x0_saved_regs.aux_ctrl, l2x0_data->num_lock); + void __iomem *base = l2x0_base; + + /* Do not touch the controller if already enabled. */ + if (!(readl_relaxed(base + L2X0_CTRL) & L2X0_CTRL_EN)) + l2c_enable(base, l2x0_data->num_lock); } /* @@ -252,6 +247,8 @@ static const struct l2c_init_data l2c210_data __initconst = { .num_lock = 1, .enable = l2c_enable, .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -391,16 +388,22 @@ static void l2c220_sync(void) raw_spin_unlock_irqrestore(&l2x0_lock, flags); } -static void l2c220_enable(void __iomem *base, u32 aux, unsigned num_lock) +static void l2c220_enable(void __iomem *base, unsigned num_lock) { /* * Always enable non-secure access to the lockdown registers - * we write to them as part of the L2C enable sequence so they * need to be accessible. */ - aux |= L220_AUX_CTRL_NS_LOCKDOWN; + l2x0_saved_regs.aux_ctrl |= L220_AUX_CTRL_NS_LOCKDOWN; - l2c_enable(base, aux, num_lock); + l2c_enable(base, num_lock); +} + +static void l2c220_unlock(void __iomem *base, unsigned num_lock) +{ + if (readl_relaxed(base + L2X0_AUX_CTRL) & L220_AUX_CTRL_NS_LOCKDOWN) + l2c_unlock(base, num_lock); } static const struct l2c_init_data l2c220_data = { @@ -409,6 +412,8 @@ static const struct l2c_init_data l2c220_data = { .num_lock = 1, .enable = l2c220_enable, .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c220_unlock, .outer_cache = { .inv_range = l2c220_inv_range, .clean_range = l2c220_clean_range, @@ -569,6 +574,8 @@ static void l2c310_configure(void __iomem *base) { unsigned revision; + l2c_configure(base); + /* restore pl310 setup */ l2c_write_sec(l2x0_saved_regs.tag_latency, base, L310_TAG_LATENCY_CTRL); @@ -603,10 +610,11 @@ static int l2c310_cpu_enable_flz(struct notifier_block *nb, unsigned long act, v return NOTIFY_OK; } -static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) +static void __init l2c310_enable(void __iomem *base, unsigned num_lock) { unsigned rev = readl_relaxed(base + L2X0_CACHE_ID) & L2X0_CACHE_ID_RTL_MASK; bool cortex_a9 = read_cpuid_part() == ARM_CPU_PART_CORTEX_A9; + u32 aux = l2x0_saved_regs.aux_ctrl; if (rev >= L310_CACHE_ID_RTL_R2P0) { if (cortex_a9) { @@ -649,9 +657,9 @@ static void __init l2c310_enable(void __iomem *base, u32 aux, unsigned num_lock) * we write to them as part of the L2C enable sequence so they * need to be accessible. */ - aux |= L310_AUX_CTRL_NS_LOCKDOWN; + l2x0_saved_regs.aux_ctrl = aux | L310_AUX_CTRL_NS_LOCKDOWN; - l2c_enable(base, aux, num_lock); + l2c_enable(base, num_lock); /* Read back resulting AUX_CTRL value as it could have been altered. */ aux = readl_relaxed(base + L2X0_AUX_CTRL); @@ -755,6 +763,12 @@ static void l2c310_resume(void) set_auxcr(get_auxcr() | BIT(3) | BIT(2) | BIT(1)); } +static void l2c310_unlock(void __iomem *base, unsigned num_lock) +{ + if (readl_relaxed(base + L2X0_AUX_CTRL) & L310_AUX_CTRL_NS_LOCKDOWN) + l2c_unlock(base, num_lock); +} + static const struct l2c_init_data l2c310_init_fns __initconst = { .type = "L2C-310", .way_size_0 = SZ_8K, @@ -763,6 +777,7 @@ static const struct l2c_init_data l2c310_init_fns __initconst = { .fixup = l2c310_fixup, .save = l2c310_save, .configure = l2c310_configure, + .unlock = l2c310_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -856,8 +871,11 @@ static int __init __l2c_init(const struct l2c_init_data *data, * Check if l2x0 controller is already enabled. If we are booting * in non-secure mode accessing the below registers will fault. */ - if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) - data->enable(l2x0_base, aux, data->num_lock); + if (!(readl_relaxed(l2x0_base + L2X0_CTRL) & L2X0_CTRL_EN)) { + l2x0_saved_regs.aux_ctrl = aux; + + data->enable(l2x0_base, data->num_lock); + } outer_cache = fns; @@ -1066,6 +1084,8 @@ static const struct l2c_init_data of_l2c210_data __initconst = { .of_parse = l2x0_of_parse, .enable = l2c_enable, .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1084,6 +1104,8 @@ static const struct l2c_init_data of_l2c220_data __initconst = { .of_parse = l2x0_of_parse, .enable = l2c220_enable, .save = l2c_save, + .configure = l2c_configure, + .unlock = l2c220_unlock, .outer_cache = { .inv_range = l2c220_inv_range, .clean_range = l2c220_clean_range, @@ -1149,6 +1171,11 @@ static void __init l2c310_of_parse(const struct device_node *np, } } + if (of_property_read_bool(np, "arm,shared-override")) { + *aux_val |= L2C_AUX_CTRL_SHARED_OVERRIDE; + *aux_mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE; + } + prefetch = l2x0_saved_regs.prefetch_ctrl; ret = of_property_read_u32(np, "arm,double-linefill", &val); @@ -1199,6 +1226,26 @@ static void __init l2c310_of_parse(const struct device_node *np, pr_err("L2C-310 OF arm,prefetch-offset property value is missing\n"); } + ret = of_property_read_u32(np, "prefetch-data", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_DATA_PREFETCH; + else + prefetch &= ~L310_PREFETCH_CTRL_DATA_PREFETCH; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF prefetch-data property value is missing\n"); + } + + ret = of_property_read_u32(np, "prefetch-instr", &val); + if (ret == 0) { + if (val) + prefetch |= L310_PREFETCH_CTRL_INSTR_PREFETCH; + else + prefetch &= ~L310_PREFETCH_CTRL_INSTR_PREFETCH; + } else if (ret != -EINVAL) { + pr_err("L2C-310 OF prefetch-instr property value is missing\n"); + } + l2x0_saved_regs.prefetch_ctrl = prefetch; } @@ -1211,6 +1258,7 @@ static const struct l2c_init_data of_l2c310_data __initconst = { .fixup = l2c310_fixup, .save = l2c310_save, .configure = l2c310_configure, + .unlock = l2c310_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1240,6 +1288,7 @@ static const struct l2c_init_data of_l2c310_coherent_data __initconst = { .fixup = l2c310_fixup, .save = l2c310_save, .configure = l2c310_configure, + .unlock = l2c310_unlock, .outer_cache = { .inv_range = l2c210_inv_range, .clean_range = l2c210_clean_range, @@ -1366,7 +1415,7 @@ static void aurora_save(void __iomem *base) * For Aurora cache in no outer mode, enable via the CP15 coprocessor * broadcasting of cache commands to L2. */ -static void __init aurora_enable_no_outer(void __iomem *base, u32 aux, +static void __init aurora_enable_no_outer(void __iomem *base, unsigned num_lock) { u32 u; @@ -1377,7 +1426,7 @@ static void __init aurora_enable_no_outer(void __iomem *base, u32 aux, isb(); - l2c_enable(base, aux, num_lock); + l2c_enable(base, num_lock); } static void __init aurora_fixup(void __iomem *base, u32 cache_id, @@ -1416,6 +1465,8 @@ static const struct l2c_init_data of_aurora_with_outer_data __initconst = { .enable = l2c_enable, .fixup = aurora_fixup, .save = aurora_save, + .configure = l2c_configure, + .unlock = l2c_unlock, .outer_cache = { .inv_range = aurora_inv_range, .clean_range = aurora_clean_range, @@ -1435,6 +1486,8 @@ static const struct l2c_init_data of_aurora_no_outer_data __initconst = { .enable = aurora_enable_no_outer, .fixup = aurora_fixup, .save = aurora_save, + .configure = l2c_configure, + .unlock = l2c_unlock, .outer_cache = { .resume = l2c_resume, }, @@ -1585,6 +1638,7 @@ static const struct l2c_init_data of_bcm_l2x0_data __initconst = { .enable = l2c310_enable, .save = l2c310_save, .configure = l2c310_configure, + .unlock = l2c310_unlock, .outer_cache = { .inv_range = bcm_inv_range, .clean_range = bcm_clean_range, @@ -1608,6 +1662,7 @@ static void __init tauros3_save(void __iomem *base) static void tauros3_configure(void __iomem *base) { + l2c_configure(base); writel_relaxed(l2x0_saved_regs.aux2_ctrl, base + TAUROS3_AUX2_CTRL); writel_relaxed(l2x0_saved_regs.prefetch_ctrl, @@ -1621,6 +1676,7 @@ static const struct l2c_init_data of_tauros3_data __initconst = { .enable = l2c_enable, .save = tauros3_save, .configure = tauros3_configure, + .unlock = l2c_unlock, /* Tauros3 broadcasts L1 cache operations to L2 */ .outer_cache = { .resume = l2c_resume, diff --git a/kernel/arch/arm/mm/cache-uniphier.c b/kernel/arch/arm/mm/cache-uniphier.c new file mode 100644 index 000000000..0502ba17a --- /dev/null +++ b/kernel/arch/arm/mm/cache-uniphier.c @@ -0,0 +1,555 @@ +/* + * Copyright (C) 2015 Masahiro Yamada + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "uniphier: " fmt + +#include +#include +#include +#include +#include +#include +#include + +/* control registers */ +#define UNIPHIER_SSCC 0x0 /* Control Register */ +#define UNIPHIER_SSCC_BST BIT(20) /* UCWG burst read */ +#define UNIPHIER_SSCC_ACT BIT(19) /* Inst-Data separate */ +#define UNIPHIER_SSCC_WTG BIT(18) /* WT gathering on */ +#define UNIPHIER_SSCC_PRD BIT(17) /* enable pre-fetch */ +#define UNIPHIER_SSCC_ON BIT(0) /* enable cache */ +#define UNIPHIER_SSCLPDAWCR 0x30 /* Unified/Data Active Way Control */ +#define UNIPHIER_SSCLPIAWCR 0x34 /* Instruction Active Way Control */ + +/* revision registers */ +#define UNIPHIER_SSCID 0x0 /* ID Register */ + +/* operation registers */ +#define UNIPHIER_SSCOPE 0x244 /* Cache Operation Primitive Entry */ +#define UNIPHIER_SSCOPE_CM_INV 0x0 /* invalidate */ +#define UNIPHIER_SSCOPE_CM_CLEAN 0x1 /* clean */ +#define UNIPHIER_SSCOPE_CM_FLUSH 0x2 /* flush */ +#define UNIPHIER_SSCOPE_CM_SYNC 0x8 /* sync (drain bufs) */ +#define UNIPHIER_SSCOPE_CM_FLUSH_PREFETCH 0x9 /* flush p-fetch buf */ +#define UNIPHIER_SSCOQM 0x248 /* Cache Operation Queue Mode */ +#define UNIPHIER_SSCOQM_TID_MASK (0x3 << 21) +#define UNIPHIER_SSCOQM_TID_LRU_DATA (0x0 << 21) +#define UNIPHIER_SSCOQM_TID_LRU_INST (0x1 << 21) +#define UNIPHIER_SSCOQM_TID_WAY (0x2 << 21) +#define UNIPHIER_SSCOQM_S_MASK (0x3 << 17) +#define UNIPHIER_SSCOQM_S_RANGE (0x0 << 17) +#define UNIPHIER_SSCOQM_S_ALL (0x1 << 17) +#define UNIPHIER_SSCOQM_S_WAY (0x2 << 17) +#define UNIPHIER_SSCOQM_CE BIT(15) /* notify completion */ +#define UNIPHIER_SSCOQM_CM_INV 0x0 /* invalidate */ +#define UNIPHIER_SSCOQM_CM_CLEAN 0x1 /* clean */ +#define UNIPHIER_SSCOQM_CM_FLUSH 0x2 /* flush */ +#define UNIPHIER_SSCOQM_CM_PREFETCH 0x3 /* prefetch to cache */ +#define UNIPHIER_SSCOQM_CM_PREFETCH_BUF 0x4 /* prefetch to pf-buf */ +#define UNIPHIER_SSCOQM_CM_TOUCH 0x5 /* touch */ +#define UNIPHIER_SSCOQM_CM_TOUCH_ZERO 0x6 /* touch to zero */ +#define UNIPHIER_SSCOQM_CM_TOUCH_DIRTY 0x7 /* touch with dirty */ +#define UNIPHIER_SSCOQAD 0x24c /* Cache Operation Queue Address */ +#define UNIPHIER_SSCOQSZ 0x250 /* Cache Operation Queue Size */ +#define UNIPHIER_SSCOQMASK 0x254 /* Cache Operation Queue Address Mask */ +#define UNIPHIER_SSCOQWN 0x258 /* Cache Operation Queue Way Number */ +#define UNIPHIER_SSCOPPQSEF 0x25c /* Cache Operation Queue Set Complete*/ +#define UNIPHIER_SSCOPPQSEF_FE BIT(1) +#define UNIPHIER_SSCOPPQSEF_OE BIT(0) +#define UNIPHIER_SSCOLPQS 0x260 /* Cache Operation Queue Status */ +#define UNIPHIER_SSCOLPQS_EF BIT(2) +#define UNIPHIER_SSCOLPQS_EST BIT(1) +#define UNIPHIER_SSCOLPQS_QST BIT(0) + +/* Is the touch/pre-fetch destination specified by ways? */ +#define UNIPHIER_SSCOQM_TID_IS_WAY(op) \ + ((op & UNIPHIER_SSCOQM_TID_MASK) == UNIPHIER_SSCOQM_TID_WAY) +/* Is the operation region specified by address range? */ +#define UNIPHIER_SSCOQM_S_IS_RANGE(op) \ + ((op & UNIPHIER_SSCOQM_S_MASK) == UNIPHIER_SSCOQM_S_RANGE) + +/** + * uniphier_cache_data - UniPhier outer cache specific data + * + * @ctrl_base: virtual base address of control registers + * @rev_base: virtual base address of revision registers + * @op_base: virtual base address of operation registers + * @way_present_mask: each bit specifies if the way is present + * @way_locked_mask: each bit specifies if the way is locked + * @nsets: number of associativity sets + * @line_size: line size in bytes + * @range_op_max_size: max size that can be handled by a single range operation + * @list: list node to include this level in the whole cache hierarchy + */ +struct uniphier_cache_data { + void __iomem *ctrl_base; + void __iomem *rev_base; + void __iomem *op_base; + u32 way_present_mask; + u32 way_locked_mask; + u32 nsets; + u32 line_size; + u32 range_op_max_size; + struct list_head list; +}; + +/* + * List of the whole outer cache hierarchy. This list is only modified during + * the early boot stage, so no mutex is taken for the access to the list. + */ +static LIST_HEAD(uniphier_cache_list); + +/** + * __uniphier_cache_sync - perform a sync point for a particular cache level + * + * @data: cache controller specific data + */ +static void __uniphier_cache_sync(struct uniphier_cache_data *data) +{ + /* This sequence need not be atomic. Do not disable IRQ. */ + writel_relaxed(UNIPHIER_SSCOPE_CM_SYNC, + data->op_base + UNIPHIER_SSCOPE); + /* need a read back to confirm */ + readl_relaxed(data->op_base + UNIPHIER_SSCOPE); +} + +/** + * __uniphier_cache_maint_common - run a queue operation for a particular level + * + * @data: cache controller specific data + * @start: start address of range operation (don't care for "all" operation) + * @size: data size of range operation (don't care for "all" operation) + * @operation: flags to specify the desired cache operation + */ +static void __uniphier_cache_maint_common(struct uniphier_cache_data *data, + unsigned long start, + unsigned long size, + u32 operation) +{ + unsigned long flags; + + /* + * No spin lock is necessary here because: + * + * [1] This outer cache controller is able to accept maintenance + * operations from multiple CPUs at a time in an SMP system; if a + * maintenance operation is under way and another operation is issued, + * the new one is stored in the queue. The controller performs one + * operation after another. If the queue is full, the status register, + * UNIPHIER_SSCOPPQSEF, indicates that the queue registration has + * failed. The status registers, UNIPHIER_{SSCOPPQSEF, SSCOLPQS}, have + * different instances for each CPU, i.e. each CPU can track the status + * of the maintenance operations triggered by itself. + * + * [2] The cache command registers, UNIPHIER_{SSCOQM, SSCOQAD, SSCOQSZ, + * SSCOQWN}, are shared between multiple CPUs, but the hardware still + * guarantees the registration sequence is atomic; the write access to + * them are arbitrated by the hardware. The first accessor to the + * register, UNIPHIER_SSCOQM, holds the access right and it is released + * by reading the status register, UNIPHIER_SSCOPPQSEF. While one CPU + * is holding the access right, other CPUs fail to register operations. + * One CPU should not hold the access right for a long time, so local + * IRQs should be disabled while the following sequence. + */ + local_irq_save(flags); + + /* clear the complete notification flag */ + writel_relaxed(UNIPHIER_SSCOLPQS_EF, data->op_base + UNIPHIER_SSCOLPQS); + + do { + /* set cache operation */ + writel_relaxed(UNIPHIER_SSCOQM_CE | operation, + data->op_base + UNIPHIER_SSCOQM); + + /* set address range if needed */ + if (likely(UNIPHIER_SSCOQM_S_IS_RANGE(operation))) { + writel_relaxed(start, data->op_base + UNIPHIER_SSCOQAD); + writel_relaxed(size, data->op_base + UNIPHIER_SSCOQSZ); + } + + /* set target ways if needed */ + if (unlikely(UNIPHIER_SSCOQM_TID_IS_WAY(operation))) + writel_relaxed(data->way_locked_mask, + data->op_base + UNIPHIER_SSCOQWN); + } while (unlikely(readl_relaxed(data->op_base + UNIPHIER_SSCOPPQSEF) & + (UNIPHIER_SSCOPPQSEF_FE | UNIPHIER_SSCOPPQSEF_OE))); + + /* wait until the operation is completed */ + while (likely(readl_relaxed(data->op_base + UNIPHIER_SSCOLPQS) != + UNIPHIER_SSCOLPQS_EF)) + cpu_relax(); + + local_irq_restore(flags); +} + +static void __uniphier_cache_maint_all(struct uniphier_cache_data *data, + u32 operation) +{ + __uniphier_cache_maint_common(data, 0, 0, + UNIPHIER_SSCOQM_S_ALL | operation); + + __uniphier_cache_sync(data); +} + +static void __uniphier_cache_maint_range(struct uniphier_cache_data *data, + unsigned long start, unsigned long end, + u32 operation) +{ + unsigned long size; + + /* + * If the start address is not aligned, + * perform a cache operation for the first cache-line + */ + start = start & ~(data->line_size - 1); + + size = end - start; + + if (unlikely(size >= (unsigned long)(-data->line_size))) { + /* this means cache operation for all range */ + __uniphier_cache_maint_all(data, operation); + return; + } + + /* + * If the end address is not aligned, + * perform a cache operation for the last cache-line + */ + size = ALIGN(size, data->line_size); + + while (size) { + unsigned long chunk_size = min_t(unsigned long, size, + data->range_op_max_size); + + __uniphier_cache_maint_common(data, start, chunk_size, + UNIPHIER_SSCOQM_S_RANGE | operation); + + start += chunk_size; + size -= chunk_size; + } + + __uniphier_cache_sync(data); +} + +static void __uniphier_cache_enable(struct uniphier_cache_data *data, bool on) +{ + u32 val = 0; + + if (on) + val = UNIPHIER_SSCC_WTG | UNIPHIER_SSCC_PRD | UNIPHIER_SSCC_ON; + + writel_relaxed(val, data->ctrl_base + UNIPHIER_SSCC); +} + +static void __init __uniphier_cache_set_locked_ways( + struct uniphier_cache_data *data, + u32 way_mask) +{ + data->way_locked_mask = way_mask & data->way_present_mask; + + writel_relaxed(~data->way_locked_mask & data->way_present_mask, + data->ctrl_base + UNIPHIER_SSCLPDAWCR); +} + +static void uniphier_cache_maint_range(unsigned long start, unsigned long end, + u32 operation) +{ + struct uniphier_cache_data *data; + + list_for_each_entry(data, &uniphier_cache_list, list) + __uniphier_cache_maint_range(data, start, end, operation); +} + +static void uniphier_cache_maint_all(u32 operation) +{ + struct uniphier_cache_data *data; + + list_for_each_entry(data, &uniphier_cache_list, list) + __uniphier_cache_maint_all(data, operation); +} + +static void uniphier_cache_inv_range(unsigned long start, unsigned long end) +{ + uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_INV); +} + +static void uniphier_cache_clean_range(unsigned long start, unsigned long end) +{ + uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_CLEAN); +} + +static void uniphier_cache_flush_range(unsigned long start, unsigned long end) +{ + uniphier_cache_maint_range(start, end, UNIPHIER_SSCOQM_CM_FLUSH); +} + +static void __init uniphier_cache_inv_all(void) +{ + uniphier_cache_maint_all(UNIPHIER_SSCOQM_CM_INV); +} + +static void uniphier_cache_flush_all(void) +{ + uniphier_cache_maint_all(UNIPHIER_SSCOQM_CM_FLUSH); +} + +static void uniphier_cache_disable(void) +{ + struct uniphier_cache_data *data; + + list_for_each_entry_reverse(data, &uniphier_cache_list, list) + __uniphier_cache_enable(data, false); + + uniphier_cache_flush_all(); +} + +static void __init uniphier_cache_enable(void) +{ + struct uniphier_cache_data *data; + + uniphier_cache_inv_all(); + + list_for_each_entry(data, &uniphier_cache_list, list) { + __uniphier_cache_enable(data, true); + __uniphier_cache_set_locked_ways(data, 0); + } +} + +static void uniphier_cache_sync(void) +{ + struct uniphier_cache_data *data; + + list_for_each_entry(data, &uniphier_cache_list, list) + __uniphier_cache_sync(data); +} + +int __init uniphier_cache_l2_is_enabled(void) +{ + struct uniphier_cache_data *data; + + data = list_first_entry_or_null(&uniphier_cache_list, + struct uniphier_cache_data, list); + if (!data) + return 0; + + return !!(readl_relaxed(data->ctrl_base + UNIPHIER_SSCC) & + UNIPHIER_SSCC_ON); +} + +void __init uniphier_cache_l2_touch_range(unsigned long start, + unsigned long end) +{ + struct uniphier_cache_data *data; + + data = list_first_entry_or_null(&uniphier_cache_list, + struct uniphier_cache_data, list); + if (data) + __uniphier_cache_maint_range(data, start, end, + UNIPHIER_SSCOQM_TID_WAY | + UNIPHIER_SSCOQM_CM_TOUCH); +} + +void __init uniphier_cache_l2_set_locked_ways(u32 way_mask) +{ + struct uniphier_cache_data *data; + + data = list_first_entry_or_null(&uniphier_cache_list, + struct uniphier_cache_data, list); + if (data) + __uniphier_cache_set_locked_ways(data, way_mask); +} + +static const struct of_device_id uniphier_cache_match[] __initconst = { + { + .compatible = "socionext,uniphier-system-cache", + }, + { /* sentinel */ } +}; + +static struct device_node * __init uniphier_cache_get_next_level_node( + struct device_node *np) +{ + u32 phandle; + + if (of_property_read_u32(np, "next-level-cache", &phandle)) + return NULL; + + return of_find_node_by_phandle(phandle); +} + +static int __init __uniphier_cache_init(struct device_node *np, + unsigned int *cache_level) +{ + struct uniphier_cache_data *data; + u32 level, cache_size; + struct device_node *next_np; + int ret = 0; + + if (!of_match_node(uniphier_cache_match, np)) { + pr_err("L%d: not compatible with uniphier cache\n", + *cache_level); + return -EINVAL; + } + + if (of_property_read_u32(np, "cache-level", &level)) { + pr_err("L%d: cache-level is not specified\n", *cache_level); + return -EINVAL; + } + + if (level != *cache_level) { + pr_err("L%d: cache-level is unexpected value %d\n", + *cache_level, level); + return -EINVAL; + } + + if (!of_property_read_bool(np, "cache-unified")) { + pr_err("L%d: cache-unified is not specified\n", *cache_level); + return -EINVAL; + } + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + if (of_property_read_u32(np, "cache-line-size", &data->line_size) || + !is_power_of_2(data->line_size)) { + pr_err("L%d: cache-line-size is unspecified or invalid\n", + *cache_level); + ret = -EINVAL; + goto err; + } + + if (of_property_read_u32(np, "cache-sets", &data->nsets) || + !is_power_of_2(data->nsets)) { + pr_err("L%d: cache-sets is unspecified or invalid\n", + *cache_level); + ret = -EINVAL; + goto err; + } + + if (of_property_read_u32(np, "cache-size", &cache_size) || + cache_size == 0 || cache_size % (data->nsets * data->line_size)) { + pr_err("L%d: cache-size is unspecified or invalid\n", + *cache_level); + ret = -EINVAL; + goto err; + } + + data->way_present_mask = + ((u32)1 << cache_size / data->nsets / data->line_size) - 1; + + data->ctrl_base = of_iomap(np, 0); + if (!data->ctrl_base) { + pr_err("L%d: failed to map control register\n", *cache_level); + ret = -ENOMEM; + goto err; + } + + data->rev_base = of_iomap(np, 1); + if (!data->rev_base) { + pr_err("L%d: failed to map revision register\n", *cache_level); + ret = -ENOMEM; + goto err; + } + + data->op_base = of_iomap(np, 2); + if (!data->op_base) { + pr_err("L%d: failed to map operation register\n", *cache_level); + ret = -ENOMEM; + goto err; + } + + if (*cache_level == 2) { + u32 revision = readl(data->rev_base + UNIPHIER_SSCID); + /* + * The size of range operation is limited to (1 << 22) or less + * for PH-sLD8 or older SoCs. + */ + if (revision <= 0x16) + data->range_op_max_size = (u32)1 << 22; + } + + data->range_op_max_size -= data->line_size; + + INIT_LIST_HEAD(&data->list); + list_add_tail(&data->list, &uniphier_cache_list); /* no mutex */ + + /* + * OK, this level has been successfully initialized. Look for the next + * level cache. Do not roll back even if the initialization of the + * next level cache fails because we want to continue with available + * cache levels. + */ + next_np = uniphier_cache_get_next_level_node(np); + if (next_np) { + (*cache_level)++; + ret = __uniphier_cache_init(next_np, cache_level); + } + of_node_put(next_np); + + return ret; +err: + iounmap(data->op_base); + iounmap(data->rev_base); + iounmap(data->ctrl_base); + kfree(data); + + return ret; +} + +int __init uniphier_cache_init(void) +{ + struct device_node *np = NULL; + unsigned int cache_level; + int ret = 0; + + /* look for level 2 cache */ + while ((np = of_find_matching_node(np, uniphier_cache_match))) + if (!of_property_read_u32(np, "cache-level", &cache_level) && + cache_level == 2) + break; + + if (!np) + return -ENODEV; + + ret = __uniphier_cache_init(np, &cache_level); + of_node_put(np); + + if (ret) { + /* + * Error out iif L2 initialization fails. Continue with any + * error on L3 or outer because they are optional. + */ + if (cache_level == 2) { + pr_err("failed to initialize L2 cache\n"); + return ret; + } + + cache_level--; + ret = 0; + } + + outer_cache.inv_range = uniphier_cache_inv_range; + outer_cache.clean_range = uniphier_cache_clean_range; + outer_cache.flush_range = uniphier_cache_flush_range; + outer_cache.flush_all = uniphier_cache_flush_all; + outer_cache.disable = uniphier_cache_disable; + outer_cache.sync = uniphier_cache_sync; + + uniphier_cache_enable(); + + pr_info("enabled outer cache (cache level: %d)\n", cache_level); + + return ret; +} diff --git a/kernel/arch/arm/mm/context.c b/kernel/arch/arm/mm/context.c index 845769e41..c8c8b9ed0 100644 --- a/kernel/arch/arm/mm/context.c +++ b/kernel/arch/arm/mm/context.c @@ -165,13 +165,28 @@ static void flush_context(unsigned int cpu) __flush_icache_all(); } -static int is_reserved_asid(u64 asid) +static bool check_update_reserved_asid(u64 asid, u64 newasid) { int cpu; - for_each_possible_cpu(cpu) - if (per_cpu(reserved_asids, cpu) == asid) - return 1; - return 0; + bool hit = false; + + /* + * Iterate over the set of reserved ASIDs looking for a match. + * If we find one, then we can update our mm to use newasid + * (i.e. the same ASID in the current generation) but we can't + * exit the loop early, since we need to ensure that all copies + * of the old ASID are updated to reflect the mm. Failure to do + * so could result in us missing the reserved ASID in a future + * generation. + */ + for_each_possible_cpu(cpu) { + if (per_cpu(reserved_asids, cpu) == asid) { + hit = true; + per_cpu(reserved_asids, cpu) = newasid; + } + } + + return hit; } static u64 new_context(struct mm_struct *mm, unsigned int cpu) @@ -181,12 +196,14 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) u64 generation = atomic64_read(&asid_generation); if (asid != 0) { + u64 newasid = generation | (asid & ~ASID_MASK); + /* * If our current ASID was active during a rollover, we * can continue to use it and this was just a false alarm. */ - if (is_reserved_asid(asid)) - return generation | (asid & ~ASID_MASK); + if (check_update_reserved_asid(asid, newasid)) + return newasid; /* * We had a valid ASID in a previous life, so try to re-use @@ -194,7 +211,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) */ asid &= ~ASID_MASK; if (!__test_and_set_bit(asid, asid_map)) - goto bump_gen; + return newasid; } /* @@ -216,11 +233,8 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu) __set_bit(asid, asid_map); cur_idx = asid; - -bump_gen: - asid |= generation; cpumask_clear(mm_cpumask(mm)); - return asid; + return asid | generation; } void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) diff --git a/kernel/arch/arm/mm/dma-mapping.c b/kernel/arch/arm/mm/dma-mapping.c index 6e4b9ff22..534a60ae2 100644 --- a/kernel/arch/arm/mm/dma-mapping.c +++ b/kernel/arch/arm/mm/dma-mapping.c @@ -39,6 +39,7 @@ #include #include +#include "dma.h" #include "mm.h" /* @@ -148,11 +149,14 @@ static void *arm_coherent_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs); static void arm_coherent_dma_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t handle, struct dma_attrs *attrs); +static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs); struct dma_map_ops arm_coherent_dma_ops = { .alloc = arm_coherent_dma_alloc, .free = arm_coherent_dma_free, - .mmap = arm_dma_mmap, + .mmap = arm_coherent_dma_mmap, .get_sgtable = arm_dma_get_sgtable, .map_page = arm_coherent_dma_map_page, .map_sg = arm_dma_map_sg, @@ -645,14 +649,18 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, size = PAGE_ALIGN(size); want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); - if (is_coherent || nommu()) + if (nommu()) + addr = __alloc_simple_buffer(dev, size, gfp, &page); + else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM)) + addr = __alloc_from_contiguous(dev, size, prot, &page, + caller, want_vaddr); + else if (is_coherent) addr = __alloc_simple_buffer(dev, size, gfp, &page); - else if (!(gfp & __GFP_WAIT)) + else if (!gfpflags_allow_blocking(gfp)) addr = __alloc_from_pool(size, &page); - else if (!dev_get_cma_area(dev)) - addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller, want_vaddr); else - addr = __alloc_from_contiguous(dev, size, prot, &page, caller, want_vaddr); + addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, + caller, want_vaddr); if (page) *handle = pfn_to_dma(dev, page_to_pfn(page)); @@ -668,10 +676,6 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); - void *memory; - - if (dma_alloc_from_coherent(dev, size, handle, &memory)) - return memory; return __dma_alloc(dev, size, handle, gfp, prot, false, attrs, __builtin_return_address(0)); @@ -680,20 +684,11 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, static void *arm_coherent_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); - void *memory; - - if (dma_alloc_from_coherent(dev, size, handle, &memory)) - return memory; - - return __dma_alloc(dev, size, handle, gfp, prot, true, + return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true, attrs, __builtin_return_address(0)); } -/* - * Create userspace mapping for the DMA-coherent memory. - */ -int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, +static int __arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, struct dma_attrs *attrs) { @@ -704,8 +699,6 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long pfn = dma_to_pfn(dev, dma_addr); unsigned long off = vma->vm_pgoff; - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); - if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) return ret; @@ -720,6 +713,26 @@ int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } +/* + * Create userspace mapping for the DMA-coherent memory. + */ +static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ + return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); +} + +int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) +{ +#ifdef CONFIG_MMU + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); +#endif /* CONFIG_MMU */ + return __arm_dma_mmap(dev, vma, cpu_addr, dma_addr, size, attrs); +} + /* * Free a buffer as defined by the above mapping. */ @@ -730,17 +743,14 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, struct page *page = pfn_to_page(dma_to_pfn(dev, handle)); bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); - if (dma_release_from_coherent(dev, get_order(size), cpu_addr)) - return; - size = PAGE_ALIGN(size); - if (is_coherent || nommu()) { + if (nommu()) { __dma_free_buffer(page, size); - } else if (__free_from_pool(cpu_addr, size)) { + } else if (!is_coherent && __free_from_pool(cpu_addr, size)) { return; } else if (!dev_get_cma_area(dev)) { - if (want_vaddr) + if (want_vaddr && !is_coherent) __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { @@ -1239,7 +1249,7 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; dma_addr_t dma_addr, iova; - int i, ret = DMA_ERROR_CODE; + int i; dma_addr = __alloc_iova(mapping, size); if (dma_addr == DMA_ERROR_CODE) @@ -1247,6 +1257,8 @@ __iommu_create_mapping(struct device *dev, struct page **pages, size_t size) iova = dma_addr; for (i = 0; i < count; ) { + int ret; + unsigned int next_pfn = page_to_pfn(pages[i]) + 1; phys_addr_t phys = page_to_phys(pages[i]); unsigned int len, j; @@ -1351,7 +1363,7 @@ static void *arm_iommu_alloc_attrs(struct device *dev, size_t size, *handle = DMA_ERROR_CODE; size = PAGE_ALIGN(size); - if (!(gfp & __GFP_WAIT)) + if (!gfpflags_allow_blocking(gfp)) return __iommu_alloc_atomic(dev, size, handle); /* @@ -1395,12 +1407,19 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, unsigned long uaddr = vma->vm_start; unsigned long usize = vma->vm_end - vma->vm_start; struct page **pages = __iommu_get_pages(cpu_addr, attrs); + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); if (!pages) return -ENXIO; + if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off) + return -ENXIO; + + pages += off; + do { int ret = vm_insert_page(vma, uaddr, *pages++); if (ret) { diff --git a/kernel/arch/arm/mm/dma.h b/kernel/arch/arm/mm/dma.h new file mode 100644 index 000000000..70ea6852f --- /dev/null +++ b/kernel/arch/arm/mm/dma.h @@ -0,0 +1,32 @@ +#ifndef DMA_H +#define DMA_H + +#include + +#ifndef MULTI_CACHE +#define dmac_map_area __glue(_CACHE,_dma_map_area) +#define dmac_unmap_area __glue(_CACHE,_dma_unmap_area) + +/* + * These are private to the dma-mapping API. Do not use directly. + * Their sole purpose is to ensure that data held in the cache + * is visible to DMA, or data written by DMA to system memory is + * visible to the CPU. + */ +extern void dmac_map_area(const void *, size_t, int); +extern void dmac_unmap_area(const void *, size_t, int); + +#else + +/* + * These are private to the dma-mapping API. Do not use directly. + * Their sole purpose is to ensure that data held in the cache + * is visible to DMA, or data written by DMA to system memory is + * visible to the CPU. + */ +#define dmac_map_area cpu_cache.dma_map_area +#define dmac_unmap_area cpu_cache.dma_unmap_area + +#endif + +#endif diff --git a/kernel/arch/arm/mm/fault.c b/kernel/arch/arm/mm/fault.c index 62016e3e4..b8aa1e9ee 100644 --- a/kernel/arch/arm/mm/fault.c +++ b/kernel/arch/arm/mm/fault.c @@ -599,6 +599,28 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) arm_notify_die("", regs, &info, ifsr, 0); } +/* + * Abort handler to be used only during first unmasking of asynchronous aborts + * on the boot CPU. This makes sure that the machine will not die if the + * firmware/bootloader left an imprecise abort pending for us to trip over. + */ +static int __init early_abort_handler(unsigned long addr, unsigned int fsr, + struct pt_regs *regs) +{ + pr_warn("Hit pending asynchronous external abort (FSR=0x%08x) during " + "first unmask, this is most likely caused by a " + "firmware/bootloader bug.\n", fsr); + + return 0; +} + +void __init early_abt_enable(void) +{ + fsr_info[22].fn = early_abort_handler; + local_abt_enable(); + fsr_info[22].fn = do_bad; +} + #ifndef CONFIG_ARM_LPAE static int __init exceptions_init(void) { diff --git a/kernel/arch/arm/mm/fault.h b/kernel/arch/arm/mm/fault.h index cf08bdfbe..05ec5e0df 100644 --- a/kernel/arch/arm/mm/fault.h +++ b/kernel/arch/arm/mm/fault.h @@ -24,5 +24,6 @@ static inline int fsr_fs(unsigned int fsr) void do_bad_area(unsigned long addr, unsigned int fsr, struct pt_regs *regs); unsigned long search_exception_table(unsigned long addr); +void early_abt_enable(void); #endif /* __ARCH_ARM_FAULT_H */ diff --git a/kernel/arch/arm/mm/flush.c b/kernel/arch/arm/mm/flush.c index 34b66af51..1ec8e7590 100644 --- a/kernel/arch/arm/mm/flush.c +++ b/kernel/arch/arm/mm/flush.c @@ -21,6 +21,21 @@ #include "mm.h" +#ifdef CONFIG_ARM_HEAVY_MB +void (*soc_mb)(void); + +void arm_heavy_mb(void) +{ +#ifdef CONFIG_OUTER_CACHE_SYNC + if (outer_cache.sync) + outer_cache.sync(); +#endif + if (soc_mb) + soc_mb(); +} +EXPORT_SYMBOL(arm_heavy_mb); +#endif + #ifdef CONFIG_CPU_CACHE_VIPT static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) diff --git a/kernel/arch/arm/mm/highmem.c b/kernel/arch/arm/mm/highmem.c index 4050e9d99..542692dbd 100644 --- a/kernel/arch/arm/mm/highmem.c +++ b/kernel/arch/arm/mm/highmem.c @@ -34,6 +34,11 @@ static inline pte_t get_fixmap_pte(unsigned long vaddr) return *ptep; } +static unsigned int fixmap_idx(int type) +{ + return FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); +} + void *kmap(struct page *page) { might_sleep(); @@ -80,7 +85,7 @@ void *kmap_atomic(struct page *page) type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR * smp_processor_id(); + idx = fixmap_idx(type); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM /* @@ -110,7 +115,7 @@ void __kunmap_atomic(void *kvaddr) if (kvaddr >= (void *)FIXADDR_START) { type = kmap_atomic_idx(); - idx = type + KM_TYPE_NR * smp_processor_id(); + idx = fixmap_idx(type); if (cache_is_vivt()) __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); @@ -146,7 +151,7 @@ void *kmap_atomic_pfn(unsigned long pfn) return page_address(page); type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR * smp_processor_id(); + idx = fixmap_idx(type); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(!pte_none(get_fixmap_pte(vaddr))); @@ -158,17 +163,6 @@ void *kmap_atomic_pfn(unsigned long pfn) return (void *)vaddr; } - -struct page *kmap_atomic_to_page(const void *ptr) -{ - unsigned long vaddr = (unsigned long)ptr; - - if (vaddr < FIXADDR_START) - return virt_to_page(ptr); - - return pte_page(get_fixmap_pte(vaddr)); -} - #if defined CONFIG_PREEMPT_RT_FULL void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) { @@ -178,7 +172,7 @@ void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) * Clear @prev's kmap_atomic mappings */ for (i = 0; i < prev_p->kmap_idx; i++) { - int idx = i + KM_TYPE_NR * smp_processor_id(); + int idx = fixmap_idx(i); set_fixmap_pte(idx, __pte(0)); } @@ -186,7 +180,7 @@ void switch_kmaps(struct task_struct *prev_p, struct task_struct *next_p) * Restore @next_p's kmap_atomic mappings */ for (i = 0; i < next_p->kmap_idx; i++) { - int idx = i + KM_TYPE_NR * smp_processor_id(); + int idx = fixmap_idx(i); if (!pte_none(next_p->kmap_pte[i])) set_fixmap_pte(idx, next_p->kmap_pte[i]); diff --git a/kernel/arch/arm/mm/hugetlbpage.c b/kernel/arch/arm/mm/hugetlbpage.c index c72412415..fcafb521f 100644 --- a/kernel/arch/arm/mm/hugetlbpage.c +++ b/kernel/arch/arm/mm/hugetlbpage.c @@ -41,11 +41,6 @@ int pud_huge(pud_t pud) return 0; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - int pmd_huge(pmd_t pmd) { return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT); diff --git a/kernel/arch/arm/mm/init.c b/kernel/arch/arm/mm/init.c index be92fa0f2..7f8cd1b35 100644 --- a/kernel/arch/arm/mm/init.c +++ b/kernel/arch/arm/mm/init.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -268,6 +269,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) if (mdesc->reserve) mdesc->reserve(); + early_init_fdt_reserve_self(); early_init_fdt_scan_reserved_mem(); /* reserve memory for DMA contiguous allocations */ @@ -626,12 +628,10 @@ static struct section_perm ro_perms[] = { * safe to be called with preemption disabled, as under stop_machine(). */ static inline void section_update(unsigned long addr, pmdval_t mask, - pmdval_t prot) + pmdval_t prot, struct mm_struct *mm) { - struct mm_struct *mm; pmd_t *pmd; - mm = current->active_mm; pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr); #ifdef CONFIG_ARM_LPAE @@ -655,49 +655,82 @@ static inline bool arch_has_strict_perms(void) return !!(get_cr() & CR_XP); } -#define set_section_perms(perms, field) { \ - size_t i; \ - unsigned long addr; \ - \ - if (!arch_has_strict_perms()) \ - return; \ - \ - for (i = 0; i < ARRAY_SIZE(perms); i++) { \ - if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || \ - !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { \ - pr_err("BUG: section %lx-%lx not aligned to %lx\n", \ - perms[i].start, perms[i].end, \ - SECTION_SIZE); \ - continue; \ - } \ - \ - for (addr = perms[i].start; \ - addr < perms[i].end; \ - addr += SECTION_SIZE) \ - section_update(addr, perms[i].mask, \ - perms[i].field); \ - } \ +void set_section_perms(struct section_perm *perms, int n, bool set, + struct mm_struct *mm) +{ + size_t i; + unsigned long addr; + + if (!arch_has_strict_perms()) + return; + + for (i = 0; i < n; i++) { + if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) || + !IS_ALIGNED(perms[i].end, SECTION_SIZE)) { + pr_err("BUG: section %lx-%lx not aligned to %lx\n", + perms[i].start, perms[i].end, + SECTION_SIZE); + continue; + } + + for (addr = perms[i].start; + addr < perms[i].end; + addr += SECTION_SIZE) + section_update(addr, perms[i].mask, + set ? perms[i].prot : perms[i].clear, mm); + } + } -static inline void fix_kernmem_perms(void) +static void update_sections_early(struct section_perm perms[], int n) { - set_section_perms(nx_perms, prot); + struct task_struct *t, *s; + + read_lock(&tasklist_lock); + for_each_process(t) { + if (t->flags & PF_KTHREAD) + continue; + for_each_thread(t, s) + set_section_perms(perms, n, true, s->mm); + } + read_unlock(&tasklist_lock); + set_section_perms(perms, n, true, current->active_mm); + set_section_perms(perms, n, true, &init_mm); +} + +int __fix_kernmem_perms(void *unused) +{ + update_sections_early(nx_perms, ARRAY_SIZE(nx_perms)); + return 0; +} + +void fix_kernmem_perms(void) +{ + stop_machine(__fix_kernmem_perms, NULL, NULL); } #ifdef CONFIG_DEBUG_RODATA +int __mark_rodata_ro(void *unused) +{ + update_sections_early(ro_perms, ARRAY_SIZE(ro_perms)); + return 0; +} + void mark_rodata_ro(void) { - set_section_perms(ro_perms, prot); + stop_machine(__mark_rodata_ro, NULL, NULL); } void set_kernel_text_rw(void) { - set_section_perms(ro_perms, clear); + set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false, + current->active_mm); } void set_kernel_text_ro(void) { - set_section_perms(ro_perms, prot); + set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true, + current->active_mm); } #endif /* CONFIG_DEBUG_RODATA */ diff --git a/kernel/arch/arm/mm/ioremap.c b/kernel/arch/arm/mm/ioremap.c index d1e5ad7ab..0c81056c1 100644 --- a/kernel/arch/arm/mm/ioremap.c +++ b/kernel/arch/arm/mm/ioremap.c @@ -255,7 +255,7 @@ remap_area_supersections(unsigned long virt, unsigned long pfn, } #endif -void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, +static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype, void *caller) { const struct mem_type *type; @@ -363,7 +363,7 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype) { return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, - __builtin_return_address(0)); + __builtin_return_address(0)); } EXPORT_SYMBOL(__arm_ioremap_pfn); @@ -371,13 +371,26 @@ void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *) = __arm_ioremap_caller; -void __iomem * -__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) +void __iomem *ioremap(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap); + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) { - return arch_ioremap_caller(phys_addr, size, mtype, - __builtin_return_address(0)); + return arch_ioremap_caller(res_cookie, size, MT_DEVICE_WC, + __builtin_return_address(0)); } -EXPORT_SYMBOL(__arm_ioremap); +EXPORT_SYMBOL(ioremap_wc); /* * Remap an arbitrary physical address space into the kernel virtual @@ -431,11 +444,11 @@ void __iounmap(volatile void __iomem *io_addr) void (*arch_iounmap)(volatile void __iomem *) = __iounmap; -void __arm_iounmap(volatile void __iomem *io_addr) +void iounmap(volatile void __iomem *cookie) { - arch_iounmap(io_addr); + arch_iounmap(cookie); } -EXPORT_SYMBOL(__arm_iounmap); +EXPORT_SYMBOL(iounmap); #ifdef CONFIG_PCI static int pci_ioremap_mem_type = MT_DEVICE; diff --git a/kernel/arch/arm/mm/mmu.c b/kernel/arch/arm/mm/mmu.c index 718638267..4867f5daf 100644 --- a/kernel/arch/arm/mm/mmu.c +++ b/kernel/arch/arm/mm/mmu.c @@ -38,6 +38,7 @@ #include #include +#include "fault.h" #include "mm.h" #include "tcm.h" @@ -291,13 +292,13 @@ static struct mem_type mem_types[] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, - .domain = DOMAIN_USER, + .domain = DOMAIN_VECTORS, }, [MT_HIGH_VECTORS] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_USER | L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, - .domain = DOMAIN_USER, + .domain = DOMAIN_VECTORS, }, [MT_MEMORY_RWX] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, @@ -357,6 +358,47 @@ const struct mem_type *get_mem_type(unsigned int type) } EXPORT_SYMBOL(get_mem_type); +static pte_t *(*pte_offset_fixmap)(pmd_t *dir, unsigned long addr); + +static pte_t bm_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS] + __aligned(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE) __initdata; + +static pte_t * __init pte_offset_early_fixmap(pmd_t *dir, unsigned long addr) +{ + return &bm_pte[pte_index(addr)]; +} + +static pte_t *pte_offset_late_fixmap(pmd_t *dir, unsigned long addr) +{ + return pte_offset_kernel(dir, addr); +} + +static inline pmd_t * __init fixmap_pmd(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + pud_t *pud = pud_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); + + return pmd; +} + +void __init early_fixmap_init(void) +{ + pmd_t *pmd; + + /* + * The early fixmap range spans multiple pmds, for which + * we are not prepared: + */ + BUILD_BUG_ON((__fix_to_virt(__end_of_permanent_fixed_addresses) >> PMD_SHIFT) + != FIXADDR_TOP >> PMD_SHIFT); + + pmd = fixmap_pmd(FIXADDR_TOP); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + + pte_offset_fixmap = pte_offset_early_fixmap; +} + /* * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range(). * As a result, this can only be called with preemption disabled, as under @@ -365,7 +407,7 @@ EXPORT_SYMBOL(get_mem_type); void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) { unsigned long vaddr = __fix_to_virt(idx); - pte_t *pte = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + pte_t *pte = pte_offset_fixmap(pmd_off_k(vaddr), vaddr); /* Make sure fixmap region does not exceed available allocation. */ BUILD_BUG_ON(FIXADDR_START + (__end_of_fixed_addresses * PAGE_SIZE) > @@ -855,7 +897,7 @@ static void __init create_mapping(struct map_desc *md) } if ((md->type == MT_DEVICE || md->type == MT_ROM) && - md->virtual >= PAGE_OFFSET && + md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START && (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n", (long long)__pfn_to_phys((u64)md->pfn), md->virtual); @@ -1072,6 +1114,7 @@ void __init sanity_check_meminfo(void) int highmem = 0; phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; struct memblock_region *reg; + bool should_use_highmem = false; for_each_memblock(memory, reg) { phys_addr_t block_start = reg->base; @@ -1090,6 +1133,7 @@ void __init sanity_check_meminfo(void) pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n", &block_start, &block_end); memblock_remove(reg->base, reg->size); + should_use_highmem = true; continue; } @@ -1100,6 +1144,7 @@ void __init sanity_check_meminfo(void) &block_start, &block_end, &vmalloc_limit); memblock_remove(vmalloc_limit, overlap_size); block_end = vmalloc_limit; + should_use_highmem = true; } } @@ -1134,6 +1179,9 @@ void __init sanity_check_meminfo(void) } } + if (should_use_highmem) + pr_notice("Consider using a HIGHMEM enabled kernel.\n"); + high_memory = __va(arm_lowmem_limit - 1) + 1; /* @@ -1213,10 +1261,10 @@ void __init arm_mm_memblock_reserve(void) /* * Set up the device mappings. Since we clear out the page tables for all - * mappings above VMALLOC_START, we will remove any debug device mappings. - * This means you have to be careful how you debug this function, or any - * called function. This means you can't use any function or debugging - * method which may touch any device, otherwise the kernel _will_ crash. + * mappings above VMALLOC_START, except early fixmap, we might remove debug + * device mappings. This means earlycon can be used to debug this function + * Any other function or debugging method which may touch any device _will_ + * crash the kernel. */ static void __init devicemaps_init(const struct machine_desc *mdesc) { @@ -1231,7 +1279,10 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) early_trap_init(vectors); - for (addr = VMALLOC_START; addr; addr += PMD_SIZE) + /* + * Clear page table except top pmd used by early fixmaps + */ + for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); /* @@ -1313,6 +1364,9 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) */ local_flush_tlb_all(); flush_cache_all(); + + /* Enable asynchronous aborts */ + early_abt_enable(); } static void __init kmap_init(void) @@ -1387,127 +1441,131 @@ static void __init map_lowmem(void) } } -#ifdef CONFIG_ARM_LPAE +#ifdef CONFIG_ARM_PV_FIXUP +extern unsigned long __atags_pointer; +typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata); +pgtables_remap lpae_pgtables_remap_asm; + /* * early_paging_init() recreates boot time page table setup, allowing machines * to switch over to a high (>4G) address space on LPAE systems */ -void __init early_paging_init(const struct machine_desc *mdesc, - struct proc_info_list *procinfo) +void __init early_paging_init(const struct machine_desc *mdesc) { - pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags; - unsigned long map_start, map_end; - pgd_t *pgd0, *pgdk; - pud_t *pud0, *pudk, *pud_start; - pmd_t *pmd0, *pmdk; - phys_addr_t phys; - int i; + pgtables_remap *lpae_pgtables_remap; + unsigned long pa_pgd; + unsigned int cr, ttbcr; + long long offset; + void *boot_data; - if (!(mdesc->init_meminfo)) + if (!mdesc->pv_fixup) return; - /* remap kernel code and data */ - map_start = init_mm.start_code & PMD_MASK; - map_end = ALIGN(init_mm.brk, PMD_SIZE); + offset = mdesc->pv_fixup(); + if (offset == 0) + return; - /* get a handle on things... */ - pgd0 = pgd_offset_k(0); - pud_start = pud0 = pud_offset(pgd0, 0); - pmd0 = pmd_offset(pud0, 0); + /* + * Get the address of the remap function in the 1:1 identity + * mapping setup by the early page table assembly code. We + * must get this prior to the pv update. The following barrier + * ensures that this is complete before we fixup any P:V offsets. + */ + lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm); + pa_pgd = __pa(swapper_pg_dir); + boot_data = __va(__atags_pointer); + barrier(); - pgdk = pgd_offset_k(map_start); - pudk = pud_offset(pgdk, map_start); - pmdk = pmd_offset(pudk, map_start); + pr_info("Switching physical address space to 0x%08llx\n", + (u64)PHYS_OFFSET + offset); - mdesc->init_meminfo(); + /* Re-set the phys pfn offset, and the pv offset */ + __pv_offset += offset; + __pv_phys_pfn_offset += PFN_DOWN(offset); /* Run the patch stub to update the constants */ fixup_pv_table(&__pv_table_begin, (&__pv_table_end - &__pv_table_begin) << 2); /* - * Cache cleaning operations for self-modifying code - * We should clean the entries by MVA but running a - * for loop over every pv_table entry pointer would - * just complicate the code. - */ - flush_cache_louis(); - dsb(ishst); - isb(); - - /* - * FIXME: This code is not architecturally compliant: we modify - * the mappings in-place, indeed while they are in use by this - * very same code. This may lead to unpredictable behaviour of - * the CPU. - * - * Even modifying the mappings in a separate page table does - * not resolve this. - * - * The architecture strongly recommends that when a mapping is - * changed, that it is changed by first going via an invalid - * mapping and back to the new mapping. This is to ensure that - * no TLB conflicts (caused by the TLB having more than one TLB - * entry match a translation) can occur. However, doing that - * here will result in unmapping the code we are running. - */ - pr_warn("WARNING: unsafe modification of in-place page tables - tainting kernel\n"); - add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); - - /* - * Remap level 1 table. This changes the physical addresses - * used to refer to the level 2 page tables to the high - * physical address alias, leaving everything else the same. - */ - for (i = 0; i < PTRS_PER_PGD; pud0++, i++) { - set_pud(pud0, - __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); - pmd0 += PTRS_PER_PMD; - } - - /* - * Remap the level 2 table, pointing the mappings at the high - * physical address alias of these pages. - */ - phys = __pa(map_start); - do { - *pmdk++ = __pmd(phys | pmdprot); - phys += PMD_SIZE; - } while (phys < map_end); - - /* - * Ensure that the above updates are flushed out of the cache. - * This is not strictly correct; on a system where the caches - * are coherent with each other, but the MMU page table walks - * may not be coherent, flush_cache_all() may be a no-op, and - * this will fail. + * We changing not only the virtual to physical mapping, but also + * the physical addresses used to access memory. We need to flush + * all levels of cache in the system with caching disabled to + * ensure that all data is written back, and nothing is prefetched + * into the caches. We also need to prevent the TLB walkers + * allocating into the caches too. Note that this is ARMv7 LPAE + * specific. */ + cr = get_cr(); + set_cr(cr & ~(CR_I | CR_C)); + asm("mrc p15, 0, %0, c2, c0, 2" : "=r" (ttbcr)); + asm volatile("mcr p15, 0, %0, c2, c0, 2" + : : "r" (ttbcr & ~(3 << 8 | 3 << 10))); flush_cache_all(); /* - * Re-write the TTBR values to point them at the high physical - * alias of the page tables. We expect __va() will work on - * cpu_get_pgd(), which returns the value of TTBR0. + * Fixup the page tables - this must be in the idmap region as + * we need to disable the MMU to do this safely, and hence it + * needs to be assembly. It's fairly simple, as we're using the + * temporary tables setup by the initial assembly code. */ - cpu_switch_mm(pgd0, &init_mm); - cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); + lpae_pgtables_remap(offset, pa_pgd, boot_data); - /* Finally flush any stale TLB values. */ - local_flush_bp_all(); - local_flush_tlb_all(); + /* Re-enable the caches and cacheable TLB walks */ + asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr)); + set_cr(cr); } #else -void __init early_paging_init(const struct machine_desc *mdesc, - struct proc_info_list *procinfo) +void __init early_paging_init(const struct machine_desc *mdesc) { - if (mdesc->init_meminfo) - mdesc->init_meminfo(); + long long offset; + + if (!mdesc->pv_fixup) + return; + + offset = mdesc->pv_fixup(); + if (offset == 0) + return; + + pr_crit("Physical address space modification is only to support Keystone2.\n"); + pr_crit("Please enable ARM_LPAE and ARM_PATCH_PHYS_VIRT support to use this\n"); + pr_crit("feature. Your kernel may crash now, have a good day.\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); } #endif +static void __init early_fixmap_shutdown(void) +{ + int i; + unsigned long va = fix_to_virt(__end_of_permanent_fixed_addresses - 1); + + pte_offset_fixmap = pte_offset_late_fixmap; + pmd_clear(fixmap_pmd(va)); + local_flush_tlb_kernel_page(va); + + for (i = 0; i < __end_of_permanent_fixed_addresses; i++) { + pte_t *pte; + struct map_desc map; + + map.virtual = fix_to_virt(i); + pte = pte_offset_early_fixmap(pmd_off_k(map.virtual), map.virtual); + + /* Only i/o device mappings are supported ATM */ + if (pte_none(*pte) || + (pte_val(*pte) & L_PTE_MT_MASK) != L_PTE_MT_DEV_SHARED) + continue; + + map.pfn = pte_pfn(*pte); + map.type = MT_DEVICE; + map.length = PAGE_SIZE; + + create_mapping(&map); + } +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. @@ -1519,7 +1577,9 @@ void __init paging_init(const struct machine_desc *mdesc) build_mem_type_table(); prepare_page_table(); map_lowmem(); + memblock_set_current_limit(arm_lowmem_limit); dma_contiguous_remap(); + early_fixmap_shutdown(); devicemaps_init(mdesc); kmap_init(); tcm_init(); diff --git a/kernel/arch/arm/mm/nommu.c b/kernel/arch/arm/mm/nommu.c index a014dfacd..1dd10936d 100644 --- a/kernel/arch/arm/mm/nommu.c +++ b/kernel/arch/arm/mm/nommu.c @@ -303,15 +303,6 @@ void __init sanity_check_meminfo(void) memblock_set_current_limit(end); } -/* - * early_paging_init() recreates boot time page table setup, allowing machines - * to switch over to a high (>4G) address space on LPAE systems - */ -void __init early_paging_init(const struct machine_desc *mdesc, - struct proc_info_list *procinfo) -{ -} - /* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. @@ -360,30 +351,43 @@ void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, } EXPORT_SYMBOL(__arm_ioremap_pfn); -void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, - size_t size, unsigned int mtype, void *caller) +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, + unsigned int mtype, void *caller) { - return __arm_ioremap_pfn(pfn, offset, size, mtype); + return (void __iomem *)phys_addr; } -void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size, - unsigned int mtype) +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); + +void __iomem *ioremap(resource_size_t res_cookie, size_t size) { - return (void __iomem *)phys_addr; + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE, + __builtin_return_address(0)); } -EXPORT_SYMBOL(__arm_ioremap); +EXPORT_SYMBOL(ioremap); -void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); -void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, - unsigned int mtype, void *caller) +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_WC, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wc); + +void __iounmap(volatile void __iomem *addr) { - return __arm_ioremap(phys_addr, size, mtype); } +EXPORT_SYMBOL(__iounmap); void (*arch_iounmap)(volatile void __iomem *); -void __arm_iounmap(volatile void __iomem *addr) +void iounmap(volatile void __iomem *addr) { } -EXPORT_SYMBOL(__arm_iounmap); +EXPORT_SYMBOL(iounmap); diff --git a/kernel/arch/arm/mm/pgd.c b/kernel/arch/arm/mm/pgd.c index a3681f11d..e683db1b9 100644 --- a/kernel/arch/arm/mm/pgd.c +++ b/kernel/arch/arm/mm/pgd.c @@ -84,6 +84,16 @@ pgd_t *pgd_alloc(struct mm_struct *mm) if (!new_pte) goto no_pte; +#ifndef CONFIG_ARM_LPAE + /* + * Modify the PTE pointer to have the correct domain. This + * needs to be the vectors domain to avoid the low vectors + * being unmapped. + */ + pmd_val(*new_pmd) &= ~PMD_DOMAIN_MASK; + pmd_val(*new_pmd) |= PMD_DOMAIN(DOMAIN_VECTORS); +#endif + init_pud = pud_offset(init_pgd, 0); init_pmd = pmd_offset(init_pud, 0); init_pte = pte_offset_map(init_pmd, 0); diff --git a/kernel/arch/arm/mm/proc-v7-2level.S b/kernel/arch/arm/mm/proc-v7-2level.S index 10405b8d3..c6141a543 100644 --- a/kernel/arch/arm/mm/proc-v7-2level.S +++ b/kernel/arch/arm/mm/proc-v7-2level.S @@ -36,14 +36,16 @@ * * It is assumed that: * - we are not using split page tables + * + * Note that we always need to flush BTAC/BTB if IBE is set + * even on Cortex-A8 revisions not affected by 430973. + * If IBE is not set, the flush BTAC/BTB won't do anything. */ ENTRY(cpu_ca8_switch_mm) #ifdef CONFIG_MMU mov r2, #0 -#ifdef CONFIG_ARM_ERRATA_430973 mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB #endif -#endif ENTRY(cpu_v7_switch_mm) #ifdef CONFIG_MMU mmid r1, r1 @ get mm->context.id @@ -148,10 +150,10 @@ ENDPROC(cpu_v7_set_pte_ext) * Macro for setting up the TTBRx and TTBCR registers. * - \ttb0 and \ttb1 updated with the corresponding flags. */ - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp mcr p15, 0, \zero, c2, c0, 2 @ TTB control register - ALT_SMP(orr \ttbr0, \ttbr0, #TTB_FLAGS_SMP) - ALT_UP(orr \ttbr0, \ttbr0, #TTB_FLAGS_UP) + ALT_SMP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_SMP) + ALT_UP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_UP) ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP) ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP) mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1 diff --git a/kernel/arch/arm/mm/proc-v7-3level.S b/kernel/arch/arm/mm/proc-v7-3level.S index d3daed0ae..5e5720e8b 100644 --- a/kernel/arch/arm/mm/proc-v7-3level.S +++ b/kernel/arch/arm/mm/proc-v7-3level.S @@ -126,11 +126,10 @@ ENDPROC(cpu_v7_set_pte_ext) * Macro for setting up the TTBRx and TTBCR registers. * - \ttbr1 updated. */ - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address - mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT - cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? - mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register + cmp \ttbr1, \tmp, lsr #12 @ PHYS_OFFSET > PAGE_OFFSET? + mrc p15, 0, \tmp, c2, c0, 2 @ TTB control egister orr \tmp, \tmp, #TTB_EAE ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP) @@ -143,13 +142,10 @@ ENDPROC(cpu_v7_set_pte_ext) */ orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR - mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits - mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits + mov \tmp, \ttbr1, lsr #20 + mov \ttbr1, \ttbr1, lsl #12 addls \ttbr1, \ttbr1, #TTBR1_OFFSET mcrr p15, 1, \ttbr1, \tmp, c2 @ load TTBR1 - mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits - mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits - mcrr p15, 0, \ttbr0, \tmp, c2 @ load TTBR0 .endm /* diff --git a/kernel/arch/arm/mm/proc-v7.S b/kernel/arch/arm/mm/proc-v7.S index 7911f14c2..8e1ea433c 100644 --- a/kernel/arch/arm/mm/proc-v7.S +++ b/kernel/arch/arm/mm/proc-v7.S @@ -95,7 +95,7 @@ ENDPROC(cpu_v7_dcache_clean_area) .equ cpu_v7_suspend_size, 4 * 9 #ifdef CONFIG_ARM_CPU_SUSPEND ENTRY(cpu_v7_do_suspend) - stmfd sp!, {r4 - r10, lr} + stmfd sp!, {r4 - r11, lr} mrc p15, 0, r4, c13, c0, 0 @ FCSE/PID mrc p15, 0, r5, c13, c0, 3 @ User r/o thread ID stmia r0!, {r4 - r5} @@ -112,7 +112,7 @@ ENTRY(cpu_v7_do_suspend) mrc p15, 0, r9, c1, c0, 1 @ Auxiliary control register mrc p15, 0, r10, c1, c0, 2 @ Co-processor access control stmia r0, {r5 - r11} - ldmfd sp!, {r4 - r10, pc} + ldmfd sp!, {r4 - r11, pc} ENDPROC(cpu_v7_do_suspend) ENTRY(cpu_v7_do_resume) @@ -252,6 +252,12 @@ ENDPROC(cpu_pj4b_do_resume) * Initialise TLB, Caches, and MMU state ready to switch the MMU * on. Return in r0 the new CP15 C1 control register setting. * + * r1, r2, r4, r5, r9, r13 must be preserved - r13 is not a stack + * r4: TTBR0 (low word) + * r5: TTBR0 (high word if LPAE) + * r8: TTBR1 + * r9: Main ID register + * * This should be able to cover all ARMv7 cores. * * It is assumed that: @@ -282,6 +288,78 @@ __v7_ca17mp_setup: #endif b __v7_setup_cont +/* + * Errata: + * r0, r10 available for use + * r1, r2, r4, r5, r9, r13: must be preserved + * r3: contains MIDR rX number in bits 23-20 + * r6: contains MIDR rXpY as 8-bit XY number + * r9: MIDR + */ +__ca8_errata: +#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) + teq r3, #0x00100000 @ only present in r1p* + mrceq p15, 0, r0, c1, c0, 1 @ read aux control register + orreq r0, r0, #(1 << 6) @ set IBE to 1 + mcreq p15, 0, r0, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_458693 + teq r6, #0x20 @ only present in r2p0 + mrceq p15, 0, r0, c1, c0, 1 @ read aux control register + orreq r0, r0, #(1 << 5) @ set L1NEON to 1 + orreq r0, r0, #(1 << 9) @ set PLDNOP to 1 + mcreq p15, 0, r0, c1, c0, 1 @ write aux control register +#endif +#ifdef CONFIG_ARM_ERRATA_460075 + teq r6, #0x20 @ only present in r2p0 + mrceq p15, 1, r0, c9, c0, 2 @ read L2 cache aux ctrl register + tsteq r0, #1 << 22 + orreq r0, r0, #(1 << 22) @ set the Write Allocate disable bit + mcreq p15, 1, r0, c9, c0, 2 @ write the L2 cache aux ctrl register +#endif + b __errata_finish + +__ca9_errata: +#ifdef CONFIG_ARM_ERRATA_742230 + cmp r6, #0x22 @ only present up to r2p2 + mrcle p15, 0, r0, c15, c0, 1 @ read diagnostic register + orrle r0, r0, #1 << 4 @ set bit #4 + mcrle p15, 0, r0, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_742231 + teq r6, #0x20 @ present in r2p0 + teqne r6, #0x21 @ present in r2p1 + teqne r6, #0x22 @ present in r2p2 + mrceq p15, 0, r0, c15, c0, 1 @ read diagnostic register + orreq r0, r0, #1 << 12 @ set bit #12 + orreq r0, r0, #1 << 22 @ set bit #22 + mcreq p15, 0, r0, c15, c0, 1 @ write diagnostic register +#endif +#ifdef CONFIG_ARM_ERRATA_743622 + teq r3, #0x00200000 @ only present in r2p* + mrceq p15, 0, r0, c15, c0, 1 @ read diagnostic register + orreq r0, r0, #1 << 6 @ set bit #6 + mcreq p15, 0, r0, c15, c0, 1 @ write diagnostic register +#endif +#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP) + ALT_SMP(cmp r6, #0x30) @ present prior to r3p0 + ALT_UP_B(1f) + mrclt p15, 0, r0, c15, c0, 1 @ read diagnostic register + orrlt r0, r0, #1 << 11 @ set bit #11 + mcrlt p15, 0, r0, c15, c0, 1 @ write diagnostic register +1: +#endif + b __errata_finish + +__ca15_errata: +#ifdef CONFIG_ARM_ERRATA_773022 + cmp r6, #0x4 @ only present up to r0p4 + mrcle p15, 0, r0, c1, c0, 1 @ read aux control register + orrle r0, r0, #1 << 1 @ disable loop buffer + mcrle p15, 0, r0, c1, c0, 1 @ write aux control register +#endif + b __errata_finish + __v7_pj4b_setup: #ifdef CONFIG_CPU_PJ4B @@ -343,96 +421,38 @@ __v7_setup: ldmia r12, {r0-r5, lr} __v7_setup_cont: - mrc p15, 0, r0, c0, c0, 0 @ read main ID register - and r10, r0, #0xff000000 @ ARM? - teq r10, #0x41000000 - bne 3f - and r5, r0, #0x00f00000 @ variant - and r6, r0, #0x0000000f @ revision - orr r6, r6, r5, lsr #20-4 @ combine variant and revision - ubfx r0, r0, #4, #12 @ primary part number + and r0, r9, #0xff000000 @ ARM? + teq r0, #0x41000000 + bne __errata_finish + and r3, r9, #0x00f00000 @ variant + and r6, r9, #0x0000000f @ revision + orr r6, r6, r3, lsr #20-4 @ combine variant and revision + ubfx r0, r9, #4, #12 @ primary part number /* Cortex-A8 Errata */ ldr r10, =0x00000c08 @ Cortex-A8 primary part number teq r0, r10 - bne 2f -#if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) - - teq r5, #0x00100000 @ only present in r1p* - mrceq p15, 0, r10, c1, c0, 1 @ read aux control register - orreq r10, r10, #(1 << 6) @ set IBE to 1 - mcreq p15, 0, r10, c1, c0, 1 @ write aux control register -#endif -#ifdef CONFIG_ARM_ERRATA_458693 - teq r6, #0x20 @ only present in r2p0 - mrceq p15, 0, r10, c1, c0, 1 @ read aux control register - orreq r10, r10, #(1 << 5) @ set L1NEON to 1 - orreq r10, r10, #(1 << 9) @ set PLDNOP to 1 - mcreq p15, 0, r10, c1, c0, 1 @ write aux control register -#endif -#ifdef CONFIG_ARM_ERRATA_460075 - teq r6, #0x20 @ only present in r2p0 - mrceq p15, 1, r10, c9, c0, 2 @ read L2 cache aux ctrl register - tsteq r10, #1 << 22 - orreq r10, r10, #(1 << 22) @ set the Write Allocate disable bit - mcreq p15, 1, r10, c9, c0, 2 @ write the L2 cache aux ctrl register -#endif - b 3f + beq __ca8_errata /* Cortex-A9 Errata */ -2: ldr r10, =0x00000c09 @ Cortex-A9 primary part number + ldr r10, =0x00000c09 @ Cortex-A9 primary part number teq r0, r10 - bne 3f -#ifdef CONFIG_ARM_ERRATA_742230 - cmp r6, #0x22 @ only present up to r2p2 - mrcle p15, 0, r10, c15, c0, 1 @ read diagnostic register - orrle r10, r10, #1 << 4 @ set bit #4 - mcrle p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#ifdef CONFIG_ARM_ERRATA_742231 - teq r6, #0x20 @ present in r2p0 - teqne r6, #0x21 @ present in r2p1 - teqne r6, #0x22 @ present in r2p2 - mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register - orreq r10, r10, #1 << 12 @ set bit #12 - orreq r10, r10, #1 << 22 @ set bit #22 - mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#ifdef CONFIG_ARM_ERRATA_743622 - teq r5, #0x00200000 @ only present in r2p* - mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register - orreq r10, r10, #1 << 6 @ set bit #6 - mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register -#endif -#if defined(CONFIG_ARM_ERRATA_751472) && defined(CONFIG_SMP) - ALT_SMP(cmp r6, #0x30) @ present prior to r3p0 - ALT_UP_B(1f) - mrclt p15, 0, r10, c15, c0, 1 @ read diagnostic register - orrlt r10, r10, #1 << 11 @ set bit #11 - mcrlt p15, 0, r10, c15, c0, 1 @ write diagnostic register -1: -#endif + beq __ca9_errata /* Cortex-A15 Errata */ -3: ldr r10, =0x00000c0f @ Cortex-A15 primary part number + ldr r10, =0x00000c0f @ Cortex-A15 primary part number teq r0, r10 - bne 4f + beq __ca15_errata -#ifdef CONFIG_ARM_ERRATA_773022 - cmp r6, #0x4 @ only present up to r0p4 - mrcle p15, 0, r10, c1, c0, 1 @ read aux control register - orrle r10, r10, #1 << 1 @ disable loop buffer - mcrle p15, 0, r10, c1, c0, 1 @ write aux control register -#endif - -4: mov r10, #0 +__errata_finish: + mov r10, #0 mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate #ifdef CONFIG_MMU mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs - v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup - ldr r5, =PRRR @ PRRR + v7_ttb_setup r10, r4, r5, r8, r3 @ TTBCR, TTBRx setup + ldr r3, =PRRR @ PRRR ldr r6, =NMRR @ NMRR - mcr p15, 0, r5, c10, c2, 0 @ write PRRR + mcr p15, 0, r3, c10, c2, 0 @ write PRRR mcr p15, 0, r6, c10, c2, 1 @ write NMRR #endif dsb @ Complete invalidations @@ -441,22 +461,22 @@ __v7_setup_cont: and r0, r0, #(0xf << 12) @ ThumbEE enabled field teq r0, #(1 << 12) @ check if ThumbEE is present bne 1f - mov r5, #0 - mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0 + mov r3, #0 + mcr p14, 6, r3, c1, c0, 0 @ Initialize TEEHBR to 0 mrc p14, 6, r0, c0, c0, 0 @ load TEECR orr r0, r0, #1 @ set the 1st bit in order to mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access 1: #endif - adr r5, v7_crval - ldmia r5, {r5, r6} + adr r3, v7_crval + ldmia r3, {r3, r6} ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables #ifdef CONFIG_SWP_EMULATE - orr r5, r5, #(1 << 10) @ set SW bit in "clear" + orr r3, r3, #(1 << 10) @ set SW bit in "clear" bic r6, r6, #(1 << 10) @ clear it in "mmuset" #endif mrc p15, 0, r0, c1, c0, 0 @ read control register - bic r0, r0, r5 @ clear bits them + bic r0, r0, r3 @ clear bits them orr r0, r0, r6 @ set them THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions ret lr @ return to head.S:__ret diff --git a/kernel/arch/arm/mm/proc-v7m.S b/kernel/arch/arm/mm/proc-v7m.S index e08e1f2ba..67d920907 100644 --- a/kernel/arch/arm/mm/proc-v7m.S +++ b/kernel/arch/arm/mm/proc-v7m.S @@ -98,7 +98,7 @@ __v7m_setup: str r5, [r0, V7M_SCB_SHPR3] @ set PendSV priority @ SVC to run the kernel in this mode - adr r1, BSYM(1f) + badr r1, 1f ldr r5, [r12, #11 * 4] @ read the SVC vector entry str r1, [r12, #11 * 4] @ write the temporary SVC vector entry mov r6, lr @ save LR diff --git a/kernel/arch/arm/mm/pv-fixup-asm.S b/kernel/arch/arm/mm/pv-fixup-asm.S new file mode 100644 index 000000000..1867f3e43 --- /dev/null +++ b/kernel/arch/arm/mm/pv-fixup-asm.S @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2015 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This assembly is required to safely remap the physical address space + * for Keystone 2 + */ +#include +#include +#include +#include +#include + + .section ".idmap.text", "ax" + +#define L1_ORDER 3 +#define L2_ORDER 3 + +ENTRY(lpae_pgtables_remap_asm) + stmfd sp!, {r4-r8, lr} + + mrc p15, 0, r8, c1, c0, 0 @ read control reg + bic ip, r8, #CR_M @ disable caches and MMU + mcr p15, 0, ip, c1, c0, 0 + dsb + isb + + /* Update level 2 entries covering the kernel */ + ldr r6, =(_end - 1) + add r7, r2, #0x1000 + add r6, r7, r6, lsr #SECTION_SHIFT - L2_ORDER + add r7, r7, #PAGE_OFFSET >> (SECTION_SHIFT - L2_ORDER) +1: ldrd r4, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, [r7], #1 << L2_ORDER + cmp r7, r6 + bls 1b + + /* Update level 2 entries for the boot data */ + add r7, r2, #0x1000 + add r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER + bic r7, r7, #(1 << L2_ORDER) - 1 + ldrd r4, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, [r7], #1 << L2_ORDER + ldrd r4, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, [r7] + + /* Update level 1 entries */ + mov r6, #4 + mov r7, r2 +2: ldrd r4, [r7] + adds r4, r4, r0 + adc r5, r5, r1 + strd r4, [r7], #1 << L1_ORDER + subs r6, r6, #1 + bne 2b + + mrrc p15, 0, r4, r5, c2 @ read TTBR0 + adds r4, r4, r0 @ update physical address + adc r5, r5, r1 + mcrr p15, 0, r4, r5, c2 @ write back TTBR0 + mrrc p15, 1, r4, r5, c2 @ read TTBR1 + adds r4, r4, r0 @ update physical address + adc r5, r5, r1 + mcrr p15, 1, r4, r5, c2 @ write back TTBR1 + + dsb + + mov ip, #0 + mcr p15, 0, ip, c7, c5, 0 @ I+BTB cache invalidate + mcr p15, 0, ip, c8, c7, 0 @ local_flush_tlb_all() + dsb + isb + + mcr p15, 0, r8, c1, c0, 0 @ re-enable MMU + dsb + isb + + ldmfd sp!, {r4-r8, pc} +ENDPROC(lpae_pgtables_remap_asm) -- cgit 1.2.3-korg