summaryrefslogtreecommitdiffstats
path: root/kernel/arch/arc/mm
diff options
context:
space:
mode:
authorJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-11 10:41:07 +0300
committerJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-13 08:17:18 +0300
commite09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
treed10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/arch/arc/mm
parentf93b97fd65072de626c074dbe099a1fff05ce060 (diff)
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/arch/arc/mm')
-rw-r--r--kernel/arch/arc/mm/Makefile3
-rw-r--r--kernel/arch/arc/mm/cache.c (renamed from kernel/arch/arc/mm/cache_arc700.c)721
-rw-r--r--kernel/arch/arc/mm/dma.c46
-rw-r--r--kernel/arch/arc/mm/fault.c13
-rw-r--r--kernel/arch/arc/mm/highmem.c140
-rw-r--r--kernel/arch/arc/mm/init.c106
-rw-r--r--kernel/arch/arc/mm/tlb.c290
-rw-r--r--kernel/arch/arc/mm/tlbex.S99
8 files changed, 1073 insertions, 345 deletions
diff --git a/kernel/arch/arc/mm/Makefile b/kernel/arch/arc/mm/Makefile
index ac95cc239..3703a4969 100644
--- a/kernel/arch/arc/mm/Makefile
+++ b/kernel/arch/arc/mm/Makefile
@@ -7,4 +7,5 @@
#
obj-y := extable.o ioremap.o dma.o fault.o init.o
-obj-y += tlb.o tlbex.o cache_arc700.o mmap.o
+obj-y += tlb.o tlbex.o cache.o mmap.o
+obj-$(CONFIG_HIGHMEM) += highmem.o
diff --git a/kernel/arch/arc/mm/cache_arc700.c b/kernel/arch/arc/mm/cache.c
index 12b2100db..ff7ff6cbb 100644
--- a/kernel/arch/arc/mm/cache_arc700.c
+++ b/kernel/arch/arc/mm/cache.c
@@ -1,64 +1,12 @@
/*
- * ARC700 VIPT Cache Management
+ * ARC Cache Management
*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
* Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs
- * -flush_cache_dup_mm (fork)
- * -likewise for flush_cache_mm (exit/execve)
- * -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break)
- *
- * vineetg: Apr 2011
- * -Now that MMU can support larger pg sz (16K), the determiniation of
- * aliasing shd not be based on assumption of 8k pg
- *
- * vineetg: Mar 2011
- * -optimised version of flush_icache_range( ) for making I/D coherent
- * when vaddr is available (agnostic of num of aliases)
- *
- * vineetg: Mar 2011
- * -Added documentation about I-cache aliasing on ARC700 and the way it
- * was handled up until MMU V2.
- * -Spotted a three year old bug when killing the 4 aliases, which needs
- * bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03}
- * instead of paddr | {0x00, 0x01, 0x10, 0x11}
- * (Rajesh you owe me one now)
- *
- * vineetg: Dec 2010
- * -Off-by-one error when computing num_of_lines to flush
- * This broke signal handling with bionic which uses synthetic sigret stub
- *
- * vineetg: Mar 2010
- * -GCC can't generate ZOL for core cache flush loops.
- * Conv them into iterations based as opposed to while (start < end) types
- *
- * Vineetg: July 2009
- * -In I-cache flush routine we used to chk for aliasing for every line INV.
- * Instead now we setup routines per cache geometry and invoke them
- * via function pointers.
- *
- * Vineetg: Jan 2009
- * -Cache Line flush routines used to flush an extra line beyond end addr
- * because check was while (end >= start) instead of (end > start)
- * =Some call sites had to work around by doing -1, -4 etc to end param
- * =Some callers didnt care. This was spec bad in case of INV routines
- * which would discard valid data (cause of the horrible ext2 bug
- * in ARC IDE driver)
- *
- * vineetg: June 11th 2008: Fixed flush_icache_range( )
- * -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need
- * to be flushed, which it was not doing.
- * -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API,
- * however ARC cache maintenance OPs require PHY addr. Thus need to do
- * vmalloc_to_phy.
- * -Also added optimisation there, that for range > PAGE SIZE we flush the
- * entire cache in one shot rather than line by line. For e.g. a module
- * with Code sz 600k, old code flushed 600k worth of cache (line-by-line),
- * while cache is only 16 or 32k.
*/
#include <linux/module.h>
@@ -73,9 +21,21 @@
#include <asm/cachectl.h>
#include <asm/setup.h>
+static int l2_line_sz;
+int ioc_exists;
+volatile int slc_enable = 1, ioc_enable = 1;
+
+void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
+ unsigned long sz, const int cacheop);
+
+void (*__dma_cache_wback_inv)(unsigned long start, unsigned long sz);
+void (*__dma_cache_inv)(unsigned long start, unsigned long sz);
+void (*__dma_cache_wback)(unsigned long start, unsigned long sz);
+
char *arc_cache_mumbojumbo(int c, char *buf, int len)
{
int n = 0;
+ struct cpuinfo_arc_cache *p;
#define PR_CACHE(p, cfg, str) \
if (!(p)->ver) \
@@ -86,11 +46,24 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
(p)->sz_k, (p)->assoc, (p)->line_len, \
(p)->vipt ? "VIPT" : "PIPT", \
(p)->alias ? " aliasing" : "", \
- IS_ENABLED(cfg) ? "" : " (not used)");
+ IS_USED_CFG(cfg));
PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
+ if (!is_isa_arcv2())
+ return buf;
+
+ p = &cpuinfo_arc700[c].slc;
+ if (p->ver)
+ n += scnprintf(buf + n, len - n,
+ "SLC\t\t: %uK, %uB Line%s\n",
+ p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
+
+ if (ioc_exists)
+ n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n",
+ IS_DISABLED_RUN(ioc_enable));
+
return buf;
}
@@ -99,6 +72,40 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
* the cpuinfo structure for later use.
* No Validation done here, simply read/convert the BCRs
*/
+static void read_decode_cache_bcr_arcv2(int cpu)
+{
+ struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
+ struct bcr_generic sbcr;
+
+ struct bcr_slc_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:24, way:2, lsz:2, sz:4;
+#else
+ unsigned int sz:4, lsz:2, way:2, pad:24;
+#endif
+ } slc_cfg;
+
+ struct bcr_clust_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
+#else
+ unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
+#endif
+ } cbcr;
+
+ READ_BCR(ARC_REG_SLC_BCR, sbcr);
+ if (sbcr.ver) {
+ READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
+ p_slc->ver = sbcr.ver;
+ p_slc->sz_k = 128 << slc_cfg.sz;
+ l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+ }
+
+ READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
+ if (cbcr.c && ioc_enable)
+ ioc_exists = 1;
+}
+
void read_decode_cache_bcr(void)
{
struct cpuinfo_arc_cache *p_ic, *p_dc;
@@ -117,8 +124,13 @@ void read_decode_cache_bcr(void)
if (!ibcr.ver)
goto dc_chk;
- BUG_ON(ibcr.config != 3);
- p_ic->assoc = 2; /* Fixed to 2w set assoc */
+ if (ibcr.ver <= 3) {
+ BUG_ON(ibcr.config != 3);
+ p_ic->assoc = 2; /* Fixed to 2w set assoc */
+ } else if (ibcr.ver >= 4) {
+ p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */
+ }
+
p_ic->line_len = 8 << ibcr.line_len;
p_ic->sz_k = 1 << (ibcr.sz - 1);
p_ic->ver = ibcr.ver;
@@ -130,94 +142,135 @@ dc_chk:
READ_BCR(ARC_REG_DC_BCR, dbcr);
if (!dbcr.ver)
- return;
+ goto slc_chk;
+
+ if (dbcr.ver <= 3) {
+ BUG_ON(dbcr.config != 2);
+ p_dc->assoc = 4; /* Fixed to 4w set assoc */
+ p_dc->vipt = 1;
+ p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
+ } else if (dbcr.ver >= 4) {
+ p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */
+ p_dc->vipt = 0;
+ p_dc->alias = 0; /* PIPT so can't VIPT alias */
+ }
- BUG_ON(dbcr.config != 2);
- p_dc->assoc = 4; /* Fixed to 4w set assoc */
p_dc->line_len = 16 << dbcr.line_len;
p_dc->sz_k = 1 << (dbcr.sz - 1);
p_dc->ver = dbcr.ver;
- p_dc->vipt = 1;
- p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
+
+slc_chk:
+ if (is_isa_arcv2())
+ read_decode_cache_bcr_arcv2(cpu);
}
/*
- * 1. Validate the Cache Geomtery (compile time config matches hardware)
- * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn)
- * (aliasing D-cache configurations are not supported YET)
- * 3. Enable the Caches, setup default flush mode for D-Cache
- * 3. Calculate the SHMLBA used by user space
+ * Line Operation on {I,D}-Cache
*/
-void arc_cache_init(void)
-{
- unsigned int __maybe_unused cpu = smp_processor_id();
- char str[256];
-
- printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
- if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
- struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+#define OP_INV 0x1
+#define OP_FLUSH 0x2
+#define OP_FLUSH_N_INV 0x3
+#define OP_INV_IC 0x4
- if (!ic->ver)
- panic("cache support enabled but non-existent cache\n");
+/*
+ * I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3)
+ *
+ * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
+ * The orig Cache Management Module "CDU" only required paddr to invalidate a
+ * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
+ * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
+ * the exact same line.
+ *
+ * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
+ * paddr alone could not be used to correctly index the cache.
+ *
+ * ------------------
+ * MMU v1/v2 (Fixed Page Size 8k)
+ * ------------------
+ * The solution was to provide CDU with these additonal vaddr bits. These
+ * would be bits [x:13], x would depend on cache-geometry, 13 comes from
+ * standard page size of 8k.
+ * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
+ * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
+ * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
+ * represent the offset within cache-line. The adv of using this "clumsy"
+ * interface for additional info was no new reg was needed in CDU programming
+ * model.
+ *
+ * 17:13 represented the max num of bits passable, actual bits needed were
+ * fewer, based on the num-of-aliases possible.
+ * -for 2 alias possibility, only bit 13 needed (32K cache)
+ * -for 4 alias possibility, bits 14:13 needed (64K cache)
+ *
+ * ------------------
+ * MMU v3
+ * ------------------
+ * This ver of MMU supports variable page sizes (1k-16k): although Linux will
+ * only support 8k (default), 16k and 4k.
+ * However from hardware perspective, smaller page sizes aggrevate aliasing
+ * meaning more vaddr bits needed to disambiguate the cache-line-op ;
+ * the existing scheme of piggybacking won't work for certain configurations.
+ * Two new registers IC_PTAG and DC_PTAG inttoduced.
+ * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
+ */
- if (ic->line_len != L1_CACHE_BYTES)
- panic("ICache line [%d] != kernel Config [%d]",
- ic->line_len, L1_CACHE_BYTES);
+static inline
+void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr,
+ unsigned long sz, const int op)
+{
+ unsigned int aux_cmd;
+ int num_lines;
+ const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
- if (ic->ver != CONFIG_ARC_MMU_VER)
- panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
- ic->ver, CONFIG_ARC_MMU_VER);
+ if (op == OP_INV_IC) {
+ aux_cmd = ARC_REG_IC_IVIL;
+ } else {
+ /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+ aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
}
- if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
- struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
- int handled;
-
- if (!dc->ver)
- panic("cache support enabled but non-existent cache\n");
+ /* Ensure we properly floor/ceil the non-line aligned/sized requests
+ * and have @paddr - aligned to cache line and integral @num_lines.
+ * This however can be avoided for page sized since:
+ * -@paddr will be cache-line aligned already (being page aligned)
+ * -@sz will be integral multiple of line size (being page sized).
+ */
+ if (!full_page) {
+ sz += paddr & ~CACHE_LINE_MASK;
+ paddr &= CACHE_LINE_MASK;
+ vaddr &= CACHE_LINE_MASK;
+ }
- if (dc->line_len != L1_CACHE_BYTES)
- panic("DCache line [%d] != kernel Config [%d]",
- dc->line_len, L1_CACHE_BYTES);
+ num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
- /* check for D-Cache aliasing */
- handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
+ /* MMUv2 and before: paddr contains stuffed vaddrs bits */
+ paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
- if (dc->alias && !handled)
- panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
- else if (!dc->alias && handled)
- panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+ while (num_lines-- > 0) {
+ write_aux_reg(aux_cmd, paddr);
+ paddr += L1_CACHE_BYTES;
}
}
-#define OP_INV 0x1
-#define OP_FLUSH 0x2
-#define OP_FLUSH_N_INV 0x3
-#define OP_INV_IC 0x4
-
/*
- * Common Helper for Line Operations on {I,D}-Cache
+ * For ARC700 MMUv3 I-cache and D-cache flushes
+ * Also reused for HS38 aliasing I-cache configuration
*/
-static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
- unsigned long sz, const int cacheop)
+static inline
+void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr,
+ unsigned long sz, const int op)
{
unsigned int aux_cmd, aux_tag;
int num_lines;
- const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+ const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE;
- if (cacheop == OP_INV_IC) {
+ if (op == OP_INV_IC) {
aux_cmd = ARC_REG_IC_IVIL;
-#if (CONFIG_ARC_MMU_VER > 2)
aux_tag = ARC_REG_IC_PTAG;
-#endif
- }
- else {
- /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
- aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
-#if (CONFIG_ARC_MMU_VER > 2)
+ } else {
+ aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
aux_tag = ARC_REG_DC_PTAG;
-#endif
}
/* Ensure we properly floor/ceil the non-line aligned/sized requests
@@ -226,177 +279,201 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr,
* -@paddr will be cache-line aligned already (being page aligned)
* -@sz will be integral multiple of line size (being page sized).
*/
- if (!full_page_op) {
+ if (!full_page) {
sz += paddr & ~CACHE_LINE_MASK;
paddr &= CACHE_LINE_MASK;
vaddr &= CACHE_LINE_MASK;
}
-
num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
-#if (CONFIG_ARC_MMU_VER <= 2)
- /* MMUv2 and before: paddr contains stuffed vaddrs bits */
- paddr |= (vaddr >> PAGE_SHIFT) & 0x1F;
-#else
- /* if V-P const for loop, PTAG can be written once outside loop */
- if (full_page_op)
+ /*
+ * MMUv3, cache ops require paddr in PTAG reg
+ * if V-P const for loop, PTAG can be written once outside loop
+ */
+ if (full_page)
write_aux_reg(aux_tag, paddr);
-#endif
+
+ /*
+ * This is technically for MMU v4, using the MMU v3 programming model
+ * Special work for HS38 aliasing I-cache configuratino with PAE40
+ * - upper 8 bits of paddr need to be written into PTAG_HI
+ * - (and needs to be written before the lower 32 bits)
+ * Note that PTAG_HI is hoisted outside the line loop
+ */
+ if (is_pae40_enabled() && op == OP_INV_IC)
+ write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
while (num_lines-- > 0) {
-#if (CONFIG_ARC_MMU_VER > 2)
- /* MMUv3, cache ops require paddr seperately */
- if (!full_page_op) {
+ if (!full_page) {
write_aux_reg(aux_tag, paddr);
paddr += L1_CACHE_BYTES;
}
write_aux_reg(aux_cmd, vaddr);
vaddr += L1_CACHE_BYTES;
-#else
+ }
+}
+
+/*
+ * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT
+ * Here's how cache ops are implemented
+ *
+ * - D-cache: only paddr needed (in DC_IVDL/DC_FLDL)
+ * - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL)
+ * - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG
+ * respectively, similar to MMU v3 programming model, hence
+ * __cache_line_loop_v3() is used)
+ *
+ * If PAE40 is enabled, independent of aliasing considerations, the higher bits
+ * needs to be written into PTAG_HI
+ */
+static inline
+void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr,
+ unsigned long sz, const int cacheop)
+{
+ unsigned int aux_cmd;
+ int num_lines;
+ const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE;
+
+ if (cacheop == OP_INV_IC) {
+ aux_cmd = ARC_REG_IC_IVIL;
+ } else {
+ /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */
+ aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL;
+ }
+
+ /* Ensure we properly floor/ceil the non-line aligned/sized requests
+ * and have @paddr - aligned to cache line and integral @num_lines.
+ * This however can be avoided for page sized since:
+ * -@paddr will be cache-line aligned already (being page aligned)
+ * -@sz will be integral multiple of line size (being page sized).
+ */
+ if (!full_page_op) {
+ sz += paddr & ~CACHE_LINE_MASK;
+ paddr &= CACHE_LINE_MASK;
+ }
+
+ num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES);
+
+ /*
+ * For HS38 PAE40 configuration
+ * - upper 8 bits of paddr need to be written into PTAG_HI
+ * - (and needs to be written before the lower 32 bits)
+ */
+ if (is_pae40_enabled()) {
+ if (cacheop == OP_INV_IC)
+ /*
+ * Non aliasing I-cache in HS38,
+ * aliasing I-cache handled in __cache_line_loop_v3()
+ */
+ write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32);
+ else
+ write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32);
+ }
+
+ while (num_lines-- > 0) {
write_aux_reg(aux_cmd, paddr);
paddr += L1_CACHE_BYTES;
-#endif
}
}
+#if (CONFIG_ARC_MMU_VER < 3)
+#define __cache_line_loop __cache_line_loop_v2
+#elif (CONFIG_ARC_MMU_VER == 3)
+#define __cache_line_loop __cache_line_loop_v3
+#elif (CONFIG_ARC_MMU_VER > 3)
+#define __cache_line_loop __cache_line_loop_v4
+#endif
+
#ifdef CONFIG_ARC_HAS_DCACHE
/***************************************************************
* Machine specific helpers for Entire D-Cache or Per Line ops
*/
-static inline unsigned int __before_dc_op(const int op)
+static inline void __before_dc_op(const int op)
{
- unsigned int reg = reg;
-
if (op == OP_FLUSH_N_INV) {
/* Dcache provides 2 cmd: FLUSH or INV
* INV inturn has sub-modes: DISCARD or FLUSH-BEFORE
* flush-n-inv is achieved by INV cmd but with IM=1
* So toggle INV sub-mode depending on op request and default
*/
- reg = read_aux_reg(ARC_REG_DC_CTRL);
- write_aux_reg(ARC_REG_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH)
- ;
+ const unsigned int ctl = ARC_REG_DC_CTRL;
+ write_aux_reg(ctl, read_aux_reg(ctl) | DC_CTRL_INV_MODE_FLUSH);
}
-
- return reg;
}
-static inline void __after_dc_op(const int op, unsigned int reg)
+static inline void __after_dc_op(const int op)
{
- if (op & OP_FLUSH) /* flush / flush-n-inv both wait */
- while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS);
+ if (op & OP_FLUSH) {
+ const unsigned int ctl = ARC_REG_DC_CTRL;
+ unsigned int reg;
+
+ /* flush / flush-n-inv both wait */
+ while ((reg = read_aux_reg(ctl)) & DC_CTRL_FLUSH_STATUS)
+ ;
- /* Switch back to default Invalidate mode */
- if (op == OP_FLUSH_N_INV)
- write_aux_reg(ARC_REG_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH);
+ /* Switch back to default Invalidate mode */
+ if (op == OP_FLUSH_N_INV)
+ write_aux_reg(ctl, reg & ~DC_CTRL_INV_MODE_FLUSH);
+ }
}
/*
* Operation on Entire D-Cache
- * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
+ * @op = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV}
* Note that constant propagation ensures all the checks are gone
* in generated code
*/
-static inline void __dc_entire_op(const int cacheop)
+static inline void __dc_entire_op(const int op)
{
- unsigned int ctrl_reg;
int aux;
- ctrl_reg = __before_dc_op(cacheop);
+ __before_dc_op(op);
- if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */
+ if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */
aux = ARC_REG_DC_IVDC;
else
aux = ARC_REG_DC_FLSH;
write_aux_reg(aux, 0x1);
- __after_dc_op(cacheop, ctrl_reg);
+ __after_dc_op(op);
}
/* For kernel mappings cache operation: index is same as paddr */
#define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op)
/*
- * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback)
+ * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback)
*/
-static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr,
- unsigned long sz, const int cacheop)
+static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr,
+ unsigned long sz, const int op)
{
unsigned long flags;
- unsigned int ctrl_reg;
local_irq_save(flags);
- ctrl_reg = __before_dc_op(cacheop);
+ __before_dc_op(op);
- __cache_line_loop(paddr, vaddr, sz, cacheop);
+ __cache_line_loop(paddr, vaddr, sz, op);
- __after_dc_op(cacheop, ctrl_reg);
+ __after_dc_op(op);
local_irq_restore(flags);
}
#else
-#define __dc_entire_op(cacheop)
-#define __dc_line_op(paddr, vaddr, sz, cacheop)
-#define __dc_line_op_k(paddr, sz, cacheop)
+#define __dc_entire_op(op)
+#define __dc_line_op(paddr, vaddr, sz, op)
+#define __dc_line_op_k(paddr, sz, op)
#endif /* CONFIG_ARC_HAS_DCACHE */
-
#ifdef CONFIG_ARC_HAS_ICACHE
-/*
- * I-Cache Aliasing in ARC700 VIPT caches
- *
- * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag.
- * The orig Cache Management Module "CDU" only required paddr to invalidate a
- * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry.
- * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching
- * the exact same line.
- *
- * However for larger Caches (way-size > page-size) - i.e. in Aliasing config,
- * paddr alone could not be used to correctly index the cache.
- *
- * ------------------
- * MMU v1/v2 (Fixed Page Size 8k)
- * ------------------
- * The solution was to provide CDU with these additonal vaddr bits. These
- * would be bits [x:13], x would depend on cache-geometry, 13 comes from
- * standard page size of 8k.
- * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits
- * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the
- * orig 5 bits of paddr were anyways ignored by CDU line ops, as they
- * represent the offset within cache-line. The adv of using this "clumsy"
- * interface for additional info was no new reg was needed in CDU programming
- * model.
- *
- * 17:13 represented the max num of bits passable, actual bits needed were
- * fewer, based on the num-of-aliases possible.
- * -for 2 alias possibility, only bit 13 needed (32K cache)
- * -for 4 alias possibility, bits 14:13 needed (64K cache)
- *
- * ------------------
- * MMU v3
- * ------------------
- * This ver of MMU supports variable page sizes (1k-16k): although Linux will
- * only support 8k (default), 16k and 4k.
- * However from hardware perspective, smaller page sizes aggrevate aliasing
- * meaning more vaddr bits needed to disambiguate the cache-line-op ;
- * the existing scheme of piggybacking won't work for certain configurations.
- * Two new registers IC_PTAG and DC_PTAG inttoduced.
- * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs
- */
-
-/***********************************************************
- * Machine specific helper for per line I-Cache invalidate.
- */
-
static inline void __ic_entire_inv(void)
{
write_aux_reg(ARC_REG_IC_IVIC, 1);
@@ -404,13 +481,13 @@ static inline void __ic_entire_inv(void)
}
static inline void
-__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
+__ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr,
unsigned long sz)
{
unsigned long flags;
local_irq_save(flags);
- __cache_line_loop(paddr, vaddr, sz, OP_INV_IC);
+ (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC);
local_irq_restore(flags);
}
@@ -421,7 +498,7 @@ __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr,
#else
struct ic_inv_args {
- unsigned long paddr, vaddr;
+ phys_addr_t paddr, vaddr;
int sz;
};
@@ -432,7 +509,7 @@ static void __ic_line_inv_vaddr_helper(void *info)
__ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz);
}
-static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
+static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr,
unsigned long sz)
{
struct ic_inv_args ic_inv = {
@@ -453,6 +530,56 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr,
#endif /* CONFIG_ARC_HAS_ICACHE */
+noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op)
+{
+#ifdef CONFIG_ISA_ARCV2
+ /*
+ * SLC is shared between all cores and concurrent aux operations from
+ * multiple cores need to be serialized using a spinlock
+ * A concurrent operation can be silently ignored and/or the old/new
+ * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop
+ * below)
+ */
+ static DEFINE_SPINLOCK(lock);
+ unsigned long flags;
+ unsigned int ctrl;
+
+ spin_lock_irqsave(&lock, flags);
+
+ /*
+ * The Region Flush operation is specified by CTRL.RGN_OP[11..9]
+ * - b'000 (default) is Flush,
+ * - b'001 is Invalidate if CTRL.IM == 0
+ * - b'001 is Flush-n-Invalidate if CTRL.IM == 1
+ */
+ ctrl = read_aux_reg(ARC_REG_SLC_CTRL);
+
+ /* Don't rely on default value of IM bit */
+ if (!(op & OP_FLUSH)) /* i.e. OP_INV */
+ ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */
+ else
+ ctrl |= SLC_CTRL_IM;
+
+ if (op & OP_INV)
+ ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */
+ else
+ ctrl &= ~SLC_CTRL_RGN_OP_INV;
+
+ write_aux_reg(ARC_REG_SLC_CTRL, ctrl);
+
+ /*
+ * Lower bits are ignored, no need to clip
+ * END needs to be setup before START (latter triggers the operation)
+ * END can't be same as START, so add (l2_line_sz - 1) to sz
+ */
+ write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1));
+ write_aux_reg(ARC_REG_SLC_RGN_START, paddr);
+
+ while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
+
+ spin_unlock_irqrestore(&lock, flags);
+#endif
+}
/***********************************************************
* Exported APIs
@@ -493,7 +620,7 @@ void flush_dcache_page(struct page *page)
} else if (page_mapped(page)) {
/* kernel reading from page with U-mapping */
- void *paddr = page_address(page);
+ phys_addr_t paddr = (unsigned long)page_address(page);
unsigned long vaddr = page->index << PAGE_CACHE_SHIFT;
if (addr_not_cache_congruent(paddr, vaddr))
@@ -502,22 +629,74 @@ void flush_dcache_page(struct page *page)
}
EXPORT_SYMBOL(flush_dcache_page);
+/*
+ * DMA ops for systems with L1 cache only
+ * Make memory coherent with L1 cache by flushing/invalidating L1 lines
+ */
+static void __dma_cache_wback_inv_l1(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+}
-void dma_cache_wback_inv(unsigned long start, unsigned long sz)
+static void __dma_cache_inv_l1(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_INV);
+}
+
+static void __dma_cache_wback_l1(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_FLUSH);
+}
+
+/*
+ * DMA ops for systems with both L1 and L2 caches, but without IOC
+ * Both L1 and L2 lines need to be explicity flushed/invalidated
+ */
+static void __dma_cache_wback_inv_slc(unsigned long start, unsigned long sz)
{
__dc_line_op_k(start, sz, OP_FLUSH_N_INV);
+ slc_op(start, sz, OP_FLUSH_N_INV);
+}
+
+static void __dma_cache_inv_slc(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_INV);
+ slc_op(start, sz, OP_INV);
+}
+
+static void __dma_cache_wback_slc(unsigned long start, unsigned long sz)
+{
+ __dc_line_op_k(start, sz, OP_FLUSH);
+ slc_op(start, sz, OP_FLUSH);
+}
+
+/*
+ * DMA ops for systems with IOC
+ * IOC hardware snoops all DMA traffic keeping the caches consistent with
+ * memory - eliding need for any explicit cache maintenance of DMA buffers
+ */
+static void __dma_cache_wback_inv_ioc(unsigned long start, unsigned long sz) {}
+static void __dma_cache_inv_ioc(unsigned long start, unsigned long sz) {}
+static void __dma_cache_wback_ioc(unsigned long start, unsigned long sz) {}
+
+/*
+ * Exported DMA API
+ */
+void dma_cache_wback_inv(unsigned long start, unsigned long sz)
+{
+ __dma_cache_wback_inv(start, sz);
}
EXPORT_SYMBOL(dma_cache_wback_inv);
void dma_cache_inv(unsigned long start, unsigned long sz)
{
- __dc_line_op_k(start, sz, OP_INV);
+ __dma_cache_inv(start, sz);
}
EXPORT_SYMBOL(dma_cache_inv);
void dma_cache_wback(unsigned long start, unsigned long sz)
{
- __dc_line_op_k(start, sz, OP_FLUSH);
+ __dma_cache_wback(start, sz);
}
EXPORT_SYMBOL(dma_cache_wback);
@@ -589,14 +768,14 @@ EXPORT_SYMBOL(flush_icache_range);
* builtin kernel page will not have any virtual mappings.
* kprobe on loadable module will be kernel vaddr.
*/
-void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len)
+void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len)
{
__dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV);
__ic_line_inv_vaddr(paddr, vaddr, len);
}
/* wrapper to compile time eliminate alignment checks in flush loop */
-void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
+void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr)
{
__ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE);
}
@@ -605,7 +784,7 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr)
* wrapper to clearout kernel or userspace mappings of a page
* For kernel mappings @vaddr == @paddr
*/
-void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr)
+void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr)
{
__dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV);
}
@@ -637,7 +816,7 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr,
u_vaddr &= PAGE_MASK;
- ___flush_dcache_page(paddr, u_vaddr);
+ __flush_dcache_page(paddr, u_vaddr);
if (vma->vm_flags & VM_EXEC)
__inv_icache_page(paddr, u_vaddr);
@@ -663,8 +842,8 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page,
void copy_user_highpage(struct page *to, struct page *from,
unsigned long u_vaddr, struct vm_area_struct *vma)
{
- void *kfrom = page_address(from);
- void *kto = page_address(to);
+ void *kfrom = kmap_atomic(from);
+ void *kto = kmap_atomic(to);
int clean_src_k_mappings = 0;
/*
@@ -674,9 +853,12 @@ void copy_user_highpage(struct page *to, struct page *from,
*
* Note that while @u_vaddr refers to DST page's userspace vaddr, it is
* equally valid for SRC page as well
+ *
+ * For !VIPT cache, all of this gets compiled out as
+ * addr_not_cache_congruent() is 0
*/
if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) {
- __flush_dcache_page(kfrom, u_vaddr);
+ __flush_dcache_page((unsigned long)kfrom, u_vaddr);
clean_src_k_mappings = 1;
}
@@ -697,11 +879,14 @@ void copy_user_highpage(struct page *to, struct page *from,
* sync the kernel mapping back to physical page
*/
if (clean_src_k_mappings) {
- __flush_dcache_page(kfrom, kfrom);
+ __flush_dcache_page((unsigned long)kfrom, (unsigned long)kfrom);
set_bit(PG_dc_clean, &from->flags);
} else {
clear_bit(PG_dc_clean, &from->flags);
}
+
+ kunmap_atomic(kto);
+ kunmap_atomic(kfrom);
}
void clear_user_page(void *to, unsigned long u_vaddr, struct page *page)
@@ -721,3 +906,93 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags)
flush_cache_all();
return 0;
}
+
+void arc_cache_init(void)
+{
+ unsigned int __maybe_unused cpu = smp_processor_id();
+ char str[256];
+
+ printk(arc_cache_mumbojumbo(0, str, sizeof(str)));
+
+ if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
+ struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+
+ if (!ic->ver)
+ panic("cache support enabled but non-existent cache\n");
+
+ if (ic->line_len != L1_CACHE_BYTES)
+ panic("ICache line [%d] != kernel Config [%d]",
+ ic->line_len, L1_CACHE_BYTES);
+
+ if (ic->ver != CONFIG_ARC_MMU_VER)
+ panic("Cache ver [%d] doesn't match MMU ver [%d]\n",
+ ic->ver, CONFIG_ARC_MMU_VER);
+
+ /*
+ * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG
+ * pair to provide vaddr/paddr respectively, just as in MMU v3
+ */
+ if (is_isa_arcv2() && ic->alias)
+ _cache_line_loop_ic_fn = __cache_line_loop_v3;
+ else
+ _cache_line_loop_ic_fn = __cache_line_loop;
+ }
+
+ if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
+ struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+
+ if (!dc->ver)
+ panic("cache support enabled but non-existent cache\n");
+
+ if (dc->line_len != L1_CACHE_BYTES)
+ panic("DCache line [%d] != kernel Config [%d]",
+ dc->line_len, L1_CACHE_BYTES);
+
+ /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
+ if (is_isa_arcompact()) {
+ int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
+
+ if (dc->alias && !handled)
+ panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+ else if (!dc->alias && handled)
+ panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
+ }
+ }
+
+ if (is_isa_arcv2() && l2_line_sz && !slc_enable) {
+
+ /* IM set : flush before invalidate */
+ write_aux_reg(ARC_REG_SLC_CTRL,
+ read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_IM);
+
+ write_aux_reg(ARC_REG_SLC_INVALIDATE, 1);
+
+ /* Important to wait for flush to complete */
+ while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY);
+ write_aux_reg(ARC_REG_SLC_CTRL,
+ read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_DISABLE);
+ }
+
+ if (is_isa_arcv2() && ioc_exists) {
+ /* IO coherency base - 0x8z */
+ write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000);
+ /* IO coherency aperture size - 512Mb: 0x8z-0xAz */
+ write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, 0x11);
+ /* Enable partial writes */
+ write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1);
+ /* Enable IO coherency */
+ write_aux_reg(ARC_REG_IO_COH_ENABLE, 1);
+
+ __dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
+ __dma_cache_inv = __dma_cache_inv_ioc;
+ __dma_cache_wback = __dma_cache_wback_ioc;
+ } else if (is_isa_arcv2() && l2_line_sz && slc_enable) {
+ __dma_cache_wback_inv = __dma_cache_wback_inv_slc;
+ __dma_cache_inv = __dma_cache_inv_slc;
+ __dma_cache_wback = __dma_cache_wback_slc;
+ } else {
+ __dma_cache_wback_inv = __dma_cache_wback_inv_l1;
+ __dma_cache_inv = __dma_cache_inv_l1;
+ __dma_cache_wback = __dma_cache_wback_l1;
+ }
+}
diff --git a/kernel/arch/arc/mm/dma.c b/kernel/arch/arc/mm/dma.c
index 12cc6485b..29a46bb19 100644
--- a/kernel/arch/arc/mm/dma.c
+++ b/kernel/arch/arc/mm/dma.c
@@ -14,13 +14,12 @@
* Cache bit off in the TLB entry.
*
* The default DMA address == Phy address which is 0x8000_0000 based.
- * A platform/device can make it zero based, by over-riding
- * plat_{dma,kernel}_addr_to_{kernel,dma}
*/
#include <linux/dma-mapping.h>
#include <linux/dma-debug.h>
#include <linux/export.h>
+#include <asm/cache.h>
#include <asm/cacheflush.h>
/*
@@ -37,7 +36,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size,
return NULL;
/* This is bus address, platform dependent */
- *dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+ *dma_handle = (dma_addr_t)paddr;
return paddr;
}
@@ -46,8 +45,7 @@ EXPORT_SYMBOL(dma_alloc_noncoherent);
void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle)
{
- free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
- size);
+ free_pages_exact((void *)dma_handle, size);
}
EXPORT_SYMBOL(dma_free_noncoherent);
@@ -56,6 +54,20 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
{
void *paddr, *kvaddr;
+ /*
+ * IOC relies on all data (even coherent DMA data) being in cache
+ * Thus allocate normal cached memory
+ *
+ * The gains with IOC are two pronged:
+ * -For streaming data, elides needs for cache maintenance, saving
+ * cycles in flush code, and bus bandwidth as all the lines of a
+ * buffer need to be flushed out to memory
+ * -For coherent data, Read/Write to buffers terminate early in cache
+ * (vs. always going to memory - thus are faster)
+ */
+ if (is_isa_arcv2() && ioc_exists)
+ return dma_alloc_noncoherent(dev, size, dma_handle, gfp);
+
/* This is linear addr (0x8000_0000 based) */
paddr = alloc_pages_exact(size, gfp);
if (!paddr)
@@ -63,11 +75,23 @@ void *dma_alloc_coherent(struct device *dev, size_t size,
/* This is kernel Virtual address (0x7000_0000 based) */
kvaddr = ioremap_nocache((unsigned long)paddr, size);
- if (kvaddr != NULL)
- memset(kvaddr, 0, size);
+ if (kvaddr == NULL)
+ return NULL;
/* This is bus address, platform dependent */
- *dma_handle = plat_kernel_addr_to_dma(dev, paddr);
+ *dma_handle = (dma_addr_t)paddr;
+
+ /*
+ * Evict any existing L1 and/or L2 lines for the backing page
+ * in case it was used earlier as a normal "cached" page.
+ * Yeah this bit us - STAR 9000898266
+ *
+ * Although core does call flush_cache_vmap(), it gets kvaddr hence
+ * can't be used to efficiently flush L1 and/or L2 which need paddr
+ * Currently flush_cache_vmap nukes the L1 cache completely which
+ * will be optimized as a separate commit
+ */
+ dma_cache_wback_inv((unsigned long)paddr, size);
return kvaddr;
}
@@ -76,10 +100,12 @@ EXPORT_SYMBOL(dma_alloc_coherent);
void dma_free_coherent(struct device *dev, size_t size, void *kvaddr,
dma_addr_t dma_handle)
{
+ if (is_isa_arcv2() && ioc_exists)
+ return dma_free_noncoherent(dev, size, kvaddr, dma_handle);
+
iounmap((void __force __iomem *)kvaddr);
- free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle),
- size);
+ free_pages_exact((void *)dma_handle, size);
}
EXPORT_SYMBOL(dma_free_coherent);
diff --git a/kernel/arch/arc/mm/fault.c b/kernel/arch/arc/mm/fault.c
index d948e4e9d..af63f4a13 100644
--- a/kernel/arch/arc/mm/fault.c
+++ b/kernel/arch/arc/mm/fault.c
@@ -18,7 +18,14 @@
#include <asm/pgalloc.h>
#include <asm/mmu.h>
-static int handle_vmalloc_fault(unsigned long address)
+/*
+ * kernel virtual address is required to implement vmalloc/pkmap/fixmap
+ * Refer to asm/processor.h for System Memory Map
+ *
+ * It simply copies the PMD entry (pointer to 2nd level page table or hugepage)
+ * from swapper pgdir to task pgdir. The 2nd level table/page is thus shared
+ */
+noinline static int handle_kernel_vaddr_fault(unsigned long address)
{
/*
* Synchronize this task's top level page-table
@@ -72,8 +79,8 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
* only copy the information from the master page table,
* nothing more.
*/
- if (address >= VMALLOC_START && address <= VMALLOC_END) {
- ret = handle_vmalloc_fault(address);
+ if (address >= VMALLOC_START) {
+ ret = handle_kernel_vaddr_fault(address);
if (unlikely(ret))
goto bad_area_nosemaphore;
else
diff --git a/kernel/arch/arc/mm/highmem.c b/kernel/arch/arc/mm/highmem.c
new file mode 100644
index 000000000..92dd92cad
--- /dev/null
+++ b/kernel/arch/arc/mm/highmem.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/bootmem.h>
+#include <linux/export.h>
+#include <linux/highmem.h>
+#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+
+/*
+ * HIGHMEM API:
+ *
+ * kmap() API provides sleep semantics hence refered to as "permanent maps"
+ * It allows mapping LAST_PKMAP pages, using @last_pkmap_nr as the cursor
+ * for book-keeping
+ *
+ * kmap_atomic() can't sleep (calls pagefault_disable()), thus it provides
+ * shortlived ala "temporary mappings" which historically were implemented as
+ * fixmaps (compile time addr etc). Their book-keeping is done per cpu.
+ *
+ * Both these facts combined (preemption disabled and per-cpu allocation)
+ * means the total number of concurrent fixmaps will be limited to max
+ * such allocations in a single control path. Thus KM_TYPE_NR (another
+ * historic relic) is a small'ish number which caps max percpu fixmaps
+ *
+ * ARC HIGHMEM Details
+ *
+ * - the kernel vaddr space from 0x7z to 0x8z (currently used by vmalloc/module)
+ * is now shared between vmalloc and kmap (non overlapping though)
+ *
+ * - Both fixmap/pkmap use a dedicated page table each, hooked up to swapper PGD
+ * This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which means
+ * 2M of kvaddr space for typical config (8K page and 11:8:13 traversal split)
+ *
+ * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE
+ * slots across NR_CPUS would be more than sufficient (generic code defines
+ * KM_TYPE_NR as 20).
+ *
+ * - pkmap being preemptible, in theory could do with more than 256 concurrent
+ * mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse
+ * the PGD and only works with a single page table @pkmap_page_table, hence
+ * sets the limit
+ */
+
+extern pte_t * pkmap_page_table;
+static pte_t * fixmap_page_table;
+
+void *kmap(struct page *page)
+{
+ BUG_ON(in_interrupt());
+ if (!PageHighMem(page))
+ return page_address(page);
+
+ return kmap_high(page);
+}
+
+void *kmap_atomic(struct page *page)
+{
+ int idx, cpu_idx;
+ unsigned long vaddr;
+
+ preempt_disable();
+ pagefault_disable();
+ if (!PageHighMem(page))
+ return page_address(page);
+
+ cpu_idx = kmap_atomic_idx_push();
+ idx = cpu_idx + KM_TYPE_NR * smp_processor_id();
+ vaddr = FIXMAP_ADDR(idx);
+
+ set_pte_at(&init_mm, vaddr, fixmap_page_table + idx,
+ mk_pte(page, kmap_prot));
+
+ return (void *)vaddr;
+}
+EXPORT_SYMBOL(kmap_atomic);
+
+void __kunmap_atomic(void *kv)
+{
+ unsigned long kvaddr = (unsigned long)kv;
+
+ if (kvaddr >= FIXMAP_BASE && kvaddr < (FIXMAP_BASE + FIXMAP_SIZE)) {
+
+ /*
+ * Because preemption is disabled, this vaddr can be associated
+ * with the current allocated index.
+ * But in case of multiple live kmap_atomic(), it still relies on
+ * callers to unmap in right order.
+ */
+ int cpu_idx = kmap_atomic_idx();
+ int idx = cpu_idx + KM_TYPE_NR * smp_processor_id();
+
+ WARN_ON(kvaddr != FIXMAP_ADDR(idx));
+
+ pte_clear(&init_mm, kvaddr, fixmap_page_table + idx);
+ local_flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE);
+
+ kmap_atomic_idx_pop();
+ }
+
+ pagefault_enable();
+ preempt_enable();
+}
+EXPORT_SYMBOL(__kunmap_atomic);
+
+static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr)
+{
+ pgd_t *pgd_k;
+ pud_t *pud_k;
+ pmd_t *pmd_k;
+ pte_t *pte_k;
+
+ pgd_k = pgd_offset_k(kvaddr);
+ pud_k = pud_offset(pgd_k, kvaddr);
+ pmd_k = pmd_offset(pud_k, kvaddr);
+
+ pte_k = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+ pmd_populate_kernel(&init_mm, pmd_k, pte_k);
+ return pte_k;
+}
+
+void __init kmap_init(void)
+{
+ /* Due to recursive include hell, we can't do this in processor.h */
+ BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE));
+
+ BUILD_BUG_ON(KM_TYPE_NR > PTRS_PER_PTE);
+ pkmap_page_table = alloc_kmap_pgtable(PKMAP_BASE);
+
+ BUILD_BUG_ON(LAST_PKMAP > PTRS_PER_PTE);
+ fixmap_page_table = alloc_kmap_pgtable(FIXMAP_BASE);
+}
diff --git a/kernel/arch/arc/mm/init.c b/kernel/arch/arc/mm/init.c
index d44eedd8c..7d2c4fbf4 100644
--- a/kernel/arch/arc/mm/init.c
+++ b/kernel/arch/arc/mm/init.c
@@ -15,6 +15,7 @@
#endif
#include <linux/swap.h>
#include <linux/module.h>
+#include <linux/highmem.h>
#include <asm/page.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
@@ -24,16 +25,22 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE);
EXPORT_SYMBOL(empty_zero_page);
-/* Default tot mem from .config */
-static unsigned long arc_mem_sz = 0x20000000; /* some default */
+static const unsigned long low_mem_start = CONFIG_LINUX_LINK_BASE;
+static unsigned long low_mem_sz;
+
+#ifdef CONFIG_HIGHMEM
+static unsigned long min_high_pfn;
+static u64 high_mem_start;
+static u64 high_mem_sz;
+#endif
/* User can over-ride above with "mem=nnn[KkMm]" in cmdline */
static int __init setup_mem_sz(char *str)
{
- arc_mem_sz = memparse(str, NULL) & PAGE_MASK;
+ low_mem_sz = memparse(str, NULL) & PAGE_MASK;
/* early console might not be setup yet - it will show up later */
- pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(arc_mem_sz));
+ pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(low_mem_sz));
return 0;
}
@@ -41,8 +48,24 @@ early_param("mem", setup_mem_sz);
void __init early_init_dt_add_memory_arch(u64 base, u64 size)
{
- arc_mem_sz = size & PAGE_MASK;
- pr_info("Memory size set via devicetree %ldM\n", TO_MB(arc_mem_sz));
+ int in_use = 0;
+
+ if (!low_mem_sz) {
+ if (base != low_mem_start)
+ panic("CONFIG_LINUX_LINK_BASE != DT memory { }");
+
+ low_mem_sz = size;
+ in_use = 1;
+ } else {
+#ifdef CONFIG_HIGHMEM
+ high_mem_start = base;
+ high_mem_sz = size;
+ in_use = 1;
+#endif
+ }
+
+ pr_info("Memory @ %llx [%lldM] %s\n",
+ base, TO_MB(size), !in_use ? "Not used":"");
}
#ifdef CONFIG_BLK_DEV_INITRD
@@ -72,46 +95,62 @@ early_param("initrd", early_initrd);
void __init setup_arch_memory(void)
{
unsigned long zones_size[MAX_NR_ZONES];
- unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz;
+ unsigned long zones_holes[MAX_NR_ZONES];
init_mm.start_code = (unsigned long)_text;
init_mm.end_code = (unsigned long)_etext;
init_mm.end_data = (unsigned long)_edata;
init_mm.brk = (unsigned long)_end;
- /*
- * We do it here, so that memory is correctly instantiated
- * even if "mem=xxx" cmline over-ride is given and/or
- * DT has memory node. Each causes an update to @arc_mem_sz
- * and we finally add memory one here
- */
- memblock_add(CONFIG_LINUX_LINK_BASE, arc_mem_sz);
-
- /*------------- externs in mm need setting up ---------------*/
-
/* first page of system - kernel .vector starts here */
min_low_pfn = ARCH_PFN_OFFSET;
- /* Last usable page of low mem (no HIGHMEM yet for ARC port) */
- max_low_pfn = max_pfn = PFN_DOWN(end_mem);
+ /* Last usable page of low mem */
+ max_low_pfn = max_pfn = PFN_DOWN(low_mem_start + low_mem_sz);
- max_mapnr = max_low_pfn - min_low_pfn;
+#ifdef CONFIG_HIGHMEM
+ min_high_pfn = PFN_DOWN(high_mem_start);
+ max_pfn = PFN_DOWN(high_mem_start + high_mem_sz);
+#endif
+
+ max_mapnr = max_pfn - min_low_pfn;
- /*------------- reserve kernel image -----------------------*/
- memblock_reserve(CONFIG_LINUX_LINK_BASE,
- __pa(_end) - CONFIG_LINUX_LINK_BASE);
+ /*------------- bootmem allocator setup -----------------------*/
+
+ /*
+ * seed the bootmem allocator after any DT memory node parsing or
+ * "mem=xxx" cmdline overrides have potentially updated @arc_mem_sz
+ *
+ * Only low mem is added, otherwise we have crashes when allocating
+ * mem_map[] itself. NO_BOOTMEM allocates mem_map[] at the end of
+ * avail memory, ending in highmem with a > 32-bit address. However
+ * it then tries to memset it with a truncaed 32-bit handle, causing
+ * the crash
+ */
+
+ memblock_add(low_mem_start, low_mem_sz);
+ memblock_reserve(low_mem_start, __pa(_end) - low_mem_start);
#ifdef CONFIG_BLK_DEV_INITRD
- /*------------- reserve initrd image -----------------------*/
if (initrd_start)
memblock_reserve(__pa(initrd_start), initrd_end - initrd_start);
#endif
memblock_dump_all();
- /*-------------- node setup --------------------------------*/
+ /*----------------- node/zones setup --------------------------*/
memset(zones_size, 0, sizeof(zones_size));
- zones_size[ZONE_NORMAL] = max_mapnr;
+ memset(zones_holes, 0, sizeof(zones_holes));
+
+ zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn;
+ zones_holes[ZONE_NORMAL] = 0;
+
+#ifdef CONFIG_HIGHMEM
+ zones_size[ZONE_HIGHMEM] = max_pfn - max_low_pfn;
+
+ /* This handles the peripheral address space hole */
+ zones_holes[ZONE_HIGHMEM] = min_high_pfn - max_low_pfn;
+#endif
/*
* We can't use the helper free_area_init(zones[]) because it uses
@@ -122,9 +161,12 @@ void __init setup_arch_memory(void)
free_area_init_node(0, /* node-id */
zones_size, /* num pages per zone */
min_low_pfn, /* first pfn of node */
- NULL); /* NO holes */
+ zones_holes); /* holes */
- high_memory = (void *)end_mem;
+#ifdef CONFIG_HIGHMEM
+ high_memory = (void *)(min_high_pfn << PAGE_SHIFT);
+ kmap_init();
+#endif
}
/*
@@ -135,6 +177,14 @@ void __init setup_arch_memory(void)
*/
void __init mem_init(void)
{
+#ifdef CONFIG_HIGHMEM
+ unsigned long tmp;
+
+ reset_all_zones_managed_pages();
+ for (tmp = min_high_pfn; tmp < max_pfn; tmp++)
+ free_highmem_page(pfn_to_page(tmp));
+#endif
+
free_all_bootmem();
mem_init_print_info(NULL);
}
diff --git a/kernel/arch/arc/mm/tlb.c b/kernel/arch/arc/mm/tlb.c
index 7f47d2a56..daf2bf52b 100644
--- a/kernel/arch/arc/mm/tlb.c
+++ b/kernel/arch/arc/mm/tlb.c
@@ -109,10 +109,16 @@ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
static inline void __tlb_entry_erase(void)
{
write_aux_reg(ARC_REG_TLBPD1, 0);
+
+ if (is_pae40_enabled())
+ write_aux_reg(ARC_REG_TLBPD1HI, 0);
+
write_aux_reg(ARC_REG_TLBPD0, 0);
write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
}
+#if (CONFIG_ARC_MMU_VER < 4)
+
static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid)
{
unsigned int idx;
@@ -180,7 +186,7 @@ static void utlb_invalidate(void)
}
-static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
+static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
{
unsigned int idx;
@@ -210,28 +216,71 @@ static void tlb_entry_insert(unsigned int pd0, unsigned int pd1)
write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
}
+#else /* CONFIG_ARC_MMU_VER >= 4) */
+
+static void utlb_invalidate(void)
+{
+ /* No need since uTLB is always in sync with JTLB */
+}
+
+static void tlb_entry_erase(unsigned int vaddr_n_asid)
+{
+ write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT);
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry);
+}
+
+static void tlb_entry_insert(unsigned int pd0, pte_t pd1)
+{
+ write_aux_reg(ARC_REG_TLBPD0, pd0);
+ write_aux_reg(ARC_REG_TLBPD1, pd1);
+
+ if (is_pae40_enabled())
+ write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32);
+
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry);
+}
+
+#endif
+
/*
* Un-conditionally (without lookup) erase the entire MMU contents
*/
noinline void local_flush_tlb_all(void)
{
+ struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
unsigned long flags;
unsigned int entry;
- struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+ int num_tlb = mmu->sets * mmu->ways;
local_irq_save(flags);
/* Load PD0 and PD1 with template for a Blank Entry */
write_aux_reg(ARC_REG_TLBPD1, 0);
+
+ if (is_pae40_enabled())
+ write_aux_reg(ARC_REG_TLBPD1HI, 0);
+
write_aux_reg(ARC_REG_TLBPD0, 0);
- for (entry = 0; entry < mmu->num_tlb; entry++) {
+ for (entry = 0; entry < num_tlb; entry++) {
/* write this entry to the TLB */
write_aux_reg(ARC_REG_TLBINDEX, entry);
write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
}
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
+ const int stlb_idx = 0x800;
+
+ /* Blank sTLB entry */
+ write_aux_reg(ARC_REG_TLBPD0, _PAGE_HW_SZ);
+
+ for (entry = stlb_idx; entry < stlb_idx + 16; entry++) {
+ write_aux_reg(ARC_REG_TLBINDEX, entry);
+ write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite);
+ }
+ }
+
utlb_invalidate();
local_irq_restore(flags);
@@ -385,6 +434,15 @@ static inline void ipi_flush_tlb_range(void *arg)
local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static inline void ipi_flush_pmd_tlb_range(void *arg)
+{
+ struct tlb_args *ta = arg;
+
+ local_flush_pmd_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end);
+}
+#endif
+
static inline void ipi_flush_tlb_kernel_range(void *arg)
{
struct tlb_args *ta = (struct tlb_args *)arg;
@@ -425,6 +483,20 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1);
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ struct tlb_args ta = {
+ .ta_vma = vma,
+ .ta_start = start,
+ .ta_end = end
+ };
+
+ on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_pmd_tlb_range, &ta, 1);
+}
+#endif
+
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
struct tlb_args ta = {
@@ -439,11 +511,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
/*
* Routine to create a TLB entry
*/
-void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
+void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
{
unsigned long flags;
unsigned int asid_or_sasid, rwx;
- unsigned long pd0, pd1;
+ unsigned long pd0;
+ pte_t pd1;
/*
* create_tlb() assumes that current->mm == vma->mm, since
@@ -475,9 +548,9 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
local_irq_save(flags);
- tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address);
+ tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr);
- address &= PAGE_MASK;
+ vaddr &= PAGE_MASK;
/* update this PTE credentials */
pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED);
@@ -487,7 +560,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep)
/* ASID for this task */
asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff;
- pd0 = address | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0);
+ pd0 = vaddr | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0);
/*
* ARC MMU provides fully orthogonal access bits for K/U mode,
@@ -523,7 +596,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
pte_t *ptep)
{
unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
- unsigned long paddr = pte_val(*ptep) & PAGE_MASK;
+ phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK;
struct page *page = pfn_to_page(pte_pfn(*ptep));
create_tlb(vma, vaddr, ptep);
@@ -546,16 +619,105 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
int dirty = !test_and_set_bit(PG_dc_clean, &page->flags);
if (dirty) {
- /* wback + inv dcache lines */
+ /* wback + inv dcache lines (K-mapping) */
__flush_dcache_page(paddr, paddr);
- /* invalidate any existing icache lines */
+ /* invalidate any existing icache lines (U-mapping) */
if (vma->vm_flags & VM_EXEC)
__inv_icache_page(paddr, vaddr);
}
}
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+/*
+ * MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP
+ * support.
+ *
+ * Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a
+ * new bit "SZ" in TLB page desciptor to distinguish between them.
+ * Super Page size is configurable in hardware (4K to 16M), but fixed once
+ * RTL builds.
+ *
+ * The exact THP size a Linx configuration will support is a function of:
+ * - MMU page size (typical 8K, RTL fixed)
+ * - software page walker address split between PGD:PTE:PFN (typical
+ * 11:8:13, but can be changed with 1 line)
+ * So for above default, THP size supported is 8K * (2^8) = 2M
+ *
+ * Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime
+ * reduces to 1 level (as PTE is folded into PGD and canonically referred
+ * to as PMD).
+ * Thus THP PMD accessors are implemented in terms of PTE (just like sparc)
+ */
+
+void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd)
+{
+ pte_t pte = __pte(pmd_val(*pmd));
+ update_mmu_cache(vma, addr, &pte);
+}
+
+void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+ pgtable_t pgtable)
+{
+ struct list_head *lh = (struct list_head *) pgtable;
+
+ assert_spin_locked(&mm->page_table_lock);
+
+ /* FIFO */
+ if (!pmd_huge_pte(mm, pmdp))
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+ pmd_huge_pte(mm, pmdp) = pgtable;
+}
+
+pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
+{
+ struct list_head *lh;
+ pgtable_t pgtable;
+
+ assert_spin_locked(&mm->page_table_lock);
+
+ pgtable = pmd_huge_pte(mm, pmdp);
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ pmd_huge_pte(mm, pmdp) = NULL;
+ else {
+ pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+ list_del(lh);
+ }
+
+ pte_val(pgtable[0]) = 0;
+ pte_val(pgtable[1]) = 0;
+
+ return pgtable;
+}
+
+void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ unsigned int cpu;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ cpu = smp_processor_id();
+
+ if (likely(asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID)) {
+ unsigned int asid = hw_pid(vma->vm_mm, cpu);
+
+ /* No need to loop here: this will always be for 1 Huge Page */
+ tlb_entry_erase(start | _PAGE_HW_SZ | asid);
+ }
+
+ local_irq_restore(flags);
+}
+
+#endif
+
/* Read the Cache Build Confuration Registers, Decode them and save into
* the cpuinfo structure for later use.
* No Validation is done here, simply read/convert the BCRs
@@ -574,47 +736,73 @@ void read_decode_mmu_bcr(void)
struct bcr_mmu_3 {
#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4,
+ unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
u_itlb:4, u_dtlb:4;
#else
- unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4,
+ unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
ways:4, ver:8;
#endif
} *mmu3;
+ struct bcr_mmu_4 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
+ n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
+#else
+ /* DTLB ITLB JES JE JA */
+ unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
+ pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
+#endif
+ } *mmu4;
+
tmp = read_aux_reg(ARC_REG_MMU_BCR);
mmu->ver = (tmp >> 24);
if (mmu->ver <= 2) {
mmu2 = (struct bcr_mmu_1_2 *)&tmp;
- mmu->pg_sz = PAGE_SIZE;
+ mmu->pg_sz_k = TO_KB(0x2000);
mmu->sets = 1 << mmu2->sets;
mmu->ways = 1 << mmu2->ways;
mmu->u_dtlb = mmu2->u_dtlb;
mmu->u_itlb = mmu2->u_itlb;
- } else {
+ } else if (mmu->ver == 3) {
mmu3 = (struct bcr_mmu_3 *)&tmp;
- mmu->pg_sz = 512 << mmu3->pg_sz;
+ mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
mmu->sets = 1 << mmu3->sets;
mmu->ways = 1 << mmu3->ways;
mmu->u_dtlb = mmu3->u_dtlb;
mmu->u_itlb = mmu3->u_itlb;
+ mmu->sasid = mmu3->sasid;
+ } else {
+ mmu4 = (struct bcr_mmu_4 *)&tmp;
+ mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
+ mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11);
+ mmu->sets = 64 << mmu4->n_entry;
+ mmu->ways = mmu4->n_ways * 2;
+ mmu->u_dtlb = mmu4->u_dtlb * 4;
+ mmu->u_itlb = mmu4->u_itlb * 4;
+ mmu->sasid = mmu4->sasid;
+ mmu->pae = mmu4->pae;
}
-
- mmu->num_tlb = mmu->sets * mmu->ways;
}
char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
{
int n = 0;
struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
+ char super_pg[64] = "";
+
+ if (p_mmu->s_pg_sz_m)
+ scnprintf(super_pg, 64, "%dM Super Page%s, ",
+ p_mmu->s_pg_sz_m,
+ IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
n += scnprintf(buf + n, len - n,
- "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n",
- p_mmu->ver, TO_KB(p_mmu->pg_sz),
- p_mmu->num_tlb, p_mmu->sets, p_mmu->ways,
+ "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n",
+ p_mmu->ver, p_mmu->pg_sz_k, super_pg,
+ p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
p_mmu->u_dtlb, p_mmu->u_itlb,
- IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : "");
+ IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40));
return buf;
}
@@ -639,9 +827,17 @@ void arc_mmu_init(void)
mmu->ver, CONFIG_ARC_MMU_VER);
}
- if (mmu->pg_sz != PAGE_SIZE)
+ if (mmu->pg_sz_k != TO_KB(PAGE_SIZE))
panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE));
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+ mmu->s_pg_sz_m != TO_MB(HPAGE_PMD_SIZE))
+ panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n",
+ (unsigned long)TO_MB(HPAGE_PMD_SIZE));
+
+ if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae)
+ panic("Hardware doesn't support PAE40\n");
+
/* Enable the MMU */
write_aux_reg(ARC_REG_PID, MMU_ENABLE);
@@ -677,15 +873,15 @@ void arc_mmu_init(void)
* the duplicate one.
* -Knob to be verbose abt it.(TODO: hook them up to debugfs)
*/
-volatile int dup_pd_verbose = 1;/* Be slient abt it or complain (default) */
+volatile int dup_pd_silent; /* Be slient abt it or complain (default) */
void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
struct pt_regs *regs)
{
- int set, way, n;
- unsigned long flags, is_valid;
struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
- unsigned int pd0[mmu->ways], pd1[mmu->ways];
+ unsigned int pd0[mmu->ways];
+ unsigned long flags;
+ int set;
local_irq_save(flags);
@@ -695,14 +891,16 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
/* loop thru all sets of TLB */
for (set = 0; set < mmu->sets; set++) {
+ int is_valid, way;
+
/* read out all the ways of current set */
for (way = 0, is_valid = 0; way < mmu->ways; way++) {
write_aux_reg(ARC_REG_TLBINDEX,
SET_WAY_TO_IDX(mmu, set, way));
write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead);
pd0[way] = read_aux_reg(ARC_REG_TLBPD0);
- pd1[way] = read_aux_reg(ARC_REG_TLBPD1);
is_valid |= pd0[way] & _PAGE_PRESENT;
+ pd0[way] &= PAGE_MASK;
}
/* If all the WAYS in SET are empty, skip to next SET */
@@ -711,30 +909,28 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
/* Scan the set for duplicate ways: needs a nested loop */
for (way = 0; way < mmu->ways - 1; way++) {
+
+ int n;
+
if (!pd0[way])
continue;
for (n = way + 1; n < mmu->ways; n++) {
- if ((pd0[way] & PAGE_MASK) ==
- (pd0[n] & PAGE_MASK)) {
-
- if (dup_pd_verbose) {
- pr_info("Duplicate PD's @"
- "[%d:%d]/[%d:%d]\n",
- set, way, set, n);
- pr_info("TLBPD0[%u]: %08x\n",
- way, pd0[way]);
- }
-
- /*
- * clear entry @way and not @n. This is
- * critical to our optimised loop
- */
- pd0[way] = pd1[way] = 0;
- write_aux_reg(ARC_REG_TLBINDEX,
+ if (pd0[way] != pd0[n])
+ continue;
+
+ if (!dup_pd_silent)
+ pr_info("Dup TLB PD0 %08x @ set %d ways %d,%d\n",
+ pd0[way], set, way, n);
+
+ /*
+ * clear entry @way and not @n.
+ * This is critical to our optimised loop
+ */
+ pd0[way] = 0;
+ write_aux_reg(ARC_REG_TLBINDEX,
SET_WAY_TO_IDX(mmu, set, way));
- __tlb_entry_erase();
- }
+ __tlb_entry_erase();
}
}
}
diff --git a/kernel/arch/arc/mm/tlbex.S b/kernel/arch/arc/mm/tlbex.S
index d572f1c2c..f1967eeb3 100644
--- a/kernel/arch/arc/mm/tlbex.S
+++ b/kernel/arch/arc/mm/tlbex.S
@@ -35,8 +35,6 @@
* Rahul Trivedi, Amit Bhor: Codito Technologies 2004
*/
- .cpu A7
-
#include <linux/linkage.h>
#include <asm/entry.h>
#include <asm/mmu.h>
@@ -46,6 +44,7 @@
#include <asm/processor.h>
#include <asm/tlb-mmu1.h>
+#ifdef CONFIG_ISA_ARCOMPACT
;-----------------------------------------------------------------
; ARC700 Exception Handling doesn't auto-switch stack and it only provides
; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
@@ -89,7 +88,7 @@ ex_saved_reg1:
#ifdef CONFIG_SMP
sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with
GET_CPU_ID r0 ; get to per cpu scratch mem,
- lsl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu
+ asl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu
add r0, @ex_saved_reg1, r0
#else
st r0, [@ex_saved_reg1]
@@ -108,7 +107,7 @@ ex_saved_reg1:
.macro TLBMISS_RESTORE_REGS
#ifdef CONFIG_SMP
GET_CPU_ID r0 ; get to per cpu scratch mem
- lsl r0, r0, L1_CACHE_SHIFT ; each is cache line wide
+ asl r0, r0, L1_CACHE_SHIFT ; each is cache line wide
add r0, @ex_saved_reg1, r0
ld_s r3, [r0,12]
ld_s r2, [r0, 8]
@@ -123,6 +122,24 @@ ex_saved_reg1:
#endif
.endm
+#else /* ARCv2 */
+
+.macro TLBMISS_FREEUP_REGS
+ PUSH r0
+ PUSH r1
+ PUSH r2
+ PUSH r3
+.endm
+
+.macro TLBMISS_RESTORE_REGS
+ POP r3
+ POP r2
+ POP r1
+ POP r0
+.endm
+
+#endif
+
;============================================================================
; Troubleshooting Stuff
;============================================================================
@@ -188,20 +205,38 @@ ex_saved_reg1:
#endif
lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD
- ld.as r1, [r1, r0] ; PGD entry corresp to faulting addr
- and.f r1, r1, PAGE_MASK ; Ignoring protection and other flags
- ; contains Ptr to Page Table
- bz.d do_slow_path_pf ; if no Page Table, do page fault
+ ld.as r3, [r1, r0] ; PGD entry corresp to faulting addr
+ tst r3, r3
+ bz do_slow_path_pf ; if no Page Table, do page fault
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp)
+ add2.nz r1, r1, r0
+ bnz.d 2f ; YES: PGD == PMD has THP PTE: stop pgd walk
+ mov.nz r0, r3
+
+#endif
+ and r1, r3, PAGE_MASK
; Get the PTE entry: The idea is
; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr
; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
- ; (3) z = pgtbl[y]
- ; To avoid the multiply by in end, we do the -2, <<2 below
+ ; (3) z = (pgtbl + y * 4)
+
+#ifdef CONFIG_ARC_HAS_PAE40
+#define PTE_SIZE_LOG 3 /* 8 == 2 ^ 3 */
+#else
+#define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */
+#endif
+
+ ; multiply in step (3) above avoided by shifting lesser in step (1)
+ lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG )
+ and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG )
+ ld.aw r0, [r1, r0] ; r0: PTE (lower word only for PAE40)
+ ; r1: PTE ptr
+
+2:
- lsr r0, r2, (PAGE_SHIFT - 2)
- and r0, r0, ( (PTRS_PER_PTE - 1) << 2)
- ld.aw r0, [r1, r0] ; get PTE and PTE ptr for fault addr
#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT
and.f 0, r0, _PAGE_PRESENT
bz 1f
@@ -216,18 +251,23 @@ ex_saved_reg1:
;-----------------------------------------------------------------
; Convert Linux PTE entry into TLB entry
; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
+; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI])
; IN: r0 = PTE, r1 = ptr to PTE
.macro CONV_PTE_TO_TLB
- and r3, r0, PTE_BITS_RWX ; r w x
- lsl r2, r3, 3 ; r w x 0 0 0 (GLOBAL, kernel only)
+ and r3, r0, PTE_BITS_RWX ; r w x
+ asl r2, r3, 3 ; Kr Kw Kx 0 0 0 (GLOBAL, kernel only)
and.f 0, r0, _PAGE_GLOBAL
- or.z r2, r2, r3 ; r w x r w x (!GLOBAL, user page)
+ or.z r2, r2, r3 ; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page)
and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE
or r3, r3, r2
- sr r3, [ARC_REG_TLBPD1] ; these go in PD1
+ sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C
+#ifdef CONFIG_ARC_HAS_PAE40
+ ld r3, [r1, 4] ; paddr[39..32]
+ sr r3, [ARC_REG_TLBPD1HI]
+#endif
and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb
@@ -241,6 +281,7 @@ ex_saved_reg1:
; Commit the TLB entry into MMU
.macro COMMIT_ENTRY_TO_MMU
+#if (CONFIG_ARC_MMU_VER < 4)
/* Get free TLB slot: Set = computed from vaddr, way = random */
sr TLBGetIndex, [ARC_REG_TLBCOMMAND]
@@ -251,6 +292,10 @@ ex_saved_reg1:
#else
sr TLBWrite, [ARC_REG_TLBCOMMAND]
#endif
+
+#else
+ sr TLBInsertEntry, [ARC_REG_TLBCOMMAND]
+#endif
.endm
@@ -291,6 +336,7 @@ ENTRY(EV_TLBMissI)
CONV_PTE_TO_TLB
COMMIT_ENTRY_TO_MMU
TLBMISS_RESTORE_REGS
+EV_TLBMissI_fast_ret: ; additional label for VDK OS-kit instrumentation
rtie
END(EV_TLBMissI)
@@ -342,7 +388,7 @@ ENTRY(EV_TLBMissD)
lr r3, [ecr]
or r0, r0, _PAGE_ACCESSED ; Accessed bit always
btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ?
- or.nz r0, r0, _PAGE_MODIFIED ; if Write, set Dirty bit as well
+ or.nz r0, r0, _PAGE_DIRTY ; if Write, set Dirty bit as well
st_s r0, [r1] ; Write back PTE
CONV_PTE_TO_TLB
@@ -356,6 +402,7 @@ ENTRY(EV_TLBMissD)
COMMIT_ENTRY_TO_MMU
TLBMISS_RESTORE_REGS
+EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation
rtie
;-------- Common routine to call Linux Page Fault Handler -----------
@@ -366,19 +413,5 @@ do_slow_path_pf:
; Slow path TLB Miss handled as a regular ARC Exception
; (stack switching / save the complete reg-file).
- EXCEPTION_PROLOGUE
-
- ; ------- setup args for Linux Page fault Hanlder ---------
- mov_s r1, sp
- lr r0, [efa]
-
- ; We don't want exceptions to be disabled while the fault is handled.
- ; Now that we have saved the context we return from exception hence
- ; exceptions get re-enable
-
- FAKE_RET_FROM_EXCPN r9
-
- bl do_page_fault
- b ret_from_exception
-
+ b call_do_page_fault
END(EV_TLBMissD)