diff options
Diffstat (limited to 'kernel/arch/arc/mm')
-rw-r--r-- | kernel/arch/arc/mm/Makefile | 3 | ||||
-rw-r--r-- | kernel/arch/arc/mm/cache.c (renamed from kernel/arch/arc/mm/cache_arc700.c) | 721 | ||||
-rw-r--r-- | kernel/arch/arc/mm/dma.c | 46 | ||||
-rw-r--r-- | kernel/arch/arc/mm/fault.c | 13 | ||||
-rw-r--r-- | kernel/arch/arc/mm/highmem.c | 140 | ||||
-rw-r--r-- | kernel/arch/arc/mm/init.c | 106 | ||||
-rw-r--r-- | kernel/arch/arc/mm/tlb.c | 290 | ||||
-rw-r--r-- | kernel/arch/arc/mm/tlbex.S | 99 |
8 files changed, 1073 insertions, 345 deletions
diff --git a/kernel/arch/arc/mm/Makefile b/kernel/arch/arc/mm/Makefile index ac95cc239..3703a4969 100644 --- a/kernel/arch/arc/mm/Makefile +++ b/kernel/arch/arc/mm/Makefile @@ -7,4 +7,5 @@ # obj-y := extable.o ioremap.o dma.o fault.o init.o -obj-y += tlb.o tlbex.o cache_arc700.o mmap.o +obj-y += tlb.o tlbex.o cache.o mmap.o +obj-$(CONFIG_HIGHMEM) += highmem.o diff --git a/kernel/arch/arc/mm/cache_arc700.c b/kernel/arch/arc/mm/cache.c index 12b2100db..ff7ff6cbb 100644 --- a/kernel/arch/arc/mm/cache_arc700.c +++ b/kernel/arch/arc/mm/cache.c @@ -1,64 +1,12 @@ /* - * ARC700 VIPT Cache Management + * ARC Cache Management * + * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com) * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs - * -flush_cache_dup_mm (fork) - * -likewise for flush_cache_mm (exit/execve) - * -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break) - * - * vineetg: Apr 2011 - * -Now that MMU can support larger pg sz (16K), the determiniation of - * aliasing shd not be based on assumption of 8k pg - * - * vineetg: Mar 2011 - * -optimised version of flush_icache_range( ) for making I/D coherent - * when vaddr is available (agnostic of num of aliases) - * - * vineetg: Mar 2011 - * -Added documentation about I-cache aliasing on ARC700 and the way it - * was handled up until MMU V2. - * -Spotted a three year old bug when killing the 4 aliases, which needs - * bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03} - * instead of paddr | {0x00, 0x01, 0x10, 0x11} - * (Rajesh you owe me one now) - * - * vineetg: Dec 2010 - * -Off-by-one error when computing num_of_lines to flush - * This broke signal handling with bionic which uses synthetic sigret stub - * - * vineetg: Mar 2010 - * -GCC can't generate ZOL for core cache flush loops. - * Conv them into iterations based as opposed to while (start < end) types - * - * Vineetg: July 2009 - * -In I-cache flush routine we used to chk for aliasing for every line INV. - * Instead now we setup routines per cache geometry and invoke them - * via function pointers. - * - * Vineetg: Jan 2009 - * -Cache Line flush routines used to flush an extra line beyond end addr - * because check was while (end >= start) instead of (end > start) - * =Some call sites had to work around by doing -1, -4 etc to end param - * =Some callers didnt care. This was spec bad in case of INV routines - * which would discard valid data (cause of the horrible ext2 bug - * in ARC IDE driver) - * - * vineetg: June 11th 2008: Fixed flush_icache_range( ) - * -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need - * to be flushed, which it was not doing. - * -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API, - * however ARC cache maintenance OPs require PHY addr. Thus need to do - * vmalloc_to_phy. - * -Also added optimisation there, that for range > PAGE SIZE we flush the - * entire cache in one shot rather than line by line. For e.g. a module - * with Code sz 600k, old code flushed 600k worth of cache (line-by-line), - * while cache is only 16 or 32k. */ #include <linux/module.h> @@ -73,9 +21,21 @@ #include <asm/cachectl.h> #include <asm/setup.h> +static int l2_line_sz; +int ioc_exists; +volatile int slc_enable = 1, ioc_enable = 1; + +void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int cacheop); + +void (*__dma_cache_wback_inv)(unsigned long start, unsigned long sz); +void (*__dma_cache_inv)(unsigned long start, unsigned long sz); +void (*__dma_cache_wback)(unsigned long start, unsigned long sz); + char *arc_cache_mumbojumbo(int c, char *buf, int len) { int n = 0; + struct cpuinfo_arc_cache *p; #define PR_CACHE(p, cfg, str) \ if (!(p)->ver) \ @@ -86,11 +46,24 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) (p)->sz_k, (p)->assoc, (p)->line_len, \ (p)->vipt ? "VIPT" : "PIPT", \ (p)->alias ? " aliasing" : "", \ - IS_ENABLED(cfg) ? "" : " (not used)"); + IS_USED_CFG(cfg)); PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); + if (!is_isa_arcv2()) + return buf; + + p = &cpuinfo_arc700[c].slc; + if (p->ver) + n += scnprintf(buf + n, len - n, + "SLC\t\t: %uK, %uB Line%s\n", + p->sz_k, p->line_len, IS_USED_RUN(slc_enable)); + + if (ioc_exists) + n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n", + IS_DISABLED_RUN(ioc_enable)); + return buf; } @@ -99,6 +72,40 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) * the cpuinfo structure for later use. * No Validation done here, simply read/convert the BCRs */ +static void read_decode_cache_bcr_arcv2(int cpu) +{ + struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc; + struct bcr_generic sbcr; + + struct bcr_slc_cfg { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:24, way:2, lsz:2, sz:4; +#else + unsigned int sz:4, lsz:2, way:2, pad:24; +#endif + } slc_cfg; + + struct bcr_clust_cfg { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8; +#else + unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7; +#endif + } cbcr; + + READ_BCR(ARC_REG_SLC_BCR, sbcr); + if (sbcr.ver) { + READ_BCR(ARC_REG_SLC_CFG, slc_cfg); + p_slc->ver = sbcr.ver; + p_slc->sz_k = 128 << slc_cfg.sz; + l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64; + } + + READ_BCR(ARC_REG_CLUSTER_BCR, cbcr); + if (cbcr.c && ioc_enable) + ioc_exists = 1; +} + void read_decode_cache_bcr(void) { struct cpuinfo_arc_cache *p_ic, *p_dc; @@ -117,8 +124,13 @@ void read_decode_cache_bcr(void) if (!ibcr.ver) goto dc_chk; - BUG_ON(ibcr.config != 3); - p_ic->assoc = 2; /* Fixed to 2w set assoc */ + if (ibcr.ver <= 3) { + BUG_ON(ibcr.config != 3); + p_ic->assoc = 2; /* Fixed to 2w set assoc */ + } else if (ibcr.ver >= 4) { + p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */ + } + p_ic->line_len = 8 << ibcr.line_len; p_ic->sz_k = 1 << (ibcr.sz - 1); p_ic->ver = ibcr.ver; @@ -130,94 +142,135 @@ dc_chk: READ_BCR(ARC_REG_DC_BCR, dbcr); if (!dbcr.ver) - return; + goto slc_chk; + + if (dbcr.ver <= 3) { + BUG_ON(dbcr.config != 2); + p_dc->assoc = 4; /* Fixed to 4w set assoc */ + p_dc->vipt = 1; + p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1; + } else if (dbcr.ver >= 4) { + p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */ + p_dc->vipt = 0; + p_dc->alias = 0; /* PIPT so can't VIPT alias */ + } - BUG_ON(dbcr.config != 2); - p_dc->assoc = 4; /* Fixed to 4w set assoc */ p_dc->line_len = 16 << dbcr.line_len; p_dc->sz_k = 1 << (dbcr.sz - 1); p_dc->ver = dbcr.ver; - p_dc->vipt = 1; - p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1; + +slc_chk: + if (is_isa_arcv2()) + read_decode_cache_bcr_arcv2(cpu); } /* - * 1. Validate the Cache Geomtery (compile time config matches hardware) - * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn) - * (aliasing D-cache configurations are not supported YET) - * 3. Enable the Caches, setup default flush mode for D-Cache - * 3. Calculate the SHMLBA used by user space + * Line Operation on {I,D}-Cache */ -void arc_cache_init(void) -{ - unsigned int __maybe_unused cpu = smp_processor_id(); - char str[256]; - - printk(arc_cache_mumbojumbo(0, str, sizeof(str))); - if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) { - struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; +#define OP_INV 0x1 +#define OP_FLUSH 0x2 +#define OP_FLUSH_N_INV 0x3 +#define OP_INV_IC 0x4 - if (!ic->ver) - panic("cache support enabled but non-existent cache\n"); +/* + * I-Cache Aliasing in ARC700 VIPT caches (MMU v1-v3) + * + * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. + * The orig Cache Management Module "CDU" only required paddr to invalidate a + * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. + * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching + * the exact same line. + * + * However for larger Caches (way-size > page-size) - i.e. in Aliasing config, + * paddr alone could not be used to correctly index the cache. + * + * ------------------ + * MMU v1/v2 (Fixed Page Size 8k) + * ------------------ + * The solution was to provide CDU with these additonal vaddr bits. These + * would be bits [x:13], x would depend on cache-geometry, 13 comes from + * standard page size of 8k. + * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits + * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the + * orig 5 bits of paddr were anyways ignored by CDU line ops, as they + * represent the offset within cache-line. The adv of using this "clumsy" + * interface for additional info was no new reg was needed in CDU programming + * model. + * + * 17:13 represented the max num of bits passable, actual bits needed were + * fewer, based on the num-of-aliases possible. + * -for 2 alias possibility, only bit 13 needed (32K cache) + * -for 4 alias possibility, bits 14:13 needed (64K cache) + * + * ------------------ + * MMU v3 + * ------------------ + * This ver of MMU supports variable page sizes (1k-16k): although Linux will + * only support 8k (default), 16k and 4k. + * However from hardware perspective, smaller page sizes aggrevate aliasing + * meaning more vaddr bits needed to disambiguate the cache-line-op ; + * the existing scheme of piggybacking won't work for certain configurations. + * Two new registers IC_PTAG and DC_PTAG inttoduced. + * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs + */ - if (ic->line_len != L1_CACHE_BYTES) - panic("ICache line [%d] != kernel Config [%d]", - ic->line_len, L1_CACHE_BYTES); +static inline +void __cache_line_loop_v2(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op) +{ + unsigned int aux_cmd; + int num_lines; + const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; - if (ic->ver != CONFIG_ARC_MMU_VER) - panic("Cache ver [%d] doesn't match MMU ver [%d]\n", - ic->ver, CONFIG_ARC_MMU_VER); + if (op == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; + } else { + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; } - if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) { - struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; - int handled; - - if (!dc->ver) - panic("cache support enabled but non-existent cache\n"); + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + vaddr &= CACHE_LINE_MASK; + } - if (dc->line_len != L1_CACHE_BYTES) - panic("DCache line [%d] != kernel Config [%d]", - dc->line_len, L1_CACHE_BYTES); + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); - /* check for D-Cache aliasing */ - handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); + /* MMUv2 and before: paddr contains stuffed vaddrs bits */ + paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; - if (dc->alias && !handled) - panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); - else if (!dc->alias && handled) - panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + while (num_lines-- > 0) { + write_aux_reg(aux_cmd, paddr); + paddr += L1_CACHE_BYTES; } } -#define OP_INV 0x1 -#define OP_FLUSH 0x2 -#define OP_FLUSH_N_INV 0x3 -#define OP_INV_IC 0x4 - /* - * Common Helper for Line Operations on {I,D}-Cache + * For ARC700 MMUv3 I-cache and D-cache flushes + * Also reused for HS38 aliasing I-cache configuration */ -static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, - unsigned long sz, const int cacheop) +static inline +void __cache_line_loop_v3(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op) { unsigned int aux_cmd, aux_tag; int num_lines; - const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE; + const int full_page = __builtin_constant_p(sz) && sz == PAGE_SIZE; - if (cacheop == OP_INV_IC) { + if (op == OP_INV_IC) { aux_cmd = ARC_REG_IC_IVIL; -#if (CONFIG_ARC_MMU_VER > 2) aux_tag = ARC_REG_IC_PTAG; -#endif - } - else { - /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ - aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; -#if (CONFIG_ARC_MMU_VER > 2) + } else { + aux_cmd = op & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; aux_tag = ARC_REG_DC_PTAG; -#endif } /* Ensure we properly floor/ceil the non-line aligned/sized requests @@ -226,177 +279,201 @@ static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, * -@paddr will be cache-line aligned already (being page aligned) * -@sz will be integral multiple of line size (being page sized). */ - if (!full_page_op) { + if (!full_page) { sz += paddr & ~CACHE_LINE_MASK; paddr &= CACHE_LINE_MASK; vaddr &= CACHE_LINE_MASK; } - num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); -#if (CONFIG_ARC_MMU_VER <= 2) - /* MMUv2 and before: paddr contains stuffed vaddrs bits */ - paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; -#else - /* if V-P const for loop, PTAG can be written once outside loop */ - if (full_page_op) + /* + * MMUv3, cache ops require paddr in PTAG reg + * if V-P const for loop, PTAG can be written once outside loop + */ + if (full_page) write_aux_reg(aux_tag, paddr); -#endif + + /* + * This is technically for MMU v4, using the MMU v3 programming model + * Special work for HS38 aliasing I-cache configuratino with PAE40 + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + * Note that PTAG_HI is hoisted outside the line loop + */ + if (is_pae40_enabled() && op == OP_INV_IC) + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); while (num_lines-- > 0) { -#if (CONFIG_ARC_MMU_VER > 2) - /* MMUv3, cache ops require paddr seperately */ - if (!full_page_op) { + if (!full_page) { write_aux_reg(aux_tag, paddr); paddr += L1_CACHE_BYTES; } write_aux_reg(aux_cmd, vaddr); vaddr += L1_CACHE_BYTES; -#else + } +} + +/* + * In HS38x (MMU v4), I-cache is VIPT (can alias), D-cache is PIPT + * Here's how cache ops are implemented + * + * - D-cache: only paddr needed (in DC_IVDL/DC_FLDL) + * - I-cache Non Aliasing: Despite VIPT, only paddr needed (in IC_IVIL) + * - I-cache Aliasing: Both vaddr and paddr needed (in IC_IVIL, IC_PTAG + * respectively, similar to MMU v3 programming model, hence + * __cache_line_loop_v3() is used) + * + * If PAE40 is enabled, independent of aliasing considerations, the higher bits + * needs to be written into PTAG_HI + */ +static inline +void __cache_line_loop_v4(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int cacheop) +{ + unsigned int aux_cmd; + int num_lines; + const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE; + + if (cacheop == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; + } else { + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; + } + + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page_op) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + } + + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + + /* + * For HS38 PAE40 configuration + * - upper 8 bits of paddr need to be written into PTAG_HI + * - (and needs to be written before the lower 32 bits) + */ + if (is_pae40_enabled()) { + if (cacheop == OP_INV_IC) + /* + * Non aliasing I-cache in HS38, + * aliasing I-cache handled in __cache_line_loop_v3() + */ + write_aux_reg(ARC_REG_IC_PTAG_HI, (u64)paddr >> 32); + else + write_aux_reg(ARC_REG_DC_PTAG_HI, (u64)paddr >> 32); + } + + while (num_lines-- > 0) { write_aux_reg(aux_cmd, paddr); paddr += L1_CACHE_BYTES; -#endif } } +#if (CONFIG_ARC_MMU_VER < 3) +#define __cache_line_loop __cache_line_loop_v2 +#elif (CONFIG_ARC_MMU_VER == 3) +#define __cache_line_loop __cache_line_loop_v3 +#elif (CONFIG_ARC_MMU_VER > 3) +#define __cache_line_loop __cache_line_loop_v4 +#endif + #ifdef CONFIG_ARC_HAS_DCACHE /*************************************************************** * Machine specific helpers for Entire D-Cache or Per Line ops */ -static inline unsigned int __before_dc_op(const int op) +static inline void __before_dc_op(const int op) { - unsigned int reg = reg; - if (op == OP_FLUSH_N_INV) { /* Dcache provides 2 cmd: FLUSH or INV * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE * flush-n-inv is achieved by INV cmd but with IM=1 * So toggle INV sub-mode depending on op request and default */ - reg = read_aux_reg(ARC_REG_DC_CTRL); - write_aux_reg(ARC_REG_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH) - ; + const unsigned int ctl = ARC_REG_DC_CTRL; + write_aux_reg(ctl, read_aux_reg(ctl) | DC_CTRL_INV_MODE_FLUSH); } - - return reg; } -static inline void __after_dc_op(const int op, unsigned int reg) +static inline void __after_dc_op(const int op) { - if (op & OP_FLUSH) /* flush / flush-n-inv both wait */ - while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS); + if (op & OP_FLUSH) { + const unsigned int ctl = ARC_REG_DC_CTRL; + unsigned int reg; + + /* flush / flush-n-inv both wait */ + while ((reg = read_aux_reg(ctl)) & DC_CTRL_FLUSH_STATUS) + ; - /* Switch back to default Invalidate mode */ - if (op == OP_FLUSH_N_INV) - write_aux_reg(ARC_REG_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH); + /* Switch back to default Invalidate mode */ + if (op == OP_FLUSH_N_INV) + write_aux_reg(ctl, reg & ~DC_CTRL_INV_MODE_FLUSH); + } } /* * Operation on Entire D-Cache - * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV} + * @op = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV} * Note that constant propagation ensures all the checks are gone * in generated code */ -static inline void __dc_entire_op(const int cacheop) +static inline void __dc_entire_op(const int op) { - unsigned int ctrl_reg; int aux; - ctrl_reg = __before_dc_op(cacheop); + __before_dc_op(op); - if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */ + if (op & OP_INV) /* Inv or flush-n-inv use same cmd reg */ aux = ARC_REG_DC_IVDC; else aux = ARC_REG_DC_FLSH; write_aux_reg(aux, 0x1); - __after_dc_op(cacheop, ctrl_reg); + __after_dc_op(op); } /* For kernel mappings cache operation: index is same as paddr */ #define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op) /* - * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback) + * D-Cache Line ops: Per Line INV (discard or wback+discard) or FLUSH (wback) */ -static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, - unsigned long sz, const int cacheop) +static inline void __dc_line_op(phys_addr_t paddr, unsigned long vaddr, + unsigned long sz, const int op) { unsigned long flags; - unsigned int ctrl_reg; local_irq_save(flags); - ctrl_reg = __before_dc_op(cacheop); + __before_dc_op(op); - __cache_line_loop(paddr, vaddr, sz, cacheop); + __cache_line_loop(paddr, vaddr, sz, op); - __after_dc_op(cacheop, ctrl_reg); + __after_dc_op(op); local_irq_restore(flags); } #else -#define __dc_entire_op(cacheop) -#define __dc_line_op(paddr, vaddr, sz, cacheop) -#define __dc_line_op_k(paddr, sz, cacheop) +#define __dc_entire_op(op) +#define __dc_line_op(paddr, vaddr, sz, op) +#define __dc_line_op_k(paddr, sz, op) #endif /* CONFIG_ARC_HAS_DCACHE */ - #ifdef CONFIG_ARC_HAS_ICACHE -/* - * I-Cache Aliasing in ARC700 VIPT caches - * - * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. - * The orig Cache Management Module "CDU" only required paddr to invalidate a - * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. - * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching - * the exact same line. - * - * However for larger Caches (way-size > page-size) - i.e. in Aliasing config, - * paddr alone could not be used to correctly index the cache. - * - * ------------------ - * MMU v1/v2 (Fixed Page Size 8k) - * ------------------ - * The solution was to provide CDU with these additonal vaddr bits. These - * would be bits [x:13], x would depend on cache-geometry, 13 comes from - * standard page size of 8k. - * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits - * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the - * orig 5 bits of paddr were anyways ignored by CDU line ops, as they - * represent the offset within cache-line. The adv of using this "clumsy" - * interface for additional info was no new reg was needed in CDU programming - * model. - * - * 17:13 represented the max num of bits passable, actual bits needed were - * fewer, based on the num-of-aliases possible. - * -for 2 alias possibility, only bit 13 needed (32K cache) - * -for 4 alias possibility, bits 14:13 needed (64K cache) - * - * ------------------ - * MMU v3 - * ------------------ - * This ver of MMU supports variable page sizes (1k-16k): although Linux will - * only support 8k (default), 16k and 4k. - * However from hardware perspective, smaller page sizes aggrevate aliasing - * meaning more vaddr bits needed to disambiguate the cache-line-op ; - * the existing scheme of piggybacking won't work for certain configurations. - * Two new registers IC_PTAG and DC_PTAG inttoduced. - * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs - */ - -/*********************************************************** - * Machine specific helper for per line I-Cache invalidate. - */ - static inline void __ic_entire_inv(void) { write_aux_reg(ARC_REG_IC_IVIC, 1); @@ -404,13 +481,13 @@ static inline void __ic_entire_inv(void) } static inline void -__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, +__ic_line_inv_vaddr_local(phys_addr_t paddr, unsigned long vaddr, unsigned long sz) { unsigned long flags; local_irq_save(flags); - __cache_line_loop(paddr, vaddr, sz, OP_INV_IC); + (*_cache_line_loop_ic_fn)(paddr, vaddr, sz, OP_INV_IC); local_irq_restore(flags); } @@ -421,7 +498,7 @@ __ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, #else struct ic_inv_args { - unsigned long paddr, vaddr; + phys_addr_t paddr, vaddr; int sz; }; @@ -432,7 +509,7 @@ static void __ic_line_inv_vaddr_helper(void *info) __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz); } -static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, +static void __ic_line_inv_vaddr(phys_addr_t paddr, unsigned long vaddr, unsigned long sz) { struct ic_inv_args ic_inv = { @@ -453,6 +530,56 @@ static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, #endif /* CONFIG_ARC_HAS_ICACHE */ +noinline void slc_op(phys_addr_t paddr, unsigned long sz, const int op) +{ +#ifdef CONFIG_ISA_ARCV2 + /* + * SLC is shared between all cores and concurrent aux operations from + * multiple cores need to be serialized using a spinlock + * A concurrent operation can be silently ignored and/or the old/new + * operation can remain incomplete forever (lockup in SLC_CTRL_BUSY loop + * below) + */ + static DEFINE_SPINLOCK(lock); + unsigned long flags; + unsigned int ctrl; + + spin_lock_irqsave(&lock, flags); + + /* + * The Region Flush operation is specified by CTRL.RGN_OP[11..9] + * - b'000 (default) is Flush, + * - b'001 is Invalidate if CTRL.IM == 0 + * - b'001 is Flush-n-Invalidate if CTRL.IM == 1 + */ + ctrl = read_aux_reg(ARC_REG_SLC_CTRL); + + /* Don't rely on default value of IM bit */ + if (!(op & OP_FLUSH)) /* i.e. OP_INV */ + ctrl &= ~SLC_CTRL_IM; /* clear IM: Disable flush before Inv */ + else + ctrl |= SLC_CTRL_IM; + + if (op & OP_INV) + ctrl |= SLC_CTRL_RGN_OP_INV; /* Inv or flush-n-inv */ + else + ctrl &= ~SLC_CTRL_RGN_OP_INV; + + write_aux_reg(ARC_REG_SLC_CTRL, ctrl); + + /* + * Lower bits are ignored, no need to clip + * END needs to be setup before START (latter triggers the operation) + * END can't be same as START, so add (l2_line_sz - 1) to sz + */ + write_aux_reg(ARC_REG_SLC_RGN_END, (paddr + sz + l2_line_sz - 1)); + write_aux_reg(ARC_REG_SLC_RGN_START, paddr); + + while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); + + spin_unlock_irqrestore(&lock, flags); +#endif +} /*********************************************************** * Exported APIs @@ -493,7 +620,7 @@ void flush_dcache_page(struct page *page) } else if (page_mapped(page)) { /* kernel reading from page with U-mapping */ - void *paddr = page_address(page); + phys_addr_t paddr = (unsigned long)page_address(page); unsigned long vaddr = page->index << PAGE_CACHE_SHIFT; if (addr_not_cache_congruent(paddr, vaddr)) @@ -502,22 +629,74 @@ void flush_dcache_page(struct page *page) } EXPORT_SYMBOL(flush_dcache_page); +/* + * DMA ops for systems with L1 cache only + * Make memory coherent with L1 cache by flushing/invalidating L1 lines + */ +static void __dma_cache_wback_inv_l1(unsigned long start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH_N_INV); +} -void dma_cache_wback_inv(unsigned long start, unsigned long sz) +static void __dma_cache_inv_l1(unsigned long start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_INV); +} + +static void __dma_cache_wback_l1(unsigned long start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH); +} + +/* + * DMA ops for systems with both L1 and L2 caches, but without IOC + * Both L1 and L2 lines need to be explicity flushed/invalidated + */ +static void __dma_cache_wback_inv_slc(unsigned long start, unsigned long sz) { __dc_line_op_k(start, sz, OP_FLUSH_N_INV); + slc_op(start, sz, OP_FLUSH_N_INV); +} + +static void __dma_cache_inv_slc(unsigned long start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_INV); + slc_op(start, sz, OP_INV); +} + +static void __dma_cache_wback_slc(unsigned long start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH); + slc_op(start, sz, OP_FLUSH); +} + +/* + * DMA ops for systems with IOC + * IOC hardware snoops all DMA traffic keeping the caches consistent with + * memory - eliding need for any explicit cache maintenance of DMA buffers + */ +static void __dma_cache_wback_inv_ioc(unsigned long start, unsigned long sz) {} +static void __dma_cache_inv_ioc(unsigned long start, unsigned long sz) {} +static void __dma_cache_wback_ioc(unsigned long start, unsigned long sz) {} + +/* + * Exported DMA API + */ +void dma_cache_wback_inv(unsigned long start, unsigned long sz) +{ + __dma_cache_wback_inv(start, sz); } EXPORT_SYMBOL(dma_cache_wback_inv); void dma_cache_inv(unsigned long start, unsigned long sz) { - __dc_line_op_k(start, sz, OP_INV); + __dma_cache_inv(start, sz); } EXPORT_SYMBOL(dma_cache_inv); void dma_cache_wback(unsigned long start, unsigned long sz) { - __dc_line_op_k(start, sz, OP_FLUSH); + __dma_cache_wback(start, sz); } EXPORT_SYMBOL(dma_cache_wback); @@ -589,14 +768,14 @@ EXPORT_SYMBOL(flush_icache_range); * builtin kernel page will not have any virtual mappings. * kprobe on loadable module will be kernel vaddr. */ -void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len) +void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, int len) { __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV); __ic_line_inv_vaddr(paddr, vaddr, len); } /* wrapper to compile time eliminate alignment checks in flush loop */ -void __inv_icache_page(unsigned long paddr, unsigned long vaddr) +void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr) { __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE); } @@ -605,7 +784,7 @@ void __inv_icache_page(unsigned long paddr, unsigned long vaddr) * wrapper to clearout kernel or userspace mappings of a page * For kernel mappings @vaddr == @paddr */ -void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr) +void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr) { __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); } @@ -637,7 +816,7 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, u_vaddr &= PAGE_MASK; - ___flush_dcache_page(paddr, u_vaddr); + __flush_dcache_page(paddr, u_vaddr); if (vma->vm_flags & VM_EXEC) __inv_icache_page(paddr, u_vaddr); @@ -663,8 +842,8 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, void copy_user_highpage(struct page *to, struct page *from, unsigned long u_vaddr, struct vm_area_struct *vma) { - void *kfrom = page_address(from); - void *kto = page_address(to); + void *kfrom = kmap_atomic(from); + void *kto = kmap_atomic(to); int clean_src_k_mappings = 0; /* @@ -674,9 +853,12 @@ void copy_user_highpage(struct page *to, struct page *from, * * Note that while @u_vaddr refers to DST page's userspace vaddr, it is * equally valid for SRC page as well + * + * For !VIPT cache, all of this gets compiled out as + * addr_not_cache_congruent() is 0 */ if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { - __flush_dcache_page(kfrom, u_vaddr); + __flush_dcache_page((unsigned long)kfrom, u_vaddr); clean_src_k_mappings = 1; } @@ -697,11 +879,14 @@ void copy_user_highpage(struct page *to, struct page *from, * sync the kernel mapping back to physical page */ if (clean_src_k_mappings) { - __flush_dcache_page(kfrom, kfrom); + __flush_dcache_page((unsigned long)kfrom, (unsigned long)kfrom); set_bit(PG_dc_clean, &from->flags); } else { clear_bit(PG_dc_clean, &from->flags); } + + kunmap_atomic(kto); + kunmap_atomic(kfrom); } void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) @@ -721,3 +906,93 @@ SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags) flush_cache_all(); return 0; } + +void arc_cache_init(void) +{ + unsigned int __maybe_unused cpu = smp_processor_id(); + char str[256]; + + printk(arc_cache_mumbojumbo(0, str, sizeof(str))); + + if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) { + struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; + + if (!ic->ver) + panic("cache support enabled but non-existent cache\n"); + + if (ic->line_len != L1_CACHE_BYTES) + panic("ICache line [%d] != kernel Config [%d]", + ic->line_len, L1_CACHE_BYTES); + + if (ic->ver != CONFIG_ARC_MMU_VER) + panic("Cache ver [%d] doesn't match MMU ver [%d]\n", + ic->ver, CONFIG_ARC_MMU_VER); + + /* + * In MMU v4 (HS38x) the alising icache config uses IVIL/PTAG + * pair to provide vaddr/paddr respectively, just as in MMU v3 + */ + if (is_isa_arcv2() && ic->alias) + _cache_line_loop_ic_fn = __cache_line_loop_v3; + else + _cache_line_loop_ic_fn = __cache_line_loop; + } + + if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) { + struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; + + if (!dc->ver) + panic("cache support enabled but non-existent cache\n"); + + if (dc->line_len != L1_CACHE_BYTES) + panic("DCache line [%d] != kernel Config [%d]", + dc->line_len, L1_CACHE_BYTES); + + /* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */ + if (is_isa_arcompact()) { + int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); + + if (dc->alias && !handled) + panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + else if (!dc->alias && handled) + panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + } + } + + if (is_isa_arcv2() && l2_line_sz && !slc_enable) { + + /* IM set : flush before invalidate */ + write_aux_reg(ARC_REG_SLC_CTRL, + read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_IM); + + write_aux_reg(ARC_REG_SLC_INVALIDATE, 1); + + /* Important to wait for flush to complete */ + while (read_aux_reg(ARC_REG_SLC_CTRL) & SLC_CTRL_BUSY); + write_aux_reg(ARC_REG_SLC_CTRL, + read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_DISABLE); + } + + if (is_isa_arcv2() && ioc_exists) { + /* IO coherency base - 0x8z */ + write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000); + /* IO coherency aperture size - 512Mb: 0x8z-0xAz */ + write_aux_reg(ARC_REG_IO_COH_AP0_SIZE, 0x11); + /* Enable partial writes */ + write_aux_reg(ARC_REG_IO_COH_PARTIAL, 1); + /* Enable IO coherency */ + write_aux_reg(ARC_REG_IO_COH_ENABLE, 1); + + __dma_cache_wback_inv = __dma_cache_wback_inv_ioc; + __dma_cache_inv = __dma_cache_inv_ioc; + __dma_cache_wback = __dma_cache_wback_ioc; + } else if (is_isa_arcv2() && l2_line_sz && slc_enable) { + __dma_cache_wback_inv = __dma_cache_wback_inv_slc; + __dma_cache_inv = __dma_cache_inv_slc; + __dma_cache_wback = __dma_cache_wback_slc; + } else { + __dma_cache_wback_inv = __dma_cache_wback_inv_l1; + __dma_cache_inv = __dma_cache_inv_l1; + __dma_cache_wback = __dma_cache_wback_l1; + } +} diff --git a/kernel/arch/arc/mm/dma.c b/kernel/arch/arc/mm/dma.c index 12cc6485b..29a46bb19 100644 --- a/kernel/arch/arc/mm/dma.c +++ b/kernel/arch/arc/mm/dma.c @@ -14,13 +14,12 @@ * Cache bit off in the TLB entry. * * The default DMA address == Phy address which is 0x8000_0000 based. - * A platform/device can make it zero based, by over-riding - * plat_{dma,kernel}_addr_to_{kernel,dma} */ #include <linux/dma-mapping.h> #include <linux/dma-debug.h> #include <linux/export.h> +#include <asm/cache.h> #include <asm/cacheflush.h> /* @@ -37,7 +36,7 @@ void *dma_alloc_noncoherent(struct device *dev, size_t size, return NULL; /* This is bus address, platform dependent */ - *dma_handle = plat_kernel_addr_to_dma(dev, paddr); + *dma_handle = (dma_addr_t)paddr; return paddr; } @@ -46,8 +45,7 @@ EXPORT_SYMBOL(dma_alloc_noncoherent); void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { - free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle), - size); + free_pages_exact((void *)dma_handle, size); } EXPORT_SYMBOL(dma_free_noncoherent); @@ -56,6 +54,20 @@ void *dma_alloc_coherent(struct device *dev, size_t size, { void *paddr, *kvaddr; + /* + * IOC relies on all data (even coherent DMA data) being in cache + * Thus allocate normal cached memory + * + * The gains with IOC are two pronged: + * -For streaming data, elides needs for cache maintenance, saving + * cycles in flush code, and bus bandwidth as all the lines of a + * buffer need to be flushed out to memory + * -For coherent data, Read/Write to buffers terminate early in cache + * (vs. always going to memory - thus are faster) + */ + if (is_isa_arcv2() && ioc_exists) + return dma_alloc_noncoherent(dev, size, dma_handle, gfp); + /* This is linear addr (0x8000_0000 based) */ paddr = alloc_pages_exact(size, gfp); if (!paddr) @@ -63,11 +75,23 @@ void *dma_alloc_coherent(struct device *dev, size_t size, /* This is kernel Virtual address (0x7000_0000 based) */ kvaddr = ioremap_nocache((unsigned long)paddr, size); - if (kvaddr != NULL) - memset(kvaddr, 0, size); + if (kvaddr == NULL) + return NULL; /* This is bus address, platform dependent */ - *dma_handle = plat_kernel_addr_to_dma(dev, paddr); + *dma_handle = (dma_addr_t)paddr; + + /* + * Evict any existing L1 and/or L2 lines for the backing page + * in case it was used earlier as a normal "cached" page. + * Yeah this bit us - STAR 9000898266 + * + * Although core does call flush_cache_vmap(), it gets kvaddr hence + * can't be used to efficiently flush L1 and/or L2 which need paddr + * Currently flush_cache_vmap nukes the L1 cache completely which + * will be optimized as a separate commit + */ + dma_cache_wback_inv((unsigned long)paddr, size); return kvaddr; } @@ -76,10 +100,12 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *kvaddr, dma_addr_t dma_handle) { + if (is_isa_arcv2() && ioc_exists) + return dma_free_noncoherent(dev, size, kvaddr, dma_handle); + iounmap((void __force __iomem *)kvaddr); - free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle), - size); + free_pages_exact((void *)dma_handle, size); } EXPORT_SYMBOL(dma_free_coherent); diff --git a/kernel/arch/arc/mm/fault.c b/kernel/arch/arc/mm/fault.c index d948e4e9d..af63f4a13 100644 --- a/kernel/arch/arc/mm/fault.c +++ b/kernel/arch/arc/mm/fault.c @@ -18,7 +18,14 @@ #include <asm/pgalloc.h> #include <asm/mmu.h> -static int handle_vmalloc_fault(unsigned long address) +/* + * kernel virtual address is required to implement vmalloc/pkmap/fixmap + * Refer to asm/processor.h for System Memory Map + * + * It simply copies the PMD entry (pointer to 2nd level page table or hugepage) + * from swapper pgdir to task pgdir. The 2nd level table/page is thus shared + */ +noinline static int handle_kernel_vaddr_fault(unsigned long address) { /* * Synchronize this task's top level page-table @@ -72,8 +79,8 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) * only copy the information from the master page table, * nothing more. */ - if (address >= VMALLOC_START && address <= VMALLOC_END) { - ret = handle_vmalloc_fault(address); + if (address >= VMALLOC_START) { + ret = handle_kernel_vaddr_fault(address); if (unlikely(ret)) goto bad_area_nosemaphore; else diff --git a/kernel/arch/arc/mm/highmem.c b/kernel/arch/arc/mm/highmem.c new file mode 100644 index 000000000..92dd92cad --- /dev/null +++ b/kernel/arch/arc/mm/highmem.c @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2015 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/bootmem.h> +#include <linux/export.h> +#include <linux/highmem.h> +#include <asm/processor.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/tlbflush.h> + +/* + * HIGHMEM API: + * + * kmap() API provides sleep semantics hence refered to as "permanent maps" + * It allows mapping LAST_PKMAP pages, using @last_pkmap_nr as the cursor + * for book-keeping + * + * kmap_atomic() can't sleep (calls pagefault_disable()), thus it provides + * shortlived ala "temporary mappings" which historically were implemented as + * fixmaps (compile time addr etc). Their book-keeping is done per cpu. + * + * Both these facts combined (preemption disabled and per-cpu allocation) + * means the total number of concurrent fixmaps will be limited to max + * such allocations in a single control path. Thus KM_TYPE_NR (another + * historic relic) is a small'ish number which caps max percpu fixmaps + * + * ARC HIGHMEM Details + * + * - the kernel vaddr space from 0x7z to 0x8z (currently used by vmalloc/module) + * is now shared between vmalloc and kmap (non overlapping though) + * + * - Both fixmap/pkmap use a dedicated page table each, hooked up to swapper PGD + * This means each only has 1 PGDIR_SIZE worth of kvaddr mappings, which means + * 2M of kvaddr space for typical config (8K page and 11:8:13 traversal split) + * + * - fixmap anyhow needs a limited number of mappings. So 2M kvaddr == 256 PTE + * slots across NR_CPUS would be more than sufficient (generic code defines + * KM_TYPE_NR as 20). + * + * - pkmap being preemptible, in theory could do with more than 256 concurrent + * mappings. However, generic pkmap code: map_new_virtual(), doesn't traverse + * the PGD and only works with a single page table @pkmap_page_table, hence + * sets the limit + */ + +extern pte_t * pkmap_page_table; +static pte_t * fixmap_page_table; + +void *kmap(struct page *page) +{ + BUG_ON(in_interrupt()); + if (!PageHighMem(page)) + return page_address(page); + + return kmap_high(page); +} + +void *kmap_atomic(struct page *page) +{ + int idx, cpu_idx; + unsigned long vaddr; + + preempt_disable(); + pagefault_disable(); + if (!PageHighMem(page)) + return page_address(page); + + cpu_idx = kmap_atomic_idx_push(); + idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); + vaddr = FIXMAP_ADDR(idx); + + set_pte_at(&init_mm, vaddr, fixmap_page_table + idx, + mk_pte(page, kmap_prot)); + + return (void *)vaddr; +} +EXPORT_SYMBOL(kmap_atomic); + +void __kunmap_atomic(void *kv) +{ + unsigned long kvaddr = (unsigned long)kv; + + if (kvaddr >= FIXMAP_BASE && kvaddr < (FIXMAP_BASE + FIXMAP_SIZE)) { + + /* + * Because preemption is disabled, this vaddr can be associated + * with the current allocated index. + * But in case of multiple live kmap_atomic(), it still relies on + * callers to unmap in right order. + */ + int cpu_idx = kmap_atomic_idx(); + int idx = cpu_idx + KM_TYPE_NR * smp_processor_id(); + + WARN_ON(kvaddr != FIXMAP_ADDR(idx)); + + pte_clear(&init_mm, kvaddr, fixmap_page_table + idx); + local_flush_tlb_kernel_range(kvaddr, kvaddr + PAGE_SIZE); + + kmap_atomic_idx_pop(); + } + + pagefault_enable(); + preempt_enable(); +} +EXPORT_SYMBOL(__kunmap_atomic); + +static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr) +{ + pgd_t *pgd_k; + pud_t *pud_k; + pmd_t *pmd_k; + pte_t *pte_k; + + pgd_k = pgd_offset_k(kvaddr); + pud_k = pud_offset(pgd_k, kvaddr); + pmd_k = pmd_offset(pud_k, kvaddr); + + pte_k = (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); + pmd_populate_kernel(&init_mm, pmd_k, pte_k); + return pte_k; +} + +void __init kmap_init(void) +{ + /* Due to recursive include hell, we can't do this in processor.h */ + BUILD_BUG_ON(PAGE_OFFSET < (VMALLOC_END + FIXMAP_SIZE + PKMAP_SIZE)); + + BUILD_BUG_ON(KM_TYPE_NR > PTRS_PER_PTE); + pkmap_page_table = alloc_kmap_pgtable(PKMAP_BASE); + + BUILD_BUG_ON(LAST_PKMAP > PTRS_PER_PTE); + fixmap_page_table = alloc_kmap_pgtable(FIXMAP_BASE); +} diff --git a/kernel/arch/arc/mm/init.c b/kernel/arch/arc/mm/init.c index d44eedd8c..7d2c4fbf4 100644 --- a/kernel/arch/arc/mm/init.c +++ b/kernel/arch/arc/mm/init.c @@ -15,6 +15,7 @@ #endif #include <linux/swap.h> #include <linux/module.h> +#include <linux/highmem.h> #include <asm/page.h> #include <asm/pgalloc.h> #include <asm/sections.h> @@ -24,16 +25,22 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE); char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE); EXPORT_SYMBOL(empty_zero_page); -/* Default tot mem from .config */ -static unsigned long arc_mem_sz = 0x20000000; /* some default */ +static const unsigned long low_mem_start = CONFIG_LINUX_LINK_BASE; +static unsigned long low_mem_sz; + +#ifdef CONFIG_HIGHMEM +static unsigned long min_high_pfn; +static u64 high_mem_start; +static u64 high_mem_sz; +#endif /* User can over-ride above with "mem=nnn[KkMm]" in cmdline */ static int __init setup_mem_sz(char *str) { - arc_mem_sz = memparse(str, NULL) & PAGE_MASK; + low_mem_sz = memparse(str, NULL) & PAGE_MASK; /* early console might not be setup yet - it will show up later */ - pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(arc_mem_sz)); + pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(low_mem_sz)); return 0; } @@ -41,8 +48,24 @@ early_param("mem", setup_mem_sz); void __init early_init_dt_add_memory_arch(u64 base, u64 size) { - arc_mem_sz = size & PAGE_MASK; - pr_info("Memory size set via devicetree %ldM\n", TO_MB(arc_mem_sz)); + int in_use = 0; + + if (!low_mem_sz) { + if (base != low_mem_start) + panic("CONFIG_LINUX_LINK_BASE != DT memory { }"); + + low_mem_sz = size; + in_use = 1; + } else { +#ifdef CONFIG_HIGHMEM + high_mem_start = base; + high_mem_sz = size; + in_use = 1; +#endif + } + + pr_info("Memory @ %llx [%lldM] %s\n", + base, TO_MB(size), !in_use ? "Not used":""); } #ifdef CONFIG_BLK_DEV_INITRD @@ -72,46 +95,62 @@ early_param("initrd", early_initrd); void __init setup_arch_memory(void) { unsigned long zones_size[MAX_NR_ZONES]; - unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz; + unsigned long zones_holes[MAX_NR_ZONES]; init_mm.start_code = (unsigned long)_text; init_mm.end_code = (unsigned long)_etext; init_mm.end_data = (unsigned long)_edata; init_mm.brk = (unsigned long)_end; - /* - * We do it here, so that memory is correctly instantiated - * even if "mem=xxx" cmline over-ride is given and/or - * DT has memory node. Each causes an update to @arc_mem_sz - * and we finally add memory one here - */ - memblock_add(CONFIG_LINUX_LINK_BASE, arc_mem_sz); - - /*------------- externs in mm need setting up ---------------*/ - /* first page of system - kernel .vector starts here */ min_low_pfn = ARCH_PFN_OFFSET; - /* Last usable page of low mem (no HIGHMEM yet for ARC port) */ - max_low_pfn = max_pfn = PFN_DOWN(end_mem); + /* Last usable page of low mem */ + max_low_pfn = max_pfn = PFN_DOWN(low_mem_start + low_mem_sz); - max_mapnr = max_low_pfn - min_low_pfn; +#ifdef CONFIG_HIGHMEM + min_high_pfn = PFN_DOWN(high_mem_start); + max_pfn = PFN_DOWN(high_mem_start + high_mem_sz); +#endif + + max_mapnr = max_pfn - min_low_pfn; - /*------------- reserve kernel image -----------------------*/ - memblock_reserve(CONFIG_LINUX_LINK_BASE, - __pa(_end) - CONFIG_LINUX_LINK_BASE); + /*------------- bootmem allocator setup -----------------------*/ + + /* + * seed the bootmem allocator after any DT memory node parsing or + * "mem=xxx" cmdline overrides have potentially updated @arc_mem_sz + * + * Only low mem is added, otherwise we have crashes when allocating + * mem_map[] itself. NO_BOOTMEM allocates mem_map[] at the end of + * avail memory, ending in highmem with a > 32-bit address. However + * it then tries to memset it with a truncaed 32-bit handle, causing + * the crash + */ + + memblock_add(low_mem_start, low_mem_sz); + memblock_reserve(low_mem_start, __pa(_end) - low_mem_start); #ifdef CONFIG_BLK_DEV_INITRD - /*------------- reserve initrd image -----------------------*/ if (initrd_start) memblock_reserve(__pa(initrd_start), initrd_end - initrd_start); #endif memblock_dump_all(); - /*-------------- node setup --------------------------------*/ + /*----------------- node/zones setup --------------------------*/ memset(zones_size, 0, sizeof(zones_size)); - zones_size[ZONE_NORMAL] = max_mapnr; + memset(zones_holes, 0, sizeof(zones_holes)); + + zones_size[ZONE_NORMAL] = max_low_pfn - min_low_pfn; + zones_holes[ZONE_NORMAL] = 0; + +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_HIGHMEM] = max_pfn - max_low_pfn; + + /* This handles the peripheral address space hole */ + zones_holes[ZONE_HIGHMEM] = min_high_pfn - max_low_pfn; +#endif /* * We can't use the helper free_area_init(zones[]) because it uses @@ -122,9 +161,12 @@ void __init setup_arch_memory(void) free_area_init_node(0, /* node-id */ zones_size, /* num pages per zone */ min_low_pfn, /* first pfn of node */ - NULL); /* NO holes */ + zones_holes); /* holes */ - high_memory = (void *)end_mem; +#ifdef CONFIG_HIGHMEM + high_memory = (void *)(min_high_pfn << PAGE_SHIFT); + kmap_init(); +#endif } /* @@ -135,6 +177,14 @@ void __init setup_arch_memory(void) */ void __init mem_init(void) { +#ifdef CONFIG_HIGHMEM + unsigned long tmp; + + reset_all_zones_managed_pages(); + for (tmp = min_high_pfn; tmp < max_pfn; tmp++) + free_highmem_page(pfn_to_page(tmp)); +#endif + free_all_bootmem(); mem_init_print_info(NULL); } diff --git a/kernel/arch/arc/mm/tlb.c b/kernel/arch/arc/mm/tlb.c index 7f47d2a56..daf2bf52b 100644 --- a/kernel/arch/arc/mm/tlb.c +++ b/kernel/arch/arc/mm/tlb.c @@ -109,10 +109,16 @@ DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; static inline void __tlb_entry_erase(void) { write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } +#if (CONFIG_ARC_MMU_VER < 4) + static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) { unsigned int idx; @@ -180,7 +186,7 @@ static void utlb_invalidate(void) } -static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +static void tlb_entry_insert(unsigned int pd0, pte_t pd1) { unsigned int idx; @@ -210,28 +216,71 @@ static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } +#else /* CONFIG_ARC_MMU_VER >= 4) */ + +static void utlb_invalidate(void) +{ + /* No need since uTLB is always in sync with JTLB */ +} + +static void tlb_entry_erase(unsigned int vaddr_n_asid) +{ + write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid | _PAGE_PRESENT); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBDeleteEntry); +} + +static void tlb_entry_insert(unsigned int pd0, pte_t pd1) +{ + write_aux_reg(ARC_REG_TLBPD0, pd0); + write_aux_reg(ARC_REG_TLBPD1, pd1); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, (u64)pd1 >> 32); + + write_aux_reg(ARC_REG_TLBCOMMAND, TLBInsertEntry); +} + +#endif + /* * Un-conditionally (without lookup) erase the entire MMU contents */ noinline void local_flush_tlb_all(void) { + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; unsigned long flags; unsigned int entry; - struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + int num_tlb = mmu->sets * mmu->ways; local_irq_save(flags); /* Load PD0 and PD1 with template for a Blank Entry */ write_aux_reg(ARC_REG_TLBPD1, 0); + + if (is_pae40_enabled()) + write_aux_reg(ARC_REG_TLBPD1HI, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); - for (entry = 0; entry < mmu->num_tlb; entry++) { + for (entry = 0; entry < num_tlb; entry++) { /* write this entry to the TLB */ write_aux_reg(ARC_REG_TLBINDEX, entry); write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); } + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { + const int stlb_idx = 0x800; + + /* Blank sTLB entry */ + write_aux_reg(ARC_REG_TLBPD0, _PAGE_HW_SZ); + + for (entry = stlb_idx; entry < stlb_idx + 16; entry++) { + write_aux_reg(ARC_REG_TLBINDEX, entry); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + } + } + utlb_invalidate(); local_irq_restore(flags); @@ -385,6 +434,15 @@ static inline void ipi_flush_tlb_range(void *arg) local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void ipi_flush_pmd_tlb_range(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_pmd_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} +#endif + static inline void ipi_flush_tlb_kernel_range(void *arg) { struct tlb_args *ta = (struct tlb_args *)arg; @@ -425,6 +483,20 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = start, + .ta_end = end + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_pmd_tlb_range, &ta, 1); +} +#endif + void flush_tlb_kernel_range(unsigned long start, unsigned long end) { struct tlb_args ta = { @@ -439,11 +511,12 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) /* * Routine to create a TLB entry */ -void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep) { unsigned long flags; unsigned int asid_or_sasid, rwx; - unsigned long pd0, pd1; + unsigned long pd0; + pte_t pd1; /* * create_tlb() assumes that current->mm == vma->mm, since @@ -475,9 +548,9 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) local_irq_save(flags); - tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address); + tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), vaddr); - address &= PAGE_MASK; + vaddr &= PAGE_MASK; /* update this PTE credentials */ pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED); @@ -487,7 +560,7 @@ void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) /* ASID for this task */ asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff; - pd0 = address | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); + pd0 = vaddr | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); /* * ARC MMU provides fully orthogonal access bits for K/U mode, @@ -523,7 +596,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, pte_t *ptep) { unsigned long vaddr = vaddr_unaligned & PAGE_MASK; - unsigned long paddr = pte_val(*ptep) & PAGE_MASK; + phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK; struct page *page = pfn_to_page(pte_pfn(*ptep)); create_tlb(vma, vaddr, ptep); @@ -546,16 +619,105 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, int dirty = !test_and_set_bit(PG_dc_clean, &page->flags); if (dirty) { - /* wback + inv dcache lines */ + /* wback + inv dcache lines (K-mapping) */ __flush_dcache_page(paddr, paddr); - /* invalidate any existing icache lines */ + /* invalidate any existing icache lines (U-mapping) */ if (vma->vm_flags & VM_EXEC) __inv_icache_page(paddr, vaddr); } } } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +/* + * MMUv4 in HS38x cores supports Super Pages which are basis for Linux THP + * support. + * + * Normal and Super pages can co-exist (ofcourse not overlap) in TLB with a + * new bit "SZ" in TLB page desciptor to distinguish between them. + * Super Page size is configurable in hardware (4K to 16M), but fixed once + * RTL builds. + * + * The exact THP size a Linx configuration will support is a function of: + * - MMU page size (typical 8K, RTL fixed) + * - software page walker address split between PGD:PTE:PFN (typical + * 11:8:13, but can be changed with 1 line) + * So for above default, THP size supported is 8K * (2^8) = 2M + * + * Default Page Walker is 2 levels, PGD:PTE:PFN, which in THP regime + * reduces to 1 level (as PTE is folded into PGD and canonically referred + * to as PMD). + * Thus THP PMD accessors are implemented in terms of PTE (just like sparc) + */ + +void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd) +{ + pte_t pte = __pte(pmd_val(*pmd)); + update_mmu_cache(vma, addr, &pte); +} + +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable) +{ + struct list_head *lh = (struct list_head *) pgtable; + + assert_spin_locked(&mm->page_table_lock); + + /* FIFO */ + if (!pmd_huge_pte(mm, pmdp)) + INIT_LIST_HEAD(lh); + else + list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp)); + pmd_huge_pte(mm, pmdp) = pgtable; +} + +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) +{ + struct list_head *lh; + pgtable_t pgtable; + + assert_spin_locked(&mm->page_table_lock); + + pgtable = pmd_huge_pte(mm, pmdp); + lh = (struct list_head *) pgtable; + if (list_empty(lh)) + pmd_huge_pte(mm, pmdp) = NULL; + else { + pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next; + list_del(lh); + } + + pte_val(pgtable[0]) = 0; + pte_val(pgtable[1]) = 0; + + return pgtable; +} + +void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + unsigned int cpu; + unsigned long flags; + + local_irq_save(flags); + + cpu = smp_processor_id(); + + if (likely(asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID)) { + unsigned int asid = hw_pid(vma->vm_mm, cpu); + + /* No need to loop here: this will always be for 1 Huge Page */ + tlb_entry_erase(start | _PAGE_HW_SZ | asid); + } + + local_irq_restore(flags); +} + +#endif + /* Read the Cache Build Confuration Registers, Decode them and save into * the cpuinfo structure for later use. * No Validation is done here, simply read/convert the BCRs @@ -574,47 +736,73 @@ void read_decode_mmu_bcr(void) struct bcr_mmu_3 { #ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4, + unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4, u_itlb:4, u_dtlb:4; #else - unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4, + unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4, ways:4, ver:8; #endif } *mmu3; + struct bcr_mmu_4 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1, + n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3; +#else + /* DTLB ITLB JES JE JA */ + unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2, + pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8; +#endif + } *mmu4; + tmp = read_aux_reg(ARC_REG_MMU_BCR); mmu->ver = (tmp >> 24); if (mmu->ver <= 2) { mmu2 = (struct bcr_mmu_1_2 *)&tmp; - mmu->pg_sz = PAGE_SIZE; + mmu->pg_sz_k = TO_KB(0x2000); mmu->sets = 1 << mmu2->sets; mmu->ways = 1 << mmu2->ways; mmu->u_dtlb = mmu2->u_dtlb; mmu->u_itlb = mmu2->u_itlb; - } else { + } else if (mmu->ver == 3) { mmu3 = (struct bcr_mmu_3 *)&tmp; - mmu->pg_sz = 512 << mmu3->pg_sz; + mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); mmu->sets = 1 << mmu3->sets; mmu->ways = 1 << mmu3->ways; mmu->u_dtlb = mmu3->u_dtlb; mmu->u_itlb = mmu3->u_itlb; + mmu->sasid = mmu3->sasid; + } else { + mmu4 = (struct bcr_mmu_4 *)&tmp; + mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); + mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11); + mmu->sets = 64 << mmu4->n_entry; + mmu->ways = mmu4->n_ways * 2; + mmu->u_dtlb = mmu4->u_dtlb * 4; + mmu->u_itlb = mmu4->u_itlb * 4; + mmu->sasid = mmu4->sasid; + mmu->pae = mmu4->pae; } - - mmu->num_tlb = mmu->sets * mmu->ways; } char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) { int n = 0; struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu; + char super_pg[64] = ""; + + if (p_mmu->s_pg_sz_m) + scnprintf(super_pg, 64, "%dM Super Page%s, ", + p_mmu->s_pg_sz_m, + IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", - p_mmu->ver, TO_KB(p_mmu->pg_sz), - p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, + "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n", + p_mmu->ver, p_mmu->pg_sz_k, super_pg, + p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, - IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : ""); + IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40)); return buf; } @@ -639,9 +827,17 @@ void arc_mmu_init(void) mmu->ver, CONFIG_ARC_MMU_VER); } - if (mmu->pg_sz != PAGE_SIZE) + if (mmu->pg_sz_k != TO_KB(PAGE_SIZE)) panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); + if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && + mmu->s_pg_sz_m != TO_MB(HPAGE_PMD_SIZE)) + panic("MMU Super pg size != Linux HPAGE_PMD_SIZE (%luM)\n", + (unsigned long)TO_MB(HPAGE_PMD_SIZE)); + + if (IS_ENABLED(CONFIG_ARC_HAS_PAE40) && !mmu->pae) + panic("Hardware doesn't support PAE40\n"); + /* Enable the MMU */ write_aux_reg(ARC_REG_PID, MMU_ENABLE); @@ -677,15 +873,15 @@ void arc_mmu_init(void) * the duplicate one. * -Knob to be verbose abt it.(TODO: hook them up to debugfs) */ -volatile int dup_pd_verbose = 1;/* Be slient abt it or complain (default) */ +volatile int dup_pd_silent; /* Be slient abt it or complain (default) */ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, struct pt_regs *regs) { - int set, way, n; - unsigned long flags, is_valid; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; - unsigned int pd0[mmu->ways], pd1[mmu->ways]; + unsigned int pd0[mmu->ways]; + unsigned long flags; + int set; local_irq_save(flags); @@ -695,14 +891,16 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, /* loop thru all sets of TLB */ for (set = 0; set < mmu->sets; set++) { + int is_valid, way; + /* read out all the ways of current set */ for (way = 0, is_valid = 0; way < mmu->ways; way++) { write_aux_reg(ARC_REG_TLBINDEX, SET_WAY_TO_IDX(mmu, set, way)); write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead); pd0[way] = read_aux_reg(ARC_REG_TLBPD0); - pd1[way] = read_aux_reg(ARC_REG_TLBPD1); is_valid |= pd0[way] & _PAGE_PRESENT; + pd0[way] &= PAGE_MASK; } /* If all the WAYS in SET are empty, skip to next SET */ @@ -711,30 +909,28 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address, /* Scan the set for duplicate ways: needs a nested loop */ for (way = 0; way < mmu->ways - 1; way++) { + + int n; + if (!pd0[way]) continue; for (n = way + 1; n < mmu->ways; n++) { - if ((pd0[way] & PAGE_MASK) == - (pd0[n] & PAGE_MASK)) { - - if (dup_pd_verbose) { - pr_info("Duplicate PD's @" - "[%d:%d]/[%d:%d]\n", - set, way, set, n); - pr_info("TLBPD0[%u]: %08x\n", - way, pd0[way]); - } - - /* - * clear entry @way and not @n. This is - * critical to our optimised loop - */ - pd0[way] = pd1[way] = 0; - write_aux_reg(ARC_REG_TLBINDEX, + if (pd0[way] != pd0[n]) + continue; + + if (!dup_pd_silent) + pr_info("Dup TLB PD0 %08x @ set %d ways %d,%d\n", + pd0[way], set, way, n); + + /* + * clear entry @way and not @n. + * This is critical to our optimised loop + */ + pd0[way] = 0; + write_aux_reg(ARC_REG_TLBINDEX, SET_WAY_TO_IDX(mmu, set, way)); - __tlb_entry_erase(); - } + __tlb_entry_erase(); } } } diff --git a/kernel/arch/arc/mm/tlbex.S b/kernel/arch/arc/mm/tlbex.S index d572f1c2c..f1967eeb3 100644 --- a/kernel/arch/arc/mm/tlbex.S +++ b/kernel/arch/arc/mm/tlbex.S @@ -35,8 +35,6 @@ * Rahul Trivedi, Amit Bhor: Codito Technologies 2004 */ - .cpu A7 - #include <linux/linkage.h> #include <asm/entry.h> #include <asm/mmu.h> @@ -46,6 +44,7 @@ #include <asm/processor.h> #include <asm/tlb-mmu1.h> +#ifdef CONFIG_ISA_ARCOMPACT ;----------------------------------------------------------------- ; ARC700 Exception Handling doesn't auto-switch stack and it only provides ; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" @@ -89,7 +88,7 @@ ex_saved_reg1: #ifdef CONFIG_SMP sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with GET_CPU_ID r0 ; get to per cpu scratch mem, - lsl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu + asl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu add r0, @ex_saved_reg1, r0 #else st r0, [@ex_saved_reg1] @@ -108,7 +107,7 @@ ex_saved_reg1: .macro TLBMISS_RESTORE_REGS #ifdef CONFIG_SMP GET_CPU_ID r0 ; get to per cpu scratch mem - lsl r0, r0, L1_CACHE_SHIFT ; each is cache line wide + asl r0, r0, L1_CACHE_SHIFT ; each is cache line wide add r0, @ex_saved_reg1, r0 ld_s r3, [r0,12] ld_s r2, [r0, 8] @@ -123,6 +122,24 @@ ex_saved_reg1: #endif .endm +#else /* ARCv2 */ + +.macro TLBMISS_FREEUP_REGS + PUSH r0 + PUSH r1 + PUSH r2 + PUSH r3 +.endm + +.macro TLBMISS_RESTORE_REGS + POP r3 + POP r2 + POP r1 + POP r0 +.endm + +#endif + ;============================================================================ ; Troubleshooting Stuff ;============================================================================ @@ -188,20 +205,38 @@ ex_saved_reg1: #endif lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD - ld.as r1, [r1, r0] ; PGD entry corresp to faulting addr - and.f r1, r1, PAGE_MASK ; Ignoring protection and other flags - ; contains Ptr to Page Table - bz.d do_slow_path_pf ; if no Page Table, do page fault + ld.as r3, [r1, r0] ; PGD entry corresp to faulting addr + tst r3, r3 + bz do_slow_path_pf ; if no Page Table, do page fault + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + and.f 0, r3, _PAGE_HW_SZ ; Is this Huge PMD (thp) + add2.nz r1, r1, r0 + bnz.d 2f ; YES: PGD == PMD has THP PTE: stop pgd walk + mov.nz r0, r3 + +#endif + and r1, r3, PAGE_MASK ; Get the PTE entry: The idea is ; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index - ; (3) z = pgtbl[y] - ; To avoid the multiply by in end, we do the -2, <<2 below + ; (3) z = (pgtbl + y * 4) + +#ifdef CONFIG_ARC_HAS_PAE40 +#define PTE_SIZE_LOG 3 /* 8 == 2 ^ 3 */ +#else +#define PTE_SIZE_LOG 2 /* 4 == 2 ^ 2 */ +#endif + + ; multiply in step (3) above avoided by shifting lesser in step (1) + lsr r0, r2, ( PAGE_SHIFT - PTE_SIZE_LOG ) + and r0, r0, ( (PTRS_PER_PTE - 1) << PTE_SIZE_LOG ) + ld.aw r0, [r1, r0] ; r0: PTE (lower word only for PAE40) + ; r1: PTE ptr + +2: - lsr r0, r2, (PAGE_SHIFT - 2) - and r0, r0, ( (PTRS_PER_PTE - 1) << 2) - ld.aw r0, [r1, r0] ; get PTE and PTE ptr for fault addr #ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT and.f 0, r0, _PAGE_PRESENT bz 1f @@ -216,18 +251,23 @@ ex_saved_reg1: ;----------------------------------------------------------------- ; Convert Linux PTE entry into TLB entry ; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu +; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI]) ; IN: r0 = PTE, r1 = ptr to PTE .macro CONV_PTE_TO_TLB - and r3, r0, PTE_BITS_RWX ; r w x - lsl r2, r3, 3 ; r w x 0 0 0 (GLOBAL, kernel only) + and r3, r0, PTE_BITS_RWX ; r w x + asl r2, r3, 3 ; Kr Kw Kx 0 0 0 (GLOBAL, kernel only) and.f 0, r0, _PAGE_GLOBAL - or.z r2, r2, r3 ; r w x r w x (!GLOBAL, user page) + or.z r2, r2, r3 ; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page) and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE or r3, r3, r2 - sr r3, [ARC_REG_TLBPD1] ; these go in PD1 + sr r3, [ARC_REG_TLBPD1] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C +#ifdef CONFIG_ARC_HAS_PAE40 + ld r3, [r1, 4] ; paddr[39..32] + sr r3, [ARC_REG_TLBPD1HI] +#endif and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb @@ -241,6 +281,7 @@ ex_saved_reg1: ; Commit the TLB entry into MMU .macro COMMIT_ENTRY_TO_MMU +#if (CONFIG_ARC_MMU_VER < 4) /* Get free TLB slot: Set = computed from vaddr, way = random */ sr TLBGetIndex, [ARC_REG_TLBCOMMAND] @@ -251,6 +292,10 @@ ex_saved_reg1: #else sr TLBWrite, [ARC_REG_TLBCOMMAND] #endif + +#else + sr TLBInsertEntry, [ARC_REG_TLBCOMMAND] +#endif .endm @@ -291,6 +336,7 @@ ENTRY(EV_TLBMissI) CONV_PTE_TO_TLB COMMIT_ENTRY_TO_MMU TLBMISS_RESTORE_REGS +EV_TLBMissI_fast_ret: ; additional label for VDK OS-kit instrumentation rtie END(EV_TLBMissI) @@ -342,7 +388,7 @@ ENTRY(EV_TLBMissD) lr r3, [ecr] or r0, r0, _PAGE_ACCESSED ; Accessed bit always btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? - or.nz r0, r0, _PAGE_MODIFIED ; if Write, set Dirty bit as well + or.nz r0, r0, _PAGE_DIRTY ; if Write, set Dirty bit as well st_s r0, [r1] ; Write back PTE CONV_PTE_TO_TLB @@ -356,6 +402,7 @@ ENTRY(EV_TLBMissD) COMMIT_ENTRY_TO_MMU TLBMISS_RESTORE_REGS +EV_TLBMissD_fast_ret: ; additional label for VDK OS-kit instrumentation rtie ;-------- Common routine to call Linux Page Fault Handler ----------- @@ -366,19 +413,5 @@ do_slow_path_pf: ; Slow path TLB Miss handled as a regular ARC Exception ; (stack switching / save the complete reg-file). - EXCEPTION_PROLOGUE - - ; ------- setup args for Linux Page fault Hanlder --------- - mov_s r1, sp - lr r0, [efa] - - ; We don't want exceptions to be disabled while the fault is handled. - ; Now that we have saved the context we return from exception hence - ; exceptions get re-enable - - FAKE_RET_FROM_EXCPN r9 - - bl do_page_fault - b ret_from_exception - + b call_do_page_fault END(EV_TLBMissD) |