diff options
author | Yunhong Jiang <yunhong.jiang@intel.com> | 2015-08-04 12:17:53 -0700 |
---|---|---|
committer | Yunhong Jiang <yunhong.jiang@intel.com> | 2015-08-04 15:44:42 -0700 |
commit | 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (patch) | |
tree | 1c9cafbcd35f783a87880a10f85d1a060db1a563 /kernel/arch/arc/mm | |
parent | 98260f3884f4a202f9ca5eabed40b1354c489b29 (diff) |
Add the rt linux 4.1.3-rt3 as base
Import the rt linux 4.1.3-rt3 as OPNFV kvm base.
It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and
the base is:
commit 0917f823c59692d751951bf5ea699a2d1e2f26a2
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sat Jul 25 12:13:34 2015 +0200
Prepare v4.1.3-rt3
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
We lose all the git history this way and it's not good. We
should apply another opnfv project repo in future.
Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Diffstat (limited to 'kernel/arch/arc/mm')
-rw-r--r-- | kernel/arch/arc/mm/Makefile | 10 | ||||
-rw-r--r-- | kernel/arch/arc/mm/cache_arc700.c | 723 | ||||
-rw-r--r-- | kernel/arch/arc/mm/dma.c | 94 | ||||
-rw-r--r-- | kernel/arch/arc/mm/extable.c | 63 | ||||
-rw-r--r-- | kernel/arch/arc/mm/fault.c | 235 | ||||
-rw-r--r-- | kernel/arch/arc/mm/init.c | 155 | ||||
-rw-r--r-- | kernel/arch/arc/mm/ioremap.c | 91 | ||||
-rw-r--r-- | kernel/arch/arc/mm/mmap.c | 78 | ||||
-rw-r--r-- | kernel/arch/arc/mm/tlb.c | 780 | ||||
-rw-r--r-- | kernel/arch/arc/mm/tlbex.S | 384 |
10 files changed, 2613 insertions, 0 deletions
diff --git a/kernel/arch/arc/mm/Makefile b/kernel/arch/arc/mm/Makefile new file mode 100644 index 000000000..ac95cc239 --- /dev/null +++ b/kernel/arch/arc/mm/Makefile @@ -0,0 +1,10 @@ +# +# Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. +# + +obj-y := extable.o ioremap.o dma.o fault.o init.o +obj-y += tlb.o tlbex.o cache_arc700.o mmap.o diff --git a/kernel/arch/arc/mm/cache_arc700.c b/kernel/arch/arc/mm/cache_arc700.c new file mode 100644 index 000000000..12b2100db --- /dev/null +++ b/kernel/arch/arc/mm/cache_arc700.c @@ -0,0 +1,723 @@ +/* + * ARC700 VIPT Cache Management + * + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * vineetg: May 2011: for Non-aliasing VIPT D-cache following can be NOPs + * -flush_cache_dup_mm (fork) + * -likewise for flush_cache_mm (exit/execve) + * -likewise for flush_cache_range,flush_cache_page (munmap, exit, COW-break) + * + * vineetg: Apr 2011 + * -Now that MMU can support larger pg sz (16K), the determiniation of + * aliasing shd not be based on assumption of 8k pg + * + * vineetg: Mar 2011 + * -optimised version of flush_icache_range( ) for making I/D coherent + * when vaddr is available (agnostic of num of aliases) + * + * vineetg: Mar 2011 + * -Added documentation about I-cache aliasing on ARC700 and the way it + * was handled up until MMU V2. + * -Spotted a three year old bug when killing the 4 aliases, which needs + * bottom 2 bits, so we need to do paddr | {0x00, 0x01, 0x02, 0x03} + * instead of paddr | {0x00, 0x01, 0x10, 0x11} + * (Rajesh you owe me one now) + * + * vineetg: Dec 2010 + * -Off-by-one error when computing num_of_lines to flush + * This broke signal handling with bionic which uses synthetic sigret stub + * + * vineetg: Mar 2010 + * -GCC can't generate ZOL for core cache flush loops. + * Conv them into iterations based as opposed to while (start < end) types + * + * Vineetg: July 2009 + * -In I-cache flush routine we used to chk for aliasing for every line INV. + * Instead now we setup routines per cache geometry and invoke them + * via function pointers. + * + * Vineetg: Jan 2009 + * -Cache Line flush routines used to flush an extra line beyond end addr + * because check was while (end >= start) instead of (end > start) + * =Some call sites had to work around by doing -1, -4 etc to end param + * =Some callers didnt care. This was spec bad in case of INV routines + * which would discard valid data (cause of the horrible ext2 bug + * in ARC IDE driver) + * + * vineetg: June 11th 2008: Fixed flush_icache_range( ) + * -Since ARC700 caches are not coherent (I$ doesnt snoop D$) both need + * to be flushed, which it was not doing. + * -load_module( ) passes vmalloc addr (Kernel Virtual Addr) to the API, + * however ARC cache maintenance OPs require PHY addr. Thus need to do + * vmalloc_to_phy. + * -Also added optimisation there, that for range > PAGE SIZE we flush the + * entire cache in one shot rather than line by line. For e.g. a module + * with Code sz 600k, old code flushed 600k worth of cache (line-by-line), + * while cache is only 16 or 32k. + */ + +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/cache.h> +#include <linux/mmu_context.h> +#include <linux/syscalls.h> +#include <linux/uaccess.h> +#include <linux/pagemap.h> +#include <asm/cacheflush.h> +#include <asm/cachectl.h> +#include <asm/setup.h> + +char *arc_cache_mumbojumbo(int c, char *buf, int len) +{ + int n = 0; + +#define PR_CACHE(p, cfg, str) \ + if (!(p)->ver) \ + n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \ + else \ + n += scnprintf(buf + n, len - n, \ + str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n", \ + (p)->sz_k, (p)->assoc, (p)->line_len, \ + (p)->vipt ? "VIPT" : "PIPT", \ + (p)->alias ? " aliasing" : "", \ + IS_ENABLED(cfg) ? "" : " (not used)"); + + PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); + PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); + + return buf; +} + +/* + * Read the Cache Build Confuration Registers, Decode them and save into + * the cpuinfo structure for later use. + * No Validation done here, simply read/convert the BCRs + */ +void read_decode_cache_bcr(void) +{ + struct cpuinfo_arc_cache *p_ic, *p_dc; + unsigned int cpu = smp_processor_id(); + struct bcr_cache { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:12, line_len:4, sz:4, config:4, ver:8; +#else + unsigned int ver:8, config:4, sz:4, line_len:4, pad:12; +#endif + } ibcr, dbcr; + + p_ic = &cpuinfo_arc700[cpu].icache; + READ_BCR(ARC_REG_IC_BCR, ibcr); + + if (!ibcr.ver) + goto dc_chk; + + BUG_ON(ibcr.config != 3); + p_ic->assoc = 2; /* Fixed to 2w set assoc */ + p_ic->line_len = 8 << ibcr.line_len; + p_ic->sz_k = 1 << (ibcr.sz - 1); + p_ic->ver = ibcr.ver; + p_ic->vipt = 1; + p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1; + +dc_chk: + p_dc = &cpuinfo_arc700[cpu].dcache; + READ_BCR(ARC_REG_DC_BCR, dbcr); + + if (!dbcr.ver) + return; + + BUG_ON(dbcr.config != 2); + p_dc->assoc = 4; /* Fixed to 4w set assoc */ + p_dc->line_len = 16 << dbcr.line_len; + p_dc->sz_k = 1 << (dbcr.sz - 1); + p_dc->ver = dbcr.ver; + p_dc->vipt = 1; + p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1; +} + +/* + * 1. Validate the Cache Geomtery (compile time config matches hardware) + * 2. If I-cache suffers from aliasing, setup work arounds (difft flush rtn) + * (aliasing D-cache configurations are not supported YET) + * 3. Enable the Caches, setup default flush mode for D-Cache + * 3. Calculate the SHMLBA used by user space + */ +void arc_cache_init(void) +{ + unsigned int __maybe_unused cpu = smp_processor_id(); + char str[256]; + + printk(arc_cache_mumbojumbo(0, str, sizeof(str))); + + if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) { + struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; + + if (!ic->ver) + panic("cache support enabled but non-existent cache\n"); + + if (ic->line_len != L1_CACHE_BYTES) + panic("ICache line [%d] != kernel Config [%d]", + ic->line_len, L1_CACHE_BYTES); + + if (ic->ver != CONFIG_ARC_MMU_VER) + panic("Cache ver [%d] doesn't match MMU ver [%d]\n", + ic->ver, CONFIG_ARC_MMU_VER); + } + + if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) { + struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache; + int handled; + + if (!dc->ver) + panic("cache support enabled but non-existent cache\n"); + + if (dc->line_len != L1_CACHE_BYTES) + panic("DCache line [%d] != kernel Config [%d]", + dc->line_len, L1_CACHE_BYTES); + + /* check for D-Cache aliasing */ + handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING); + + if (dc->alias && !handled) + panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + else if (!dc->alias && handled) + panic("Don't need CONFIG_ARC_CACHE_VIPT_ALIASING\n"); + } +} + +#define OP_INV 0x1 +#define OP_FLUSH 0x2 +#define OP_FLUSH_N_INV 0x3 +#define OP_INV_IC 0x4 + +/* + * Common Helper for Line Operations on {I,D}-Cache + */ +static inline void __cache_line_loop(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int cacheop) +{ + unsigned int aux_cmd, aux_tag; + int num_lines; + const int full_page_op = __builtin_constant_p(sz) && sz == PAGE_SIZE; + + if (cacheop == OP_INV_IC) { + aux_cmd = ARC_REG_IC_IVIL; +#if (CONFIG_ARC_MMU_VER > 2) + aux_tag = ARC_REG_IC_PTAG; +#endif + } + else { + /* d$ cmd: INV (discard or wback-n-discard) OR FLUSH (wback) */ + aux_cmd = cacheop & OP_INV ? ARC_REG_DC_IVDL : ARC_REG_DC_FLDL; +#if (CONFIG_ARC_MMU_VER > 2) + aux_tag = ARC_REG_DC_PTAG; +#endif + } + + /* Ensure we properly floor/ceil the non-line aligned/sized requests + * and have @paddr - aligned to cache line and integral @num_lines. + * This however can be avoided for page sized since: + * -@paddr will be cache-line aligned already (being page aligned) + * -@sz will be integral multiple of line size (being page sized). + */ + if (!full_page_op) { + sz += paddr & ~CACHE_LINE_MASK; + paddr &= CACHE_LINE_MASK; + vaddr &= CACHE_LINE_MASK; + } + + num_lines = DIV_ROUND_UP(sz, L1_CACHE_BYTES); + +#if (CONFIG_ARC_MMU_VER <= 2) + /* MMUv2 and before: paddr contains stuffed vaddrs bits */ + paddr |= (vaddr >> PAGE_SHIFT) & 0x1F; +#else + /* if V-P const for loop, PTAG can be written once outside loop */ + if (full_page_op) + write_aux_reg(aux_tag, paddr); +#endif + + while (num_lines-- > 0) { +#if (CONFIG_ARC_MMU_VER > 2) + /* MMUv3, cache ops require paddr seperately */ + if (!full_page_op) { + write_aux_reg(aux_tag, paddr); + paddr += L1_CACHE_BYTES; + } + + write_aux_reg(aux_cmd, vaddr); + vaddr += L1_CACHE_BYTES; +#else + write_aux_reg(aux_cmd, paddr); + paddr += L1_CACHE_BYTES; +#endif + } +} + +#ifdef CONFIG_ARC_HAS_DCACHE + +/*************************************************************** + * Machine specific helpers for Entire D-Cache or Per Line ops + */ + +static inline unsigned int __before_dc_op(const int op) +{ + unsigned int reg = reg; + + if (op == OP_FLUSH_N_INV) { + /* Dcache provides 2 cmd: FLUSH or INV + * INV inturn has sub-modes: DISCARD or FLUSH-BEFORE + * flush-n-inv is achieved by INV cmd but with IM=1 + * So toggle INV sub-mode depending on op request and default + */ + reg = read_aux_reg(ARC_REG_DC_CTRL); + write_aux_reg(ARC_REG_DC_CTRL, reg | DC_CTRL_INV_MODE_FLUSH) + ; + } + + return reg; +} + +static inline void __after_dc_op(const int op, unsigned int reg) +{ + if (op & OP_FLUSH) /* flush / flush-n-inv both wait */ + while (read_aux_reg(ARC_REG_DC_CTRL) & DC_CTRL_FLUSH_STATUS); + + /* Switch back to default Invalidate mode */ + if (op == OP_FLUSH_N_INV) + write_aux_reg(ARC_REG_DC_CTRL, reg & ~DC_CTRL_INV_MODE_FLUSH); +} + +/* + * Operation on Entire D-Cache + * @cacheop = {OP_INV, OP_FLUSH, OP_FLUSH_N_INV} + * Note that constant propagation ensures all the checks are gone + * in generated code + */ +static inline void __dc_entire_op(const int cacheop) +{ + unsigned int ctrl_reg; + int aux; + + ctrl_reg = __before_dc_op(cacheop); + + if (cacheop & OP_INV) /* Inv or flush-n-inv use same cmd reg */ + aux = ARC_REG_DC_IVDC; + else + aux = ARC_REG_DC_FLSH; + + write_aux_reg(aux, 0x1); + + __after_dc_op(cacheop, ctrl_reg); +} + +/* For kernel mappings cache operation: index is same as paddr */ +#define __dc_line_op_k(p, sz, op) __dc_line_op(p, p, sz, op) + +/* + * D-Cache : Per Line INV (discard or wback+discard) or FLUSH (wback) + */ +static inline void __dc_line_op(unsigned long paddr, unsigned long vaddr, + unsigned long sz, const int cacheop) +{ + unsigned long flags; + unsigned int ctrl_reg; + + local_irq_save(flags); + + ctrl_reg = __before_dc_op(cacheop); + + __cache_line_loop(paddr, vaddr, sz, cacheop); + + __after_dc_op(cacheop, ctrl_reg); + + local_irq_restore(flags); +} + +#else + +#define __dc_entire_op(cacheop) +#define __dc_line_op(paddr, vaddr, sz, cacheop) +#define __dc_line_op_k(paddr, sz, cacheop) + +#endif /* CONFIG_ARC_HAS_DCACHE */ + + +#ifdef CONFIG_ARC_HAS_ICACHE + +/* + * I-Cache Aliasing in ARC700 VIPT caches + * + * ARC VIPT I-cache uses vaddr to index into cache and paddr to match the tag. + * The orig Cache Management Module "CDU" only required paddr to invalidate a + * certain line since it sufficed as index in Non-Aliasing VIPT cache-geometry. + * Infact for distinct V1,V2,P: all of {V1-P},{V2-P},{P-P} would end up fetching + * the exact same line. + * + * However for larger Caches (way-size > page-size) - i.e. in Aliasing config, + * paddr alone could not be used to correctly index the cache. + * + * ------------------ + * MMU v1/v2 (Fixed Page Size 8k) + * ------------------ + * The solution was to provide CDU with these additonal vaddr bits. These + * would be bits [x:13], x would depend on cache-geometry, 13 comes from + * standard page size of 8k. + * H/w folks chose [17:13] to be a future safe range, and moreso these 5 bits + * of vaddr could easily be "stuffed" in the paddr as bits [4:0] since the + * orig 5 bits of paddr were anyways ignored by CDU line ops, as they + * represent the offset within cache-line. The adv of using this "clumsy" + * interface for additional info was no new reg was needed in CDU programming + * model. + * + * 17:13 represented the max num of bits passable, actual bits needed were + * fewer, based on the num-of-aliases possible. + * -for 2 alias possibility, only bit 13 needed (32K cache) + * -for 4 alias possibility, bits 14:13 needed (64K cache) + * + * ------------------ + * MMU v3 + * ------------------ + * This ver of MMU supports variable page sizes (1k-16k): although Linux will + * only support 8k (default), 16k and 4k. + * However from hardware perspective, smaller page sizes aggrevate aliasing + * meaning more vaddr bits needed to disambiguate the cache-line-op ; + * the existing scheme of piggybacking won't work for certain configurations. + * Two new registers IC_PTAG and DC_PTAG inttoduced. + * "tag" bits are provided in PTAG, index bits in existing IVIL/IVDL/FLDL regs + */ + +/*********************************************************** + * Machine specific helper for per line I-Cache invalidate. + */ + +static inline void __ic_entire_inv(void) +{ + write_aux_reg(ARC_REG_IC_IVIC, 1); + read_aux_reg(ARC_REG_IC_CTRL); /* blocks */ +} + +static inline void +__ic_line_inv_vaddr_local(unsigned long paddr, unsigned long vaddr, + unsigned long sz) +{ + unsigned long flags; + + local_irq_save(flags); + __cache_line_loop(paddr, vaddr, sz, OP_INV_IC); + local_irq_restore(flags); +} + +#ifndef CONFIG_SMP + +#define __ic_line_inv_vaddr(p, v, s) __ic_line_inv_vaddr_local(p, v, s) + +#else + +struct ic_inv_args { + unsigned long paddr, vaddr; + int sz; +}; + +static void __ic_line_inv_vaddr_helper(void *info) +{ + struct ic_inv_args *ic_inv = info; + + __ic_line_inv_vaddr_local(ic_inv->paddr, ic_inv->vaddr, ic_inv->sz); +} + +static void __ic_line_inv_vaddr(unsigned long paddr, unsigned long vaddr, + unsigned long sz) +{ + struct ic_inv_args ic_inv = { + .paddr = paddr, + .vaddr = vaddr, + .sz = sz + }; + + on_each_cpu(__ic_line_inv_vaddr_helper, &ic_inv, 1); +} + +#endif /* CONFIG_SMP */ + +#else /* !CONFIG_ARC_HAS_ICACHE */ + +#define __ic_entire_inv() +#define __ic_line_inv_vaddr(pstart, vstart, sz) + +#endif /* CONFIG_ARC_HAS_ICACHE */ + + +/*********************************************************** + * Exported APIs + */ + +/* + * Handle cache congruency of kernel and userspace mappings of page when kernel + * writes-to/reads-from + * + * The idea is to defer flushing of kernel mapping after a WRITE, possible if: + * -dcache is NOT aliasing, hence any U/K-mappings of page are congruent + * -U-mapping doesn't exist yet for page (finalised in update_mmu_cache) + * -In SMP, if hardware caches are coherent + * + * There's a corollary case, where kernel READs from a userspace mapped page. + * If the U-mapping is not congruent to to K-mapping, former needs flushing. + */ +void flush_dcache_page(struct page *page) +{ + struct address_space *mapping; + + if (!cache_is_vipt_aliasing()) { + clear_bit(PG_dc_clean, &page->flags); + return; + } + + /* don't handle anon pages here */ + mapping = page_mapping(page); + if (!mapping) + return; + + /* + * pagecache page, file not yet mapped to userspace + * Make a note that K-mapping is dirty + */ + if (!mapping_mapped(mapping)) { + clear_bit(PG_dc_clean, &page->flags); + } else if (page_mapped(page)) { + + /* kernel reading from page with U-mapping */ + void *paddr = page_address(page); + unsigned long vaddr = page->index << PAGE_CACHE_SHIFT; + + if (addr_not_cache_congruent(paddr, vaddr)) + __flush_dcache_page(paddr, vaddr); + } +} +EXPORT_SYMBOL(flush_dcache_page); + + +void dma_cache_wback_inv(unsigned long start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH_N_INV); +} +EXPORT_SYMBOL(dma_cache_wback_inv); + +void dma_cache_inv(unsigned long start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_INV); +} +EXPORT_SYMBOL(dma_cache_inv); + +void dma_cache_wback(unsigned long start, unsigned long sz) +{ + __dc_line_op_k(start, sz, OP_FLUSH); +} +EXPORT_SYMBOL(dma_cache_wback); + +/* + * This is API for making I/D Caches consistent when modifying + * kernel code (loadable modules, kprobes, kgdb...) + * This is called on insmod, with kernel virtual address for CODE of + * the module. ARC cache maintenance ops require PHY address thus we + * need to convert vmalloc addr to PHY addr + */ +void flush_icache_range(unsigned long kstart, unsigned long kend) +{ + unsigned int tot_sz; + + WARN(kstart < TASK_SIZE, "%s() can't handle user vaddr", __func__); + + /* Shortcut for bigger flush ranges. + * Here we don't care if this was kernel virtual or phy addr + */ + tot_sz = kend - kstart; + if (tot_sz > PAGE_SIZE) { + flush_cache_all(); + return; + } + + /* Case: Kernel Phy addr (0x8000_0000 onwards) */ + if (likely(kstart > PAGE_OFFSET)) { + /* + * The 2nd arg despite being paddr will be used to index icache + * This is OK since no alternate virtual mappings will exist + * given the callers for this case: kprobe/kgdb in built-in + * kernel code only. + */ + __sync_icache_dcache(kstart, kstart, kend - kstart); + return; + } + + /* + * Case: Kernel Vaddr (0x7000_0000 to 0x7fff_ffff) + * (1) ARC Cache Maintenance ops only take Phy addr, hence special + * handling of kernel vaddr. + * + * (2) Despite @tot_sz being < PAGE_SIZE (bigger cases handled already), + * it still needs to handle a 2 page scenario, where the range + * straddles across 2 virtual pages and hence need for loop + */ + while (tot_sz > 0) { + unsigned int off, sz; + unsigned long phy, pfn; + + off = kstart % PAGE_SIZE; + pfn = vmalloc_to_pfn((void *)kstart); + phy = (pfn << PAGE_SHIFT) + off; + sz = min_t(unsigned int, tot_sz, PAGE_SIZE - off); + __sync_icache_dcache(phy, kstart, sz); + kstart += sz; + tot_sz -= sz; + } +} +EXPORT_SYMBOL(flush_icache_range); + +/* + * General purpose helper to make I and D cache lines consistent. + * @paddr is phy addr of region + * @vaddr is typically user vaddr (breakpoint) or kernel vaddr (vmalloc) + * However in one instance, when called by kprobe (for a breakpt in + * builtin kernel code) @vaddr will be paddr only, meaning CDU operation will + * use a paddr to index the cache (despite VIPT). This is fine since since a + * builtin kernel page will not have any virtual mappings. + * kprobe on loadable module will be kernel vaddr. + */ +void __sync_icache_dcache(unsigned long paddr, unsigned long vaddr, int len) +{ + __dc_line_op(paddr, vaddr, len, OP_FLUSH_N_INV); + __ic_line_inv_vaddr(paddr, vaddr, len); +} + +/* wrapper to compile time eliminate alignment checks in flush loop */ +void __inv_icache_page(unsigned long paddr, unsigned long vaddr) +{ + __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE); +} + +/* + * wrapper to clearout kernel or userspace mappings of a page + * For kernel mappings @vaddr == @paddr + */ +void ___flush_dcache_page(unsigned long paddr, unsigned long vaddr) +{ + __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, OP_FLUSH_N_INV); +} + +noinline void flush_cache_all(void) +{ + unsigned long flags; + + local_irq_save(flags); + + __ic_entire_inv(); + __dc_entire_op(OP_FLUSH_N_INV); + + local_irq_restore(flags); + +} + +#ifdef CONFIG_ARC_CACHE_VIPT_ALIASING + +void flush_cache_mm(struct mm_struct *mm) +{ + flush_cache_all(); +} + +void flush_cache_page(struct vm_area_struct *vma, unsigned long u_vaddr, + unsigned long pfn) +{ + unsigned int paddr = pfn << PAGE_SHIFT; + + u_vaddr &= PAGE_MASK; + + ___flush_dcache_page(paddr, u_vaddr); + + if (vma->vm_flags & VM_EXEC) + __inv_icache_page(paddr, u_vaddr); +} + +void flush_cache_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + flush_cache_all(); +} + +void flush_anon_page(struct vm_area_struct *vma, struct page *page, + unsigned long u_vaddr) +{ + /* TBD: do we really need to clear the kernel mapping */ + __flush_dcache_page(page_address(page), u_vaddr); + __flush_dcache_page(page_address(page), page_address(page)); + +} + +#endif + +void copy_user_highpage(struct page *to, struct page *from, + unsigned long u_vaddr, struct vm_area_struct *vma) +{ + void *kfrom = page_address(from); + void *kto = page_address(to); + int clean_src_k_mappings = 0; + + /* + * If SRC page was already mapped in userspace AND it's U-mapping is + * not congruent with K-mapping, sync former to physical page so that + * K-mapping in memcpy below, sees the right data + * + * Note that while @u_vaddr refers to DST page's userspace vaddr, it is + * equally valid for SRC page as well + */ + if (page_mapped(from) && addr_not_cache_congruent(kfrom, u_vaddr)) { + __flush_dcache_page(kfrom, u_vaddr); + clean_src_k_mappings = 1; + } + + copy_page(kto, kfrom); + + /* + * Mark DST page K-mapping as dirty for a later finalization by + * update_mmu_cache(). Although the finalization could have been done + * here as well (given that both vaddr/paddr are available). + * But update_mmu_cache() already has code to do that for other + * non copied user pages (e.g. read faults which wire in pagecache page + * directly). + */ + clear_bit(PG_dc_clean, &to->flags); + + /* + * if SRC was already usermapped and non-congruent to kernel mapping + * sync the kernel mapping back to physical page + */ + if (clean_src_k_mappings) { + __flush_dcache_page(kfrom, kfrom); + set_bit(PG_dc_clean, &from->flags); + } else { + clear_bit(PG_dc_clean, &from->flags); + } +} + +void clear_user_page(void *to, unsigned long u_vaddr, struct page *page) +{ + clear_page(to); + clear_bit(PG_dc_clean, &page->flags); +} + + +/********************************************************************** + * Explicit Cache flush request from user space via syscall + * Needed for JITs which generate code on the fly + */ +SYSCALL_DEFINE3(cacheflush, uint32_t, start, uint32_t, sz, uint32_t, flags) +{ + /* TBD: optimize this */ + flush_cache_all(); + return 0; +} diff --git a/kernel/arch/arc/mm/dma.c b/kernel/arch/arc/mm/dma.c new file mode 100644 index 000000000..12cc6485b --- /dev/null +++ b/kernel/arch/arc/mm/dma.c @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * DMA Coherent API Notes + * + * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is + * implemented by accessintg it using a kernel virtual address, with + * Cache bit off in the TLB entry. + * + * The default DMA address == Phy address which is 0x8000_0000 based. + * A platform/device can make it zero based, by over-riding + * plat_{dma,kernel}_addr_to_{kernel,dma} + */ + +#include <linux/dma-mapping.h> +#include <linux/dma-debug.h> +#include <linux/export.h> +#include <asm/cacheflush.h> + +/* + * Helpers for Coherent DMA API. + */ +void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + void *paddr; + + /* This is linear addr (0x8000_0000 based) */ + paddr = alloc_pages_exact(size, gfp); + if (!paddr) + return NULL; + + /* This is bus address, platform dependent */ + *dma_handle = plat_kernel_addr_to_dma(dev, paddr); + + return paddr; +} +EXPORT_SYMBOL(dma_alloc_noncoherent); + +void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle), + size); +} +EXPORT_SYMBOL(dma_free_noncoherent); + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + void *paddr, *kvaddr; + + /* This is linear addr (0x8000_0000 based) */ + paddr = alloc_pages_exact(size, gfp); + if (!paddr) + return NULL; + + /* This is kernel Virtual address (0x7000_0000 based) */ + kvaddr = ioremap_nocache((unsigned long)paddr, size); + if (kvaddr != NULL) + memset(kvaddr, 0, size); + + /* This is bus address, platform dependent */ + *dma_handle = plat_kernel_addr_to_dma(dev, paddr); + + return kvaddr; +} +EXPORT_SYMBOL(dma_alloc_coherent); + +void dma_free_coherent(struct device *dev, size_t size, void *kvaddr, + dma_addr_t dma_handle) +{ + iounmap((void __force __iomem *)kvaddr); + + free_pages_exact((void *)plat_dma_addr_to_kernel(dev, dma_handle), + size); +} +EXPORT_SYMBOL(dma_free_coherent); + +/* + * Helper for streaming DMA... + */ +void __arc_dma_cache_sync(unsigned long paddr, size_t size, + enum dma_data_direction dir) +{ + __inline_dma_cache_sync(paddr, size, dir); +} +EXPORT_SYMBOL(__arc_dma_cache_sync); diff --git a/kernel/arch/arc/mm/extable.c b/kernel/arch/arc/mm/extable.c new file mode 100644 index 000000000..aa652e281 --- /dev/null +++ b/kernel/arch/arc/mm/extable.c @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Borrowed heavily from MIPS + */ + +#include <linux/module.h> +#include <linux/uaccess.h> + +int fixup_exception(struct pt_regs *regs) +{ + const struct exception_table_entry *fixup; + + fixup = search_exception_tables(instruction_pointer(regs)); + if (fixup) { + regs->ret = fixup->fixup; + + return 1; + } + + return 0; +} + +#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE + +long arc_copy_from_user_noinline(void *to, const void __user *from, + unsigned long n) +{ + return __arc_copy_from_user(to, from, n); +} +EXPORT_SYMBOL(arc_copy_from_user_noinline); + +long arc_copy_to_user_noinline(void __user *to, const void *from, + unsigned long n) +{ + return __arc_copy_to_user(to, from, n); +} +EXPORT_SYMBOL(arc_copy_to_user_noinline); + +unsigned long arc_clear_user_noinline(void __user *to, + unsigned long n) +{ + return __arc_clear_user(to, n); +} +EXPORT_SYMBOL(arc_clear_user_noinline); + +long arc_strncpy_from_user_noinline(char *dst, const char __user *src, + long count) +{ + return __arc_strncpy_from_user(dst, src, count); +} +EXPORT_SYMBOL(arc_strncpy_from_user_noinline); + +long arc_strnlen_user_noinline(const char __user *src, long n) +{ + return __arc_strnlen_user(src, n); +} +EXPORT_SYMBOL(arc_strnlen_user_noinline); +#endif diff --git a/kernel/arch/arc/mm/fault.c b/kernel/arch/arc/mm/fault.c new file mode 100644 index 000000000..d948e4e9d --- /dev/null +++ b/kernel/arch/arc/mm/fault.c @@ -0,0 +1,235 @@ +/* Page Fault Handling for ARC (TLB Miss / ProtV) + * + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/signal.h> +#include <linux/interrupt.h> +#include <linux/sched.h> +#include <linux/errno.h> +#include <linux/ptrace.h> +#include <linux/uaccess.h> +#include <linux/kdebug.h> +#include <linux/perf_event.h> +#include <asm/pgalloc.h> +#include <asm/mmu.h> + +static int handle_vmalloc_fault(unsigned long address) +{ + /* + * Synchronize this task's top level page-table + * with the 'reference' page table. + */ + pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; + pmd_t *pmd, *pmd_k; + + pgd = pgd_offset_fast(current->active_mm, address); + pgd_k = pgd_offset_k(address); + + if (!pgd_present(*pgd_k)) + goto bad_area; + + pud = pud_offset(pgd, address); + pud_k = pud_offset(pgd_k, address); + if (!pud_present(*pud_k)) + goto bad_area; + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); + if (!pmd_present(*pmd_k)) + goto bad_area; + + set_pmd(pmd, *pmd_k); + + /* XXX: create the TLB entry here */ + return 0; + +bad_area: + return 1; +} + +void do_page_fault(unsigned long address, struct pt_regs *regs) +{ + struct vm_area_struct *vma = NULL; + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + siginfo_t info; + int fault, ret; + int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + + /* + * We fault-in kernel-space virtual memory on-demand. The + * 'reference' page table is init_mm.pgd. + * + * NOTE! We MUST NOT take any locks for this case. We may + * be in an interrupt or a critical region, and should + * only copy the information from the master page table, + * nothing more. + */ + if (address >= VMALLOC_START && address <= VMALLOC_END) { + ret = handle_vmalloc_fault(address); + if (unlikely(ret)) + goto bad_area_nosemaphore; + else + return; + } + + info.si_code = SEGV_MAPERR; + + /* + * If we're in an interrupt or have no user + * context, we must not take the fault.. + */ + if (faulthandler_disabled() || !mm) + goto no_context; + + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; +retry: + down_read(&mm->mmap_sem); + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, address)) + goto bad_area; + + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + info.si_code = SEGV_ACCERR; + + /* Handle protection violation, execute on heap or stack */ + + if ((regs->ecr_vec == ECR_V_PROTV) && + (regs->ecr_cause == ECR_C_PROTV_INST_FETCH)) + goto bad_area; + + if (write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + flags |= FAULT_FLAG_WRITE; + } else { + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + fault = handle_mm_fault(mm, vma, address, flags); + + /* If Pagefault was interrupted by SIGKILL, exit page fault "early" */ + if (unlikely(fatal_signal_pending(current))) { + if ((fault & VM_FAULT_ERROR) && !(fault & VM_FAULT_RETRY)) + up_read(&mm->mmap_sem); + if (user_mode(regs)) + return; + } + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); + + if (likely(!(fault & VM_FAULT_ERROR))) { + if (flags & FAULT_FLAG_ALLOW_RETRY) { + /* To avoid updating stats twice for retry case */ + if (fault & VM_FAULT_MAJOR) { + tsk->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); + } else { + tsk->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); + } + + if (fault & VM_FAULT_RETRY) { + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + goto retry; + } + } + + /* Fault Handled Gracefully */ + up_read(&mm->mmap_sem); + return; + } + + if (fault & VM_FAULT_OOM) + goto out_of_memory; + else if (fault & VM_FAULT_SIGSEGV) + goto bad_area; + else if (fault & VM_FAULT_SIGBUS) + goto do_sigbus; + + /* no man's land */ + BUG(); + + /* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + up_read(&mm->mmap_sem); + +bad_area_nosemaphore: + /* User mode accesses just cause a SIGSEGV */ + if (user_mode(regs)) { + tsk->thread.fault_address = address; + info.si_signo = SIGSEGV; + info.si_errno = 0; + /* info.si_code has been set above */ + info.si_addr = (void __user *)address; + force_sig_info(SIGSEGV, &info, tsk); + return; + } + +no_context: + /* Are we prepared to handle this kernel fault? + * + * (The kernel has valid exception-points in the source + * when it acesses user-memory. When it fails in one + * of those points, we find it in a table and do a jump + * to some fixup code that loads an appropriate error + * code) + */ + if (fixup_exception(regs)) + return; + + die("Oops", regs, address); + +out_of_memory: + up_read(&mm->mmap_sem); + + if (user_mode(regs)) { + pagefault_out_of_memory(); + return; + } + + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + if (!user_mode(regs)) + goto no_context; + + tsk->thread.fault_address = address; + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, tsk); +} diff --git a/kernel/arch/arc/mm/init.c b/kernel/arch/arc/mm/init.c new file mode 100644 index 000000000..d44eedd8c --- /dev/null +++ b/kernel/arch/arc/mm/init.c @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/bootmem.h> +#include <linux/memblock.h> +#ifdef CONFIG_BLK_DEV_INITRD +#include <linux/initrd.h> +#endif +#include <linux/swap.h> +#include <linux/module.h> +#include <asm/page.h> +#include <asm/pgalloc.h> +#include <asm/sections.h> +#include <asm/arcregs.h> + +pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE); +char empty_zero_page[PAGE_SIZE] __aligned(PAGE_SIZE); +EXPORT_SYMBOL(empty_zero_page); + +/* Default tot mem from .config */ +static unsigned long arc_mem_sz = 0x20000000; /* some default */ + +/* User can over-ride above with "mem=nnn[KkMm]" in cmdline */ +static int __init setup_mem_sz(char *str) +{ + arc_mem_sz = memparse(str, NULL) & PAGE_MASK; + + /* early console might not be setup yet - it will show up later */ + pr_info("\"mem=%s\": mem sz set to %ldM\n", str, TO_MB(arc_mem_sz)); + + return 0; +} +early_param("mem", setup_mem_sz); + +void __init early_init_dt_add_memory_arch(u64 base, u64 size) +{ + arc_mem_sz = size & PAGE_MASK; + pr_info("Memory size set via devicetree %ldM\n", TO_MB(arc_mem_sz)); +} + +#ifdef CONFIG_BLK_DEV_INITRD +static int __init early_initrd(char *p) +{ + unsigned long start, size; + char *endp; + + start = memparse(p, &endp); + if (*endp == ',') { + size = memparse(endp + 1, NULL); + + initrd_start = (unsigned long)__va(start); + initrd_end = (unsigned long)__va(start + size); + } + return 0; +} +early_param("initrd", early_initrd); +#endif + +/* + * First memory setup routine called from setup_arch() + * 1. setup swapper's mm @init_mm + * 2. Count the pages we have and setup bootmem allocator + * 3. zone setup + */ +void __init setup_arch_memory(void) +{ + unsigned long zones_size[MAX_NR_ZONES]; + unsigned long end_mem = CONFIG_LINUX_LINK_BASE + arc_mem_sz; + + init_mm.start_code = (unsigned long)_text; + init_mm.end_code = (unsigned long)_etext; + init_mm.end_data = (unsigned long)_edata; + init_mm.brk = (unsigned long)_end; + + /* + * We do it here, so that memory is correctly instantiated + * even if "mem=xxx" cmline over-ride is given and/or + * DT has memory node. Each causes an update to @arc_mem_sz + * and we finally add memory one here + */ + memblock_add(CONFIG_LINUX_LINK_BASE, arc_mem_sz); + + /*------------- externs in mm need setting up ---------------*/ + + /* first page of system - kernel .vector starts here */ + min_low_pfn = ARCH_PFN_OFFSET; + + /* Last usable page of low mem (no HIGHMEM yet for ARC port) */ + max_low_pfn = max_pfn = PFN_DOWN(end_mem); + + max_mapnr = max_low_pfn - min_low_pfn; + + /*------------- reserve kernel image -----------------------*/ + memblock_reserve(CONFIG_LINUX_LINK_BASE, + __pa(_end) - CONFIG_LINUX_LINK_BASE); + +#ifdef CONFIG_BLK_DEV_INITRD + /*------------- reserve initrd image -----------------------*/ + if (initrd_start) + memblock_reserve(__pa(initrd_start), initrd_end - initrd_start); +#endif + + memblock_dump_all(); + + /*-------------- node setup --------------------------------*/ + memset(zones_size, 0, sizeof(zones_size)); + zones_size[ZONE_NORMAL] = max_mapnr; + + /* + * We can't use the helper free_area_init(zones[]) because it uses + * PAGE_OFFSET to compute the @min_low_pfn which would be wrong + * when our kernel doesn't start at PAGE_OFFSET, i.e. + * PAGE_OFFSET != CONFIG_LINUX_LINK_BASE + */ + free_area_init_node(0, /* node-id */ + zones_size, /* num pages per zone */ + min_low_pfn, /* first pfn of node */ + NULL); /* NO holes */ + + high_memory = (void *)end_mem; +} + +/* + * mem_init - initializes memory + * + * Frees up bootmem + * Calculates and displays memory available/used + */ +void __init mem_init(void) +{ + free_all_bootmem(); + mem_init_print_info(NULL); +} + +/* + * free_initmem: Free all the __init memory. + */ +void __init_refok free_initmem(void) +{ + free_initmem_default(-1); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void __init free_initrd_mem(unsigned long start, unsigned long end) +{ + free_reserved_area((void *)start, (void *)end, -1, "initrd"); +} +#endif diff --git a/kernel/arch/arc/mm/ioremap.c b/kernel/arch/arc/mm/ioremap.c new file mode 100644 index 000000000..739e65f35 --- /dev/null +++ b/kernel/arch/arc/mm/ioremap.c @@ -0,0 +1,91 @@ +/* + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/cache.h> + +void __iomem *ioremap(unsigned long paddr, unsigned long size) +{ + unsigned long end; + + /* Don't allow wraparound or zero size */ + end = paddr + size - 1; + if (!size || (end < paddr)) + return NULL; + + /* If the region is h/w uncached, avoid MMU mappings */ + if (paddr >= ARC_UNCACHED_ADDR_SPACE) + return (void __iomem *)paddr; + + return ioremap_prot(paddr, size, PAGE_KERNEL_NO_CACHE); +} +EXPORT_SYMBOL(ioremap); + +/* + * ioremap with access flags + * Cache semantics wise it is same as ioremap - "forced" uncached. + * However unline vanilla ioremap which bypasses ARC MMU for addresses in + * ARC hardware uncached region, this one still goes thru the MMU as caller + * might need finer access control (R/W/X) + */ +void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, + unsigned long flags) +{ + void __iomem *vaddr; + struct vm_struct *area; + unsigned long off, end; + pgprot_t prot = __pgprot(flags); + + /* Don't allow wraparound, zero size */ + end = paddr + size - 1; + if ((!size) || (end < paddr)) + return NULL; + + /* An early platform driver might end up here */ + if (!slab_is_available()) + return NULL; + + /* force uncached */ + prot = pgprot_noncached(prot); + + /* Mappings have to be page-aligned */ + off = paddr & ~PAGE_MASK; + paddr &= PAGE_MASK; + size = PAGE_ALIGN(end + 1) - paddr; + + /* + * Ok, go for it.. + */ + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + area->phys_addr = paddr; + vaddr = (void __iomem *)area->addr; + if (ioremap_page_range((unsigned long)vaddr, + (unsigned long)vaddr + size, paddr, prot)) { + vunmap((void __force *)vaddr); + return NULL; + } + return (void __iomem *)(off + (char __iomem *)vaddr); +} +EXPORT_SYMBOL(ioremap_prot); + + +void iounmap(const void __iomem *addr) +{ + if (addr >= (void __force __iomem *)ARC_UNCACHED_ADDR_SPACE) + return; + + vfree((void *)(PAGE_MASK & (unsigned long __force)addr)); +} +EXPORT_SYMBOL(iounmap); diff --git a/kernel/arch/arc/mm/mmap.c b/kernel/arch/arc/mm/mmap.c new file mode 100644 index 000000000..2e06d56e9 --- /dev/null +++ b/kernel/arch/arc/mm/mmap.c @@ -0,0 +1,78 @@ +/* + * ARC700 mmap + * + * (started from arm version - for VIPT alias handling) + * + * Copyright (C) 2013 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/sched.h> +#include <asm/cacheflush.h> + +#define COLOUR_ALIGN(addr, pgoff) \ + ((((addr) + SHMLBA - 1) & ~(SHMLBA - 1)) + \ + (((pgoff) << PAGE_SHIFT) & (SHMLBA - 1))) + +/* + * Ensure that shared mappings are correctly aligned to + * avoid aliasing issues with VIPT caches. + * We need to ensure that + * a specific page of an object is always mapped at a multiple of + * SHMLBA bytes. + */ +unsigned long +arch_get_unmapped_area(struct file *filp, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int do_align = 0; + int aliasing = cache_is_vipt_aliasing(); + struct vm_unmapped_area_info info; + + /* + * We only need to do colour alignment if D cache aliases. + */ + if (aliasing) + do_align = filp || (flags & MAP_SHARED); + + /* + * We enforce the MAP_FIXED case. + */ + if (flags & MAP_FIXED) { + if (aliasing && flags & MAP_SHARED && + (addr - (pgoff << PAGE_SHIFT)) & (SHMLBA - 1)) + return -EINVAL; + return addr; + } + + if (len > TASK_SIZE) + return -ENOMEM; + + if (addr) { + if (do_align) + addr = COLOUR_ALIGN(addr, pgoff); + else + addr = PAGE_ALIGN(addr); + + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + + info.flags = 0; + info.length = len; + info.low_limit = mm->mmap_base; + info.high_limit = TASK_SIZE; + info.align_mask = do_align ? (PAGE_MASK & (SHMLBA - 1)) : 0; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); +} diff --git a/kernel/arch/arc/mm/tlb.c b/kernel/arch/arc/mm/tlb.c new file mode 100644 index 000000000..7f47d2a56 --- /dev/null +++ b/kernel/arch/arc/mm/tlb.c @@ -0,0 +1,780 @@ +/* + * TLB Management (flush/create/diagnostics) for ARC700 + * + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * vineetg: Aug 2011 + * -Reintroduce duplicate PD fixup - some customer chips still have the issue + * + * vineetg: May 2011 + * -No need to flush_cache_page( ) for each call to update_mmu_cache() + * some of the LMBench tests improved amazingly + * = page-fault thrice as fast (75 usec to 28 usec) + * = mmap twice as fast (9.6 msec to 4.6 msec), + * = fork (5.3 msec to 3.7 msec) + * + * vineetg: April 2011 : + * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, + * helps avoid a shift when preparing PD0 from PTE + * + * vineetg: April 2011 : Preparing for MMU V3 + * -MMU v2/v3 BCRs decoded differently + * -Remove TLB_SIZE hardcoding as it's variable now: 256 or 512 + * -tlb_entry_erase( ) can be void + * -local_flush_tlb_range( ): + * = need not "ceil" @end + * = walks MMU only if range spans < 32 entries, as opposed to 256 + * + * Vineetg: Sept 10th 2008 + * -Changes related to MMU v2 (Rel 4.8) + * + * Vineetg: Aug 29th 2008 + * -In TLB Flush operations (Metal Fix MMU) there is a explict command to + * flush Micro-TLBS. If TLB Index Reg is invalid prior to TLBIVUTLB cmd, + * it fails. Thus need to load it with ANY valid value before invoking + * TLBIVUTLB cmd + * + * Vineetg: Aug 21th 2008: + * -Reduced the duration of IRQ lockouts in TLB Flush routines + * -Multiple copies of TLB erase code seperated into a "single" function + * -In TLB Flush routines, interrupt disabling moved UP to retrieve ASID + * in interrupt-safe region. + * + * Vineetg: April 23rd Bug #93131 + * Problem: tlb_flush_kernel_range() doesnt do anything if the range to + * flush is more than the size of TLB itself. + * + * Rahul Trivedi : Codito Technologies 2004 + */ + +#include <linux/module.h> +#include <linux/bug.h> +#include <asm/arcregs.h> +#include <asm/setup.h> +#include <asm/mmu_context.h> +#include <asm/mmu.h> + +/* Need for ARC MMU v2 + * + * ARC700 MMU-v1 had a Joint-TLB for Code and Data and is 2 way set-assoc. + * For a memcpy operation with 3 players (src/dst/code) such that all 3 pages + * map into same set, there would be contention for the 2 ways causing severe + * Thrashing. + * + * Although J-TLB is 2 way set assoc, ARC700 caches J-TLB into uTLBS which has + * much higher associativity. u-D-TLB is 8 ways, u-I-TLB is 4 ways. + * Given this, the thrasing problem should never happen because once the 3 + * J-TLB entries are created (even though 3rd will knock out one of the prev + * two), the u-D-TLB and u-I-TLB will have what is required to accomplish memcpy + * + * Yet we still see the Thrashing because a J-TLB Write cause flush of u-TLBs. + * This is a simple design for keeping them in sync. So what do we do? + * The solution which James came up was pretty neat. It utilised the assoc + * of uTLBs by not invalidating always but only when absolutely necessary. + * + * - Existing TLB commands work as before + * - New command (TLBWriteNI) for TLB write without clearing uTLBs + * - New command (TLBIVUTLB) to invalidate uTLBs. + * + * The uTLBs need only be invalidated when pages are being removed from the + * OS page table. If a 'victim' TLB entry is being overwritten in the main TLB + * as a result of a miss, the removed entry is still allowed to exist in the + * uTLBs as it is still valid and present in the OS page table. This allows the + * full associativity of the uTLBs to hide the limited associativity of the main + * TLB. + * + * During a miss handler, the new "TLBWriteNI" command is used to load + * entries without clearing the uTLBs. + * + * When the OS page table is updated, TLB entries that may be associated with a + * removed page are removed (flushed) from the TLB using TLBWrite. In this + * circumstance, the uTLBs must also be cleared. This is done by using the + * existing TLBWrite command. An explicit IVUTLB is also required for those + * corner cases when TLBWrite was not executed at all because the corresp + * J-TLB entry got evicted/replaced. + */ + + +/* A copy of the ASID from the PID reg is kept in asid_cache */ +DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE; + +/* + * Utility Routine to erase a J-TLB entry + * Caller needs to setup Index Reg (manually or via getIndex) + */ +static inline void __tlb_entry_erase(void) +{ + write_aux_reg(ARC_REG_TLBPD1, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); +} + +static inline unsigned int tlb_entry_lkup(unsigned long vaddr_n_asid) +{ + unsigned int idx; + + write_aux_reg(ARC_REG_TLBPD0, vaddr_n_asid); + + write_aux_reg(ARC_REG_TLBCOMMAND, TLBProbe); + idx = read_aux_reg(ARC_REG_TLBINDEX); + + return idx; +} + +static void tlb_entry_erase(unsigned int vaddr_n_asid) +{ + unsigned int idx; + + /* Locate the TLB entry for this vaddr + ASID */ + idx = tlb_entry_lkup(vaddr_n_asid); + + /* No error means entry found, zero it out */ + if (likely(!(idx & TLB_LKUP_ERR))) { + __tlb_entry_erase(); + } else { + /* Duplicate entry error */ + WARN(idx == TLB_DUP_ERR, "Probe returned Dup PD for %x\n", + vaddr_n_asid); + } +} + +/**************************************************************************** + * ARC700 MMU caches recently used J-TLB entries (RAM) as uTLBs (FLOPs) + * + * New IVUTLB cmd in MMU v2 explictly invalidates the uTLB + * + * utlb_invalidate ( ) + * -For v2 MMU calls Flush uTLB Cmd + * -For v1 MMU does nothing (except for Metal Fix v1 MMU) + * This is because in v1 TLBWrite itself invalidate uTLBs + ***************************************************************************/ + +static void utlb_invalidate(void) +{ +#if (CONFIG_ARC_MMU_VER >= 2) + +#if (CONFIG_ARC_MMU_VER == 2) + /* MMU v2 introduced the uTLB Flush command. + * There was however an obscure hardware bug, where uTLB flush would + * fail when a prior probe for J-TLB (both totally unrelated) would + * return lkup err - because the entry didnt exist in MMU. + * The Workround was to set Index reg with some valid value, prior to + * flush. This was fixed in MMU v3 hence not needed any more + */ + unsigned int idx; + + /* make sure INDEX Reg is valid */ + idx = read_aux_reg(ARC_REG_TLBINDEX); + + /* If not write some dummy val */ + if (unlikely(idx & TLB_LKUP_ERR)) + write_aux_reg(ARC_REG_TLBINDEX, 0xa); +#endif + + write_aux_reg(ARC_REG_TLBCOMMAND, TLBIVUTLB); +#endif + +} + +static void tlb_entry_insert(unsigned int pd0, unsigned int pd1) +{ + unsigned int idx; + + /* + * First verify if entry for this vaddr+ASID already exists + * This also sets up PD0 (vaddr, ASID..) for final commit + */ + idx = tlb_entry_lkup(pd0); + + /* + * If Not already present get a free slot from MMU. + * Otherwise, Probe would have located the entry and set INDEX Reg + * with existing location. This will cause Write CMD to over-write + * existing entry with new PD0 and PD1 + */ + if (likely(idx & TLB_LKUP_ERR)) + write_aux_reg(ARC_REG_TLBCOMMAND, TLBGetIndex); + + /* setup the other half of TLB entry (pfn, rwx..) */ + write_aux_reg(ARC_REG_TLBPD1, pd1); + + /* + * Commit the Entry to MMU + * It doesnt sound safe to use the TLBWriteNI cmd here + * which doesn't flush uTLBs. I'd rather be safe than sorry. + */ + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); +} + +/* + * Un-conditionally (without lookup) erase the entire MMU contents + */ + +noinline void local_flush_tlb_all(void) +{ + unsigned long flags; + unsigned int entry; + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + + local_irq_save(flags); + + /* Load PD0 and PD1 with template for a Blank Entry */ + write_aux_reg(ARC_REG_TLBPD1, 0); + write_aux_reg(ARC_REG_TLBPD0, 0); + + for (entry = 0; entry < mmu->num_tlb; entry++) { + /* write this entry to the TLB */ + write_aux_reg(ARC_REG_TLBINDEX, entry); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBWrite); + } + + utlb_invalidate(); + + local_irq_restore(flags); +} + +/* + * Flush the entrie MM for userland. The fastest way is to move to Next ASID + */ +noinline void local_flush_tlb_mm(struct mm_struct *mm) +{ + /* + * Small optimisation courtesy IA64 + * flush_mm called during fork,exit,munmap etc, multiple times as well. + * Only for fork( ) do we need to move parent to a new MMU ctxt, + * all other cases are NOPs, hence this check. + */ + if (atomic_read(&mm->mm_users) == 0) + return; + + /* + * - Move to a new ASID, but only if the mm is still wired in + * (Android Binder ended up calling this for vma->mm != tsk->mm, + * causing h/w - s/w ASID to get out of sync) + * - Also get_new_mmu_context() new implementation allocates a new + * ASID only if it is not allocated already - so unallocate first + */ + destroy_context(mm); + if (current->mm == mm) + get_new_mmu_context(mm); +} + +/* + * Flush a Range of TLB entries for userland. + * @start is inclusive, while @end is exclusive + * Difference between this and Kernel Range Flush is + * -Here the fastest way (if range is too large) is to move to next ASID + * without doing any explicit Shootdown + * -In case of kernel Flush, entry has to be shot down explictly + */ +void local_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + const unsigned int cpu = smp_processor_id(); + unsigned long flags; + + /* If range @start to @end is more than 32 TLB entries deep, + * its better to move to a new ASID rather than searching for + * individual entries and then shooting them down + * + * The calc above is rough, doesn't account for unaligned parts, + * since this is heuristics based anyways + */ + if (unlikely((end - start) >= PAGE_SIZE * 32)) { + local_flush_tlb_mm(vma->vm_mm); + return; + } + + /* + * @start moved to page start: this alone suffices for checking + * loop end condition below, w/o need for aligning @end to end + * e.g. 2000 to 4001 will anyhow loop twice + */ + start &= PAGE_MASK; + + local_irq_save(flags); + + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { + while (start < end) { + tlb_entry_erase(start | hw_pid(vma->vm_mm, cpu)); + start += PAGE_SIZE; + } + } + + utlb_invalidate(); + + local_irq_restore(flags); +} + +/* Flush the kernel TLB entries - vmalloc/modules (Global from MMU perspective) + * @start, @end interpreted as kvaddr + * Interestingly, shared TLB entries can also be flushed using just + * @start,@end alone (interpreted as user vaddr), although technically SASID + * is also needed. However our smart TLbProbe lookup takes care of that. + */ +void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + unsigned long flags; + + /* exactly same as above, except for TLB entry not taking ASID */ + + if (unlikely((end - start) >= PAGE_SIZE * 32)) { + local_flush_tlb_all(); + return; + } + + start &= PAGE_MASK; + + local_irq_save(flags); + while (start < end) { + tlb_entry_erase(start); + start += PAGE_SIZE; + } + + utlb_invalidate(); + + local_irq_restore(flags); +} + +/* + * Delete TLB entry in MMU for a given page (??? address) + * NOTE One TLB entry contains translation for single PAGE + */ + +void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long page) +{ + const unsigned int cpu = smp_processor_id(); + unsigned long flags; + + /* Note that it is critical that interrupts are DISABLED between + * checking the ASID and using it flush the TLB entry + */ + local_irq_save(flags); + + if (asid_mm(vma->vm_mm, cpu) != MM_CTXT_NO_ASID) { + tlb_entry_erase((page & PAGE_MASK) | hw_pid(vma->vm_mm, cpu)); + utlb_invalidate(); + } + + local_irq_restore(flags); +} + +#ifdef CONFIG_SMP + +struct tlb_args { + struct vm_area_struct *ta_vma; + unsigned long ta_start; + unsigned long ta_end; +}; + +static inline void ipi_flush_tlb_page(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_tlb_page(ta->ta_vma, ta->ta_start); +} + +static inline void ipi_flush_tlb_range(void *arg) +{ + struct tlb_args *ta = arg; + + local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} + +static inline void ipi_flush_tlb_kernel_range(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end); +} + +void flush_tlb_all(void) +{ + on_each_cpu((smp_call_func_t)local_flush_tlb_all, NULL, 1); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + on_each_cpu_mask(mm_cpumask(mm), (smp_call_func_t)local_flush_tlb_mm, + mm, 1); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = uaddr + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, &ta, 1); +} + +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct tlb_args ta = { + .ta_vma = vma, + .ta_start = start, + .ta_end = end + }; + + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, &ta, 1); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + struct tlb_args ta = { + .ta_start = start, + .ta_end = end + }; + + on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); +} +#endif + +/* + * Routine to create a TLB entry + */ +void create_tlb(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) +{ + unsigned long flags; + unsigned int asid_or_sasid, rwx; + unsigned long pd0, pd1; + + /* + * create_tlb() assumes that current->mm == vma->mm, since + * -it ASID for TLB entry is fetched from MMU ASID reg (valid for curr) + * -completes the lazy write to SASID reg (again valid for curr tsk) + * + * Removing the assumption involves + * -Using vma->mm->context{ASID,SASID}, as opposed to MMU reg. + * -Fix the TLB paranoid debug code to not trigger false negatives. + * -More importantly it makes this handler inconsistent with fast-path + * TLB Refill handler which always deals with "current" + * + * Lets see the use cases when current->mm != vma->mm and we land here + * 1. execve->copy_strings()->__get_user_pages->handle_mm_fault + * Here VM wants to pre-install a TLB entry for user stack while + * current->mm still points to pre-execve mm (hence the condition). + * However the stack vaddr is soon relocated (randomization) and + * move_page_tables() tries to undo that TLB entry. + * Thus not creating TLB entry is not any worse. + * + * 2. ptrace(POKETEXT) causes a CoW - debugger(current) inserting a + * breakpoint in debugged task. Not creating a TLB now is not + * performance critical. + * + * Both the cases above are not good enough for code churn. + */ + if (current->active_mm != vma->vm_mm) + return; + + local_irq_save(flags); + + tlb_paranoid_check(asid_mm(vma->vm_mm, smp_processor_id()), address); + + address &= PAGE_MASK; + + /* update this PTE credentials */ + pte_val(*ptep) |= (_PAGE_PRESENT | _PAGE_ACCESSED); + + /* Create HW TLB(PD0,PD1) from PTE */ + + /* ASID for this task */ + asid_or_sasid = read_aux_reg(ARC_REG_PID) & 0xff; + + pd0 = address | asid_or_sasid | (pte_val(*ptep) & PTE_BITS_IN_PD0); + + /* + * ARC MMU provides fully orthogonal access bits for K/U mode, + * however Linux only saves 1 set to save PTE real-estate + * Here we convert 3 PTE bits into 6 MMU bits: + * -Kernel only entries have Kr Kw Kx 0 0 0 + * -User entries have mirrored K and U bits + */ + rwx = pte_val(*ptep) & PTE_BITS_RWX; + + if (pte_val(*ptep) & _PAGE_GLOBAL) + rwx <<= 3; /* r w x => Kr Kw Kx 0 0 0 */ + else + rwx |= (rwx << 3); /* r w x => Kr Kw Kx Ur Uw Ux */ + + pd1 = rwx | (pte_val(*ptep) & PTE_BITS_NON_RWX_IN_PD1); + + tlb_entry_insert(pd0, pd1); + + local_irq_restore(flags); +} + +/* + * Called at the end of pagefault, for a userspace mapped page + * -pre-install the corresponding TLB entry into MMU + * -Finalize the delayed D-cache flush of kernel mapping of page due to + * flush_dcache_page(), copy_user_page() + * + * Note that flush (when done) involves both WBACK - so physical page is + * in sync as well as INV - so any non-congruent aliases don't remain + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, + pte_t *ptep) +{ + unsigned long vaddr = vaddr_unaligned & PAGE_MASK; + unsigned long paddr = pte_val(*ptep) & PAGE_MASK; + struct page *page = pfn_to_page(pte_pfn(*ptep)); + + create_tlb(vma, vaddr, ptep); + + if (page == ZERO_PAGE(0)) { + return; + } + + /* + * Exec page : Independent of aliasing/page-color considerations, + * since icache doesn't snoop dcache on ARC, any dirty + * K-mapping of a code page needs to be wback+inv so that + * icache fetch by userspace sees code correctly. + * !EXEC page: If K-mapping is NOT congruent to U-mapping, flush it + * so userspace sees the right data. + * (Avoids the flush for Non-exec + congruent mapping case) + */ + if ((vma->vm_flags & VM_EXEC) || + addr_not_cache_congruent(paddr, vaddr)) { + + int dirty = !test_and_set_bit(PG_dc_clean, &page->flags); + if (dirty) { + /* wback + inv dcache lines */ + __flush_dcache_page(paddr, paddr); + + /* invalidate any existing icache lines */ + if (vma->vm_flags & VM_EXEC) + __inv_icache_page(paddr, vaddr); + } + } +} + +/* Read the Cache Build Confuration Registers, Decode them and save into + * the cpuinfo structure for later use. + * No Validation is done here, simply read/convert the BCRs + */ +void read_decode_mmu_bcr(void) +{ + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + unsigned int tmp; + struct bcr_mmu_1_2 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, ways:4, sets:4, u_itlb:8, u_dtlb:8; +#else + unsigned int u_dtlb:8, u_itlb:8, sets:4, ways:4, ver:8; +#endif + } *mmu2; + + struct bcr_mmu_3 { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int ver:8, ways:4, sets:4, osm:1, reserv:3, pg_sz:4, + u_itlb:4, u_dtlb:4; +#else + unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, reserv:3, osm:1, sets:4, + ways:4, ver:8; +#endif + } *mmu3; + + tmp = read_aux_reg(ARC_REG_MMU_BCR); + mmu->ver = (tmp >> 24); + + if (mmu->ver <= 2) { + mmu2 = (struct bcr_mmu_1_2 *)&tmp; + mmu->pg_sz = PAGE_SIZE; + mmu->sets = 1 << mmu2->sets; + mmu->ways = 1 << mmu2->ways; + mmu->u_dtlb = mmu2->u_dtlb; + mmu->u_itlb = mmu2->u_itlb; + } else { + mmu3 = (struct bcr_mmu_3 *)&tmp; + mmu->pg_sz = 512 << mmu3->pg_sz; + mmu->sets = 1 << mmu3->sets; + mmu->ways = 1 << mmu3->ways; + mmu->u_dtlb = mmu3->u_dtlb; + mmu->u_itlb = mmu3->u_itlb; + } + + mmu->num_tlb = mmu->sets * mmu->ways; +} + +char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) +{ + int n = 0; + struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu; + + n += scnprintf(buf + n, len - n, + "MMU [v%x]\t: %dk PAGE, JTLB %d (%dx%d), uDTLB %d, uITLB %d %s\n", + p_mmu->ver, TO_KB(p_mmu->pg_sz), + p_mmu->num_tlb, p_mmu->sets, p_mmu->ways, + p_mmu->u_dtlb, p_mmu->u_itlb, + IS_ENABLED(CONFIG_ARC_MMU_SASID) ? ",SASID" : ""); + + return buf; +} + +void arc_mmu_init(void) +{ + char str[256]; + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + + printk(arc_mmu_mumbojumbo(0, str, sizeof(str))); + + /* For efficiency sake, kernel is compile time built for a MMU ver + * This must match the hardware it is running on. + * Linux built for MMU V2, if run on MMU V1 will break down because V1 + * hardware doesn't understand cmds such as WriteNI, or IVUTLB + * On the other hand, Linux built for V1 if run on MMU V2 will do + * un-needed workarounds to prevent memcpy thrashing. + * Similarly MMU V3 has new features which won't work on older MMU + */ + if (mmu->ver != CONFIG_ARC_MMU_VER) { + panic("MMU ver %d doesn't match kernel built for %d...\n", + mmu->ver, CONFIG_ARC_MMU_VER); + } + + if (mmu->pg_sz != PAGE_SIZE) + panic("MMU pg size != PAGE_SIZE (%luk)\n", TO_KB(PAGE_SIZE)); + + /* Enable the MMU */ + write_aux_reg(ARC_REG_PID, MMU_ENABLE); + + /* In smp we use this reg for interrupt 1 scratch */ +#ifndef CONFIG_SMP + /* swapper_pg_dir is the pgd for the kernel, used by vmalloc */ + write_aux_reg(ARC_REG_SCRATCH_DATA0, swapper_pg_dir); +#endif +} + +/* + * TLB Programmer's Model uses Linear Indexes: 0 to {255, 511} for 128 x {2,4} + * The mapping is Column-first. + * --------------------- ----------- + * |way0|way1|way2|way3| |way0|way1| + * --------------------- ----------- + * [set0] | 0 | 1 | 2 | 3 | | 0 | 1 | + * [set1] | 4 | 5 | 6 | 7 | | 2 | 3 | + * ~ ~ ~ ~ + * [set127] | 508| 509| 510| 511| | 254| 255| + * --------------------- ----------- + * For normal operations we don't(must not) care how above works since + * MMU cmd getIndex(vaddr) abstracts that out. + * However for walking WAYS of a SET, we need to know this + */ +#define SET_WAY_TO_IDX(mmu, set, way) ((set) * mmu->ways + (way)) + +/* Handling of Duplicate PD (TLB entry) in MMU. + * -Could be due to buggy customer tapeouts or obscure kernel bugs + * -MMU complaints not at the time of duplicate PD installation, but at the + * time of lookup matching multiple ways. + * -Ideally these should never happen - but if they do - workaround by deleting + * the duplicate one. + * -Knob to be verbose abt it.(TODO: hook them up to debugfs) + */ +volatile int dup_pd_verbose = 1;/* Be slient abt it or complain (default) */ + +void do_tlb_overlap_fault(unsigned long cause, unsigned long address, + struct pt_regs *regs) +{ + int set, way, n; + unsigned long flags, is_valid; + struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + unsigned int pd0[mmu->ways], pd1[mmu->ways]; + + local_irq_save(flags); + + /* re-enable the MMU */ + write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID)); + + /* loop thru all sets of TLB */ + for (set = 0; set < mmu->sets; set++) { + + /* read out all the ways of current set */ + for (way = 0, is_valid = 0; way < mmu->ways; way++) { + write_aux_reg(ARC_REG_TLBINDEX, + SET_WAY_TO_IDX(mmu, set, way)); + write_aux_reg(ARC_REG_TLBCOMMAND, TLBRead); + pd0[way] = read_aux_reg(ARC_REG_TLBPD0); + pd1[way] = read_aux_reg(ARC_REG_TLBPD1); + is_valid |= pd0[way] & _PAGE_PRESENT; + } + + /* If all the WAYS in SET are empty, skip to next SET */ + if (!is_valid) + continue; + + /* Scan the set for duplicate ways: needs a nested loop */ + for (way = 0; way < mmu->ways - 1; way++) { + if (!pd0[way]) + continue; + + for (n = way + 1; n < mmu->ways; n++) { + if ((pd0[way] & PAGE_MASK) == + (pd0[n] & PAGE_MASK)) { + + if (dup_pd_verbose) { + pr_info("Duplicate PD's @" + "[%d:%d]/[%d:%d]\n", + set, way, set, n); + pr_info("TLBPD0[%u]: %08x\n", + way, pd0[way]); + } + + /* + * clear entry @way and not @n. This is + * critical to our optimised loop + */ + pd0[way] = pd1[way] = 0; + write_aux_reg(ARC_REG_TLBINDEX, + SET_WAY_TO_IDX(mmu, set, way)); + __tlb_entry_erase(); + } + } + } + } + + local_irq_restore(flags); +} + +/*********************************************************************** + * Diagnostic Routines + * -Called from Low Level TLB Hanlders if things don;t look good + **********************************************************************/ + +#ifdef CONFIG_ARC_DBG_TLB_PARANOIA + +/* + * Low Level ASM TLB handler calls this if it finds that HW and SW ASIDS + * don't match + */ +void print_asid_mismatch(int mm_asid, int mmu_asid, int is_fast_path) +{ + pr_emerg("ASID Mismatch in %s Path Handler: sw-pid=0x%x hw-pid=0x%x\n", + is_fast_path ? "Fast" : "Slow", mm_asid, mmu_asid); + + __asm__ __volatile__("flag 1"); +} + +void tlb_paranoid_check(unsigned int mm_asid, unsigned long addr) +{ + unsigned int mmu_asid; + + mmu_asid = read_aux_reg(ARC_REG_PID) & 0xff; + + /* + * At the time of a TLB miss/installation + * - HW version needs to match SW version + * - SW needs to have a valid ASID + */ + if (addr < 0x70000000 && + ((mm_asid == MM_CTXT_NO_ASID) || + (mmu_asid != (mm_asid & MM_CTXT_ASID_MASK)))) + print_asid_mismatch(mm_asid, mmu_asid, 0); +} +#endif diff --git a/kernel/arch/arc/mm/tlbex.S b/kernel/arch/arc/mm/tlbex.S new file mode 100644 index 000000000..d572f1c2c --- /dev/null +++ b/kernel/arch/arc/mm/tlbex.S @@ -0,0 +1,384 @@ +/* + * TLB Exception Handling for ARC + * + * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Vineetg: April 2011 : + * -MMU v1: moved out legacy code into a seperate file + * -MMU v3: PD{0,1} bits layout changed: They don't overlap anymore, + * helps avoid a shift when preparing PD0 from PTE + * + * Vineetg: July 2009 + * -For MMU V2, we need not do heuristics at the time of commiting a D-TLB + * entry, so that it doesn't knock out it's I-TLB entry + * -Some more fine tuning: + * bmsk instead of add, asl.cc instead of branch, delay slot utilise etc + * + * Vineetg: July 2009 + * -Practically rewrote the I/D TLB Miss handlers + * Now 40 and 135 instructions a peice as compared to 131 and 449 resp. + * Hence Leaner by 1.5 K + * Used Conditional arithmetic to replace excessive branching + * Also used short instructions wherever possible + * + * Vineetg: Aug 13th 2008 + * -Passing ECR (Exception Cause REG) to do_page_fault( ) for printing + * more information in case of a Fatality + * + * Vineetg: March 25th Bug #92690 + * -Added Debug Code to check if sw-ASID == hw-ASID + + * Rahul Trivedi, Amit Bhor: Codito Technologies 2004 + */ + + .cpu A7 + +#include <linux/linkage.h> +#include <asm/entry.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/arcregs.h> +#include <asm/cache.h> +#include <asm/processor.h> +#include <asm/tlb-mmu1.h> + +;----------------------------------------------------------------- +; ARC700 Exception Handling doesn't auto-switch stack and it only provides +; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0" +; +; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a +; "global" is used to free-up FIRST core reg to be able to code the rest of +; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe). +; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3 +; need to be saved as well by extending the "global" to be 4 words. Hence +; ".size ex_saved_reg1, 16" +; [All of this dance is to avoid stack switching for each TLB Miss, since we +; only need to save only a handful of regs, as opposed to complete reg file] +; +; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST +; core reg as it will not be SMP safe. +; Thus scratch AUX reg is used (and no longer used to cache task PGD). +; To save the rest of 3 regs - per cpu, the global is made "per-cpu". +; Epilogue thus has to locate the "per-cpu" storage for regs. +; To avoid cache line bouncing the per-cpu global is aligned/sized per +; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence +; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)" + +; As simple as that.... +;-------------------------------------------------------------------------- + +; scratch memory to save [r0-r3] used to code TLB refill Handler +ARCFP_DATA ex_saved_reg1 + .align 1 << L1_CACHE_SHIFT + .type ex_saved_reg1, @object +#ifdef CONFIG_SMP + .size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT) +ex_saved_reg1: + .zero (CONFIG_NR_CPUS << L1_CACHE_SHIFT) +#else + .size ex_saved_reg1, 16 +ex_saved_reg1: + .zero 16 +#endif + +.macro TLBMISS_FREEUP_REGS +#ifdef CONFIG_SMP + sr r0, [ARC_REG_SCRATCH_DATA0] ; freeup r0 to code with + GET_CPU_ID r0 ; get to per cpu scratch mem, + lsl r0, r0, L1_CACHE_SHIFT ; cache line wide per cpu + add r0, @ex_saved_reg1, r0 +#else + st r0, [@ex_saved_reg1] + mov_s r0, @ex_saved_reg1 +#endif + st_s r1, [r0, 4] + st_s r2, [r0, 8] + st_s r3, [r0, 12] + + ; VERIFY if the ASID in MMU-PID Reg is same as + ; one in Linux data structures + + tlb_paranoid_check_asm +.endm + +.macro TLBMISS_RESTORE_REGS +#ifdef CONFIG_SMP + GET_CPU_ID r0 ; get to per cpu scratch mem + lsl r0, r0, L1_CACHE_SHIFT ; each is cache line wide + add r0, @ex_saved_reg1, r0 + ld_s r3, [r0,12] + ld_s r2, [r0, 8] + ld_s r1, [r0, 4] + lr r0, [ARC_REG_SCRATCH_DATA0] +#else + mov_s r0, @ex_saved_reg1 + ld_s r3, [r0,12] + ld_s r2, [r0, 8] + ld_s r1, [r0, 4] + ld_s r0, [r0] +#endif +.endm + +;============================================================================ +; Troubleshooting Stuff +;============================================================================ + +; Linux keeps ASID (Address Space ID) in task->active_mm->context.asid +; When Creating TLB Entries, instead of doing 3 dependent loads from memory, +; we use the MMU PID Reg to get current ASID. +; In bizzare scenrios SW and HW ASID can get out-of-sync which is trouble. +; So we try to detect this in TLB Mis shandler + +.macro tlb_paranoid_check_asm + +#ifdef CONFIG_ARC_DBG_TLB_PARANOIA + + GET_CURR_TASK_ON_CPU r3 + ld r0, [r3, TASK_ACT_MM] + ld r0, [r0, MM_CTXT+MM_CTXT_ASID] + breq r0, 0, 55f ; Error if no ASID allocated + + lr r1, [ARC_REG_PID] + and r1, r1, 0xFF + + and r2, r0, 0xFF ; MMU PID bits only for comparison + breq r1, r2, 5f + +55: + ; Error if H/w and S/w ASID don't match, but NOT if in kernel mode + lr r2, [erstatus] + bbit0 r2, STATUS_U_BIT, 5f + + ; We sure are in troubled waters, Flag the error, but to do so + ; need to switch to kernel mode stack to call error routine + GET_TSK_STACK_BASE r3, sp + + ; Call printk to shoutout aloud + mov r2, 1 + j print_asid_mismatch + +5: ; ASIDs match so proceed normally + nop + +#endif + +.endm + +;============================================================================ +;TLB Miss handling Code +;============================================================================ + +;----------------------------------------------------------------------------- +; This macro does the page-table lookup for the faulting address. +; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address +.macro LOAD_FAULT_PTE + + lr r2, [efa] + +#ifndef CONFIG_SMP + lr r1, [ARC_REG_SCRATCH_DATA0] ; current pgd +#else + GET_CURR_TASK_ON_CPU r1 + ld r1, [r1, TASK_ACT_MM] + ld r1, [r1, MM_PGD] +#endif + + lsr r0, r2, PGDIR_SHIFT ; Bits for indexing into PGD + ld.as r1, [r1, r0] ; PGD entry corresp to faulting addr + and.f r1, r1, PAGE_MASK ; Ignoring protection and other flags + ; contains Ptr to Page Table + bz.d do_slow_path_pf ; if no Page Table, do page fault + + ; Get the PTE entry: The idea is + ; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr + ; (2) y = x & (PTRS_PER_PTE - 1) -> to get index + ; (3) z = pgtbl[y] + ; To avoid the multiply by in end, we do the -2, <<2 below + + lsr r0, r2, (PAGE_SHIFT - 2) + and r0, r0, ( (PTRS_PER_PTE - 1) << 2) + ld.aw r0, [r1, r0] ; get PTE and PTE ptr for fault addr +#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT + and.f 0, r0, _PAGE_PRESENT + bz 1f + ld r3, [num_pte_not_present] + add r3, r3, 1 + st r3, [num_pte_not_present] +1: +#endif + +.endm + +;----------------------------------------------------------------- +; Convert Linux PTE entry into TLB entry +; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu +; IN: r0 = PTE, r1 = ptr to PTE + +.macro CONV_PTE_TO_TLB + and r3, r0, PTE_BITS_RWX ; r w x + lsl r2, r3, 3 ; r w x 0 0 0 (GLOBAL, kernel only) + and.f 0, r0, _PAGE_GLOBAL + or.z r2, r2, r3 ; r w x r w x (!GLOBAL, user page) + + and r3, r0, PTE_BITS_NON_RWX_IN_PD1 ; Extract PFN+cache bits from PTE + or r3, r3, r2 + + sr r3, [ARC_REG_TLBPD1] ; these go in PD1 + + and r2, r0, PTE_BITS_IN_PD0 ; Extract other PTE flags: (V)alid, (G)lb + + lr r3,[ARC_REG_TLBPD0] ; MMU prepares PD0 with vaddr and asid + + or r3, r3, r2 ; S | vaddr | {sasid|asid} + sr r3,[ARC_REG_TLBPD0] ; rewrite PD0 +.endm + +;----------------------------------------------------------------- +; Commit the TLB entry into MMU + +.macro COMMIT_ENTRY_TO_MMU + + /* Get free TLB slot: Set = computed from vaddr, way = random */ + sr TLBGetIndex, [ARC_REG_TLBCOMMAND] + + /* Commit the Write */ +#if (CONFIG_ARC_MMU_VER >= 2) /* introduced in v2 */ + sr TLBWriteNI, [ARC_REG_TLBCOMMAND] +#else + sr TLBWrite, [ARC_REG_TLBCOMMAND] +#endif +.endm + + +ARCFP_CODE ;Fast Path Code, candidate for ICCM + +;----------------------------------------------------------------------------- +; I-TLB Miss Exception Handler +;----------------------------------------------------------------------------- + +ENTRY(EV_TLBMissI) + + TLBMISS_FREEUP_REGS + +#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT + ld r0, [@numitlb] + add r0, r0, 1 + st r0, [@numitlb] +#endif + + ;---------------------------------------------------------------- + ; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA + LOAD_FAULT_PTE + + ;---------------------------------------------------------------- + ; VERIFY_PTE: Check if PTE permissions approp for executing code + cmp_s r2, VMALLOC_START + mov_s r2, (_PAGE_PRESENT | _PAGE_EXECUTE) + or.hs r2, r2, _PAGE_GLOBAL + + and r3, r0, r2 ; Mask out NON Flag bits from PTE + xor.f r3, r3, r2 ; check ( ( pte & flags_test ) == flags_test ) + bnz do_slow_path_pf + + ; Let Linux VM know that the page was accessed + or r0, r0, _PAGE_ACCESSED ; set Accessed Bit + st_s r0, [r1] ; Write back PTE + + CONV_PTE_TO_TLB + COMMIT_ENTRY_TO_MMU + TLBMISS_RESTORE_REGS + rtie + +END(EV_TLBMissI) + +;----------------------------------------------------------------------------- +; D-TLB Miss Exception Handler +;----------------------------------------------------------------------------- + +ENTRY(EV_TLBMissD) + + TLBMISS_FREEUP_REGS + +#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT + ld r0, [@numdtlb] + add r0, r0, 1 + st r0, [@numdtlb] +#endif + + ;---------------------------------------------------------------- + ; Get the PTE corresponding to V-addr accessed + ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA + LOAD_FAULT_PTE + + ;---------------------------------------------------------------- + ; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W) + + cmp_s r2, VMALLOC_START + mov_s r2, _PAGE_PRESENT ; common bit for K/U PTE + or.hs r2, r2, _PAGE_GLOBAL ; kernel PTE only + + ; Linux PTE [RWX] bits are semantically overloaded: + ; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc) + ; -Otherwise they are user-mode permissions, and those are exactly + ; same for kernel mode as well (e.g. copy_(to|from)_user) + + lr r3, [ecr] + btst_s r3, ECR_C_BIT_DTLB_LD_MISS ; Read Access + or.nz r2, r2, _PAGE_READ ; chk for Read flag in PTE + btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; Write Access + or.nz r2, r2, _PAGE_WRITE ; chk for Write flag in PTE + ; Above laddering takes care of XCHG access (both R and W) + + ; By now, r2 setup with all the Flags we need to check in PTE + and r3, r0, r2 ; Mask out NON Flag bits from PTE + brne.d r3, r2, do_slow_path_pf ; is ((pte & flags_test) == flags_test) + + ;---------------------------------------------------------------- + ; UPDATE_PTE: Let Linux VM know that page was accessed/dirty + lr r3, [ecr] + or r0, r0, _PAGE_ACCESSED ; Accessed bit always + btst_s r3, ECR_C_BIT_DTLB_ST_MISS ; See if it was a Write Access ? + or.nz r0, r0, _PAGE_MODIFIED ; if Write, set Dirty bit as well + st_s r0, [r1] ; Write back PTE + + CONV_PTE_TO_TLB + +#if (CONFIG_ARC_MMU_VER == 1) + ; MMU with 2 way set assoc J-TLB, needs some help in pathetic case of + ; memcpy where 3 parties contend for 2 ways, ensuing a livelock. + ; But only for old MMU or one with Metal Fix + TLB_WRITE_HEURISTICS +#endif + + COMMIT_ENTRY_TO_MMU + TLBMISS_RESTORE_REGS + rtie + +;-------- Common routine to call Linux Page Fault Handler ----------- +do_slow_path_pf: + + ; Restore the 4-scratch regs saved by fast path miss handler + TLBMISS_RESTORE_REGS + + ; Slow path TLB Miss handled as a regular ARC Exception + ; (stack switching / save the complete reg-file). + EXCEPTION_PROLOGUE + + ; ------- setup args for Linux Page fault Hanlder --------- + mov_s r1, sp + lr r0, [efa] + + ; We don't want exceptions to be disabled while the fault is handled. + ; Now that we have saved the context we return from exception hence + ; exceptions get re-enable + + FAKE_RET_FROM_EXCPN r9 + + bl do_page_fault + b ret_from_exception + +END(EV_TLBMissD) |