diff options
author | Yunhong Jiang <yunhong.jiang@intel.com> | 2015-08-04 12:17:53 -0700 |
---|---|---|
committer | Yunhong Jiang <yunhong.jiang@intel.com> | 2015-08-04 15:44:42 -0700 |
commit | 9ca8dbcc65cfc63d6f5ef3312a33184e1d726e00 (patch) | |
tree | 1c9cafbcd35f783a87880a10f85d1a060db1a563 /kernel/arch/arm64/mm/mmu.c | |
parent | 98260f3884f4a202f9ca5eabed40b1354c489b29 (diff) |
Add the rt linux 4.1.3-rt3 as base
Import the rt linux 4.1.3-rt3 as OPNFV kvm base.
It's from git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-4.1.y-rt and
the base is:
commit 0917f823c59692d751951bf5ea699a2d1e2f26a2
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sat Jul 25 12:13:34 2015 +0200
Prepare v4.1.3-rt3
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
We lose all the git history this way and it's not good. We
should apply another opnfv project repo in future.
Change-Id: I87543d81c9df70d99c5001fbdf646b202c19f423
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
Diffstat (limited to 'kernel/arch/arm64/mm/mmu.c')
-rw-r--r-- | kernel/arch/arm64/mm/mmu.c | 645 |
1 files changed, 645 insertions, 0 deletions
diff --git a/kernel/arch/arm64/mm/mmu.c b/kernel/arch/arm64/mm/mmu.c new file mode 100644 index 000000000..5b8b66442 --- /dev/null +++ b/kernel/arch/arm64/mm/mmu.c @@ -0,0 +1,645 @@ +/* + * Based on arch/arm/mm/mmu.c + * + * Copyright (C) 1995-2005 Russell King + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/mman.h> +#include <linux/nodemask.h> +#include <linux/memblock.h> +#include <linux/fs.h> +#include <linux/io.h> +#include <linux/slab.h> +#include <linux/stop_machine.h> + +#include <asm/cputype.h> +#include <asm/fixmap.h> +#include <asm/sections.h> +#include <asm/setup.h> +#include <asm/sizes.h> +#include <asm/tlb.h> +#include <asm/memblock.h> +#include <asm/mmu_context.h> + +#include "mm.h" + +u64 idmap_t0sz = TCR_T0SZ(VA_BITS); + +/* + * Empty_zero_page is a special page that is used for zero-initialized data + * and COW. + */ +struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); + +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + if (!pfn_valid(pfn)) + return pgprot_noncached(vma_prot); + else if (file->f_flags & O_SYNC) + return pgprot_writecombine(vma_prot); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); + +static void __init *early_alloc(unsigned long sz) +{ + void *ptr = __va(memblock_alloc(sz, sz)); + BUG_ON(!ptr); + memset(ptr, 0, sz); + return ptr; +} + +/* + * remap a PMD into pages + */ +static void split_pmd(pmd_t *pmd, pte_t *pte) +{ + unsigned long pfn = pmd_pfn(*pmd); + int i = 0; + + do { + /* + * Need to have the least restrictive permissions available + * permissions will be fixed up later + */ + set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); + pfn++; + } while (pte++, i++, i < PTRS_PER_PTE); +} + +static void alloc_init_pte(pmd_t *pmd, unsigned long addr, + unsigned long end, unsigned long pfn, + pgprot_t prot, + void *(*alloc)(unsigned long size)) +{ + pte_t *pte; + + if (pmd_none(*pmd) || pmd_sect(*pmd)) { + pte = alloc(PTRS_PER_PTE * sizeof(pte_t)); + if (pmd_sect(*pmd)) + split_pmd(pmd, pte); + __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE); + flush_tlb_all(); + } + BUG_ON(pmd_bad(*pmd)); + + pte = pte_offset_kernel(pmd, addr); + do { + set_pte(pte, pfn_pte(pfn, prot)); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +void split_pud(pud_t *old_pud, pmd_t *pmd) +{ + unsigned long addr = pud_pfn(*old_pud) << PAGE_SHIFT; + pgprot_t prot = __pgprot(pud_val(*old_pud) ^ addr); + int i = 0; + + do { + set_pmd(pmd, __pmd(addr | prot)); + addr += PMD_SIZE; + } while (pmd++, i++, i < PTRS_PER_PMD); +} + +static void alloc_init_pmd(struct mm_struct *mm, pud_t *pud, + unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) +{ + pmd_t *pmd; + unsigned long next; + + /* + * Check for initial section mappings in the pgd/pud and remove them. + */ + if (pud_none(*pud) || pud_sect(*pud)) { + pmd = alloc(PTRS_PER_PMD * sizeof(pmd_t)); + if (pud_sect(*pud)) { + /* + * need to have the 1G of mappings continue to be + * present + */ + split_pud(pud, pmd); + } + pud_populate(mm, pud, pmd); + flush_tlb_all(); + } + BUG_ON(pud_bad(*pud)); + + pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + /* try section mapping first */ + if (((addr | next | phys) & ~SECTION_MASK) == 0) { + pmd_t old_pmd =*pmd; + set_pmd(pmd, __pmd(phys | + pgprot_val(mk_sect_prot(prot)))); + /* + * Check for previous table entries created during + * boot (__create_page_tables) and flush them. + */ + if (!pmd_none(old_pmd)) { + flush_tlb_all(); + if (pmd_table(old_pmd)) { + phys_addr_t table = __pa(pte_offset_map(&old_pmd, 0)); + if (!WARN_ON_ONCE(slab_is_available())) + memblock_free(table, PAGE_SIZE); + } + } + } else { + alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys), + prot, alloc); + } + phys += next - addr; + } while (pmd++, addr = next, addr != end); +} + +static inline bool use_1G_block(unsigned long addr, unsigned long next, + unsigned long phys) +{ + if (PAGE_SHIFT != 12) + return false; + + if (((addr | next | phys) & ~PUD_MASK) != 0) + return false; + + return true; +} + +static void alloc_init_pud(struct mm_struct *mm, pgd_t *pgd, + unsigned long addr, unsigned long end, + phys_addr_t phys, pgprot_t prot, + void *(*alloc)(unsigned long size)) +{ + pud_t *pud; + unsigned long next; + + if (pgd_none(*pgd)) { + pud = alloc(PTRS_PER_PUD * sizeof(pud_t)); + pgd_populate(mm, pgd, pud); + } + BUG_ON(pgd_bad(*pgd)); + + pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + + /* + * For 4K granule only, attempt to put down a 1GB block + */ + if (use_1G_block(addr, next, phys)) { + pud_t old_pud = *pud; + set_pud(pud, __pud(phys | + pgprot_val(mk_sect_prot(prot)))); + + /* + * If we have an old value for a pud, it will + * be pointing to a pmd table that we no longer + * need (from swapper_pg_dir). + * + * Look up the old pmd table and free it. + */ + if (!pud_none(old_pud)) { + flush_tlb_all(); + if (pud_table(old_pud)) { + phys_addr_t table = __pa(pmd_offset(&old_pud, 0)); + if (!WARN_ON_ONCE(slab_is_available())) + memblock_free(table, PAGE_SIZE); + } + } + } else { + alloc_init_pmd(mm, pud, addr, next, phys, prot, alloc); + } + phys += next - addr; + } while (pud++, addr = next, addr != end); +} + +/* + * Create the page directory entries and any necessary page tables for the + * mapping specified by 'md'. + */ +static void __create_mapping(struct mm_struct *mm, pgd_t *pgd, + phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot, + void *(*alloc)(unsigned long size)) +{ + unsigned long addr, length, end, next; + + addr = virt & PAGE_MASK; + length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); + + end = addr + length; + do { + next = pgd_addr_end(addr, end); + alloc_init_pud(mm, pgd, addr, next, phys, prot, alloc); + phys += next - addr; + } while (pgd++, addr = next, addr != end); +} + +static void *late_alloc(unsigned long size) +{ + void *ptr; + + BUG_ON(size > PAGE_SIZE); + ptr = (void *)__get_free_page(PGALLOC_GFP); + BUG_ON(!ptr); + return ptr; +} + +static void __ref create_mapping(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) +{ + if (virt < VMALLOC_START) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); + return; + } + __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), phys, virt, + size, prot, early_alloc); +} + +void __init create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, + unsigned long virt, phys_addr_t size, + pgprot_t prot) +{ + __create_mapping(mm, pgd_offset(mm, virt), phys, virt, size, prot, + late_alloc); +} + +static void create_mapping_late(phys_addr_t phys, unsigned long virt, + phys_addr_t size, pgprot_t prot) +{ + if (virt < VMALLOC_START) { + pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n", + &phys, virt); + return; + } + + return __create_mapping(&init_mm, pgd_offset_k(virt & PAGE_MASK), + phys, virt, size, prot, late_alloc); +} + +#ifdef CONFIG_DEBUG_RODATA +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + /* + * Set up the executable regions using the existing section mappings + * for now. This will get more fine grained later once all memory + * is mapped + */ + unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE); + unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); + + if (end < kernel_x_start) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else if (start >= kernel_x_end) { + create_mapping(start, __phys_to_virt(start), + end - start, PAGE_KERNEL); + } else { + if (start < kernel_x_start) + create_mapping(start, __phys_to_virt(start), + kernel_x_start - start, + PAGE_KERNEL); + create_mapping(kernel_x_start, + __phys_to_virt(kernel_x_start), + kernel_x_end - kernel_x_start, + PAGE_KERNEL_EXEC); + if (kernel_x_end < end) + create_mapping(kernel_x_end, + __phys_to_virt(kernel_x_end), + end - kernel_x_end, + PAGE_KERNEL); + } + +} +#else +static void __init __map_memblock(phys_addr_t start, phys_addr_t end) +{ + create_mapping(start, __phys_to_virt(start), end - start, + PAGE_KERNEL_EXEC); +} +#endif + +static void __init map_mem(void) +{ + struct memblock_region *reg; + phys_addr_t limit; + + /* + * Temporarily limit the memblock range. We need to do this as + * create_mapping requires puds, pmds and ptes to be allocated from + * memory addressable from the initial direct kernel mapping. + * + * The initial direct kernel mapping, located at swapper_pg_dir, gives + * us PUD_SIZE (4K pages) or PMD_SIZE (64K pages) memory starting from + * PHYS_OFFSET (which must be aligned to 2MB as per + * Documentation/arm64/booting.txt). + */ + if (IS_ENABLED(CONFIG_ARM64_64K_PAGES)) + limit = PHYS_OFFSET + PMD_SIZE; + else + limit = PHYS_OFFSET + PUD_SIZE; + memblock_set_current_limit(limit); + + /* map all the memory banks */ + for_each_memblock(memory, reg) { + phys_addr_t start = reg->base; + phys_addr_t end = start + reg->size; + + if (start >= end) + break; + +#ifndef CONFIG_ARM64_64K_PAGES + /* + * For the first memory bank align the start address and + * current memblock limit to prevent create_mapping() from + * allocating pte page tables from unmapped memory. + * When 64K pages are enabled, the pte page table for the + * first PGDIR_SIZE is already present in swapper_pg_dir. + */ + if (start < limit) + start = ALIGN(start, PMD_SIZE); + if (end < limit) { + limit = end & PMD_MASK; + memblock_set_current_limit(limit); + } +#endif + __map_memblock(start, end); + } + + /* Limit no longer required. */ + memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE); +} + +void __init fixup_executable(void) +{ +#ifdef CONFIG_DEBUG_RODATA + /* now that we are actually fully mapped, make the start/end more fine grained */ + if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) { + unsigned long aligned_start = round_down(__pa(_stext), + SECTION_SIZE); + + create_mapping(aligned_start, __phys_to_virt(aligned_start), + __pa(_stext) - aligned_start, + PAGE_KERNEL); + } + + if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) { + unsigned long aligned_end = round_up(__pa(__init_end), + SECTION_SIZE); + create_mapping(__pa(__init_end), (unsigned long)__init_end, + aligned_end - __pa(__init_end), + PAGE_KERNEL); + } +#endif +} + +#ifdef CONFIG_DEBUG_RODATA +void mark_rodata_ro(void) +{ + create_mapping_late(__pa(_stext), (unsigned long)_stext, + (unsigned long)_etext - (unsigned long)_stext, + PAGE_KERNEL_EXEC | PTE_RDONLY); + +} +#endif + +void fixup_init(void) +{ + create_mapping_late(__pa(__init_begin), (unsigned long)__init_begin, + (unsigned long)__init_end - (unsigned long)__init_begin, + PAGE_KERNEL); +} + +/* + * paging_init() sets up the page tables, initialises the zone memory + * maps and sets up the zero page. + */ +void __init paging_init(void) +{ + void *zero_page; + + map_mem(); + fixup_executable(); + + /* allocate the zero page. */ + zero_page = early_alloc(PAGE_SIZE); + + bootmem_init(); + + empty_zero_page = virt_to_page(zero_page); + + /* + * TTBR0 is only used for the identity mapping at this stage. Make it + * point to zero page to avoid speculatively fetching new entries. + */ + cpu_set_reserved_ttbr0(); + flush_tlb_all(); + cpu_set_default_tcr_t0sz(); +} + +/* + * Enable the identity mapping to allow the MMU disabling. + */ +void setup_mm_for_reboot(void) +{ + cpu_set_reserved_ttbr0(); + flush_tlb_all(); + cpu_set_idmap_tcr_t0sz(); + cpu_switch_mm(idmap_pg_dir, &init_mm); +} + +/* + * Check whether a kernel address is valid (derived from arch/x86/). + */ +int kern_addr_valid(unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + if ((((long)addr) >> VA_BITS) != -1UL) + return 0; + + pgd = pgd_offset_k(addr); + if (pgd_none(*pgd)) + return 0; + + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) + return 0; + + if (pud_sect(*pud)) + return pfn_valid(pud_pfn(*pud)); + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) + return 0; + + if (pmd_sect(*pmd)) + return pfn_valid(pmd_pfn(*pmd)); + + pte = pte_offset_kernel(pmd, addr); + if (pte_none(*pte)) + return 0; + + return pfn_valid(pte_pfn(*pte)); +} +#ifdef CONFIG_SPARSEMEM_VMEMMAP +#ifdef CONFIG_ARM64_64K_PAGES +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +{ + return vmemmap_populate_basepages(start, end, node); +} +#else /* !CONFIG_ARM64_64K_PAGES */ +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +{ + unsigned long addr = start; + unsigned long next; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + do { + next = pmd_addr_end(addr, end); + + pgd = vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + + pud = vmemmap_pud_populate(pgd, addr, node); + if (!pud) + return -ENOMEM; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + void *p = NULL; + + p = vmemmap_alloc_block_buf(PMD_SIZE, node); + if (!p) + return -ENOMEM; + + set_pmd(pmd, __pmd(__pa(p) | PROT_SECT_NORMAL)); + } else + vmemmap_verify((pte_t *)pmd, node, addr, next); + } while (addr = next, addr != end); + + return 0; +} +#endif /* CONFIG_ARM64_64K_PAGES */ +void vmemmap_free(unsigned long start, unsigned long end) +{ +} +#endif /* CONFIG_SPARSEMEM_VMEMMAP */ + +static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#if CONFIG_PGTABLE_LEVELS > 2 +static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; +#endif +#if CONFIG_PGTABLE_LEVELS > 3 +static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; +#endif + +static inline pud_t * fixmap_pud(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + + BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); + + return pud_offset(pgd, addr); +} + +static inline pmd_t * fixmap_pmd(unsigned long addr) +{ + pud_t *pud = fixmap_pud(addr); + + BUG_ON(pud_none(*pud) || pud_bad(*pud)); + + return pmd_offset(pud, addr); +} + +static inline pte_t * fixmap_pte(unsigned long addr) +{ + pmd_t *pmd = fixmap_pmd(addr); + + BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd)); + + return pte_offset_kernel(pmd, addr); +} + +void __init early_fixmap_init(void) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + unsigned long addr = FIXADDR_START; + + pgd = pgd_offset_k(addr); + pgd_populate(&init_mm, pgd, bm_pud); + pud = pud_offset(pgd, addr); + pud_populate(&init_mm, pud, bm_pmd); + pmd = pmd_offset(pud, addr); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + + /* + * The boot-ioremap range spans multiple pmds, for which + * we are not preparted: + */ + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); + + if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN))) + || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) { + WARN_ON(1); + pr_warn("pmd %p != %p, %p\n", + pmd, fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)), + fixmap_pmd(fix_to_virt(FIX_BTMAP_END))); + pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n", + fix_to_virt(FIX_BTMAP_BEGIN)); + pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n", + fix_to_virt(FIX_BTMAP_END)); + + pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END); + pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); + } +} + +void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *pte; + + BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); + + pte = fixmap_pte(addr); + + if (pgprot_val(flags)) { + set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags)); + } else { + pte_clear(&init_mm, addr, pte); + flush_tlb_kernel_range(addr, addr+PAGE_SIZE); + } +} |