diff options
author | José Pekkarinen <jose.pekkarinen@nokia.com> | 2016-04-11 10:41:07 +0300 |
---|---|---|
committer | José Pekkarinen <jose.pekkarinen@nokia.com> | 2016-04-13 08:17:18 +0300 |
commit | e09b41010ba33a20a87472ee821fa407a5b8da36 (patch) | |
tree | d10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/arch/s390/mm | |
parent | f93b97fd65072de626c074dbe099a1fff05ce060 (diff) |
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page.
During the rebasing, the following patch collided:
Force tick interrupt and get rid of softirq magic(I70131fb85).
Collisions have been removed because its logic was found on the
source already.
Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769
Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/arch/s390/mm')
-rw-r--r-- | kernel/arch/s390/mm/extable.c | 8 | ||||
-rw-r--r-- | kernel/arch/s390/mm/extmem.c | 3 | ||||
-rw-r--r-- | kernel/arch/s390/mm/fault.c | 7 | ||||
-rw-r--r-- | kernel/arch/s390/mm/gup.c | 10 | ||||
-rw-r--r-- | kernel/arch/s390/mm/hugetlbpage.c | 72 | ||||
-rw-r--r-- | kernel/arch/s390/mm/init.c | 74 | ||||
-rw-r--r-- | kernel/arch/s390/mm/mem_detect.c | 4 | ||||
-rw-r--r-- | kernel/arch/s390/mm/mmap.c | 60 | ||||
-rw-r--r-- | kernel/arch/s390/mm/pgtable.c | 225 |
9 files changed, 155 insertions, 308 deletions
diff --git a/kernel/arch/s390/mm/extable.c b/kernel/arch/s390/mm/extable.c index 4d1ee8886..18c8b819b 100644 --- a/kernel/arch/s390/mm/extable.c +++ b/kernel/arch/s390/mm/extable.c @@ -52,12 +52,16 @@ void sort_extable(struct exception_table_entry *start, int i; /* Normalize entries to being relative to the start of the section */ - for (p = start, i = 0; p < finish; p++, i += 8) + for (p = start, i = 0; p < finish; p++, i += 8) { p->insn += i; + p->fixup += i + 4; + } sort(start, finish - start, sizeof(*start), cmp_ex, NULL); /* Denormalize all entries */ - for (p = start, i = 0; p < finish; p++, i += 8) + for (p = start, i = 0; p < finish; p++, i += 8) { p->insn -= i; + p->fixup -= i + 4; + } } #ifdef CONFIG_MODULES diff --git a/kernel/arch/s390/mm/extmem.c b/kernel/arch/s390/mm/extmem.c index 23c496957..18fccc303 100644 --- a/kernel/arch/s390/mm/extmem.c +++ b/kernel/arch/s390/mm/extmem.c @@ -18,6 +18,7 @@ #include <linux/bootmem.h> #include <linux/ctype.h> #include <linux/ioport.h> +#include <asm/diag.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/ebcdic.h> @@ -112,6 +113,7 @@ dcss_set_subcodes(void) ry = DCSS_FINDSEGX; strcpy(name, "dummy"); + diag_stat_inc(DIAG_STAT_X064); asm volatile( " diag %0,%1,0x64\n" "0: ipm %2\n" @@ -205,6 +207,7 @@ dcss_diag(int *func, void *parameter, ry = (unsigned long) *func; /* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */ + diag_stat_inc(DIAG_STAT_X064); if (*func > DCSS_SEGEXT) asm volatile( " diag %0,%1,0x64\n" diff --git a/kernel/arch/s390/mm/fault.c b/kernel/arch/s390/mm/fault.c index 4c8f5d7f9..ec1a30d0d 100644 --- a/kernel/arch/s390/mm/fault.c +++ b/kernel/arch/s390/mm/fault.c @@ -30,6 +30,7 @@ #include <linux/uaccess.h> #include <linux/hugetlb.h> #include <asm/asm-offsets.h> +#include <asm/diag.h> #include <asm/pgtable.h> #include <asm/irq.h> #include <asm/mmu_context.h> @@ -589,7 +590,7 @@ int pfault_init(void) .reffcode = 0, .refdwlen = 5, .refversn = 2, - .refgaddr = __LC_CURRENT_PID, + .refgaddr = __LC_LPP, .refselmk = 1ULL << 48, .refcmpmk = 1ULL << 48, .reserved = __PF_RES_FIELD }; @@ -597,6 +598,7 @@ int pfault_init(void) if (pfault_disable) return -1; + diag_stat_inc(DIAG_STAT_X258); asm volatile( " diag %1,%0,0x258\n" "0: j 2f\n" @@ -618,6 +620,7 @@ void pfault_fini(void) if (pfault_disable) return; + diag_stat_inc(DIAG_STAT_X258); asm volatile( " diag %0,0,0x258\n" "0:\n" @@ -646,7 +649,7 @@ static void pfault_interrupt(struct ext_code ext_code, return; inc_irq_stat(IRQEXT_PFL); /* Get the token (= pid of the affected task). */ - pid = sizeof(void *) == 4 ? param32 : param64; + pid = param64 & LPP_PFAULT_PID_MASK; rcu_read_lock(); tsk = find_task_by_pid_ns(pid, &init_pid_ns); if (tsk) diff --git a/kernel/arch/s390/mm/gup.c b/kernel/arch/s390/mm/gup.c index 1eb41bb30..12bbf0e84 100644 --- a/kernel/arch/s390/mm/gup.c +++ b/kernel/arch/s390/mm/gup.c @@ -30,6 +30,9 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr, do { pte = *ptep; barrier(); + /* Similar to the PMD case, NUMA hinting must take slow path */ + if (pte_protnone(pte)) + return 0; if ((pte_val(pte) & mask) != 0) return 0; VM_BUG_ON(!pfn_valid(pte_pfn(pte))); @@ -125,6 +128,13 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; if (unlikely(pmd_large(pmd))) { + /* + * NUMA hinting faults need to be handled in the GUP + * slowpath for accounting purposes and so that they + * can be serialised against THP migration. + */ + if (pmd_protnone(pmd)) + return 0; if (!gup_huge_pmd(pmdp, pmd, addr, next, write, pages, nr)) return 0; diff --git a/kernel/arch/s390/mm/hugetlbpage.c b/kernel/arch/s390/mm/hugetlbpage.c index e617e74b7..f81096b69 100644 --- a/kernel/arch/s390/mm/hugetlbpage.c +++ b/kernel/arch/s390/mm/hugetlbpage.c @@ -40,6 +40,7 @@ static inline pmd_t __pte_to_pmd(pte_t pte) pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT); pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10; pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10; + pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13; } else pmd_val(pmd) = _SEGMENT_ENTRY_INVALID; return pmd; @@ -78,6 +79,7 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT); pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10; pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10; + pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13; } else pte_val(pte) = _PAGE_INVALID; return pte; @@ -86,31 +88,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd) void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - pmd_t pmd; + pmd_t pmd = __pte_to_pmd(pte); - pmd = __pte_to_pmd(pte); - if (!MACHINE_HAS_HPAGE) { - /* Emulated huge ptes loose the dirty and young bit */ - pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; - pmd_val(pmd) |= pte_page(pte)[1].index; - } else - pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; + pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE; *(pmd_t *) ptep = pmd; } pte_t huge_ptep_get(pte_t *ptep) { - unsigned long origin; - pmd_t pmd; + pmd_t pmd = *(pmd_t *) ptep; - pmd = *(pmd_t *) ptep; - if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) { - origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN; - pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN; - pmd_val(pmd) |= *(unsigned long *) origin; - /* Emulated huge ptes are young and dirty by definition */ - pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY; - } return __pmd_to_pte(pmd); } @@ -125,45 +112,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, return pte; } -int arch_prepare_hugepage(struct page *page) -{ - unsigned long addr = page_to_phys(page); - pte_t pte; - pte_t *ptep; - int i; - - if (MACHINE_HAS_HPAGE) - return 0; - - ptep = (pte_t *) pte_alloc_one(&init_mm, addr); - if (!ptep) - return -ENOMEM; - - pte_val(pte) = addr; - for (i = 0; i < PTRS_PER_PTE; i++) { - set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte); - pte_val(pte) += PAGE_SIZE; - } - page[1].index = (unsigned long) ptep; - return 0; -} - -void arch_release_hugepage(struct page *page) -{ - pte_t *ptep; - - if (MACHINE_HAS_HPAGE) - return; - - ptep = (pte_t *) page[1].index; - if (!ptep) - return; - clear_table((unsigned long *) ptep, _PAGE_INVALID, - PTRS_PER_PTE * sizeof(pte_t)); - page_table_free(&init_mm, (unsigned long *) ptep); - page[1].index = 0; -} - pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz) { @@ -193,17 +141,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) return (pte_t *) pmdp; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) -{ - return 0; -} - int pmd_huge(pmd_t pmd) { - if (!MACHINE_HAS_HPAGE) - return 0; - - return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE); + return pmd_large(pmd); } int pud_huge(pud_t pud) diff --git a/kernel/arch/s390/mm/init.c b/kernel/arch/s390/mm/init.c index 80875c43a..c722400c7 100644 --- a/kernel/arch/s390/mm/init.c +++ b/kernel/arch/s390/mm/init.c @@ -27,6 +27,7 @@ #include <linux/initrd.h> #include <linux/export.h> #include <linux/gfp.h> +#include <linux/memblock.h> #include <asm/processor.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -47,37 +48,13 @@ EXPORT_SYMBOL(zero_page_mask); static void __init setup_zero_pages(void) { - struct cpuid cpu_id; unsigned int order; struct page *page; int i; - get_cpu_id(&cpu_id); - switch (cpu_id.machine) { - case 0x9672: /* g5 */ - case 0x2064: /* z900 */ - case 0x2066: /* z900 */ - case 0x2084: /* z990 */ - case 0x2086: /* z990 */ - case 0x2094: /* z9-109 */ - case 0x2096: /* z9-109 */ - order = 0; - break; - case 0x2097: /* z10 */ - case 0x2098: /* z10 */ - case 0x2817: /* z196 */ - case 0x2818: /* z196 */ - order = 2; - break; - case 0x2827: /* zEC12 */ - case 0x2828: /* zEC12 */ - order = 5; - break; - case 0x2964: /* z13 */ - default: - order = 7; - break; - } + /* Latest machines require a mapping granularity of 512KB */ + order = 7; + /* Limit number of empty zero pages for small memory sizes */ while (order > 2 && (totalram_pages >> 10) < (1UL << order)) order--; @@ -138,7 +115,7 @@ void __init mem_init(void) cpumask_set_cpu(0, mm_cpumask(&init_mm)); atomic_set(&init_mm.context.attach_count, 1); - max_mapnr = max_low_pfn; + set_max_mapnr(max_low_pfn); high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); /* Setup guest page hinting */ @@ -168,39 +145,38 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) #endif #ifdef CONFIG_MEMORY_HOTPLUG -int arch_add_memory(int nid, u64 start, u64 size) +int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { - unsigned long zone_start_pfn, zone_end_pfn, nr_pages; + unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM()); + unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS); unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); - struct zone *zone; - int rc; + unsigned long nr_pages; + int rc, zone_enum; rc = vmem_add_mapping(start, size); if (rc) return rc; - for_each_zone(zone) { - if (zone_idx(zone) != ZONE_MOVABLE) { - /* Add range within existing zone limits */ - zone_start_pfn = zone->zone_start_pfn; - zone_end_pfn = zone->zone_start_pfn + - zone->spanned_pages; + + while (size_pages > 0) { + if (start_pfn < dma_end_pfn) { + nr_pages = (start_pfn + size_pages > dma_end_pfn) ? + dma_end_pfn - start_pfn : size_pages; + zone_enum = ZONE_DMA; + } else if (start_pfn < normal_end_pfn) { + nr_pages = (start_pfn + size_pages > normal_end_pfn) ? + normal_end_pfn - start_pfn : size_pages; + zone_enum = ZONE_NORMAL; } else { - /* Add remaining range to ZONE_MOVABLE */ - zone_start_pfn = start_pfn; - zone_end_pfn = start_pfn + size_pages; + nr_pages = size_pages; + zone_enum = ZONE_MOVABLE; } - if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn) - continue; - nr_pages = (start_pfn + size_pages > zone_end_pfn) ? - zone_end_pfn - start_pfn : size_pages; - rc = __add_pages(nid, zone, start_pfn, nr_pages); + rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum, + start_pfn, size_pages); if (rc) break; start_pfn += nr_pages; size_pages -= nr_pages; - if (!size_pages) - break; } if (rc) vmem_remove_mapping(start, size); @@ -213,7 +189,7 @@ unsigned long memory_block_size_bytes(void) * Make sure the memory block size is always greater * or equal than the memory increment size. */ - return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm()); + return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp.rzm); } #ifdef CONFIG_MEMORY_HOTREMOVE diff --git a/kernel/arch/s390/mm/mem_detect.c b/kernel/arch/s390/mm/mem_detect.c index 0f3604395..e00f0d5d2 100644 --- a/kernel/arch/s390/mm/mem_detect.c +++ b/kernel/arch/s390/mm/mem_detect.c @@ -31,8 +31,8 @@ void __init detect_memory_memblock(void) unsigned long addr, size; int type; - rzm = sclp_get_rzm(); - rnmax = sclp_get_rnmax(); + rzm = sclp.rzm; + rnmax = sclp.rnmax; memsize = rzm * rnmax; if (!rzm) rzm = 1ULL << 17; diff --git a/kernel/arch/s390/mm/mmap.c b/kernel/arch/s390/mm/mmap.c index 6e552af08..ea01477b4 100644 --- a/kernel/arch/s390/mm/mmap.c +++ b/kernel/arch/s390/mm/mmap.c @@ -31,9 +31,6 @@ #include <linux/security.h> #include <asm/pgalloc.h> -unsigned long mmap_rnd_mask; -static unsigned long mmap_align_mask; - static unsigned long stack_maxrandom_size(void) { if (!(current->flags & PF_RANDOMIZE)) @@ -62,10 +59,7 @@ static inline int mmap_is_legacy(void) unsigned long arch_mmap_rnd(void) { - if (is_32bit_task()) - return (get_random_int() & 0x7ff) << PAGE_SHIFT; - else - return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT; + return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT; } static unsigned long mmap_base_legacy(unsigned long rnd) @@ -92,7 +86,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, struct mm_struct *mm = current->mm; struct vm_area_struct *vma; struct vm_unmapped_area_info info; - int do_color_align; if (len > TASK_SIZE - mmap_min_addr) return -ENOMEM; @@ -108,15 +101,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, return addr; } - do_color_align = 0; - if (filp || (flags & MAP_SHARED)) - do_color_align = !is_32bit_task(); - info.flags = 0; info.length = len; info.low_limit = mm->mmap_base; info.high_limit = TASK_SIZE; - info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0; + if (filp || (flags & MAP_SHARED)) + info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT; + else + info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; return vm_unmapped_area(&info); } @@ -130,7 +122,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, struct mm_struct *mm = current->mm; unsigned long addr = addr0; struct vm_unmapped_area_info info; - int do_color_align; /* requested length too big for entire address space */ if (len > TASK_SIZE - mmap_min_addr) @@ -148,15 +139,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, return addr; } - do_color_align = 0; - if (filp || (flags & MAP_SHARED)) - do_color_align = !is_32bit_task(); - info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; info.low_limit = max(PAGE_SIZE, mmap_min_addr); info.high_limit = mm->mmap_base; - info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0; + if (filp || (flags & MAP_SHARED)) + info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT; + else + info.align_mask = 0; info.align_offset = pgoff << PAGE_SHIFT; addr = vm_unmapped_area(&info); @@ -254,35 +244,3 @@ void arch_pick_mmap_layout(struct mm_struct *mm) mm->get_unmapped_area = s390_get_unmapped_area_topdown; } } - -static int __init setup_mmap_rnd(void) -{ - struct cpuid cpu_id; - - get_cpu_id(&cpu_id); - switch (cpu_id.machine) { - case 0x9672: - case 0x2064: - case 0x2066: - case 0x2084: - case 0x2086: - case 0x2094: - case 0x2096: - case 0x2097: - case 0x2098: - case 0x2817: - case 0x2818: - case 0x2827: - case 0x2828: - mmap_rnd_mask = 0x7ffUL; - mmap_align_mask = 0UL; - break; - case 0x2964: /* z13 */ - default: - mmap_rnd_mask = 0x3ff80UL; - mmap_align_mask = 0x7fUL; - break; - } - return 0; -} -early_initcall(setup_mmap_rnd); diff --git a/kernel/arch/s390/mm/pgtable.c b/kernel/arch/s390/mm/pgtable.c index b33f66110..54ef3bc01 100644 --- a/kernel/arch/s390/mm/pgtable.c +++ b/kernel/arch/s390/mm/pgtable.c @@ -10,11 +10,7 @@ #include <linux/mm.h> #include <linux/swap.h> #include <linux/smp.h> -#include <linux/highmem.h> -#include <linux/pagemap.h> #include <linux/spinlock.h> -#include <linux/module.h> -#include <linux/quicklist.h> #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/swapops.h> @@ -28,12 +24,9 @@ #include <asm/tlbflush.h> #include <asm/mmu_context.h> -#define ALLOC_ORDER 2 -#define FRAG_MASK 0x03 - unsigned long *crst_table_alloc(struct mm_struct *mm) { - struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + struct page *page = alloc_pages(GFP_KERNEL, 2); if (!page) return NULL; @@ -42,7 +35,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm) void crst_table_free(struct mm_struct *mm, unsigned long *table) { - free_pages((unsigned long) table, ALLOC_ORDER); + free_pages((unsigned long) table, 2); } static void __crst_table_upgrade(void *arg) @@ -176,7 +169,7 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); spin_lock_init(&gmap->guest_table_lock); gmap->mm = mm; - page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL, 2); if (!page) goto out_free; page->index = 0; @@ -247,7 +240,7 @@ void gmap_free(struct gmap *gmap) /* Free all segment & region tables. */ list_for_each_entry_safe(page, next, &gmap->crst_list, lru) - __free_pages(page, ALLOC_ORDER); + __free_pages(page, 2); gmap_radix_tree_free(&gmap->guest_to_host); gmap_radix_tree_free(&gmap->host_to_guest); down_write(&gmap->mm->mmap_sem); @@ -287,7 +280,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, unsigned long *new; /* since we dont free the gmap table until gmap_free we can unlock */ - page = alloc_pages(GFP_KERNEL, ALLOC_ORDER); + page = alloc_pages(GFP_KERNEL, 2); if (!page) return -ENOMEM; new = (unsigned long *) page_to_phys(page); @@ -302,7 +295,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, } spin_unlock(&gmap->mm->page_table_lock); if (page) - __free_pages(page, ALLOC_ORDER); + __free_pages(page, 2); return 0; } @@ -795,40 +788,6 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) } EXPORT_SYMBOL_GPL(gmap_do_ipte_notify); -static inline int page_table_with_pgste(struct page *page) -{ - return atomic_read(&page->_mapcount) == 0; -} - -static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) -{ - struct page *page; - unsigned long *table; - - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); - if (!page) - return NULL; - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - atomic_set(&page->_mapcount, 0); - table = (unsigned long *) page_to_phys(page); - clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); - clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); - return table; -} - -static inline void page_table_free_pgste(unsigned long *table) -{ - struct page *page; - - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); - __free_page(page); -} - int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned long key, bool nq) { @@ -957,20 +916,6 @@ __initcall(page_table_register_sysctl); #else /* CONFIG_PGSTE */ -static inline int page_table_with_pgste(struct page *page) -{ - return 0; -} - -static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm) -{ - return NULL; -} - -static inline void page_table_free_pgste(unsigned long *table) -{ -} - static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table, unsigned long vmaddr) { @@ -994,44 +939,55 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) */ unsigned long *page_table_alloc(struct mm_struct *mm) { - unsigned long *uninitialized_var(table); - struct page *uninitialized_var(page); + unsigned long *table; + struct page *page; unsigned int mask, bit; - if (mm_alloc_pgste(mm)) - return page_table_alloc_pgste(mm); - /* Allocate fragments of a 4K page as 1K/2K page table */ - spin_lock_bh(&mm->context.list_lock); - mask = FRAG_MASK; - if (!list_empty(&mm->context.pgtable_list)) { - page = list_first_entry(&mm->context.pgtable_list, - struct page, lru); - table = (unsigned long *) page_to_phys(page); - mask = atomic_read(&page->_mapcount); - mask = mask | (mask >> 4); - } - if ((mask & FRAG_MASK) == FRAG_MASK) { - spin_unlock_bh(&mm->context.list_lock); - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); - if (!page) - return NULL; - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; + /* Try to get a fragment of a 4K page as a 2K page table */ + if (!mm_alloc_pgste(mm)) { + table = NULL; + spin_lock_bh(&mm->context.list_lock); + if (!list_empty(&mm->context.pgtable_list)) { + page = list_first_entry(&mm->context.pgtable_list, + struct page, lru); + mask = atomic_read(&page->_mapcount); + mask = (mask | (mask >> 4)) & 3; + if (mask != 3) { + table = (unsigned long *) page_to_phys(page); + bit = mask & 1; /* =1 -> second 2K */ + if (bit) + table += PTRS_PER_PTE; + atomic_xor_bits(&page->_mapcount, 1U << bit); + list_del(&page->lru); + } } + spin_unlock_bh(&mm->context.list_lock); + if (table) + return table; + } + /* Allocate a fresh page */ + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } + /* Initialize page table */ + table = (unsigned long *) page_to_phys(page); + if (mm_alloc_pgste(mm)) { + /* Return 4K page table with PGSTEs */ + atomic_set(&page->_mapcount, 3); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + } else { + /* Return the first 2K fragment of the page */ atomic_set(&page->_mapcount, 1); - table = (unsigned long *) page_to_phys(page); clear_table(table, _PAGE_INVALID, PAGE_SIZE); spin_lock_bh(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); - } else { - for (bit = 1; mask & bit; bit <<= 1) - table += PTRS_PER_PTE; - mask = atomic_xor_bits(&page->_mapcount, bit); - if ((mask & FRAG_MASK) == FRAG_MASK) - list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); } - spin_unlock_bh(&mm->context.list_lock); return table; } @@ -1041,37 +997,23 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) unsigned int bit, mask; page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (page_table_with_pgste(page)) - return page_table_free_pgste(table); - /* Free 1K/2K page table fragment of a 4K page */ - bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); - spin_lock_bh(&mm->context.list_lock); - if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) - list_del(&page->lru); - mask = atomic_xor_bits(&page->_mapcount, bit); - if (mask & FRAG_MASK) - list_add(&page->lru, &mm->context.pgtable_list); - spin_unlock_bh(&mm->context.list_lock); - if (mask == 0) { - pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); - __free_page(page); + if (!mm_alloc_pgste(mm)) { + /* Free 2K page table fragment of a 4K page */ + bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); + spin_lock_bh(&mm->context.list_lock); + mask = atomic_xor_bits(&page->_mapcount, 1U << bit); + if (mask & 3) + list_add(&page->lru, &mm->context.pgtable_list); + else + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + if (mask != 0) + return; } -} - -static void __page_table_free_rcu(void *table, unsigned bit) -{ - struct page *page; - if (bit == FRAG_MASK) - return page_table_free_pgste(table); - /* Free 1K/2K page table fragment of a 4K page */ - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (atomic_xor_bits(&page->_mapcount, bit) == 0) { - pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); - __free_page(page); - } + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); } void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, @@ -1083,34 +1025,45 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, mm = tlb->mm; page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (page_table_with_pgste(page)) { + if (mm_alloc_pgste(mm)) { gmap_unlink(mm, table, vmaddr); - table = (unsigned long *) (__pa(table) | FRAG_MASK); + table = (unsigned long *) (__pa(table) | 3); tlb_remove_table(tlb, table); return; } - bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); + bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); spin_lock_bh(&mm->context.list_lock); - if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) - list_del(&page->lru); - mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4)); - if (mask & FRAG_MASK) + mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); + if (mask & 3) list_add_tail(&page->lru, &mm->context.pgtable_list); + else + list_del(&page->lru); spin_unlock_bh(&mm->context.list_lock); - table = (unsigned long *) (__pa(table) | (bit << 4)); + table = (unsigned long *) (__pa(table) | (1U << bit)); tlb_remove_table(tlb, table); } static void __tlb_remove_table(void *_table) { - const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK; - void *table = (void *)((unsigned long) _table & ~mask); - unsigned type = (unsigned long) _table & mask; - - if (type) - __page_table_free_rcu(table, type); - else - free_pages((unsigned long) table, ALLOC_ORDER); + unsigned int mask = (unsigned long) _table & 3; + void *table = (void *)((unsigned long) _table ^ mask); + struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + + switch (mask) { + case 0: /* pmd or pud */ + free_pages((unsigned long) table, 2); + break; + case 1: /* lower 2K of a 4K page table */ + case 2: /* higher 2K of a 4K page table */ + if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) + break; + /* fallthrough */ + case 3: /* 4K page table with pgstes */ + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); + break; + } } static void tlb_remove_table_smp_sync(void *arg) |