summaryrefslogtreecommitdiffstats
path: root/kernel/arch/s390/mm
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/arch/s390/mm')
-rw-r--r--kernel/arch/s390/mm/extable.c8
-rw-r--r--kernel/arch/s390/mm/extmem.c3
-rw-r--r--kernel/arch/s390/mm/fault.c7
-rw-r--r--kernel/arch/s390/mm/gup.c10
-rw-r--r--kernel/arch/s390/mm/hugetlbpage.c72
-rw-r--r--kernel/arch/s390/mm/init.c74
-rw-r--r--kernel/arch/s390/mm/mem_detect.c4
-rw-r--r--kernel/arch/s390/mm/mmap.c60
-rw-r--r--kernel/arch/s390/mm/pgtable.c225
9 files changed, 155 insertions, 308 deletions
diff --git a/kernel/arch/s390/mm/extable.c b/kernel/arch/s390/mm/extable.c
index 4d1ee8886..18c8b819b 100644
--- a/kernel/arch/s390/mm/extable.c
+++ b/kernel/arch/s390/mm/extable.c
@@ -52,12 +52,16 @@ void sort_extable(struct exception_table_entry *start,
int i;
/* Normalize entries to being relative to the start of the section */
- for (p = start, i = 0; p < finish; p++, i += 8)
+ for (p = start, i = 0; p < finish; p++, i += 8) {
p->insn += i;
+ p->fixup += i + 4;
+ }
sort(start, finish - start, sizeof(*start), cmp_ex, NULL);
/* Denormalize all entries */
- for (p = start, i = 0; p < finish; p++, i += 8)
+ for (p = start, i = 0; p < finish; p++, i += 8) {
p->insn -= i;
+ p->fixup -= i + 4;
+ }
}
#ifdef CONFIG_MODULES
diff --git a/kernel/arch/s390/mm/extmem.c b/kernel/arch/s390/mm/extmem.c
index 23c496957..18fccc303 100644
--- a/kernel/arch/s390/mm/extmem.c
+++ b/kernel/arch/s390/mm/extmem.c
@@ -18,6 +18,7 @@
#include <linux/bootmem.h>
#include <linux/ctype.h>
#include <linux/ioport.h>
+#include <asm/diag.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/ebcdic.h>
@@ -112,6 +113,7 @@ dcss_set_subcodes(void)
ry = DCSS_FINDSEGX;
strcpy(name, "dummy");
+ diag_stat_inc(DIAG_STAT_X064);
asm volatile(
" diag %0,%1,0x64\n"
"0: ipm %2\n"
@@ -205,6 +207,7 @@ dcss_diag(int *func, void *parameter,
ry = (unsigned long) *func;
/* 64-bit Diag x'64' new subcode, keep in 64-bit addressing mode */
+ diag_stat_inc(DIAG_STAT_X064);
if (*func > DCSS_SEGEXT)
asm volatile(
" diag %0,%1,0x64\n"
diff --git a/kernel/arch/s390/mm/fault.c b/kernel/arch/s390/mm/fault.c
index 4c8f5d7f9..ec1a30d0d 100644
--- a/kernel/arch/s390/mm/fault.c
+++ b/kernel/arch/s390/mm/fault.c
@@ -30,6 +30,7 @@
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
#include <asm/asm-offsets.h>
+#include <asm/diag.h>
#include <asm/pgtable.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
@@ -589,7 +590,7 @@ int pfault_init(void)
.reffcode = 0,
.refdwlen = 5,
.refversn = 2,
- .refgaddr = __LC_CURRENT_PID,
+ .refgaddr = __LC_LPP,
.refselmk = 1ULL << 48,
.refcmpmk = 1ULL << 48,
.reserved = __PF_RES_FIELD };
@@ -597,6 +598,7 @@ int pfault_init(void)
if (pfault_disable)
return -1;
+ diag_stat_inc(DIAG_STAT_X258);
asm volatile(
" diag %1,%0,0x258\n"
"0: j 2f\n"
@@ -618,6 +620,7 @@ void pfault_fini(void)
if (pfault_disable)
return;
+ diag_stat_inc(DIAG_STAT_X258);
asm volatile(
" diag %0,0,0x258\n"
"0:\n"
@@ -646,7 +649,7 @@ static void pfault_interrupt(struct ext_code ext_code,
return;
inc_irq_stat(IRQEXT_PFL);
/* Get the token (= pid of the affected task). */
- pid = sizeof(void *) == 4 ? param32 : param64;
+ pid = param64 & LPP_PFAULT_PID_MASK;
rcu_read_lock();
tsk = find_task_by_pid_ns(pid, &init_pid_ns);
if (tsk)
diff --git a/kernel/arch/s390/mm/gup.c b/kernel/arch/s390/mm/gup.c
index 1eb41bb30..12bbf0e84 100644
--- a/kernel/arch/s390/mm/gup.c
+++ b/kernel/arch/s390/mm/gup.c
@@ -30,6 +30,9 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
do {
pte = *ptep;
barrier();
+ /* Similar to the PMD case, NUMA hinting must take slow path */
+ if (pte_protnone(pte))
+ return 0;
if ((pte_val(pte) & mask) != 0)
return 0;
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
@@ -125,6 +128,13 @@ static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
if (pmd_none(pmd) || pmd_trans_splitting(pmd))
return 0;
if (unlikely(pmd_large(pmd))) {
+ /*
+ * NUMA hinting faults need to be handled in the GUP
+ * slowpath for accounting purposes and so that they
+ * can be serialised against THP migration.
+ */
+ if (pmd_protnone(pmd))
+ return 0;
if (!gup_huge_pmd(pmdp, pmd, addr, next,
write, pages, nr))
return 0;
diff --git a/kernel/arch/s390/mm/hugetlbpage.c b/kernel/arch/s390/mm/hugetlbpage.c
index e617e74b7..f81096b69 100644
--- a/kernel/arch/s390/mm/hugetlbpage.c
+++ b/kernel/arch/s390/mm/hugetlbpage.c
@@ -40,6 +40,7 @@ static inline pmd_t __pte_to_pmd(pte_t pte)
pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT);
pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_SOFT_DIRTY) << 13;
} else
pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
return pmd;
@@ -78,6 +79,7 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) >> 10;
pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) >> 10;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_SOFT_DIRTY) >> 13;
} else
pte_val(pte) = _PAGE_INVALID;
return pte;
@@ -86,31 +88,16 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
- pmd_t pmd;
+ pmd_t pmd = __pte_to_pmd(pte);
- pmd = __pte_to_pmd(pte);
- if (!MACHINE_HAS_HPAGE) {
- /* Emulated huge ptes loose the dirty and young bit */
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
- pmd_val(pmd) |= pte_page(pte)[1].index;
- } else
- pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
*(pmd_t *) ptep = pmd;
}
pte_t huge_ptep_get(pte_t *ptep)
{
- unsigned long origin;
- pmd_t pmd;
+ pmd_t pmd = *(pmd_t *) ptep;
- pmd = *(pmd_t *) ptep;
- if (!MACHINE_HAS_HPAGE && pmd_present(pmd)) {
- origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
- pmd_val(pmd) |= *(unsigned long *) origin;
- /* Emulated huge ptes are young and dirty by definition */
- pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY;
- }
return __pmd_to_pte(pmd);
}
@@ -125,45 +112,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
return pte;
}
-int arch_prepare_hugepage(struct page *page)
-{
- unsigned long addr = page_to_phys(page);
- pte_t pte;
- pte_t *ptep;
- int i;
-
- if (MACHINE_HAS_HPAGE)
- return 0;
-
- ptep = (pte_t *) pte_alloc_one(&init_mm, addr);
- if (!ptep)
- return -ENOMEM;
-
- pte_val(pte) = addr;
- for (i = 0; i < PTRS_PER_PTE; i++) {
- set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte);
- pte_val(pte) += PAGE_SIZE;
- }
- page[1].index = (unsigned long) ptep;
- return 0;
-}
-
-void arch_release_hugepage(struct page *page)
-{
- pte_t *ptep;
-
- if (MACHINE_HAS_HPAGE)
- return;
-
- ptep = (pte_t *) page[1].index;
- if (!ptep)
- return;
- clear_table((unsigned long *) ptep, _PAGE_INVALID,
- PTRS_PER_PTE * sizeof(pte_t));
- page_table_free(&init_mm, (unsigned long *) ptep);
- page[1].index = 0;
-}
-
pte_t *huge_pte_alloc(struct mm_struct *mm,
unsigned long addr, unsigned long sz)
{
@@ -193,17 +141,9 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
return (pte_t *) pmdp;
}
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
-{
- return 0;
-}
-
int pmd_huge(pmd_t pmd)
{
- if (!MACHINE_HAS_HPAGE)
- return 0;
-
- return !!(pmd_val(pmd) & _SEGMENT_ENTRY_LARGE);
+ return pmd_large(pmd);
}
int pud_huge(pud_t pud)
diff --git a/kernel/arch/s390/mm/init.c b/kernel/arch/s390/mm/init.c
index 80875c43a..c722400c7 100644
--- a/kernel/arch/s390/mm/init.c
+++ b/kernel/arch/s390/mm/init.c
@@ -27,6 +27,7 @@
#include <linux/initrd.h>
#include <linux/export.h>
#include <linux/gfp.h>
+#include <linux/memblock.h>
#include <asm/processor.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -47,37 +48,13 @@ EXPORT_SYMBOL(zero_page_mask);
static void __init setup_zero_pages(void)
{
- struct cpuid cpu_id;
unsigned int order;
struct page *page;
int i;
- get_cpu_id(&cpu_id);
- switch (cpu_id.machine) {
- case 0x9672: /* g5 */
- case 0x2064: /* z900 */
- case 0x2066: /* z900 */
- case 0x2084: /* z990 */
- case 0x2086: /* z990 */
- case 0x2094: /* z9-109 */
- case 0x2096: /* z9-109 */
- order = 0;
- break;
- case 0x2097: /* z10 */
- case 0x2098: /* z10 */
- case 0x2817: /* z196 */
- case 0x2818: /* z196 */
- order = 2;
- break;
- case 0x2827: /* zEC12 */
- case 0x2828: /* zEC12 */
- order = 5;
- break;
- case 0x2964: /* z13 */
- default:
- order = 7;
- break;
- }
+ /* Latest machines require a mapping granularity of 512KB */
+ order = 7;
+
/* Limit number of empty zero pages for small memory sizes */
while (order > 2 && (totalram_pages >> 10) < (1UL << order))
order--;
@@ -138,7 +115,7 @@ void __init mem_init(void)
cpumask_set_cpu(0, mm_cpumask(&init_mm));
atomic_set(&init_mm.context.attach_count, 1);
- max_mapnr = max_low_pfn;
+ set_max_mapnr(max_low_pfn);
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
/* Setup guest page hinting */
@@ -168,39 +145,38 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
#endif
#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_add_memory(int nid, u64 start, u64 size)
+int arch_add_memory(int nid, u64 start, u64 size, bool for_device)
{
- unsigned long zone_start_pfn, zone_end_pfn, nr_pages;
+ unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM());
+ unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS);
unsigned long start_pfn = PFN_DOWN(start);
unsigned long size_pages = PFN_DOWN(size);
- struct zone *zone;
- int rc;
+ unsigned long nr_pages;
+ int rc, zone_enum;
rc = vmem_add_mapping(start, size);
if (rc)
return rc;
- for_each_zone(zone) {
- if (zone_idx(zone) != ZONE_MOVABLE) {
- /* Add range within existing zone limits */
- zone_start_pfn = zone->zone_start_pfn;
- zone_end_pfn = zone->zone_start_pfn +
- zone->spanned_pages;
+
+ while (size_pages > 0) {
+ if (start_pfn < dma_end_pfn) {
+ nr_pages = (start_pfn + size_pages > dma_end_pfn) ?
+ dma_end_pfn - start_pfn : size_pages;
+ zone_enum = ZONE_DMA;
+ } else if (start_pfn < normal_end_pfn) {
+ nr_pages = (start_pfn + size_pages > normal_end_pfn) ?
+ normal_end_pfn - start_pfn : size_pages;
+ zone_enum = ZONE_NORMAL;
} else {
- /* Add remaining range to ZONE_MOVABLE */
- zone_start_pfn = start_pfn;
- zone_end_pfn = start_pfn + size_pages;
+ nr_pages = size_pages;
+ zone_enum = ZONE_MOVABLE;
}
- if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn)
- continue;
- nr_pages = (start_pfn + size_pages > zone_end_pfn) ?
- zone_end_pfn - start_pfn : size_pages;
- rc = __add_pages(nid, zone, start_pfn, nr_pages);
+ rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum,
+ start_pfn, size_pages);
if (rc)
break;
start_pfn += nr_pages;
size_pages -= nr_pages;
- if (!size_pages)
- break;
}
if (rc)
vmem_remove_mapping(start, size);
@@ -213,7 +189,7 @@ unsigned long memory_block_size_bytes(void)
* Make sure the memory block size is always greater
* or equal than the memory increment size.
*/
- return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp_get_rzm());
+ return max_t(unsigned long, MIN_MEMORY_BLOCK_SIZE, sclp.rzm);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
diff --git a/kernel/arch/s390/mm/mem_detect.c b/kernel/arch/s390/mm/mem_detect.c
index 0f3604395..e00f0d5d2 100644
--- a/kernel/arch/s390/mm/mem_detect.c
+++ b/kernel/arch/s390/mm/mem_detect.c
@@ -31,8 +31,8 @@ void __init detect_memory_memblock(void)
unsigned long addr, size;
int type;
- rzm = sclp_get_rzm();
- rnmax = sclp_get_rnmax();
+ rzm = sclp.rzm;
+ rnmax = sclp.rnmax;
memsize = rzm * rnmax;
if (!rzm)
rzm = 1ULL << 17;
diff --git a/kernel/arch/s390/mm/mmap.c b/kernel/arch/s390/mm/mmap.c
index 6e552af08..ea01477b4 100644
--- a/kernel/arch/s390/mm/mmap.c
+++ b/kernel/arch/s390/mm/mmap.c
@@ -31,9 +31,6 @@
#include <linux/security.h>
#include <asm/pgalloc.h>
-unsigned long mmap_rnd_mask;
-static unsigned long mmap_align_mask;
-
static unsigned long stack_maxrandom_size(void)
{
if (!(current->flags & PF_RANDOMIZE))
@@ -62,10 +59,7 @@ static inline int mmap_is_legacy(void)
unsigned long arch_mmap_rnd(void)
{
- if (is_32bit_task())
- return (get_random_int() & 0x7ff) << PAGE_SHIFT;
- else
- return (get_random_int() & mmap_rnd_mask) << PAGE_SHIFT;
+ return (get_random_int() & MMAP_RND_MASK) << PAGE_SHIFT;
}
static unsigned long mmap_base_legacy(unsigned long rnd)
@@ -92,7 +86,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_unmapped_area_info info;
- int do_color_align;
if (len > TASK_SIZE - mmap_min_addr)
return -ENOMEM;
@@ -108,15 +101,14 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
return addr;
}
- do_color_align = 0;
- if (filp || (flags & MAP_SHARED))
- do_color_align = !is_32bit_task();
-
info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
- info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0;
+ if (filp || (flags & MAP_SHARED))
+ info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+ else
+ info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
return vm_unmapped_area(&info);
}
@@ -130,7 +122,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
- int do_color_align;
/* requested length too big for entire address space */
if (len > TASK_SIZE - mmap_min_addr)
@@ -148,15 +139,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
return addr;
}
- do_color_align = 0;
- if (filp || (flags & MAP_SHARED))
- do_color_align = !is_32bit_task();
-
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
info.high_limit = mm->mmap_base;
- info.align_mask = do_color_align ? (mmap_align_mask << PAGE_SHIFT) : 0;
+ if (filp || (flags & MAP_SHARED))
+ info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
+ else
+ info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
@@ -254,35 +244,3 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
mm->get_unmapped_area = s390_get_unmapped_area_topdown;
}
}
-
-static int __init setup_mmap_rnd(void)
-{
- struct cpuid cpu_id;
-
- get_cpu_id(&cpu_id);
- switch (cpu_id.machine) {
- case 0x9672:
- case 0x2064:
- case 0x2066:
- case 0x2084:
- case 0x2086:
- case 0x2094:
- case 0x2096:
- case 0x2097:
- case 0x2098:
- case 0x2817:
- case 0x2818:
- case 0x2827:
- case 0x2828:
- mmap_rnd_mask = 0x7ffUL;
- mmap_align_mask = 0UL;
- break;
- case 0x2964: /* z13 */
- default:
- mmap_rnd_mask = 0x3ff80UL;
- mmap_align_mask = 0x7fUL;
- break;
- }
- return 0;
-}
-early_initcall(setup_mmap_rnd);
diff --git a/kernel/arch/s390/mm/pgtable.c b/kernel/arch/s390/mm/pgtable.c
index b33f66110..54ef3bc01 100644
--- a/kernel/arch/s390/mm/pgtable.c
+++ b/kernel/arch/s390/mm/pgtable.c
@@ -10,11 +10,7 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/smp.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
#include <linux/spinlock.h>
-#include <linux/module.h>
-#include <linux/quicklist.h>
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/swapops.h>
@@ -28,12 +24,9 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#define ALLOC_ORDER 2
-#define FRAG_MASK 0x03
-
unsigned long *crst_table_alloc(struct mm_struct *mm)
{
- struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ struct page *page = alloc_pages(GFP_KERNEL, 2);
if (!page)
return NULL;
@@ -42,7 +35,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
void crst_table_free(struct mm_struct *mm, unsigned long *table)
{
- free_pages((unsigned long) table, ALLOC_ORDER);
+ free_pages((unsigned long) table, 2);
}
static void __crst_table_upgrade(void *arg)
@@ -176,7 +169,7 @@ struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
spin_lock_init(&gmap->guest_table_lock);
gmap->mm = mm;
- page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ page = alloc_pages(GFP_KERNEL, 2);
if (!page)
goto out_free;
page->index = 0;
@@ -247,7 +240,7 @@ void gmap_free(struct gmap *gmap)
/* Free all segment & region tables. */
list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, ALLOC_ORDER);
+ __free_pages(page, 2);
gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest);
down_write(&gmap->mm->mmap_sem);
@@ -287,7 +280,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
unsigned long *new;
/* since we dont free the gmap table until gmap_free we can unlock */
- page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
+ page = alloc_pages(GFP_KERNEL, 2);
if (!page)
return -ENOMEM;
new = (unsigned long *) page_to_phys(page);
@@ -302,7 +295,7 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
}
spin_unlock(&gmap->mm->page_table_lock);
if (page)
- __free_pages(page, ALLOC_ORDER);
+ __free_pages(page, 2);
return 0;
}
@@ -795,40 +788,6 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
}
EXPORT_SYMBOL_GPL(gmap_do_ipte_notify);
-static inline int page_table_with_pgste(struct page *page)
-{
- return atomic_read(&page->_mapcount) == 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
- struct page *page;
- unsigned long *table;
-
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
- if (!page)
- return NULL;
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- atomic_set(&page->_mapcount, 0);
- table = (unsigned long *) page_to_phys(page);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
- clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
- return table;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
- struct page *page;
-
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
-}
-
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned long key, bool nq)
{
@@ -957,20 +916,6 @@ __initcall(page_table_register_sysctl);
#else /* CONFIG_PGSTE */
-static inline int page_table_with_pgste(struct page *page)
-{
- return 0;
-}
-
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
-{
- return NULL;
-}
-
-static inline void page_table_free_pgste(unsigned long *table)
-{
-}
-
static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
unsigned long vmaddr)
{
@@ -994,44 +939,55 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
*/
unsigned long *page_table_alloc(struct mm_struct *mm)
{
- unsigned long *uninitialized_var(table);
- struct page *uninitialized_var(page);
+ unsigned long *table;
+ struct page *page;
unsigned int mask, bit;
- if (mm_alloc_pgste(mm))
- return page_table_alloc_pgste(mm);
- /* Allocate fragments of a 4K page as 1K/2K page table */
- spin_lock_bh(&mm->context.list_lock);
- mask = FRAG_MASK;
- if (!list_empty(&mm->context.pgtable_list)) {
- page = list_first_entry(&mm->context.pgtable_list,
- struct page, lru);
- table = (unsigned long *) page_to_phys(page);
- mask = atomic_read(&page->_mapcount);
- mask = mask | (mask >> 4);
- }
- if ((mask & FRAG_MASK) == FRAG_MASK) {
- spin_unlock_bh(&mm->context.list_lock);
- page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
- if (!page)
- return NULL;
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
+ /* Try to get a fragment of a 4K page as a 2K page table */
+ if (!mm_alloc_pgste(mm)) {
+ table = NULL;
+ spin_lock_bh(&mm->context.list_lock);
+ if (!list_empty(&mm->context.pgtable_list)) {
+ page = list_first_entry(&mm->context.pgtable_list,
+ struct page, lru);
+ mask = atomic_read(&page->_mapcount);
+ mask = (mask | (mask >> 4)) & 3;
+ if (mask != 3) {
+ table = (unsigned long *) page_to_phys(page);
+ bit = mask & 1; /* =1 -> second 2K */
+ if (bit)
+ table += PTRS_PER_PTE;
+ atomic_xor_bits(&page->_mapcount, 1U << bit);
+ list_del(&page->lru);
+ }
}
+ spin_unlock_bh(&mm->context.list_lock);
+ if (table)
+ return table;
+ }
+ /* Allocate a fresh page */
+ page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
+ if (!page)
+ return NULL;
+ if (!pgtable_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ /* Initialize page table */
+ table = (unsigned long *) page_to_phys(page);
+ if (mm_alloc_pgste(mm)) {
+ /* Return 4K page table with PGSTEs */
+ atomic_set(&page->_mapcount, 3);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
+ } else {
+ /* Return the first 2K fragment of the page */
atomic_set(&page->_mapcount, 1);
- table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE);
spin_lock_bh(&mm->context.list_lock);
list_add(&page->lru, &mm->context.pgtable_list);
- } else {
- for (bit = 1; mask & bit; bit <<= 1)
- table += PTRS_PER_PTE;
- mask = atomic_xor_bits(&page->_mapcount, bit);
- if ((mask & FRAG_MASK) == FRAG_MASK)
- list_del(&page->lru);
+ spin_unlock_bh(&mm->context.list_lock);
}
- spin_unlock_bh(&mm->context.list_lock);
return table;
}
@@ -1041,37 +997,23 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
unsigned int bit, mask;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page))
- return page_table_free_pgste(table);
- /* Free 1K/2K page table fragment of a 4K page */
- bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
- spin_lock_bh(&mm->context.list_lock);
- if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
- list_del(&page->lru);
- mask = atomic_xor_bits(&page->_mapcount, bit);
- if (mask & FRAG_MASK)
- list_add(&page->lru, &mm->context.pgtable_list);
- spin_unlock_bh(&mm->context.list_lock);
- if (mask == 0) {
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
+ if (!mm_alloc_pgste(mm)) {
+ /* Free 2K page table fragment of a 4K page */
+ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+ spin_lock_bh(&mm->context.list_lock);
+ mask = atomic_xor_bits(&page->_mapcount, 1U << bit);
+ if (mask & 3)
+ list_add(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
+ spin_unlock_bh(&mm->context.list_lock);
+ if (mask != 0)
+ return;
}
-}
-
-static void __page_table_free_rcu(void *table, unsigned bit)
-{
- struct page *page;
- if (bit == FRAG_MASK)
- return page_table_free_pgste(table);
- /* Free 1K/2K page table fragment of a 4K page */
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
- pgtable_page_dtor(page);
- atomic_set(&page->_mapcount, -1);
- __free_page(page);
- }
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
}
void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
@@ -1083,34 +1025,45 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
mm = tlb->mm;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page)) {
+ if (mm_alloc_pgste(mm)) {
gmap_unlink(mm, table, vmaddr);
- table = (unsigned long *) (__pa(table) | FRAG_MASK);
+ table = (unsigned long *) (__pa(table) | 3);
tlb_remove_table(tlb, table);
return;
}
- bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
+ bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.list_lock);
- if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
- list_del(&page->lru);
- mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
- if (mask & FRAG_MASK)
+ mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit);
+ if (mask & 3)
list_add_tail(&page->lru, &mm->context.pgtable_list);
+ else
+ list_del(&page->lru);
spin_unlock_bh(&mm->context.list_lock);
- table = (unsigned long *) (__pa(table) | (bit << 4));
+ table = (unsigned long *) (__pa(table) | (1U << bit));
tlb_remove_table(tlb, table);
}
static void __tlb_remove_table(void *_table)
{
- const unsigned long mask = (FRAG_MASK << 4) | FRAG_MASK;
- void *table = (void *)((unsigned long) _table & ~mask);
- unsigned type = (unsigned long) _table & mask;
-
- if (type)
- __page_table_free_rcu(table, type);
- else
- free_pages((unsigned long) table, ALLOC_ORDER);
+ unsigned int mask = (unsigned long) _table & 3;
+ void *table = (void *)((unsigned long) _table ^ mask);
+ struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+
+ switch (mask) {
+ case 0: /* pmd or pud */
+ free_pages((unsigned long) table, 2);
+ break;
+ case 1: /* lower 2K of a 4K page table */
+ case 2: /* higher 2K of a 4K page table */
+ if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0)
+ break;
+ /* fallthrough */
+ case 3: /* 4K page table with pgstes */
+ pgtable_page_dtor(page);
+ atomic_set(&page->_mapcount, -1);
+ __free_page(page);
+ break;
+ }
}
static void tlb_remove_table_smp_sync(void *arg)