diff options
Diffstat (limited to 'kernel/mm')
-rw-r--r-- | kernel/mm/hugetlb.c | 19 | ||||
-rw-r--r-- | kernel/mm/memory-failure.c | 15 | ||||
-rw-r--r-- | kernel/mm/memory.c | 20 | ||||
-rw-r--r-- | kernel/mm/vmscan.c | 14 |
4 files changed, 37 insertions, 31 deletions
diff --git a/kernel/mm/hugetlb.c b/kernel/mm/hugetlb.c index 271e44327..8c4c1f9f9 100644 --- a/kernel/mm/hugetlb.c +++ b/kernel/mm/hugetlb.c @@ -40,6 +40,11 @@ int hugepages_treat_as_movable; int hugetlb_max_hstate __read_mostly; unsigned int default_hstate_idx; struct hstate hstates[HUGE_MAX_HSTATE]; +/* + * Minimum page order among possible hugepage sizes, set to a proper value + * at boot time. + */ +static unsigned int minimum_order __read_mostly = UINT_MAX; __initdata LIST_HEAD(huge_boot_pages); @@ -1188,19 +1193,13 @@ static void dissolve_free_huge_page(struct page *page) */ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn) { - unsigned int order = 8 * sizeof(void *); unsigned long pfn; - struct hstate *h; if (!hugepages_supported()) return; - /* Set scan step to minimum hugepage size */ - for_each_hstate(h) - if (order > huge_page_order(h)) - order = huge_page_order(h); - VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << order)); - for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order) + VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << minimum_order)); + for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) dissolve_free_huge_page(pfn_to_page(pfn)); } @@ -1627,10 +1626,14 @@ static void __init hugetlb_init_hstates(void) struct hstate *h; for_each_hstate(h) { + if (minimum_order > huge_page_order(h)) + minimum_order = huge_page_order(h); + /* oversize hugepages were init'ed in early boot */ if (!hstate_is_gigantic(h)) hugetlb_hstate_alloc_pages(h); } + VM_BUG_ON(minimum_order == UINT_MAX); } static char * __init memfmt(char *buf, unsigned long n) diff --git a/kernel/mm/memory-failure.c b/kernel/mm/memory-failure.c index 501820c81..9f48145c8 100644 --- a/kernel/mm/memory-failure.c +++ b/kernel/mm/memory-failure.c @@ -1558,6 +1558,8 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags) */ ret = __get_any_page(page, pfn, 0); if (!PageLRU(page)) { + /* Drop page reference which is from __get_any_page() */ + put_page(page); pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n", pfn, page->flags); return -EIO; @@ -1587,13 +1589,12 @@ static int soft_offline_huge_page(struct page *page, int flags) unlock_page(hpage); ret = isolate_huge_page(hpage, &pagelist); - if (ret) { - /* - * get_any_page() and isolate_huge_page() takes a refcount each, - * so need to drop one here. - */ - put_page(hpage); - } else { + /* + * get_any_page() and isolate_huge_page() takes a refcount each, + * so need to drop one here. + */ + put_page(hpage); + if (!ret) { pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn); return -EBUSY; } diff --git a/kernel/mm/memory.c b/kernel/mm/memory.c index 17734c3c1..3fc6efd10 100644 --- a/kernel/mm/memory.c +++ b/kernel/mm/memory.c @@ -2669,6 +2669,10 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_unmap(page_table); + /* File mapping without ->vm_ops ? */ + if (vma->vm_flags & VM_SHARED) + return VM_FAULT_SIGBUS; + /* Check if we need to add a guard page to the stack */ if (check_stack_guard_page(vma, address) < 0) return VM_FAULT_SIGSEGV; @@ -3097,6 +3101,9 @@ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; pte_unmap(page_table); + /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ + if (!vma->vm_ops->fault) + return VM_FAULT_SIGBUS; if (!(flags & FAULT_FLAG_WRITE)) return do_read_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); @@ -3242,13 +3249,12 @@ static int handle_pte_fault(struct mm_struct *mm, barrier(); if (!pte_present(entry)) { if (pte_none(entry)) { - if (vma->vm_ops) { - if (likely(vma->vm_ops->fault)) - return do_fault(mm, vma, address, pte, - pmd, flags, entry); - } - return do_anonymous_page(mm, vma, address, - pte, pmd, flags); + if (vma->vm_ops) + return do_fault(mm, vma, address, pte, pmd, + flags, entry); + + return do_anonymous_page(mm, vma, address, pte, pmd, + flags); } return do_swap_page(mm, vma, address, pte, pmd, flags, entry); diff --git a/kernel/mm/vmscan.c b/kernel/mm/vmscan.c index 5e8eadd71..0d024fc8a 100644 --- a/kernel/mm/vmscan.c +++ b/kernel/mm/vmscan.c @@ -937,21 +937,17 @@ static unsigned long shrink_page_list(struct list_head *page_list, * * 2) Global reclaim encounters a page, memcg encounters a * page that is not marked for immediate reclaim or - * the caller does not have __GFP_IO. In this case mark + * the caller does not have __GFP_FS (or __GFP_IO if it's + * simply going to swap, not to fs). In this case mark * the page for immediate reclaim and continue scanning. * - * __GFP_IO is checked because a loop driver thread might + * Require may_enter_fs because we would wait on fs, which + * may not have submitted IO yet. And the loop driver might * enter reclaim, and deadlock if it waits on a page for * which it is needed to do the write (loop masks off * __GFP_IO|__GFP_FS for this reason); but more thought * would probably show more reasons. * - * Don't require __GFP_FS, since we're not going into the - * FS, just waiting on its writeback completion. Worryingly, - * ext4 gfs2 and xfs allocate pages with - * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so testing - * may_enter_fs here is liable to OOM on them. - * * 3) memcg encounters a page that is not already marked * PageReclaim. memcg does not have any dirty pages * throttling so we could easily OOM just because too many @@ -968,7 +964,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, /* Case 2 above */ } else if (global_reclaim(sc) || - !PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) { + !PageReclaim(page) || !may_enter_fs) { /* * This is slightly racy - end_page_writeback() * might have just cleared PageReclaim, then |