summaryrefslogtreecommitdiffstats
path: root/kernel/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/mm/huge_memory.c')
-rw-r--r--kernel/mm/huge_memory.c291
1 files changed, 183 insertions, 108 deletions
diff --git a/kernel/mm/huge_memory.c b/kernel/mm/huge_memory.c
index 078832cf3..62fe06bb7 100644
--- a/kernel/mm/huge_memory.c
+++ b/kernel/mm/huge_memory.c
@@ -16,6 +16,7 @@
#include <linux/swap.h>
#include <linux/shrinker.h>
#include <linux/mm_inline.h>
+#include <linux/dax.h>
#include <linux/kthread.h>
#include <linux/khugepaged.h>
#include <linux/freezer.h>
@@ -23,6 +24,8 @@
#include <linux/pagemap.h>
#include <linux/migrate.h>
#include <linux/hashtable.h>
+#include <linux/userfaultfd_k.h>
+#include <linux/page_idle.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
@@ -104,7 +107,7 @@ static struct khugepaged_scan khugepaged_scan = {
};
-static int set_recommended_min_free_kbytes(void)
+static void set_recommended_min_free_kbytes(void)
{
struct zone *zone;
int nr_zones = 0;
@@ -113,7 +116,7 @@ static int set_recommended_min_free_kbytes(void)
for_each_populated_zone(zone)
nr_zones++;
- /* Make sure at least 2 hugepages are free for MIGRATE_RESERVE */
+ /* Ensure 2 pageblocks are free to assist fragmentation avoidance */
recommended_min = pageblock_nr_pages * nr_zones * 2;
/*
@@ -139,7 +142,6 @@ static int set_recommended_min_free_kbytes(void)
min_free_kbytes = recommended_min;
}
setup_per_zone_wmarks();
- return 0;
}
static int start_stop_khugepaged(void)
@@ -149,7 +151,7 @@ static int start_stop_khugepaged(void)
if (!khugepaged_thread)
khugepaged_thread = kthread_run(khugepaged, NULL,
"khugepaged");
- if (unlikely(IS_ERR(khugepaged_thread))) {
+ if (IS_ERR(khugepaged_thread)) {
pr_err("khugepaged: kthread_run(khugepaged) failed\n");
err = PTR_ERR(khugepaged_thread);
khugepaged_thread = NULL;
@@ -171,12 +173,7 @@ fail:
static atomic_t huge_zero_refcount;
struct page *huge_zero_page __read_mostly;
-static inline bool is_huge_zero_pmd(pmd_t pmd)
-{
- return is_huge_zero_page(pmd_page(pmd));
-}
-
-static struct page *get_huge_zero_page(void)
+struct page *get_huge_zero_page(void)
{
struct page *zero_page;
retry:
@@ -716,21 +713,27 @@ static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
struct vm_area_struct *vma,
- unsigned long haddr, pmd_t *pmd,
- struct page *page, gfp_t gfp)
+ unsigned long address, pmd_t *pmd,
+ struct page *page, gfp_t gfp,
+ unsigned int flags)
{
struct mem_cgroup *memcg;
pgtable_t pgtable;
spinlock_t *ptl;
+ unsigned long haddr = address & HPAGE_PMD_MASK;
VM_BUG_ON_PAGE(!PageCompound(page), page);
- if (mem_cgroup_try_charge(page, mm, gfp, &memcg))
- return VM_FAULT_OOM;
+ if (mem_cgroup_try_charge(page, mm, gfp, &memcg)) {
+ put_page(page);
+ count_vm_event(THP_FAULT_FALLBACK);
+ return VM_FAULT_FALLBACK;
+ }
pgtable = pte_alloc_one(mm, haddr);
if (unlikely(!pgtable)) {
mem_cgroup_cancel_charge(page, memcg);
+ put_page(page);
return VM_FAULT_OOM;
}
@@ -750,6 +753,21 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
pte_free(mm, pgtable);
} else {
pmd_t entry;
+
+ /* Deliver the page fault to userland */
+ if (userfaultfd_missing(vma)) {
+ int ret;
+
+ spin_unlock(ptl);
+ mem_cgroup_cancel_charge(page, memcg);
+ put_page(page);
+ pte_free(mm, pgtable);
+ ret = handle_userfault(vma, address, flags,
+ VM_UFFD_MISSING);
+ VM_BUG_ON(ret & VM_FAULT_FALLBACK);
+ return ret;
+ }
+
entry = mk_huge_pmd(page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
page_add_new_anon_rmap(page, vma, haddr);
@@ -760,6 +778,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
atomic_long_inc(&mm->nr_ptes);
spin_unlock(ptl);
+ count_vm_event(THP_FAULT_ALLOC);
}
return 0;
@@ -767,7 +786,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
{
- return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_WAIT)) | extra_gfp;
+ return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_RECLAIM)) | extra_gfp;
}
/* Caller must hold page table lock. */
@@ -806,6 +825,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
pgtable_t pgtable;
struct page *zero_page;
bool set;
+ int ret;
pgtable = pte_alloc_one(mm, haddr);
if (unlikely(!pgtable))
return VM_FAULT_OOM;
@@ -816,14 +836,28 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_FALLBACK;
}
ptl = pmd_lock(mm, pmd);
- set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
- zero_page);
- spin_unlock(ptl);
+ ret = 0;
+ set = false;
+ if (pmd_none(*pmd)) {
+ if (userfaultfd_missing(vma)) {
+ spin_unlock(ptl);
+ ret = handle_userfault(vma, address, flags,
+ VM_UFFD_MISSING);
+ VM_BUG_ON(ret & VM_FAULT_FALLBACK);
+ } else {
+ set_huge_zero_page(pgtable, mm, vma,
+ haddr, pmd,
+ zero_page);
+ spin_unlock(ptl);
+ set = true;
+ }
+ } else
+ spin_unlock(ptl);
if (!set) {
pte_free(mm, pgtable);
put_huge_zero_page();
}
- return 0;
+ return ret;
}
gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
@@ -831,14 +865,51 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
- if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) {
- put_page(page);
- count_vm_event(THP_FAULT_FALLBACK);
- return VM_FAULT_FALLBACK;
+ return __do_huge_pmd_anonymous_page(mm, vma, address, pmd, page, gfp,
+ flags);
+}
+
+static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, unsigned long pfn, pgprot_t prot, bool write)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pmd_t entry;
+ spinlock_t *ptl;
+
+ ptl = pmd_lock(mm, pmd);
+ if (pmd_none(*pmd)) {
+ entry = pmd_mkhuge(pfn_pmd(pfn, prot));
+ if (write) {
+ entry = pmd_mkyoung(pmd_mkdirty(entry));
+ entry = maybe_pmd_mkwrite(entry, vma);
+ }
+ set_pmd_at(mm, addr, pmd, entry);
+ update_mmu_cache_pmd(vma, addr, pmd);
}
+ spin_unlock(ptl);
+}
- count_vm_event(THP_FAULT_ALLOC);
- return 0;
+int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
+ pmd_t *pmd, unsigned long pfn, bool write)
+{
+ pgprot_t pgprot = vma->vm_page_prot;
+ /*
+ * If we had pmd_special, we could avoid all these restrictions,
+ * but we need to be consistent with PTEs and architectures that
+ * can't support a 'special' bit.
+ */
+ BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
+ BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
+ (VM_PFNMAP|VM_MIXEDMAP));
+ BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
+ BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn));
+
+ if (addr < vma->vm_start || addr >= vma->vm_end)
+ return VM_FAULT_SIGBUS;
+ if (track_pfn_insert(vma, &pgprot, pfn))
+ return VM_FAULT_SIGBUS;
+ insert_pfn_pmd(vma, addr, pmd, pfn, pgprot, write);
+ return VM_FAULT_NOPAGE;
}
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -873,16 +944,14 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
*/
if (is_huge_zero_pmd(pmd)) {
struct page *zero_page;
- bool set;
/*
* get_huge_zero_page() will never allocate a new page here,
* since we already have a zero page to copy. It just takes a
* reference.
*/
zero_page = get_huge_zero_page();
- set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
+ set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
zero_page);
- BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */
ret = 0;
goto out_unlock;
}
@@ -1031,7 +1100,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
goto out_free_pages;
VM_BUG_ON_PAGE(!PageHead(page), page);
- pmdp_clear_flush_notify(vma, haddr, pmd);
+ pmdp_huge_clear_flush_notify(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -1174,7 +1243,7 @@ alloc:
pmd_t entry;
entry = mk_huge_pmd(new_page, vma->vm_page_prot);
entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
- pmdp_clear_flush_notify(vma, haddr, pmd);
+ pmdp_huge_clear_flush_notify(vma, haddr, pmd);
page_add_new_anon_rmap(new_page, vma, haddr);
mem_cgroup_commit_charge(new_page, memcg, false);
lru_cache_add_active_or_unevictable(new_page, vma);
@@ -1238,7 +1307,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
pmd, _pmd, 1))
update_mmu_cache_pmd(vma, addr, pmd);
}
- if ((flags & FOLL_POPULATE) && (vma->vm_flags & VM_LOCKED)) {
+ if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
if (page->mapping && trylock_page(page)) {
lru_add_drain();
if (page->mapping)
@@ -1387,41 +1456,41 @@ out:
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr)
{
+ pmd_t orig_pmd;
spinlock_t *ptl;
- int ret = 0;
- if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
- struct page *page;
- pgtable_t pgtable;
- pmd_t orig_pmd;
- /*
- * For architectures like ppc64 we look at deposited pgtable
- * when calling pmdp_get_and_clear. So do the
- * pgtable_trans_huge_withdraw after finishing pmdp related
- * operations.
- */
- orig_pmd = pmdp_get_and_clear_full(tlb->mm, addr, pmd,
- tlb->fullmm);
- tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
- pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd);
- if (is_huge_zero_pmd(orig_pmd)) {
- atomic_long_dec(&tlb->mm->nr_ptes);
- spin_unlock(ptl);
+ if (__pmd_trans_huge_lock(pmd, vma, &ptl) != 1)
+ return 0;
+ /*
+ * For architectures like ppc64 we look at deposited pgtable
+ * when calling pmdp_huge_get_and_clear. So do the
+ * pgtable_trans_huge_withdraw after finishing pmdp related
+ * operations.
+ */
+ orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd,
+ tlb->fullmm);
+ tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
+ if (vma_is_dax(vma)) {
+ spin_unlock(ptl);
+ if (is_huge_zero_pmd(orig_pmd))
put_huge_zero_page();
- } else {
- page = pmd_page(orig_pmd);
- page_remove_rmap(page);
- VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
- add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
- VM_BUG_ON_PAGE(!PageHead(page), page);
- atomic_long_dec(&tlb->mm->nr_ptes);
- spin_unlock(ptl);
- tlb_remove_page(tlb, page);
- }
- pte_free(tlb->mm, pgtable);
- ret = 1;
+ } else if (is_huge_zero_pmd(orig_pmd)) {
+ pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
+ atomic_long_dec(&tlb->mm->nr_ptes);
+ spin_unlock(ptl);
+ put_huge_zero_page();
+ } else {
+ struct page *page = pmd_page(orig_pmd);
+ page_remove_rmap(page);
+ VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
+ add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
+ VM_BUG_ON_PAGE(!PageHead(page), page);
+ pte_free(tlb->mm, pgtable_trans_huge_withdraw(tlb->mm, pmd));
+ atomic_long_dec(&tlb->mm->nr_ptes);
+ spin_unlock(ptl);
+ tlb_remove_page(tlb, page);
}
- return ret;
+ return 1;
}
int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
@@ -1459,7 +1528,7 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
new_ptl = pmd_lockptr(mm, new_pmd);
if (new_ptl != old_ptl)
spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
- pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
+ pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
VM_BUG_ON(!pmd_none(*new_pmd));
if (pmd_move_must_withdraw(new_ptl, old_ptl)) {
@@ -1505,7 +1574,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
}
if (!prot_numa || !pmd_protnone(*pmd)) {
- entry = pmdp_get_and_clear_notify(mm, addr, pmd);
+ entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
entry = pmd_modify(entry, newprot);
if (preserve_write)
entry = pmd_mkwrite(entry);
@@ -1676,12 +1745,7 @@ static void __split_huge_page_refcount(struct page *page,
/* after clearing PageTail the gup refcount can be released */
smp_mb__after_atomic();
- /*
- * retain hwpoison flag of the poisoned tail page:
- * fix for the unsuitable process killed on Guest Machine(KVM)
- * by the memory-failure.
- */
- page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON;
+ page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
page_tail->flags |= (page->flags &
((1L << PG_referenced) |
(1L << PG_swapbacked) |
@@ -1691,8 +1755,12 @@ static void __split_huge_page_refcount(struct page *page,
(1L << PG_unevictable)));
page_tail->flags |= (1L << PG_dirty);
- /* clear PageTail before overwriting first_page */
- smp_wmb();
+ clear_compound_head(page_tail);
+
+ if (page_is_young(page))
+ set_page_young(page_tail);
+ if (page_is_idle(page))
+ set_page_idle(page_tail);
/*
* __split_huge_page_splitting() already set the
@@ -1811,7 +1879,7 @@ static int __split_huge_page_map(struct page *page,
* here). But it is generally safer to never allow
* small and huge TLB entries for the same virtual
* address to be loaded simultaneously. So instead of
- * doing "pmd_populate(); flush_tlb_range();" we first
+ * doing "pmd_populate(); flush_pmd_tlb_range();" we first
* mark the current pmd notpresent (atomically because
* here the pmd_trans_huge and pmd_trans_splitting
* must remain set at all times on the pmd until the
@@ -1941,7 +2009,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
/*
* Be somewhat over-protective like KSM for now!
*/
- if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP))
+ if (*vm_flags & VM_NO_THP)
return -EINVAL;
*vm_flags &= ~VM_NOHUGEPAGE;
*vm_flags |= VM_HUGEPAGE;
@@ -1957,7 +2025,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
/*
* Be somewhat over-protective like KSM for now!
*/
- if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP))
+ if (*vm_flags & VM_NO_THP)
return -EINVAL;
*vm_flags &= ~VM_HUGEPAGE;
*vm_flags |= VM_NOHUGEPAGE;
@@ -2137,8 +2205,10 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
_pte++, address += PAGE_SIZE) {
pte_t pteval = *_pte;
- if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
- if (++none_or_zero <= khugepaged_max_ptes_none)
+ if (pte_none(pteval) || (pte_present(pteval) &&
+ is_zero_pfn(pte_pfn(pteval)))) {
+ if (!userfaultfd_armed(vma) &&
+ ++none_or_zero <= khugepaged_max_ptes_none)
continue;
else
goto out;
@@ -2198,7 +2268,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
VM_BUG_ON_PAGE(PageLRU(page), page);
/* If there is no mapped pte young don't collapse the page */
- if (pte_young(pteval) || PageReferenced(page) ||
+ if (pte_young(pteval) ||
+ page_is_young(page) || PageReferenced(page) ||
mmu_notifier_test_young(vma->vm_mm, address))
referenced = true;
}
@@ -2262,8 +2333,12 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
static void khugepaged_alloc_sleep(void)
{
- wait_event_freezable_timeout(khugepaged_wait, false,
- msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
+ DEFINE_WAIT(wait);
+
+ add_wait_queue(&khugepaged_wait, &wait);
+ freezable_schedule_timeout_interruptible(
+ msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
+ remove_wait_queue(&khugepaged_wait, &wait);
}
static int khugepaged_node_load[MAX_NUMNODES];
@@ -2337,8 +2412,7 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
static struct page *
khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
- struct vm_area_struct *vma, unsigned long address,
- int node)
+ unsigned long address, int node)
{
VM_BUG_ON_PAGE(*hpage, *hpage);
@@ -2350,7 +2424,7 @@ khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
*/
up_read(&mm->mmap_sem);
- *hpage = alloc_pages_exact_node(node, gfp, HPAGE_PMD_ORDER);
+ *hpage = __alloc_pages_node(node, gfp, HPAGE_PMD_ORDER);
if (unlikely(!*hpage)) {
count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
*hpage = ERR_PTR(-ENOMEM);
@@ -2405,8 +2479,7 @@ static bool khugepaged_prealloc_page(struct page **hpage, bool *wait)
static struct page *
khugepaged_alloc_page(struct page **hpage, gfp_t gfp, struct mm_struct *mm,
- struct vm_area_struct *vma, unsigned long address,
- int node)
+ unsigned long address, int node)
{
up_read(&mm->mmap_sem);
VM_BUG_ON(!*hpage);
@@ -2454,7 +2527,7 @@ static void collapse_huge_page(struct mm_struct *mm,
__GFP_THISNODE;
/* release the mmap_sem read lock. */
- new_page = khugepaged_alloc_page(hpage, gfp, mm, vma, address, node);
+ new_page = khugepaged_alloc_page(hpage, gfp, mm, address, node);
if (!new_page)
return;
@@ -2499,7 +2572,7 @@ static void collapse_huge_page(struct mm_struct *mm,
* huge and small TLB entries for the same virtual address
* to avoid the risk of CPU bugs in that area.
*/
- _pmd = pmdp_clear_flush(vma, address, pmd);
+ _pmd = pmdp_collapse_flush(vma, address, pmd);
spin_unlock(pmd_ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
@@ -2591,7 +2664,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
_pte++, _address += PAGE_SIZE) {
pte_t pteval = *_pte;
if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) {
- if (++none_or_zero <= khugepaged_max_ptes_none)
+ if (!userfaultfd_armed(vma) &&
+ ++none_or_zero <= khugepaged_max_ptes_none)
continue;
else
goto out_unmap;
@@ -2624,7 +2698,8 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
*/
if (page_count(page) != 1 + !!PageSwapCache(page))
goto out_unmap;
- if (pte_young(pteval) || PageReferenced(page) ||
+ if (pte_young(pteval) ||
+ page_is_young(page) || PageReferenced(page) ||
mmu_notifier_test_young(vma->vm_mm, address))
referenced = true;
}
@@ -2799,7 +2874,7 @@ static void khugepaged_do_scan(void)
cond_resched();
- if (unlikely(kthread_should_stop() || freezing(current)))
+ if (unlikely(kthread_should_stop() || try_to_freeze()))
break;
spin_lock(&khugepaged_mm_lock);
@@ -2820,8 +2895,6 @@ static void khugepaged_do_scan(void)
static void khugepaged_wait_work(void)
{
- try_to_freeze();
-
if (khugepaged_has_work()) {
if (!khugepaged_scan_sleep_millisecs)
return;
@@ -2865,7 +2938,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
pmd_t _pmd;
int i;
- pmdp_clear_flush_notify(vma, haddr, pmd);
+ pmdp_huge_clear_flush_notify(vma, haddr, pmd);
/* leave pmd empty until pte is filled */
pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -2889,7 +2962,7 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmd)
{
spinlock_t *ptl;
- struct page *page;
+ struct page *page = NULL;
struct mm_struct *mm = vma->vm_mm;
unsigned long haddr = address & HPAGE_PMD_MASK;
unsigned long mmun_start; /* For mmu_notifiers */
@@ -2902,25 +2975,27 @@ void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
again:
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
ptl = pmd_lock(mm, pmd);
- if (unlikely(!pmd_trans_huge(*pmd))) {
- spin_unlock(ptl);
- mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- return;
- }
- if (is_huge_zero_pmd(*pmd)) {
+ if (unlikely(!pmd_trans_huge(*pmd)))
+ goto unlock;
+ if (vma_is_dax(vma)) {
+ pmd_t _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
+ if (is_huge_zero_pmd(_pmd))
+ put_huge_zero_page();
+ } else if (is_huge_zero_pmd(*pmd)) {
__split_huge_zero_page_pmd(vma, haddr, pmd);
- spin_unlock(ptl);
- mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- return;
+ } else {
+ page = pmd_page(*pmd);
+ VM_BUG_ON_PAGE(!page_count(page), page);
+ get_page(page);
}
- page = pmd_page(*pmd);
- VM_BUG_ON_PAGE(!page_count(page), page);
- get_page(page);
+ unlock:
spin_unlock(ptl);
mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
- split_huge_page(page);
+ if (!page)
+ return;
+ split_huge_page(page);
put_page(page);
/*
@@ -2969,7 +3044,7 @@ static void split_huge_page_address(struct mm_struct *mm,
split_huge_page_pmd_mm(mm, address, pmd);
}
-void __vma_adjust_trans_huge(struct vm_area_struct *vma,
+void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
long adjust_next)