summaryrefslogtreecommitdiffstats
path: root/kernel/mm/migrate.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/mm/migrate.c')
-rw-r--r--kernel/mm/migrate.c289
1 files changed, 155 insertions, 134 deletions
diff --git a/kernel/mm/migrate.c b/kernel/mm/migrate.c
index f53838fe3..6d17e0ab4 100644
--- a/kernel/mm/migrate.c
+++ b/kernel/mm/migrate.c
@@ -1,5 +1,5 @@
/*
- * Memory Migration functionality - linux/mm/migration.c
+ * Memory Migration functionality - linux/mm/migrate.c
*
* Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
*
@@ -30,13 +30,14 @@
#include <linux/mempolicy.h>
#include <linux/vmalloc.h>
#include <linux/security.h>
-#include <linux/memcontrol.h>
+#include <linux/backing-dev.h>
#include <linux/syscalls.h>
#include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h>
#include <linux/gfp.h>
#include <linux/balloon_compaction.h>
#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
#include <asm/tlbflush.h>
@@ -170,6 +171,9 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
else
page_add_file_rmap(new);
+ if (vma->vm_flags & VM_LOCKED)
+ mlock_vma_page(new);
+
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, ptep);
unlock:
@@ -310,6 +314,8 @@ int migrate_page_move_mapping(struct address_space *mapping,
struct buffer_head *head, enum migrate_mode mode,
int extra_count)
{
+ struct zone *oldzone, *newzone;
+ int dirty;
int expected_count = 1 + extra_count;
void **pslot;
@@ -317,9 +323,20 @@ int migrate_page_move_mapping(struct address_space *mapping,
/* Anonymous page without mapping */
if (page_count(page) != expected_count)
return -EAGAIN;
+
+ /* No turning back from here */
+ set_page_memcg(newpage, page_memcg(page));
+ newpage->index = page->index;
+ newpage->mapping = page->mapping;
+ if (PageSwapBacked(page))
+ SetPageSwapBacked(newpage);
+
return MIGRATEPAGE_SUCCESS;
}
+ oldzone = page_zone(page);
+ newzone = page_zone(newpage);
+
spin_lock_irq(&mapping->tree_lock);
pslot = radix_tree_lookup_slot(&mapping->page_tree,
@@ -352,14 +369,28 @@ int migrate_page_move_mapping(struct address_space *mapping,
}
/*
- * Now we know that no one else is looking at the page.
+ * Now we know that no one else is looking at the page:
+ * no turning back from here.
*/
+ set_page_memcg(newpage, page_memcg(page));
+ newpage->index = page->index;
+ newpage->mapping = page->mapping;
+ if (PageSwapBacked(page))
+ SetPageSwapBacked(newpage);
+
get_page(newpage); /* add cache reference */
if (PageSwapCache(page)) {
SetPageSwapCache(newpage);
set_page_private(newpage, page_private(page));
}
+ /* Move dirty while page refs frozen and newpage not yet exposed */
+ dirty = PageDirty(page);
+ if (dirty) {
+ ClearPageDirty(page);
+ SetPageDirty(newpage);
+ }
+
radix_tree_replace_slot(pslot, newpage);
/*
@@ -369,6 +400,9 @@ int migrate_page_move_mapping(struct address_space *mapping,
*/
page_unfreeze_refs(page, expected_count - 1);
+ spin_unlock(&mapping->tree_lock);
+ /* Leave irq disabled to prevent preemption while updating stats */
+
/*
* If moved to a different zone then also account
* the page for that zone. Other VM counters will be
@@ -379,13 +413,19 @@ int migrate_page_move_mapping(struct address_space *mapping,
* via NR_FILE_PAGES and NR_ANON_PAGES if they
* are mapped to swap space.
*/
- __dec_zone_page_state(page, NR_FILE_PAGES);
- __inc_zone_page_state(newpage, NR_FILE_PAGES);
- if (!PageSwapCache(page) && PageSwapBacked(page)) {
- __dec_zone_page_state(page, NR_SHMEM);
- __inc_zone_page_state(newpage, NR_SHMEM);
+ if (newzone != oldzone) {
+ __dec_zone_state(oldzone, NR_FILE_PAGES);
+ __inc_zone_state(newzone, NR_FILE_PAGES);
+ if (PageSwapBacked(page) && !PageSwapCache(page)) {
+ __dec_zone_state(oldzone, NR_SHMEM);
+ __inc_zone_state(newzone, NR_SHMEM);
+ }
+ if (dirty && mapping_cap_account_dirty(mapping)) {
+ __dec_zone_state(oldzone, NR_FILE_DIRTY);
+ __inc_zone_state(newzone, NR_FILE_DIRTY);
+ }
}
- spin_unlock_irq(&mapping->tree_lock);
+ local_irq_enable();
return MIGRATEPAGE_SUCCESS;
}
@@ -400,12 +440,6 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
int expected_count;
void **pslot;
- if (!mapping) {
- if (page_count(page) != 1)
- return -EAGAIN;
- return MIGRATEPAGE_SUCCESS;
- }
-
spin_lock_irq(&mapping->tree_lock);
pslot = radix_tree_lookup_slot(&mapping->page_tree,
@@ -423,6 +457,9 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
return -EAGAIN;
}
+ set_page_memcg(newpage, page_memcg(page));
+ newpage->index = page->index;
+ newpage->mapping = page->mapping;
get_page(newpage);
radix_tree_replace_slot(pslot, newpage);
@@ -509,20 +546,14 @@ void migrate_page_copy(struct page *newpage, struct page *page)
if (PageMappedToDisk(page))
SetPageMappedToDisk(newpage);
- if (PageDirty(page)) {
- clear_page_dirty_for_io(page);
- /*
- * Want to mark the page and the radix tree as dirty, and
- * redo the accounting that clear_page_dirty_for_io undid,
- * but we can't use set_page_dirty because that function
- * is actually a signal that all of the page has become dirty.
- * Whereas only part of our page may be dirty.
- */
- if (PageSwapBacked(page))
- SetPageDirty(newpage);
- else
- __set_page_dirty_nobuffers(newpage);
- }
+ /* Move dirty on pages not done by migrate_page_move_mapping() */
+ if (PageDirty(page))
+ SetPageDirty(newpage);
+
+ if (page_is_young(page))
+ set_page_young(newpage);
+ if (page_is_idle(page))
+ set_page_idle(newpage);
/*
* Copy NUMA information to the new page, to prevent over-eager
@@ -531,7 +562,6 @@ void migrate_page_copy(struct page *newpage, struct page *page)
cpupid = page_cpupid_xchg_last(page, -1);
page_cpupid_xchg_last(newpage, cpupid);
- mlock_migrate_page(newpage, page);
ksm_migrate_page(newpage, page);
/*
* Please do not reorder this without considering how mm/ksm.c's
@@ -715,24 +745,13 @@ static int fallback_migrate_page(struct address_space *mapping,
* MIGRATEPAGE_SUCCESS - success
*/
static int move_to_new_page(struct page *newpage, struct page *page,
- int page_was_mapped, enum migrate_mode mode)
+ enum migrate_mode mode)
{
struct address_space *mapping;
int rc;
- /*
- * Block others from accessing the page when we get around to
- * establishing additional references. We are the only one
- * holding a reference to the new page at this point.
- */
- if (!trylock_page(newpage))
- BUG();
-
- /* Prepare mapping for the new page.*/
- newpage->index = page->index;
- newpage->mapping = page->mapping;
- if (PageSwapBacked(page))
- SetPageSwapBacked(newpage);
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
mapping = page_mapping(page);
if (!mapping)
@@ -744,22 +763,19 @@ static int move_to_new_page(struct page *newpage, struct page *page,
* space which also has its own migratepage callback. This
* is the most common path for page migration.
*/
- rc = mapping->a_ops->migratepage(mapping,
- newpage, page, mode);
+ rc = mapping->a_ops->migratepage(mapping, newpage, page, mode);
else
rc = fallback_migrate_page(mapping, newpage, page, mode);
- if (rc != MIGRATEPAGE_SUCCESS) {
- newpage->mapping = NULL;
- } else {
- mem_cgroup_migrate(page, newpage, false);
- if (page_was_mapped)
- remove_migration_ptes(page, newpage);
- page->mapping = NULL;
+ /*
+ * When successful, old pagecache page->mapping must be cleared before
+ * page is freed; but stats require that PageAnon be left as PageAnon.
+ */
+ if (rc == MIGRATEPAGE_SUCCESS) {
+ set_page_memcg(page, NULL);
+ if (!PageAnon(page))
+ page->mapping = NULL;
}
-
- unlock_page(newpage);
-
return rc;
}
@@ -808,6 +824,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
goto out_unlock;
wait_on_page_writeback(page);
}
+
/*
* By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
* we cannot notice that anon_vma is freed while we migrates a page.
@@ -815,34 +832,26 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
* of migration. File cache pages are no problem because of page_lock()
* File Caches may use write_page() or lock_page() in migration, then,
* just care Anon page here.
+ *
+ * Only page_get_anon_vma() understands the subtleties of
+ * getting a hold on an anon_vma from outside one of its mms.
+ * But if we cannot get anon_vma, then we won't need it anyway,
+ * because that implies that the anon page is no longer mapped
+ * (and cannot be remapped so long as we hold the page lock).
*/
- if (PageAnon(page) && !PageKsm(page)) {
- /*
- * Only page_lock_anon_vma_read() understands the subtleties of
- * getting a hold on an anon_vma from outside one of its mms.
- */
+ if (PageAnon(page) && !PageKsm(page))
anon_vma = page_get_anon_vma(page);
- if (anon_vma) {
- /*
- * Anon page
- */
- } else if (PageSwapCache(page)) {
- /*
- * We cannot be sure that the anon_vma of an unmapped
- * swapcache page is safe to use because we don't
- * know in advance if the VMA that this page belonged
- * to still exists. If the VMA and others sharing the
- * data have been freed, then the anon_vma could
- * already be invalid.
- *
- * To avoid this possibility, swapcache pages get
- * migrated but are not remapped when migration
- * completes
- */
- } else {
- goto out_unlock;
- }
- }
+
+ /*
+ * Block others from accessing the new page when we get around to
+ * establishing additional references. We are usually the only one
+ * holding a reference to newpage at this point. We used to have a BUG
+ * here if trylock_page(newpage) fails, but would like to allow for
+ * cases where there might be a race with the previous use of newpage.
+ * This is much like races on refcount of oldpage: just don't BUG().
+ */
+ if (unlikely(!trylock_page(newpage)))
+ goto out_unlock;
if (unlikely(isolated_balloon_page(page))) {
/*
@@ -853,7 +862,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
* the page migration right away (proteced by page lock).
*/
rc = balloon_page_migrate(newpage, page, mode);
- goto out_unlock;
+ goto out_unlock_both;
}
/*
@@ -872,30 +881,30 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
VM_BUG_ON_PAGE(PageAnon(page), page);
if (page_has_private(page)) {
try_to_free_buffers(page);
- goto out_unlock;
+ goto out_unlock_both;
}
- goto skip_unmap;
- }
-
- /* Establish migration ptes or remove ptes */
- if (page_mapped(page)) {
+ } else if (page_mapped(page)) {
+ /* Establish migration ptes */
+ VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma,
+ page);
try_to_unmap(page,
TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
page_was_mapped = 1;
}
-skip_unmap:
if (!page_mapped(page))
- rc = move_to_new_page(newpage, page, page_was_mapped, mode);
+ rc = move_to_new_page(newpage, page, mode);
- if (rc && page_was_mapped)
- remove_migration_ptes(page, page);
+ if (page_was_mapped)
+ remove_migration_ptes(page,
+ rc == MIGRATEPAGE_SUCCESS ? newpage : page);
+out_unlock_both:
+ unlock_page(newpage);
+out_unlock:
/* Drop an anon_vma reference if we took one */
if (anon_vma)
put_anon_vma(anon_vma);
-
-out_unlock:
unlock_page(page);
out:
return rc;
@@ -918,12 +927,14 @@ out:
static ICE_noinline int unmap_and_move(new_page_t get_new_page,
free_page_t put_new_page,
unsigned long private, struct page *page,
- int force, enum migrate_mode mode)
+ int force, enum migrate_mode mode,
+ enum migrate_reason reason)
{
- int rc = 0;
+ int rc = MIGRATEPAGE_SUCCESS;
int *result = NULL;
- struct page *newpage = get_new_page(page, private, &result);
+ struct page *newpage;
+ newpage = get_new_page(page, private, &result);
if (!newpage)
return -ENOMEM;
@@ -937,6 +948,8 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
goto out;
rc = __unmap_and_move(page, newpage, force, mode);
+ if (rc == MIGRATEPAGE_SUCCESS)
+ put_new_page = NULL;
out:
if (rc != -EAGAIN) {
@@ -949,7 +962,13 @@ out:
list_del(&page->lru);
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
- putback_lru_page(page);
+ /* Soft-offlined page shouldn't go through lru cache list */
+ if (reason == MR_MEMORY_FAILURE) {
+ put_page(page);
+ if (!test_set_page_hwpoison(page))
+ num_poisoned_pages_inc();
+ } else
+ putback_lru_page(page);
}
/*
@@ -957,10 +976,9 @@ out:
* it. Otherwise, putback_lru_page() will drop the reference grabbed
* during isolation.
*/
- if (rc != MIGRATEPAGE_SUCCESS && put_new_page) {
- ClearPageSwapBacked(newpage);
+ if (put_new_page)
put_new_page(newpage, private);
- } else if (unlikely(__is_movable_balloon_page(newpage))) {
+ else if (unlikely(__is_movable_balloon_page(newpage))) {
/* drop our reference, page already in the balloon */
put_page(newpage);
} else
@@ -998,7 +1016,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
struct page *hpage, int force,
enum migrate_mode mode)
{
- int rc = 0;
+ int rc = -EAGAIN;
int *result = NULL;
int page_was_mapped = 0;
struct page *new_hpage;
@@ -1020,8 +1038,6 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
if (!new_hpage)
return -ENOMEM;
- rc = -EAGAIN;
-
if (!trylock_page(hpage)) {
if (!force || mode != MIGRATE_SYNC)
goto out;
@@ -1031,6 +1047,9 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
if (PageAnon(hpage))
anon_vma = page_get_anon_vma(hpage);
+ if (unlikely(!trylock_page(new_hpage)))
+ goto put_anon;
+
if (page_mapped(hpage)) {
try_to_unmap(hpage,
TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
@@ -1038,16 +1057,22 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
}
if (!page_mapped(hpage))
- rc = move_to_new_page(new_hpage, hpage, page_was_mapped, mode);
+ rc = move_to_new_page(new_hpage, hpage, mode);
- if (rc != MIGRATEPAGE_SUCCESS && page_was_mapped)
- remove_migration_ptes(hpage, hpage);
+ if (page_was_mapped)
+ remove_migration_ptes(hpage,
+ rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage);
+ unlock_page(new_hpage);
+
+put_anon:
if (anon_vma)
put_anon_vma(anon_vma);
- if (rc == MIGRATEPAGE_SUCCESS)
+ if (rc == MIGRATEPAGE_SUCCESS) {
hugetlb_cgroup_migrate(hpage, new_hpage);
+ put_new_page = NULL;
+ }
unlock_page(hpage);
out:
@@ -1059,10 +1084,10 @@ out:
* it. Otherwise, put_page() will drop the reference grabbed during
* isolation.
*/
- if (rc != MIGRATEPAGE_SUCCESS && put_new_page)
+ if (put_new_page)
put_new_page(new_hpage, private);
else
- put_page(new_hpage);
+ putback_active_hugepage(new_hpage);
if (result) {
if (rc)
@@ -1089,7 +1114,7 @@ out:
*
* The function returns after 10 attempts or if no pages are movable any more
* because the list has become empty or no retryable pages exist any more.
- * The caller should call putback_lru_pages() to return pages to the LRU
+ * The caller should call putback_movable_pages() to return pages to the LRU
* or free list only if ret != 0.
*
* Returns the number of pages that were not migrated, or an error code.
@@ -1122,7 +1147,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
pass > 2, mode);
else
rc = unmap_and_move(get_new_page, put_new_page,
- private, page, pass > 2, mode);
+ private, page, pass > 2, mode,
+ reason);
switch(rc) {
case -ENOMEM:
@@ -1145,7 +1171,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
}
}
}
- rc = nr_failed + retry;
+ nr_failed += retry;
+ rc = nr_failed;
out:
if (nr_succeeded)
count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
@@ -1187,7 +1214,7 @@ static struct page *new_page_node(struct page *p, unsigned long private,
return alloc_huge_page_node(page_hstate(compound_head(p)),
pm->node);
else
- return alloc_pages_exact_node(pm->node,
+ return __alloc_pages_node(pm->node,
GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0);
}
@@ -1219,7 +1246,9 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma))
goto set_status;
- page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT);
+ /* FOLL_DUMP to ignore special (like zero) pages */
+ page = follow_page(vma, pp->addr,
+ FOLL_GET | FOLL_SPLIT | FOLL_DUMP);
err = PTR_ERR(page);
if (IS_ERR(page))
@@ -1229,10 +1258,6 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
if (!page)
goto set_status;
- /* Use PageReserved to check for zero page */
- if (PageReserved(page))
- goto put_and_set;
-
pp->page = page;
err = page_to_nid(page);
@@ -1389,18 +1414,14 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
if (!vma || addr < vma->vm_start)
goto set_status;
- page = follow_page(vma, addr, 0);
+ /* FOLL_DUMP to ignore special (like zero) pages */
+ page = follow_page(vma, addr, FOLL_DUMP);
err = PTR_ERR(page);
if (IS_ERR(page))
goto set_status;
- err = -ENOENT;
- /* Use PageReserved to check for zero page */
- if (!page || PageReserved(page))
- goto set_status;
-
- err = page_to_nid(page);
+ err = page ? page_to_nid(page) : -ENOENT;
set_status:
*status = err;
@@ -1553,11 +1574,11 @@ static struct page *alloc_misplaced_dst_page(struct page *page,
int nid = (int) data;
struct page *newpage;
- newpage = alloc_pages_exact_node(nid,
+ newpage = __alloc_pages_node(nid,
(GFP_HIGHUSER_MOVABLE |
__GFP_THISNODE | __GFP_NOMEMALLOC |
__GFP_NORETRY | __GFP_NOWARN) &
- ~GFP_IOFS, 0);
+ ~__GFP_RECLAIM, 0);
return newpage;
}
@@ -1731,7 +1752,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
goto out_dropref;
new_page = alloc_pages_node(node,
- (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT,
+ (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
HPAGE_PMD_ORDER);
if (!new_page)
goto out_fail;
@@ -1768,7 +1789,6 @@ fail_putback:
SetPageActive(page);
if (TestClearPageUnevictable(new_page))
SetPageUnevictable(page);
- mlock_migrate_page(page, new_page);
unlock_page(new_page);
put_page(new_page); /* Free it */
@@ -1796,7 +1816,7 @@ fail_putback:
*/
flush_cache_range(vma, mmun_start, mmun_end);
page_add_anon_rmap(new_page, vma, mmun_start);
- pmdp_clear_flush_notify(vma, mmun_start, pmd);
+ pmdp_huge_clear_flush_notify(vma, mmun_start, pmd);
set_pmd_at(mm, mmun_start, pmd, entry);
flush_tlb_range(vma, mmun_start, mmun_end);
update_mmu_cache_pmd(vma, address, &entry);
@@ -1810,8 +1830,9 @@ fail_putback:
goto fail_putback;
}
- mem_cgroup_migrate(page, new_page, false);
-
+ mlock_migrate_page(new_page, page);
+ set_page_memcg(new_page, page_memcg(page));
+ set_page_memcg(page, NULL);
page_remove_rmap(page);
spin_unlock(ptl);