diff options
Diffstat (limited to 'kernel/mm/migrate.c')
-rw-r--r-- | kernel/mm/migrate.c | 289 |
1 files changed, 155 insertions, 134 deletions
diff --git a/kernel/mm/migrate.c b/kernel/mm/migrate.c index f53838fe3..6d17e0ab4 100644 --- a/kernel/mm/migrate.c +++ b/kernel/mm/migrate.c @@ -1,5 +1,5 @@ /* - * Memory Migration functionality - linux/mm/migration.c + * Memory Migration functionality - linux/mm/migrate.c * * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter * @@ -30,13 +30,14 @@ #include <linux/mempolicy.h> #include <linux/vmalloc.h> #include <linux/security.h> -#include <linux/memcontrol.h> +#include <linux/backing-dev.h> #include <linux/syscalls.h> #include <linux/hugetlb.h> #include <linux/hugetlb_cgroup.h> #include <linux/gfp.h> #include <linux/balloon_compaction.h> #include <linux/mmu_notifier.h> +#include <linux/page_idle.h> #include <asm/tlbflush.h> @@ -170,6 +171,9 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, else page_add_file_rmap(new); + if (vma->vm_flags & VM_LOCKED) + mlock_vma_page(new); + /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, addr, ptep); unlock: @@ -310,6 +314,8 @@ int migrate_page_move_mapping(struct address_space *mapping, struct buffer_head *head, enum migrate_mode mode, int extra_count) { + struct zone *oldzone, *newzone; + int dirty; int expected_count = 1 + extra_count; void **pslot; @@ -317,9 +323,20 @@ int migrate_page_move_mapping(struct address_space *mapping, /* Anonymous page without mapping */ if (page_count(page) != expected_count) return -EAGAIN; + + /* No turning back from here */ + set_page_memcg(newpage, page_memcg(page)); + newpage->index = page->index; + newpage->mapping = page->mapping; + if (PageSwapBacked(page)) + SetPageSwapBacked(newpage); + return MIGRATEPAGE_SUCCESS; } + oldzone = page_zone(page); + newzone = page_zone(newpage); + spin_lock_irq(&mapping->tree_lock); pslot = radix_tree_lookup_slot(&mapping->page_tree, @@ -352,14 +369,28 @@ int migrate_page_move_mapping(struct address_space *mapping, } /* - * Now we know that no one else is looking at the page. + * Now we know that no one else is looking at the page: + * no turning back from here. */ + set_page_memcg(newpage, page_memcg(page)); + newpage->index = page->index; + newpage->mapping = page->mapping; + if (PageSwapBacked(page)) + SetPageSwapBacked(newpage); + get_page(newpage); /* add cache reference */ if (PageSwapCache(page)) { SetPageSwapCache(newpage); set_page_private(newpage, page_private(page)); } + /* Move dirty while page refs frozen and newpage not yet exposed */ + dirty = PageDirty(page); + if (dirty) { + ClearPageDirty(page); + SetPageDirty(newpage); + } + radix_tree_replace_slot(pslot, newpage); /* @@ -369,6 +400,9 @@ int migrate_page_move_mapping(struct address_space *mapping, */ page_unfreeze_refs(page, expected_count - 1); + spin_unlock(&mapping->tree_lock); + /* Leave irq disabled to prevent preemption while updating stats */ + /* * If moved to a different zone then also account * the page for that zone. Other VM counters will be @@ -379,13 +413,19 @@ int migrate_page_move_mapping(struct address_space *mapping, * via NR_FILE_PAGES and NR_ANON_PAGES if they * are mapped to swap space. */ - __dec_zone_page_state(page, NR_FILE_PAGES); - __inc_zone_page_state(newpage, NR_FILE_PAGES); - if (!PageSwapCache(page) && PageSwapBacked(page)) { - __dec_zone_page_state(page, NR_SHMEM); - __inc_zone_page_state(newpage, NR_SHMEM); + if (newzone != oldzone) { + __dec_zone_state(oldzone, NR_FILE_PAGES); + __inc_zone_state(newzone, NR_FILE_PAGES); + if (PageSwapBacked(page) && !PageSwapCache(page)) { + __dec_zone_state(oldzone, NR_SHMEM); + __inc_zone_state(newzone, NR_SHMEM); + } + if (dirty && mapping_cap_account_dirty(mapping)) { + __dec_zone_state(oldzone, NR_FILE_DIRTY); + __inc_zone_state(newzone, NR_FILE_DIRTY); + } } - spin_unlock_irq(&mapping->tree_lock); + local_irq_enable(); return MIGRATEPAGE_SUCCESS; } @@ -400,12 +440,6 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, int expected_count; void **pslot; - if (!mapping) { - if (page_count(page) != 1) - return -EAGAIN; - return MIGRATEPAGE_SUCCESS; - } - spin_lock_irq(&mapping->tree_lock); pslot = radix_tree_lookup_slot(&mapping->page_tree, @@ -423,6 +457,9 @@ int migrate_huge_page_move_mapping(struct address_space *mapping, return -EAGAIN; } + set_page_memcg(newpage, page_memcg(page)); + newpage->index = page->index; + newpage->mapping = page->mapping; get_page(newpage); radix_tree_replace_slot(pslot, newpage); @@ -509,20 +546,14 @@ void migrate_page_copy(struct page *newpage, struct page *page) if (PageMappedToDisk(page)) SetPageMappedToDisk(newpage); - if (PageDirty(page)) { - clear_page_dirty_for_io(page); - /* - * Want to mark the page and the radix tree as dirty, and - * redo the accounting that clear_page_dirty_for_io undid, - * but we can't use set_page_dirty because that function - * is actually a signal that all of the page has become dirty. - * Whereas only part of our page may be dirty. - */ - if (PageSwapBacked(page)) - SetPageDirty(newpage); - else - __set_page_dirty_nobuffers(newpage); - } + /* Move dirty on pages not done by migrate_page_move_mapping() */ + if (PageDirty(page)) + SetPageDirty(newpage); + + if (page_is_young(page)) + set_page_young(newpage); + if (page_is_idle(page)) + set_page_idle(newpage); /* * Copy NUMA information to the new page, to prevent over-eager @@ -531,7 +562,6 @@ void migrate_page_copy(struct page *newpage, struct page *page) cpupid = page_cpupid_xchg_last(page, -1); page_cpupid_xchg_last(newpage, cpupid); - mlock_migrate_page(newpage, page); ksm_migrate_page(newpage, page); /* * Please do not reorder this without considering how mm/ksm.c's @@ -715,24 +745,13 @@ static int fallback_migrate_page(struct address_space *mapping, * MIGRATEPAGE_SUCCESS - success */ static int move_to_new_page(struct page *newpage, struct page *page, - int page_was_mapped, enum migrate_mode mode) + enum migrate_mode mode) { struct address_space *mapping; int rc; - /* - * Block others from accessing the page when we get around to - * establishing additional references. We are the only one - * holding a reference to the new page at this point. - */ - if (!trylock_page(newpage)) - BUG(); - - /* Prepare mapping for the new page.*/ - newpage->index = page->index; - newpage->mapping = page->mapping; - if (PageSwapBacked(page)) - SetPageSwapBacked(newpage); + VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); mapping = page_mapping(page); if (!mapping) @@ -744,22 +763,19 @@ static int move_to_new_page(struct page *newpage, struct page *page, * space which also has its own migratepage callback. This * is the most common path for page migration. */ - rc = mapping->a_ops->migratepage(mapping, - newpage, page, mode); + rc = mapping->a_ops->migratepage(mapping, newpage, page, mode); else rc = fallback_migrate_page(mapping, newpage, page, mode); - if (rc != MIGRATEPAGE_SUCCESS) { - newpage->mapping = NULL; - } else { - mem_cgroup_migrate(page, newpage, false); - if (page_was_mapped) - remove_migration_ptes(page, newpage); - page->mapping = NULL; + /* + * When successful, old pagecache page->mapping must be cleared before + * page is freed; but stats require that PageAnon be left as PageAnon. + */ + if (rc == MIGRATEPAGE_SUCCESS) { + set_page_memcg(page, NULL); + if (!PageAnon(page)) + page->mapping = NULL; } - - unlock_page(newpage); - return rc; } @@ -808,6 +824,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, goto out_unlock; wait_on_page_writeback(page); } + /* * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, * we cannot notice that anon_vma is freed while we migrates a page. @@ -815,34 +832,26 @@ static int __unmap_and_move(struct page *page, struct page *newpage, * of migration. File cache pages are no problem because of page_lock() * File Caches may use write_page() or lock_page() in migration, then, * just care Anon page here. + * + * Only page_get_anon_vma() understands the subtleties of + * getting a hold on an anon_vma from outside one of its mms. + * But if we cannot get anon_vma, then we won't need it anyway, + * because that implies that the anon page is no longer mapped + * (and cannot be remapped so long as we hold the page lock). */ - if (PageAnon(page) && !PageKsm(page)) { - /* - * Only page_lock_anon_vma_read() understands the subtleties of - * getting a hold on an anon_vma from outside one of its mms. - */ + if (PageAnon(page) && !PageKsm(page)) anon_vma = page_get_anon_vma(page); - if (anon_vma) { - /* - * Anon page - */ - } else if (PageSwapCache(page)) { - /* - * We cannot be sure that the anon_vma of an unmapped - * swapcache page is safe to use because we don't - * know in advance if the VMA that this page belonged - * to still exists. If the VMA and others sharing the - * data have been freed, then the anon_vma could - * already be invalid. - * - * To avoid this possibility, swapcache pages get - * migrated but are not remapped when migration - * completes - */ - } else { - goto out_unlock; - } - } + + /* + * Block others from accessing the new page when we get around to + * establishing additional references. We are usually the only one + * holding a reference to newpage at this point. We used to have a BUG + * here if trylock_page(newpage) fails, but would like to allow for + * cases where there might be a race with the previous use of newpage. + * This is much like races on refcount of oldpage: just don't BUG(). + */ + if (unlikely(!trylock_page(newpage))) + goto out_unlock; if (unlikely(isolated_balloon_page(page))) { /* @@ -853,7 +862,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage, * the page migration right away (proteced by page lock). */ rc = balloon_page_migrate(newpage, page, mode); - goto out_unlock; + goto out_unlock_both; } /* @@ -872,30 +881,30 @@ static int __unmap_and_move(struct page *page, struct page *newpage, VM_BUG_ON_PAGE(PageAnon(page), page); if (page_has_private(page)) { try_to_free_buffers(page); - goto out_unlock; + goto out_unlock_both; } - goto skip_unmap; - } - - /* Establish migration ptes or remove ptes */ - if (page_mapped(page)) { + } else if (page_mapped(page)) { + /* Establish migration ptes */ + VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !anon_vma, + page); try_to_unmap(page, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); page_was_mapped = 1; } -skip_unmap: if (!page_mapped(page)) - rc = move_to_new_page(newpage, page, page_was_mapped, mode); + rc = move_to_new_page(newpage, page, mode); - if (rc && page_was_mapped) - remove_migration_ptes(page, page); + if (page_was_mapped) + remove_migration_ptes(page, + rc == MIGRATEPAGE_SUCCESS ? newpage : page); +out_unlock_both: + unlock_page(newpage); +out_unlock: /* Drop an anon_vma reference if we took one */ if (anon_vma) put_anon_vma(anon_vma); - -out_unlock: unlock_page(page); out: return rc; @@ -918,12 +927,14 @@ out: static ICE_noinline int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page, unsigned long private, struct page *page, - int force, enum migrate_mode mode) + int force, enum migrate_mode mode, + enum migrate_reason reason) { - int rc = 0; + int rc = MIGRATEPAGE_SUCCESS; int *result = NULL; - struct page *newpage = get_new_page(page, private, &result); + struct page *newpage; + newpage = get_new_page(page, private, &result); if (!newpage) return -ENOMEM; @@ -937,6 +948,8 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page, goto out; rc = __unmap_and_move(page, newpage, force, mode); + if (rc == MIGRATEPAGE_SUCCESS) + put_new_page = NULL; out: if (rc != -EAGAIN) { @@ -949,7 +962,13 @@ out: list_del(&page->lru); dec_zone_page_state(page, NR_ISOLATED_ANON + page_is_file_cache(page)); - putback_lru_page(page); + /* Soft-offlined page shouldn't go through lru cache list */ + if (reason == MR_MEMORY_FAILURE) { + put_page(page); + if (!test_set_page_hwpoison(page)) + num_poisoned_pages_inc(); + } else + putback_lru_page(page); } /* @@ -957,10 +976,9 @@ out: * it. Otherwise, putback_lru_page() will drop the reference grabbed * during isolation. */ - if (rc != MIGRATEPAGE_SUCCESS && put_new_page) { - ClearPageSwapBacked(newpage); + if (put_new_page) put_new_page(newpage, private); - } else if (unlikely(__is_movable_balloon_page(newpage))) { + else if (unlikely(__is_movable_balloon_page(newpage))) { /* drop our reference, page already in the balloon */ put_page(newpage); } else @@ -998,7 +1016,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, struct page *hpage, int force, enum migrate_mode mode) { - int rc = 0; + int rc = -EAGAIN; int *result = NULL; int page_was_mapped = 0; struct page *new_hpage; @@ -1020,8 +1038,6 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, if (!new_hpage) return -ENOMEM; - rc = -EAGAIN; - if (!trylock_page(hpage)) { if (!force || mode != MIGRATE_SYNC) goto out; @@ -1031,6 +1047,9 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, if (PageAnon(hpage)) anon_vma = page_get_anon_vma(hpage); + if (unlikely(!trylock_page(new_hpage))) + goto put_anon; + if (page_mapped(hpage)) { try_to_unmap(hpage, TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); @@ -1038,16 +1057,22 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, } if (!page_mapped(hpage)) - rc = move_to_new_page(new_hpage, hpage, page_was_mapped, mode); + rc = move_to_new_page(new_hpage, hpage, mode); - if (rc != MIGRATEPAGE_SUCCESS && page_was_mapped) - remove_migration_ptes(hpage, hpage); + if (page_was_mapped) + remove_migration_ptes(hpage, + rc == MIGRATEPAGE_SUCCESS ? new_hpage : hpage); + unlock_page(new_hpage); + +put_anon: if (anon_vma) put_anon_vma(anon_vma); - if (rc == MIGRATEPAGE_SUCCESS) + if (rc == MIGRATEPAGE_SUCCESS) { hugetlb_cgroup_migrate(hpage, new_hpage); + put_new_page = NULL; + } unlock_page(hpage); out: @@ -1059,10 +1084,10 @@ out: * it. Otherwise, put_page() will drop the reference grabbed during * isolation. */ - if (rc != MIGRATEPAGE_SUCCESS && put_new_page) + if (put_new_page) put_new_page(new_hpage, private); else - put_page(new_hpage); + putback_active_hugepage(new_hpage); if (result) { if (rc) @@ -1089,7 +1114,7 @@ out: * * The function returns after 10 attempts or if no pages are movable any more * because the list has become empty or no retryable pages exist any more. - * The caller should call putback_lru_pages() to return pages to the LRU + * The caller should call putback_movable_pages() to return pages to the LRU * or free list only if ret != 0. * * Returns the number of pages that were not migrated, or an error code. @@ -1122,7 +1147,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, pass > 2, mode); else rc = unmap_and_move(get_new_page, put_new_page, - private, page, pass > 2, mode); + private, page, pass > 2, mode, + reason); switch(rc) { case -ENOMEM: @@ -1145,7 +1171,8 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, } } } - rc = nr_failed + retry; + nr_failed += retry; + rc = nr_failed; out: if (nr_succeeded) count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded); @@ -1187,7 +1214,7 @@ static struct page *new_page_node(struct page *p, unsigned long private, return alloc_huge_page_node(page_hstate(compound_head(p)), pm->node); else - return alloc_pages_exact_node(pm->node, + return __alloc_pages_node(pm->node, GFP_HIGHUSER_MOVABLE | __GFP_THISNODE, 0); } @@ -1219,7 +1246,9 @@ static int do_move_page_to_node_array(struct mm_struct *mm, if (!vma || pp->addr < vma->vm_start || !vma_migratable(vma)) goto set_status; - page = follow_page(vma, pp->addr, FOLL_GET|FOLL_SPLIT); + /* FOLL_DUMP to ignore special (like zero) pages */ + page = follow_page(vma, pp->addr, + FOLL_GET | FOLL_SPLIT | FOLL_DUMP); err = PTR_ERR(page); if (IS_ERR(page)) @@ -1229,10 +1258,6 @@ static int do_move_page_to_node_array(struct mm_struct *mm, if (!page) goto set_status; - /* Use PageReserved to check for zero page */ - if (PageReserved(page)) - goto put_and_set; - pp->page = page; err = page_to_nid(page); @@ -1389,18 +1414,14 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, if (!vma || addr < vma->vm_start) goto set_status; - page = follow_page(vma, addr, 0); + /* FOLL_DUMP to ignore special (like zero) pages */ + page = follow_page(vma, addr, FOLL_DUMP); err = PTR_ERR(page); if (IS_ERR(page)) goto set_status; - err = -ENOENT; - /* Use PageReserved to check for zero page */ - if (!page || PageReserved(page)) - goto set_status; - - err = page_to_nid(page); + err = page ? page_to_nid(page) : -ENOENT; set_status: *status = err; @@ -1553,11 +1574,11 @@ static struct page *alloc_misplaced_dst_page(struct page *page, int nid = (int) data; struct page *newpage; - newpage = alloc_pages_exact_node(nid, + newpage = __alloc_pages_node(nid, (GFP_HIGHUSER_MOVABLE | __GFP_THISNODE | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & - ~GFP_IOFS, 0); + ~__GFP_RECLAIM, 0); return newpage; } @@ -1731,7 +1752,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm, goto out_dropref; new_page = alloc_pages_node(node, - (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_WAIT, + (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM, HPAGE_PMD_ORDER); if (!new_page) goto out_fail; @@ -1768,7 +1789,6 @@ fail_putback: SetPageActive(page); if (TestClearPageUnevictable(new_page)) SetPageUnevictable(page); - mlock_migrate_page(page, new_page); unlock_page(new_page); put_page(new_page); /* Free it */ @@ -1796,7 +1816,7 @@ fail_putback: */ flush_cache_range(vma, mmun_start, mmun_end); page_add_anon_rmap(new_page, vma, mmun_start); - pmdp_clear_flush_notify(vma, mmun_start, pmd); + pmdp_huge_clear_flush_notify(vma, mmun_start, pmd); set_pmd_at(mm, mmun_start, pmd, entry); flush_tlb_range(vma, mmun_start, mmun_end); update_mmu_cache_pmd(vma, address, &entry); @@ -1810,8 +1830,9 @@ fail_putback: goto fail_putback; } - mem_cgroup_migrate(page, new_page, false); - + mlock_migrate_page(new_page, page); + set_page_memcg(new_page, page_memcg(page)); + set_page_memcg(page, NULL); page_remove_rmap(page); spin_unlock(ptl); |