diff options
Diffstat (limited to 'kernel/mm/filemap.c')
-rw-r--r-- | kernel/mm/filemap.c | 149 |
1 files changed, 108 insertions, 41 deletions
diff --git a/kernel/mm/filemap.c b/kernel/mm/filemap.c index 01cf28476..44301361c 100644 --- a/kernel/mm/filemap.c +++ b/kernel/mm/filemap.c @@ -100,6 +100,7 @@ * ->tree_lock (page_remove_rmap->set_page_dirty) * bdi.wb->list_lock (page_remove_rmap->set_page_dirty) * ->inode->i_lock (page_remove_rmap->set_page_dirty) + * ->memcg->move_lock (page_remove_rmap->mem_cgroup_begin_page_stat) * bdi.wb->list_lock (zap_pte_range->set_page_dirty) * ->inode->i_lock (zap_pte_range->set_page_dirty) * ->private_lock (zap_pte_range->__set_page_dirty_buffers) @@ -176,9 +177,11 @@ static void page_cache_tree_delete(struct address_space *mapping, /* * Delete a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage - * is safe. The caller must hold the mapping's tree_lock. + * is safe. The caller must hold the mapping's tree_lock and + * mem_cgroup_begin_page_stat(). */ -void __delete_from_page_cache(struct page *page, void *shadow) +void __delete_from_page_cache(struct page *page, void *shadow, + struct mem_cgroup *memcg) { struct address_space *mapping = page->mapping; @@ -198,7 +201,9 @@ void __delete_from_page_cache(struct page *page, void *shadow) page->mapping = NULL; /* Leave page->index set: truncation lookup relies upon it */ - __dec_zone_page_state(page, NR_FILE_PAGES); + /* hugetlb pages do not participate in page cache accounting. */ + if (!PageHuge(page)) + __dec_zone_page_state(page, NR_FILE_PAGES); if (PageSwapBacked(page)) __dec_zone_page_state(page, NR_SHMEM); BUG_ON(page_mapped(page)); @@ -212,7 +217,8 @@ void __delete_from_page_cache(struct page *page, void *shadow) * anyway will be cleared before returning page into buddy allocator. */ if (WARN_ON_ONCE(PageDirty(page))) - account_page_cleaned(page, mapping); + account_page_cleaned(page, mapping, memcg, + inode_to_wb(mapping->host)); } /** @@ -226,14 +232,20 @@ void __delete_from_page_cache(struct page *page, void *shadow) void delete_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; + struct mem_cgroup *memcg; + unsigned long flags; + void (*freepage)(struct page *); BUG_ON(!PageLocked(page)); freepage = mapping->a_ops->freepage; - spin_lock_irq(&mapping->tree_lock); - __delete_from_page_cache(page, NULL); - spin_unlock_irq(&mapping->tree_lock); + + memcg = mem_cgroup_begin_page_stat(page); + spin_lock_irqsave(&mapping->tree_lock, flags); + __delete_from_page_cache(page, NULL, memcg); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + mem_cgroup_end_page_stat(memcg); if (freepage) freepage(page); @@ -283,7 +295,9 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, if (!mapping_cap_writeback_dirty(mapping)) return 0; + wbc_attach_fdatawrite_inode(&wbc, mapping->host); ret = do_writepages(mapping, &wbc); + wbc_detach_inode(&wbc); return ret; } @@ -319,23 +333,14 @@ int filemap_flush(struct address_space *mapping) } EXPORT_SYMBOL(filemap_flush); -/** - * filemap_fdatawait_range - wait for writeback to complete - * @mapping: address space structure to wait for - * @start_byte: offset in bytes where the range starts - * @end_byte: offset in bytes where the range ends (inclusive) - * - * Walk the list of under-writeback pages of the given address space - * in the given range and wait for all of them. - */ -int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, - loff_t end_byte) +static int __filemap_fdatawait_range(struct address_space *mapping, + loff_t start_byte, loff_t end_byte) { pgoff_t index = start_byte >> PAGE_CACHE_SHIFT; pgoff_t end = end_byte >> PAGE_CACHE_SHIFT; struct pagevec pvec; int nr_pages; - int ret2, ret = 0; + int ret = 0; if (end_byte < start_byte) goto out; @@ -362,6 +367,29 @@ int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, cond_resched(); } out: + return ret; +} + +/** + * filemap_fdatawait_range - wait for writeback to complete + * @mapping: address space structure to wait for + * @start_byte: offset in bytes where the range starts + * @end_byte: offset in bytes where the range ends (inclusive) + * + * Walk the list of under-writeback pages of the given address space + * in the given range and wait for all of them. Check error status of + * the address space and return it. + * + * Since the error status of the address space is cleared by this function, + * callers are responsible for checking the return value and handling and/or + * reporting the error. + */ +int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, + loff_t end_byte) +{ + int ret, ret2; + + ret = __filemap_fdatawait_range(mapping, start_byte, end_byte); ret2 = filemap_check_errors(mapping); if (!ret) ret = ret2; @@ -371,11 +399,38 @@ out: EXPORT_SYMBOL(filemap_fdatawait_range); /** + * filemap_fdatawait_keep_errors - wait for writeback without clearing errors + * @mapping: address space structure to wait for + * + * Walk the list of under-writeback pages of the given address space + * and wait for all of them. Unlike filemap_fdatawait(), this function + * does not clear error status of the address space. + * + * Use this function if callers don't handle errors themselves. Expected + * call sites are system-wide / filesystem-wide data flushers: e.g. sync(2), + * fsfreeze(8) + */ +void filemap_fdatawait_keep_errors(struct address_space *mapping) +{ + loff_t i_size = i_size_read(mapping->host); + + if (i_size == 0) + return; + + __filemap_fdatawait_range(mapping, 0, i_size - 1); +} + +/** * filemap_fdatawait - wait for all under-writeback pages to complete * @mapping: address space structure to wait for * * Walk the list of under-writeback pages of the given address space - * and wait for all of them. + * and wait for all of them. Check error status of the address space + * and return it. + * + * Since the error status of the address space is cleared by this function, + * callers are responsible for checking the return value and handling and/or + * reporting the error. */ int filemap_fdatawait(struct address_space *mapping) { @@ -472,6 +527,8 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) if (!error) { struct address_space *mapping = old->mapping; void (*freepage)(struct page *); + struct mem_cgroup *memcg; + unsigned long flags; pgoff_t offset = old->index; freepage = mapping->a_ops->freepage; @@ -480,16 +537,23 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) new->mapping = mapping; new->index = offset; - spin_lock_irq(&mapping->tree_lock); - __delete_from_page_cache(old, NULL); + memcg = mem_cgroup_begin_page_stat(old); + spin_lock_irqsave(&mapping->tree_lock, flags); + __delete_from_page_cache(old, NULL, memcg); error = radix_tree_insert(&mapping->page_tree, offset, new); BUG_ON(error); mapping->nrpages++; - __inc_zone_page_state(new, NR_FILE_PAGES); + + /* + * hugetlb pages do not participate in page cache accounting. + */ + if (!PageHuge(new)) + __inc_zone_page_state(new, NR_FILE_PAGES); if (PageSwapBacked(new)) __inc_zone_page_state(new, NR_SHMEM); - spin_unlock_irq(&mapping->tree_lock); - mem_cgroup_migrate(old, new, true); + spin_unlock_irqrestore(&mapping->tree_lock, flags); + mem_cgroup_end_page_stat(memcg); + mem_cgroup_replace_page(old, new); radix_tree_preload_end(); if (freepage) freepage(old); @@ -580,7 +644,10 @@ static int __add_to_page_cache_locked(struct page *page, radix_tree_preload_end(); if (unlikely(error)) goto err_insert; - __inc_zone_page_state(page, NR_FILE_PAGES); + + /* hugetlb pages do not participate in page cache accounting. */ + if (!huge) + __inc_zone_page_state(page, NR_FILE_PAGES); spin_unlock_irq(&mapping->tree_lock); if (!huge) mem_cgroup_commit_charge(page, memcg, false); @@ -653,7 +720,7 @@ struct page *__page_cache_alloc(gfp_t gfp) do { cpuset_mems_cookie = read_mems_allowed_begin(); n = cpuset_mem_spread_node(); - page = alloc_pages_exact_node(n, gfp, 0); + page = __alloc_pages_node(n, gfp, 0); } while (!page && read_mems_allowed_retry(cpuset_mems_cookie)); return page; @@ -1659,8 +1726,8 @@ no_cached_page: error = -ENOMEM; goto out; } - error = add_to_page_cache_lru(page, mapping, - index, GFP_KERNEL); + error = add_to_page_cache_lru(page, mapping, index, + mapping_gfp_constraint(mapping, GFP_KERNEL)); if (error) { page_cache_release(page); if (error == -EEXIST) { @@ -1761,7 +1828,8 @@ static int page_cache_read(struct file *file, pgoff_t offset) if (!page) return -ENOMEM; - ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL); + ret = add_to_page_cache_lru(page, mapping, offset, + mapping_gfp_constraint(mapping, GFP_KERNEL)); if (ret == 0) ret = mapping->a_ops->readpage(file, page); else if (ret == -EEXIST) @@ -1785,7 +1853,6 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma, struct file *file, pgoff_t offset) { - unsigned long ra_pages; struct address_space *mapping = file->f_mapping; /* If we don't want any read-ahead, don't bother */ @@ -1814,10 +1881,9 @@ static void do_sync_mmap_readahead(struct vm_area_struct *vma, /* * mmap read-around */ - ra_pages = max_sane_readahead(ra->ra_pages); - ra->start = max_t(long, 0, offset - ra_pages / 2); - ra->size = ra_pages; - ra->async_size = ra_pages / 4; + ra->start = max_t(long, 0, offset - ra->ra_pages / 2); + ra->size = ra->ra_pages; + ra->async_size = ra->ra_pages / 4; ra_submit(ra, mapping, file); } @@ -2466,6 +2532,11 @@ again: break; } + if (fatal_signal_pending(current)) { + status = -EINTR; + break; + } + status = a_ops->write_begin(file, mapping, pos, bytes, flags, &page, &fsdata); if (unlikely(status < 0)) @@ -2503,10 +2574,6 @@ again: written += copied; balance_dirty_pages_ratelimited(mapping); - if (fatal_signal_pending(current)) { - status = -EINTR; - break; - } } while (iov_iter_count(i)); return written ? written : status; @@ -2541,7 +2608,7 @@ ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); - err = file_remove_suid(file); + err = file_remove_privs(file); if (err) goto out; @@ -2651,7 +2718,7 @@ EXPORT_SYMBOL(generic_file_write_iter); * page is known to the local caching routines. * * The @gfp_mask argument specifies whether I/O may be performed to release - * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS). + * this page (__GFP_IO), and whether the call may block (__GFP_RECLAIM & __GFP_FS). * */ int try_to_release_page(struct page *page, gfp_t gfp_mask) |