summaryrefslogtreecommitdiffstats
path: root/kernel/include/linux/mm.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/include/linux/mm.h')
-rw-r--r--kernel/include/linux/mm.h269
1 files changed, 176 insertions, 93 deletions
diff --git a/kernel/include/linux/mm.h b/kernel/include/linux/mm.h
index b2085582d..00bad7793 100644
--- a/kernel/include/linux/mm.h
+++ b/kernel/include/linux/mm.h
@@ -20,6 +20,7 @@
#include <linux/shrinker.h>
#include <linux/resource.h>
#include <linux/page_ext.h>
+#include <linux/err.h>
struct mempolicy;
struct anon_vma;
@@ -27,6 +28,7 @@ struct anon_vma_chain;
struct file_ra_state;
struct user_struct;
struct writeback_control;
+struct bdi_writeback;
#ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */
extern unsigned long max_mapnr;
@@ -123,8 +125,10 @@ extern unsigned int kobjsize(const void *objp);
#define VM_MAYSHARE 0x00000080
#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
+#define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */
#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
+#define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */
#define VM_LOCKED 0x00002000
#define VM_IO 0x00004000 /* Memory mapped I/O or similar */
@@ -135,6 +139,7 @@ extern unsigned int kobjsize(const void *objp);
#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
+#define VM_LOCKONFAULT 0x00080000 /* Lock the pages covered when they are faulted in */
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
@@ -198,6 +203,9 @@ extern unsigned int kobjsize(const void *objp);
/* This mask defines which mm->def_flags a process can inherit its parent */
#define VM_INIT_DEF_MASK VM_NOHUGEPAGE
+/* This mask is used to clear all the VMA flags used by mlock */
+#define VM_LOCKED_CLEAR_MASK (~(VM_LOCKED | VM_LOCKONFAULT))
+
/*
* mapping from the currently active vm_flags protection bits (the
* low four bits) to a page protection mask..
@@ -244,7 +252,10 @@ struct vm_fault {
struct vm_operations_struct {
void (*open)(struct vm_area_struct * area);
void (*close)(struct vm_area_struct * area);
+ int (*mremap)(struct vm_area_struct * area);
int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
+ int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
+ pmd_t *, unsigned int flags);
void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf);
/* notification that a previously read-only page is about to become
@@ -303,18 +314,6 @@ struct inode;
#define page_private(page) ((page)->private)
#define set_page_private(page, v) ((page)->private = (v))
-/* It's valid only if the page is free path or free_list */
-static inline void set_freepage_migratetype(struct page *page, int migratetype)
-{
- page->index = migratetype;
-}
-
-/* It's valid only if the page is free path or free_list */
-static inline int get_freepage_migratetype(struct page *page)
-{
- return page->index;
-}
-
/*
* FIXME: take this include out, include page-flags.h in
* files which need it (119 of them)
@@ -355,20 +354,15 @@ static inline int get_page_unless_zero(struct page *page)
return atomic_inc_not_zero(&page->_count);
}
-/*
- * Try to drop a ref unless the page has a refcount of one, return false if
- * that is the case.
- * This is to make sure that the refcount won't become zero after this drop.
- * This can be called when MMU is off so it must not access
- * any of the virtual mappings.
- */
-static inline int put_page_unless_one(struct page *page)
-{
- return atomic_add_unless(&page->_count, -1, 1);
-}
-
extern int page_is_ram(unsigned long pfn);
-extern int region_is_ram(resource_size_t phys_addr, unsigned long size);
+
+enum {
+ REGION_INTERSECTS,
+ REGION_DISJOINT,
+ REGION_MIXED,
+};
+
+int region_intersects(resource_size_t offset, size_t size, const char *type);
/* Support for virtually mapped pages */
struct page *vmalloc_to_page(const void *addr);
@@ -436,46 +430,6 @@ static inline void compound_unlock_irqrestore(struct page *page,
#endif
}
-static inline struct page *compound_head_by_tail(struct page *tail)
-{
- struct page *head = tail->first_page;
-
- /*
- * page->first_page may be a dangling pointer to an old
- * compound page, so recheck that it is still a tail
- * page before returning.
- */
- smp_rmb();
- if (likely(PageTail(tail)))
- return head;
- return tail;
-}
-
-/*
- * Since either compound page could be dismantled asynchronously in THP
- * or we access asynchronously arbitrary positioned struct page, there
- * would be tail flag race. To handle this race, we should call
- * smp_rmb() before checking tail flag. compound_head_by_tail() did it.
- */
-static inline struct page *compound_head(struct page *page)
-{
- if (unlikely(PageTail(page)))
- return compound_head_by_tail(page);
- return page;
-}
-
-/*
- * If we access compound page synchronously such as access to
- * allocated page, there is no need to handle tail flag race, so we can
- * check tail flag directly without any synchronization primitive.
- */
-static inline struct page *compound_head_fast(struct page *page)
-{
- if (unlikely(PageTail(page)))
- return page->first_page;
- return page;
-}
-
/*
* The atomic page->_mapcount, starts from -1: so that transitions
* both from it and to it can be tracked, using atomic_inc_and_test
@@ -499,7 +453,7 @@ static inline int page_count(struct page *page)
static inline bool __compound_tail_refcounted(struct page *page)
{
- return !PageSlab(page) && !PageHeadHuge(page);
+ return PageAnon(page) && !PageSlab(page) && !PageHeadHuge(page);
}
/*
@@ -524,7 +478,7 @@ static inline void get_huge_page_tail(struct page *page)
VM_BUG_ON_PAGE(!PageTail(page), page);
VM_BUG_ON_PAGE(page_mapcount(page) < 0, page);
VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page);
- if (compound_tail_refcounted(page->first_page))
+ if (compound_tail_refcounted(compound_head(page)))
atomic_inc(&page->_mapcount);
}
@@ -547,13 +501,7 @@ static inline struct page *virt_to_head_page(const void *x)
{
struct page *page = virt_to_page(x);
- /*
- * We don't need to worry about synchronization of tail flag
- * when we call virt_to_head_page() since it is only called for
- * already allocated page and this page won't be freed until
- * this virt_to_head_page() is finished. So use _fast variant.
- */
- return compound_head_fast(page);
+ return compound_head(page);
}
/*
@@ -574,28 +522,42 @@ int split_free_page(struct page *page);
/*
* Compound pages have a destructor function. Provide a
* prototype for that function and accessor functions.
- * These are _only_ valid on the head of a PG_compound page.
+ * These are _only_ valid on the head of a compound page.
*/
+typedef void compound_page_dtor(struct page *);
+
+/* Keep the enum in sync with compound_page_dtors array in mm/page_alloc.c */
+enum compound_dtor_id {
+ NULL_COMPOUND_DTOR,
+ COMPOUND_PAGE_DTOR,
+#ifdef CONFIG_HUGETLB_PAGE
+ HUGETLB_PAGE_DTOR,
+#endif
+ NR_COMPOUND_DTORS,
+};
+extern compound_page_dtor * const compound_page_dtors[];
static inline void set_compound_page_dtor(struct page *page,
- compound_page_dtor *dtor)
+ enum compound_dtor_id compound_dtor)
{
- page[1].compound_dtor = dtor;
+ VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page);
+ page[1].compound_dtor = compound_dtor;
}
static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
{
- return page[1].compound_dtor;
+ VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page);
+ return compound_page_dtors[page[1].compound_dtor];
}
-static inline int compound_order(struct page *page)
+static inline unsigned int compound_order(struct page *page)
{
if (!PageHead(page))
return 0;
return page[1].compound_order;
}
-static inline void set_compound_order(struct page *page, unsigned long order)
+static inline void set_compound_order(struct page *page, unsigned int order)
{
page[1].compound_order = order;
}
@@ -915,6 +877,27 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
#endif
}
+#ifdef CONFIG_MEMCG
+static inline struct mem_cgroup *page_memcg(struct page *page)
+{
+ return page->mem_cgroup;
+}
+
+static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg)
+{
+ page->mem_cgroup = memcg;
+}
+#else
+static inline struct mem_cgroup *page_memcg(struct page *page)
+{
+ return NULL;
+}
+
+static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg)
+{
+}
+#endif
+
/*
* Some inline functions in vmstat.h depend on page_zone()
*/
@@ -1225,6 +1208,49 @@ long get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
int write, int force, struct page **pages);
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);
+
+/* Container for pinned pfns / pages */
+struct frame_vector {
+ unsigned int nr_allocated; /* Number of frames we have space for */
+ unsigned int nr_frames; /* Number of frames stored in ptrs array */
+ bool got_ref; /* Did we pin pages by getting page ref? */
+ bool is_pfns; /* Does array contain pages or pfns? */
+ void *ptrs[0]; /* Array of pinned pfns / pages. Use
+ * pfns_vector_pages() or pfns_vector_pfns()
+ * for access */
+};
+
+struct frame_vector *frame_vector_create(unsigned int nr_frames);
+void frame_vector_destroy(struct frame_vector *vec);
+int get_vaddr_frames(unsigned long start, unsigned int nr_pfns,
+ bool write, bool force, struct frame_vector *vec);
+void put_vaddr_frames(struct frame_vector *vec);
+int frame_vector_to_pages(struct frame_vector *vec);
+void frame_vector_to_pfns(struct frame_vector *vec);
+
+static inline unsigned int frame_vector_count(struct frame_vector *vec)
+{
+ return vec->nr_frames;
+}
+
+static inline struct page **frame_vector_pages(struct frame_vector *vec)
+{
+ if (vec->is_pfns) {
+ int err = frame_vector_to_pages(vec);
+
+ if (err)
+ return ERR_PTR(err);
+ }
+ return (struct page **)(vec->ptrs);
+}
+
+static inline unsigned long *frame_vector_pfns(struct frame_vector *vec)
+{
+ if (!vec->is_pfns)
+ frame_vector_to_pfns(vec);
+ return (unsigned long *)(vec->ptrs);
+}
+
struct kvec;
int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
struct page **pages);
@@ -1239,10 +1265,13 @@ int __set_page_dirty_nobuffers(struct page *page);
int __set_page_dirty_no_writeback(struct page *page);
int redirty_page_for_writepage(struct writeback_control *wbc,
struct page *page);
-void account_page_dirtied(struct page *page, struct address_space *mapping);
-void account_page_cleaned(struct page *page, struct address_space *mapping);
+void account_page_dirtied(struct page *page, struct address_space *mapping,
+ struct mem_cgroup *memcg);
+void account_page_cleaned(struct page *page, struct address_space *mapping,
+ struct mem_cgroup *memcg, struct bdi_writeback *wb);
int set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);
+void cancel_dirty_page(struct page *page);
int clear_page_dirty_for_io(struct page *page);
int get_cmdline(struct task_struct *task, char *buffer, int buflen);
@@ -1253,6 +1282,11 @@ static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
}
+static inline bool vma_is_anonymous(struct vm_area_struct *vma)
+{
+ return !vma->vm_ops;
+}
+
static inline int stack_guard_page_start(struct vm_area_struct *vma,
unsigned long addr)
{
@@ -1506,8 +1540,7 @@ static inline bool ptlock_init(struct page *page)
* with 0. Make sure nobody took it in use in between.
*
* It can happen if arch try to use slab for page table allocation:
- * slab code uses page->slab_cache and page->first_page (for tail
- * pages), which share storage with page->ptl.
+ * slab code uses page->slab_cache, which share storage with page->ptl.
*/
VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page);
if (!ptlock_alloc(page))
@@ -1544,8 +1577,10 @@ static inline void pgtable_init(void)
static inline bool pgtable_page_ctor(struct page *page)
{
+ if (!ptlock_init(page))
+ return false;
inc_zone_page_state(page, NR_PAGETABLE);
- return ptlock_init(page);
+ return true;
}
static inline void pgtable_page_dtor(struct page *page)
@@ -1659,6 +1694,8 @@ extern void free_highmem_page(struct page *page);
extern void adjust_managed_page_count(struct page *page, long count);
extern void mem_init_print_info(const char *str);
+extern void reserve_bootmem_region(unsigned long start, unsigned long end);
+
/* Free the reserved page into the buddy system, so it gets managed. */
static inline void __free_reserved_page(struct page *page)
{
@@ -1748,7 +1785,8 @@ extern void sparse_memory_present_with_active_regions(int nid);
#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
!defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
-static inline int __early_pfn_to_nid(unsigned long pfn)
+static inline int __early_pfn_to_nid(unsigned long pfn,
+ struct mminit_pfnnid_cache *state)
{
return 0;
}
@@ -1756,7 +1794,8 @@ static inline int __early_pfn_to_nid(unsigned long pfn)
/* please see mm/page_alloc.c */
extern int __meminit early_pfn_to_nid(unsigned long pfn);
/* there is a per-arch backend function. */
-extern int __meminit __early_pfn_to_nid(unsigned long pfn);
+extern int __meminit __early_pfn_to_nid(unsigned long pfn,
+ struct mminit_pfnnid_cache *state);
#endif
extern void set_dma_reserve(unsigned long new_dma_reserve);
@@ -1771,7 +1810,8 @@ extern void si_meminfo(struct sysinfo * val);
extern void si_meminfo_node(struct sysinfo *val, int nid);
extern __printf(3, 4)
-void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);
+void warn_alloc_failed(gfp_t gfp_mask, unsigned int order,
+ const char *fmt, ...);
extern void setup_per_cpu_pageset(void);
@@ -1825,7 +1865,7 @@ extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
extern struct vm_area_struct *vma_merge(struct mm_struct *,
struct vm_area_struct *prev, unsigned long addr, unsigned long end,
unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
- struct mempolicy *);
+ struct mempolicy *, struct vm_userfaultfd_ctx);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
extern int split_vma(struct mm_struct *,
struct vm_area_struct *, unsigned long addr, int new_below);
@@ -1872,11 +1912,19 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
extern unsigned long mmap_region(struct file *file, unsigned long addr,
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff);
-extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+extern unsigned long do_mmap(struct file *file, unsigned long addr,
unsigned long len, unsigned long prot, unsigned long flags,
- unsigned long pgoff, unsigned long *populate);
+ vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate);
extern int do_munmap(struct mm_struct *, unsigned long, size_t);
+static inline unsigned long
+do_mmap_pgoff(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long prot, unsigned long flags,
+ unsigned long pgoff, unsigned long *populate)
+{
+ return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate);
+}
+
#ifdef CONFIG_MMU
extern int __mm_populate(unsigned long addr, unsigned long len,
int ignore_errors);
@@ -1962,8 +2010,6 @@ void page_cache_async_readahead(struct address_space *mapping,
pgoff_t offset,
unsigned long size);
-unsigned long max_sane_readahead(unsigned long nr);
-
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
@@ -2063,6 +2109,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
#define FOLL_NUMA 0x200 /* force NUMA hinting page fault */
#define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */
#define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */
+#define FOLL_MLOCK 0x1000 /* lock present pages */
typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
void *data);
@@ -2174,12 +2221,48 @@ enum mf_flags {
extern int memory_failure(unsigned long pfn, int trapno, int flags);
extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
extern int unpoison_memory(unsigned long pfn);
+extern int get_hwpoison_page(struct page *page);
+extern void put_hwpoison_page(struct page *page);
extern int sysctl_memory_failure_early_kill;
extern int sysctl_memory_failure_recovery;
extern void shake_page(struct page *p, int access);
extern atomic_long_t num_poisoned_pages;
extern int soft_offline_page(struct page *page, int flags);
+
+/*
+ * Error handlers for various types of pages.
+ */
+enum mf_result {
+ MF_IGNORED, /* Error: cannot be handled */
+ MF_FAILED, /* Error: handling failed */
+ MF_DELAYED, /* Will be handled later */
+ MF_RECOVERED, /* Successfully recovered */
+};
+
+enum mf_action_page_type {
+ MF_MSG_KERNEL,
+ MF_MSG_KERNEL_HIGH_ORDER,
+ MF_MSG_SLAB,
+ MF_MSG_DIFFERENT_COMPOUND,
+ MF_MSG_POISONED_HUGE,
+ MF_MSG_HUGE,
+ MF_MSG_FREE_HUGE,
+ MF_MSG_UNMAP_FAILED,
+ MF_MSG_DIRTY_SWAPCACHE,
+ MF_MSG_CLEAN_SWAPCACHE,
+ MF_MSG_DIRTY_MLOCKED_LRU,
+ MF_MSG_CLEAN_MLOCKED_LRU,
+ MF_MSG_DIRTY_UNEVICTABLE_LRU,
+ MF_MSG_CLEAN_UNEVICTABLE_LRU,
+ MF_MSG_DIRTY_LRU,
+ MF_MSG_CLEAN_LRU,
+ MF_MSG_TRUNCATED_LRU,
+ MF_MSG_BUDDY,
+ MF_MSG_BUDDY_2ND,
+ MF_MSG_UNKNOWN,
+};
+
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLBFS)
extern void clear_huge_page(struct page *page,
unsigned long addr,