summaryrefslogtreecommitdiffstats
path: root/kernel/mm/hugetlb.c
diff options
context:
space:
mode:
authorJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-11 10:41:07 +0300
committerJosé Pekkarinen <jose.pekkarinen@nokia.com>2016-04-13 08:17:18 +0300
commite09b41010ba33a20a87472ee821fa407a5b8da36 (patch)
treed10dc367189862e7ca5c592f033dc3726e1df4e3 /kernel/mm/hugetlb.c
parentf93b97fd65072de626c074dbe099a1fff05ce060 (diff)
These changes are the raw update to linux-4.4.6-rt14. Kernel sources
are taken from kernel.org, and rt patch from the rt wiki download page. During the rebasing, the following patch collided: Force tick interrupt and get rid of softirq magic(I70131fb85). Collisions have been removed because its logic was found on the source already. Change-Id: I7f57a4081d9deaa0d9ccfc41a6c8daccdee3b769 Signed-off-by: José Pekkarinen <jose.pekkarinen@nokia.com>
Diffstat (limited to 'kernel/mm/hugetlb.c')
-rw-r--r--kernel/mm/hugetlb.c781
1 files changed, 631 insertions, 150 deletions
diff --git a/kernel/mm/hugetlb.c b/kernel/mm/hugetlb.c
index 8c4c1f9f9..ef6963b57 100644
--- a/kernel/mm/hugetlb.c
+++ b/kernel/mm/hugetlb.c
@@ -64,7 +64,7 @@ DEFINE_SPINLOCK(hugetlb_lock);
* prevent spurious OOMs when the hugepage pool is fully utilized.
*/
static int num_fault_mutexes;
-static struct mutex *htlb_fault_mutex_table ____cacheline_aligned_in_smp;
+struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
/* Forward declaration */
static int hugetlb_acct_memory(struct hstate *h, long delta);
@@ -217,8 +217,20 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
* Region tracking -- allows tracking of reservations and instantiated pages
* across the pages in a mapping.
*
- * The region data structures are embedded into a resv_map and
- * protected by a resv_map's lock
+ * The region data structures are embedded into a resv_map and protected
+ * by a resv_map's lock. The set of regions within the resv_map represent
+ * reservations for huge pages, or huge pages that have already been
+ * instantiated within the map. The from and to elements are huge page
+ * indicies into the associated mapping. from indicates the starting index
+ * of the region. to represents the first index past the end of the region.
+ *
+ * For example, a file region structure with from == 0 and to == 4 represents
+ * four huge pages in a mapping. It is important to note that the to element
+ * represents the first element past the end of the region. This is used in
+ * arithmetic as 4(to) - 0(from) = 4 huge pages in the region.
+ *
+ * Interval notation of the form [from, to) will be used to indicate that
+ * the endpoint from is inclusive and to is exclusive.
*/
struct file_region {
struct list_head link;
@@ -226,10 +238,25 @@ struct file_region {
long to;
};
+/*
+ * Add the huge page range represented by [f, t) to the reserve
+ * map. In the normal case, existing regions will be expanded
+ * to accommodate the specified range. Sufficient regions should
+ * exist for expansion due to the previous call to region_chg
+ * with the same range. However, it is possible that region_del
+ * could have been called after region_chg and modifed the map
+ * in such a way that no region exists to be expanded. In this
+ * case, pull a region descriptor from the cache associated with
+ * the map and use that for the new range.
+ *
+ * Return the number of new huge pages added to the map. This
+ * number is greater than or equal to zero.
+ */
static long region_add(struct resv_map *resv, long f, long t)
{
struct list_head *head = &resv->regions;
struct file_region *rg, *nrg, *trg;
+ long add = 0;
spin_lock(&resv->lock);
/* Locate the region we are either in or before. */
@@ -237,6 +264,28 @@ static long region_add(struct resv_map *resv, long f, long t)
if (f <= rg->to)
break;
+ /*
+ * If no region exists which can be expanded to include the
+ * specified range, the list must have been modified by an
+ * interleving call to region_del(). Pull a region descriptor
+ * from the cache and use it for this range.
+ */
+ if (&rg->link == head || t < rg->from) {
+ VM_BUG_ON(resv->region_cache_count <= 0);
+
+ resv->region_cache_count--;
+ nrg = list_first_entry(&resv->region_cache, struct file_region,
+ link);
+ list_del(&nrg->link);
+
+ nrg->from = f;
+ nrg->to = t;
+ list_add(&nrg->link, rg->link.prev);
+
+ add += t - f;
+ goto out_locked;
+ }
+
/* Round our left edge to the current segment if it encloses us. */
if (f > rg->from)
f = rg->from;
@@ -255,16 +304,50 @@ static long region_add(struct resv_map *resv, long f, long t)
if (rg->to > t)
t = rg->to;
if (rg != nrg) {
+ /* Decrement return value by the deleted range.
+ * Another range will span this area so that by
+ * end of routine add will be >= zero
+ */
+ add -= (rg->to - rg->from);
list_del(&rg->link);
kfree(rg);
}
}
+
+ add += (nrg->from - f); /* Added to beginning of region */
nrg->from = f;
+ add += t - nrg->to; /* Added to end of region */
nrg->to = t;
+
+out_locked:
+ resv->adds_in_progress--;
spin_unlock(&resv->lock);
- return 0;
+ VM_BUG_ON(add < 0);
+ return add;
}
+/*
+ * Examine the existing reserve map and determine how many
+ * huge pages in the specified range [f, t) are NOT currently
+ * represented. This routine is called before a subsequent
+ * call to region_add that will actually modify the reserve
+ * map to add the specified range [f, t). region_chg does
+ * not change the number of huge pages represented by the
+ * map. However, if the existing regions in the map can not
+ * be expanded to represent the new range, a new file_region
+ * structure is added to the map as a placeholder. This is
+ * so that the subsequent region_add call will have all the
+ * regions it needs and will not fail.
+ *
+ * Upon entry, region_chg will also examine the cache of region descriptors
+ * associated with the map. If there are not enough descriptors cached, one
+ * will be allocated for the in progress add operation.
+ *
+ * Returns the number of huge pages that need to be added to the existing
+ * reservation map for the range [f, t). This number is greater or equal to
+ * zero. -ENOMEM is returned if a new file_region structure or cache entry
+ * is needed and can not be allocated.
+ */
static long region_chg(struct resv_map *resv, long f, long t)
{
struct list_head *head = &resv->regions;
@@ -273,6 +356,33 @@ static long region_chg(struct resv_map *resv, long f, long t)
retry:
spin_lock(&resv->lock);
+retry_locked:
+ resv->adds_in_progress++;
+
+ /*
+ * Check for sufficient descriptors in the cache to accommodate
+ * the number of in progress add operations.
+ */
+ if (resv->adds_in_progress > resv->region_cache_count) {
+ struct file_region *trg;
+
+ VM_BUG_ON(resv->adds_in_progress - resv->region_cache_count > 1);
+ /* Must drop lock to allocate a new descriptor. */
+ resv->adds_in_progress--;
+ spin_unlock(&resv->lock);
+
+ trg = kmalloc(sizeof(*trg), GFP_KERNEL);
+ if (!trg) {
+ kfree(nrg);
+ return -ENOMEM;
+ }
+
+ spin_lock(&resv->lock);
+ list_add(&trg->link, &resv->region_cache);
+ resv->region_cache_count++;
+ goto retry_locked;
+ }
+
/* Locate the region we are before or in. */
list_for_each_entry(rg, head, link)
if (f <= rg->to)
@@ -283,6 +393,7 @@ retry:
* size such that we can guarantee to record the reservation. */
if (&rg->link == head || t < rg->from) {
if (!nrg) {
+ resv->adds_in_progress--;
spin_unlock(&resv->lock);
nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
if (!nrg)
@@ -331,41 +442,146 @@ out_nrg:
return chg;
}
-static long region_truncate(struct resv_map *resv, long end)
+/*
+ * Abort the in progress add operation. The adds_in_progress field
+ * of the resv_map keeps track of the operations in progress between
+ * calls to region_chg and region_add. Operations are sometimes
+ * aborted after the call to region_chg. In such cases, region_abort
+ * is called to decrement the adds_in_progress counter.
+ *
+ * NOTE: The range arguments [f, t) are not needed or used in this
+ * routine. They are kept to make reading the calling code easier as
+ * arguments will match the associated region_chg call.
+ */
+static void region_abort(struct resv_map *resv, long f, long t)
+{
+ spin_lock(&resv->lock);
+ VM_BUG_ON(!resv->region_cache_count);
+ resv->adds_in_progress--;
+ spin_unlock(&resv->lock);
+}
+
+/*
+ * Delete the specified range [f, t) from the reserve map. If the
+ * t parameter is LONG_MAX, this indicates that ALL regions after f
+ * should be deleted. Locate the regions which intersect [f, t)
+ * and either trim, delete or split the existing regions.
+ *
+ * Returns the number of huge pages deleted from the reserve map.
+ * In the normal case, the return value is zero or more. In the
+ * case where a region must be split, a new region descriptor must
+ * be allocated. If the allocation fails, -ENOMEM will be returned.
+ * NOTE: If the parameter t == LONG_MAX, then we will never split
+ * a region and possibly return -ENOMEM. Callers specifying
+ * t == LONG_MAX do not need to check for -ENOMEM error.
+ */
+static long region_del(struct resv_map *resv, long f, long t)
{
struct list_head *head = &resv->regions;
struct file_region *rg, *trg;
- long chg = 0;
+ struct file_region *nrg = NULL;
+ long del = 0;
+retry:
spin_lock(&resv->lock);
- /* Locate the region we are either in or before. */
- list_for_each_entry(rg, head, link)
- if (end <= rg->to)
+ list_for_each_entry_safe(rg, trg, head, link) {
+ /*
+ * Skip regions before the range to be deleted. file_region
+ * ranges are normally of the form [from, to). However, there
+ * may be a "placeholder" entry in the map which is of the form
+ * (from, to) with from == to. Check for placeholder entries
+ * at the beginning of the range to be deleted.
+ */
+ if (rg->to <= f && (rg->to != rg->from || rg->to != f))
+ continue;
+
+ if (rg->from >= t)
break;
- if (&rg->link == head)
- goto out;
- /* If we are in the middle of a region then adjust it. */
- if (end > rg->from) {
- chg = rg->to - end;
- rg->to = end;
- rg = list_entry(rg->link.next, typeof(*rg), link);
- }
+ if (f > rg->from && t < rg->to) { /* Must split region */
+ /*
+ * Check for an entry in the cache before dropping
+ * lock and attempting allocation.
+ */
+ if (!nrg &&
+ resv->region_cache_count > resv->adds_in_progress) {
+ nrg = list_first_entry(&resv->region_cache,
+ struct file_region,
+ link);
+ list_del(&nrg->link);
+ resv->region_cache_count--;
+ }
- /* Drop any remaining regions. */
- list_for_each_entry_safe(rg, trg, rg->link.prev, link) {
- if (&rg->link == head)
+ if (!nrg) {
+ spin_unlock(&resv->lock);
+ nrg = kmalloc(sizeof(*nrg), GFP_KERNEL);
+ if (!nrg)
+ return -ENOMEM;
+ goto retry;
+ }
+
+ del += t - f;
+
+ /* New entry for end of split region */
+ nrg->from = t;
+ nrg->to = rg->to;
+ INIT_LIST_HEAD(&nrg->link);
+
+ /* Original entry is trimmed */
+ rg->to = f;
+
+ list_add(&nrg->link, &rg->link);
+ nrg = NULL;
break;
- chg += rg->to - rg->from;
- list_del(&rg->link);
- kfree(rg);
+ }
+
+ if (f <= rg->from && t >= rg->to) { /* Remove entire region */
+ del += rg->to - rg->from;
+ list_del(&rg->link);
+ kfree(rg);
+ continue;
+ }
+
+ if (f <= rg->from) { /* Trim beginning of region */
+ del += t - rg->from;
+ rg->from = t;
+ } else { /* Trim end of region */
+ del += rg->to - f;
+ rg->to = f;
+ }
}
-out:
spin_unlock(&resv->lock);
- return chg;
+ kfree(nrg);
+ return del;
}
+/*
+ * A rare out of memory error was encountered which prevented removal of
+ * the reserve map region for a page. The huge page itself was free'ed
+ * and removed from the page cache. This routine will adjust the subpool
+ * usage count, and the global reserve count if needed. By incrementing
+ * these counts, the reserve map entry which could not be deleted will
+ * appear as a "reserved" entry instead of simply dangling with incorrect
+ * counts.
+ */
+void hugetlb_fix_reserve_counts(struct inode *inode, bool restore_reserve)
+{
+ struct hugepage_subpool *spool = subpool_inode(inode);
+ long rsv_adjust;
+
+ rsv_adjust = hugepage_subpool_get_pages(spool, 1);
+ if (restore_reserve && rsv_adjust) {
+ struct hstate *h = hstate_inode(inode);
+
+ hugetlb_acct_memory(h, 1);
+ }
+}
+
+/*
+ * Count and return the number of huge pages in the reserve map
+ * that intersect with the range [f, t).
+ */
static long region_count(struct resv_map *resv, long f, long t)
{
struct list_head *head = &resv->regions;
@@ -482,22 +698,44 @@ static void set_vma_private_data(struct vm_area_struct *vma,
struct resv_map *resv_map_alloc(void)
{
struct resv_map *resv_map = kmalloc(sizeof(*resv_map), GFP_KERNEL);
- if (!resv_map)
+ struct file_region *rg = kmalloc(sizeof(*rg), GFP_KERNEL);
+
+ if (!resv_map || !rg) {
+ kfree(resv_map);
+ kfree(rg);
return NULL;
+ }
kref_init(&resv_map->refs);
spin_lock_init(&resv_map->lock);
INIT_LIST_HEAD(&resv_map->regions);
+ resv_map->adds_in_progress = 0;
+
+ INIT_LIST_HEAD(&resv_map->region_cache);
+ list_add(&rg->link, &resv_map->region_cache);
+ resv_map->region_cache_count = 1;
+
return resv_map;
}
void resv_map_release(struct kref *ref)
{
struct resv_map *resv_map = container_of(ref, struct resv_map, refs);
+ struct list_head *head = &resv_map->region_cache;
+ struct file_region *rg, *trg;
/* Clear out any active regions before we release the map. */
- region_truncate(resv_map, 0);
+ region_del(resv_map, 0, LONG_MAX);
+
+ /* ... and any entries left in the cache */
+ list_for_each_entry_safe(rg, trg, head, link) {
+ list_del(&rg->link);
+ kfree(rg);
+ }
+
+ VM_BUG_ON(resv_map->adds_in_progress);
+
kfree(resv_map);
}
@@ -554,7 +792,7 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
}
/* Returns true if the VMA has associated reserve pages */
-static int vma_has_reserves(struct vm_area_struct *vma, long chg)
+static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
{
if (vma->vm_flags & VM_NORESERVE) {
/*
@@ -567,23 +805,34 @@ static int vma_has_reserves(struct vm_area_struct *vma, long chg)
* properly, so add work-around here.
*/
if (vma->vm_flags & VM_MAYSHARE && chg == 0)
- return 1;
+ return true;
else
- return 0;
+ return false;
}
/* Shared mappings always use reserves */
- if (vma->vm_flags & VM_MAYSHARE)
- return 1;
+ if (vma->vm_flags & VM_MAYSHARE) {
+ /*
+ * We know VM_NORESERVE is not set. Therefore, there SHOULD
+ * be a region map for all pages. The only situation where
+ * there is no region map is if a hole was punched via
+ * fallocate. In this case, there really are no reverves to
+ * use. This situation is indicated if chg != 0.
+ */
+ if (chg)
+ return false;
+ else
+ return true;
+ }
/*
* Only the process that called mmap() has reserves for
* private mappings.
*/
if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
- return 1;
+ return true;
- return 0;
+ return false;
}
static void enqueue_huge_page(struct hstate *h, struct page *page)
@@ -755,23 +1004,22 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
#if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
static void destroy_compound_gigantic_page(struct page *page,
- unsigned long order)
+ unsigned int order)
{
int i;
int nr_pages = 1 << order;
struct page *p = page + 1;
for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
- __ClearPageTail(p);
+ clear_compound_head(p);
set_page_refcounted(p);
- p->first_page = NULL;
}
set_compound_order(page, 0);
__ClearPageHead(page);
}
-static void free_gigantic_page(struct page *page, unsigned order)
+static void free_gigantic_page(struct page *page, unsigned int order)
{
free_contig_range(page_to_pfn(page), 1 << order);
}
@@ -815,7 +1063,7 @@ static bool zone_spans_last_pfn(const struct zone *zone,
return zone_spans_pfn(zone, last_pfn);
}
-static struct page *alloc_gigantic_page(int nid, unsigned order)
+static struct page *alloc_gigantic_page(int nid, unsigned int order)
{
unsigned long nr_pages = 1 << order;
unsigned long ret, pfn, flags;
@@ -851,7 +1099,7 @@ static struct page *alloc_gigantic_page(int nid, unsigned order)
}
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
-static void prep_compound_gigantic_page(struct page *page, unsigned long order);
+static void prep_compound_gigantic_page(struct page *page, unsigned int order);
static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid)
{
@@ -884,9 +1132,9 @@ static int alloc_fresh_gigantic_page(struct hstate *h,
static inline bool gigantic_page_supported(void) { return true; }
#else
static inline bool gigantic_page_supported(void) { return false; }
-static inline void free_gigantic_page(struct page *page, unsigned order) { }
+static inline void free_gigantic_page(struct page *page, unsigned int order) { }
static inline void destroy_compound_gigantic_page(struct page *page,
- unsigned long order) { }
+ unsigned int order) { }
static inline int alloc_fresh_gigantic_page(struct hstate *h,
nodemask_t *nodes_allowed) { return 0; }
#endif
@@ -907,13 +1155,12 @@ static void update_and_free_page(struct hstate *h, struct page *page)
1 << PG_writeback);
}
VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
- set_compound_page_dtor(page, NULL);
+ set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
set_page_refcounted(page);
if (hstate_is_gigantic(h)) {
destroy_compound_gigantic_page(page, huge_page_order(h));
free_gigantic_page(page, huge_page_order(h));
} else {
- arch_release_hugepage(page);
__free_pages(page, huge_page_order(h));
}
}
@@ -1004,7 +1251,7 @@ void free_huge_page(struct page *page)
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
{
INIT_LIST_HEAD(&page->lru);
- set_compound_page_dtor(page, free_huge_page);
+ set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
spin_lock(&hugetlb_lock);
set_hugetlb_cgroup(page, NULL);
h->nr_huge_pages++;
@@ -1013,7 +1260,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
put_page(page); /* free it into the hugepage allocator */
}
-static void prep_compound_gigantic_page(struct page *page, unsigned long order)
+static void prep_compound_gigantic_page(struct page *page, unsigned int order)
{
int i;
int nr_pages = 1 << order;
@@ -1038,10 +1285,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
*/
__ClearPageReserved(p);
set_page_count(p, 0);
- p->first_page = page;
- /* Make sure p->first_page is always valid for PageTail() */
- smp_wmb();
- __SetPageTail(p);
+ set_compound_head(p, page);
}
}
@@ -1056,7 +1300,7 @@ int PageHuge(struct page *page)
return 0;
page = compound_head(page);
- return get_compound_page_dtor(page) == free_huge_page;
+ return page[1].compound_dtor == HUGETLB_PAGE_DTOR;
}
EXPORT_SYMBOL_GPL(PageHuge);
@@ -1093,15 +1337,11 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
{
struct page *page;
- page = alloc_pages_exact_node(nid,
+ page = __alloc_pages_node(nid,
htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
__GFP_REPEAT|__GFP_NOWARN,
huge_page_order(h));
if (page) {
- if (arch_prepare_hugepage(page)) {
- __free_pages(page, huge_page_order(h));
- return NULL;
- }
prep_new_huge_page(h, page, nid);
}
@@ -1203,7 +1443,82 @@ void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
dissolve_free_huge_page(pfn_to_page(pfn));
}
-static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
+/*
+ * There are 3 ways this can get called:
+ * 1. With vma+addr: we use the VMA's memory policy
+ * 2. With !vma, but nid=NUMA_NO_NODE: We try to allocate a huge
+ * page from any node, and let the buddy allocator itself figure
+ * it out.
+ * 3. With !vma, but nid!=NUMA_NO_NODE. We allocate a huge page
+ * strictly from 'nid'
+ */
+static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h,
+ struct vm_area_struct *vma, unsigned long addr, int nid)
+{
+ int order = huge_page_order(h);
+ gfp_t gfp = htlb_alloc_mask(h)|__GFP_COMP|__GFP_REPEAT|__GFP_NOWARN;
+ unsigned int cpuset_mems_cookie;
+
+ /*
+ * We need a VMA to get a memory policy. If we do not
+ * have one, we use the 'nid' argument.
+ *
+ * The mempolicy stuff below has some non-inlined bits
+ * and calls ->vm_ops. That makes it hard to optimize at
+ * compile-time, even when NUMA is off and it does
+ * nothing. This helps the compiler optimize it out.
+ */
+ if (!IS_ENABLED(CONFIG_NUMA) || !vma) {
+ /*
+ * If a specific node is requested, make sure to
+ * get memory from there, but only when a node
+ * is explicitly specified.
+ */
+ if (nid != NUMA_NO_NODE)
+ gfp |= __GFP_THISNODE;
+ /*
+ * Make sure to call something that can handle
+ * nid=NUMA_NO_NODE
+ */
+ return alloc_pages_node(nid, gfp, order);
+ }
+
+ /*
+ * OK, so we have a VMA. Fetch the mempolicy and try to
+ * allocate a huge page with it. We will only reach this
+ * when CONFIG_NUMA=y.
+ */
+ do {
+ struct page *page;
+ struct mempolicy *mpol;
+ struct zonelist *zl;
+ nodemask_t *nodemask;
+
+ cpuset_mems_cookie = read_mems_allowed_begin();
+ zl = huge_zonelist(vma, addr, gfp, &mpol, &nodemask);
+ mpol_cond_put(mpol);
+ page = __alloc_pages_nodemask(gfp, order, zl, nodemask);
+ if (page)
+ return page;
+ } while (read_mems_allowed_retry(cpuset_mems_cookie));
+
+ return NULL;
+}
+
+/*
+ * There are two ways to allocate a huge page:
+ * 1. When you have a VMA and an address (like a fault)
+ * 2. When you have no VMA (like when setting /proc/.../nr_hugepages)
+ *
+ * 'vma' and 'addr' are only for (1). 'nid' is always NUMA_NO_NODE in
+ * this case which signifies that the allocation should be done with
+ * respect for the VMA's memory policy.
+ *
+ * For (2), we ignore 'vma' and 'addr' and use 'nid' exclusively. This
+ * implies that memory policies will not be taken in to account.
+ */
+static struct page *__alloc_buddy_huge_page(struct hstate *h,
+ struct vm_area_struct *vma, unsigned long addr, int nid)
{
struct page *page;
unsigned int r_nid;
@@ -1212,6 +1527,15 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
return NULL;
/*
+ * Make sure that anyone specifying 'nid' is not also specifying a VMA.
+ * This makes sure the caller is picking _one_ of the modes with which
+ * we can call this function, not both.
+ */
+ if (vma || (addr != -1)) {
+ VM_WARN_ON_ONCE(addr == -1);
+ VM_WARN_ON_ONCE(nid != NUMA_NO_NODE);
+ }
+ /*
* Assume we will successfully allocate the surplus page to
* prevent racing processes from causing the surplus to exceed
* overcommit
@@ -1244,25 +1568,13 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
}
spin_unlock(&hugetlb_lock);
- if (nid == NUMA_NO_NODE)
- page = alloc_pages(htlb_alloc_mask(h)|__GFP_COMP|
- __GFP_REPEAT|__GFP_NOWARN,
- huge_page_order(h));
- else
- page = alloc_pages_exact_node(nid,
- htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
- __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
-
- if (page && arch_prepare_hugepage(page)) {
- __free_pages(page, huge_page_order(h));
- page = NULL;
- }
+ page = __hugetlb_alloc_buddy_huge_page(h, vma, addr, nid);
spin_lock(&hugetlb_lock);
if (page) {
INIT_LIST_HEAD(&page->lru);
r_nid = page_to_nid(page);
- set_compound_page_dtor(page, free_huge_page);
+ set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
set_hugetlb_cgroup(page, NULL);
/*
* We incremented the global counters already
@@ -1281,6 +1593,29 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
}
/*
+ * Allocate a huge page from 'nid'. Note, 'nid' may be
+ * NUMA_NO_NODE, which means that it may be allocated
+ * anywhere.
+ */
+static
+struct page *__alloc_buddy_huge_page_no_mpol(struct hstate *h, int nid)
+{
+ unsigned long addr = -1;
+
+ return __alloc_buddy_huge_page(h, NULL, addr, nid);
+}
+
+/*
+ * Use the VMA's mpolicy to allocate a huge page from the buddy.
+ */
+static
+struct page *__alloc_buddy_huge_page_with_mpol(struct hstate *h,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ return __alloc_buddy_huge_page(h, vma, addr, NUMA_NO_NODE);
+}
+
+/*
* This allocation function is useful in the context where vma is irrelevant.
* E.g. soft-offlining uses this function because it only cares physical
* address of error page.
@@ -1295,7 +1630,7 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid)
spin_unlock(&hugetlb_lock);
if (!page)
- page = alloc_buddy_huge_page(h, nid);
+ page = __alloc_buddy_huge_page_no_mpol(h, nid);
return page;
}
@@ -1325,7 +1660,7 @@ static int gather_surplus_pages(struct hstate *h, int delta)
retry:
spin_unlock(&hugetlb_lock);
for (i = 0; i < needed; i++) {
- page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
+ page = __alloc_buddy_huge_page_no_mpol(h, NUMA_NO_NODE);
if (!page) {
alloc_ok = false;
break;
@@ -1421,87 +1756,150 @@ static void return_unused_surplus_pages(struct hstate *h,
}
}
+
/*
- * Determine if the huge page at addr within the vma has an associated
- * reservation. Where it does not we will need to logically increase
- * reservation and actually increase subpool usage before an allocation
- * can occur. Where any new reservation would be required the
- * reservation change is prepared, but not committed. Once the page
- * has been allocated from the subpool and instantiated the change should
- * be committed via vma_commit_reservation. No action is required on
- * failure.
+ * vma_needs_reservation, vma_commit_reservation and vma_end_reservation
+ * are used by the huge page allocation routines to manage reservations.
+ *
+ * vma_needs_reservation is called to determine if the huge page at addr
+ * within the vma has an associated reservation. If a reservation is
+ * needed, the value 1 is returned. The caller is then responsible for
+ * managing the global reservation and subpool usage counts. After
+ * the huge page has been allocated, vma_commit_reservation is called
+ * to add the page to the reservation map. If the page allocation fails,
+ * the reservation must be ended instead of committed. vma_end_reservation
+ * is called in such cases.
+ *
+ * In the normal case, vma_commit_reservation returns the same value
+ * as the preceding vma_needs_reservation call. The only time this
+ * is not the case is if a reserve map was changed between calls. It
+ * is the responsibility of the caller to notice the difference and
+ * take appropriate action.
*/
-static long vma_needs_reservation(struct hstate *h,
- struct vm_area_struct *vma, unsigned long addr)
+enum vma_resv_mode {
+ VMA_NEEDS_RESV,
+ VMA_COMMIT_RESV,
+ VMA_END_RESV,
+};
+static long __vma_reservation_common(struct hstate *h,
+ struct vm_area_struct *vma, unsigned long addr,
+ enum vma_resv_mode mode)
{
struct resv_map *resv;
pgoff_t idx;
- long chg;
+ long ret;
resv = vma_resv_map(vma);
if (!resv)
return 1;
idx = vma_hugecache_offset(h, vma, addr);
- chg = region_chg(resv, idx, idx + 1);
+ switch (mode) {
+ case VMA_NEEDS_RESV:
+ ret = region_chg(resv, idx, idx + 1);
+ break;
+ case VMA_COMMIT_RESV:
+ ret = region_add(resv, idx, idx + 1);
+ break;
+ case VMA_END_RESV:
+ region_abort(resv, idx, idx + 1);
+ ret = 0;
+ break;
+ default:
+ BUG();
+ }
if (vma->vm_flags & VM_MAYSHARE)
- return chg;
+ return ret;
else
- return chg < 0 ? chg : 0;
+ return ret < 0 ? ret : 0;
}
-static void vma_commit_reservation(struct hstate *h,
+
+static long vma_needs_reservation(struct hstate *h,
struct vm_area_struct *vma, unsigned long addr)
{
- struct resv_map *resv;
- pgoff_t idx;
+ return __vma_reservation_common(h, vma, addr, VMA_NEEDS_RESV);
+}
- resv = vma_resv_map(vma);
- if (!resv)
- return;
+static long vma_commit_reservation(struct hstate *h,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ return __vma_reservation_common(h, vma, addr, VMA_COMMIT_RESV);
+}
- idx = vma_hugecache_offset(h, vma, addr);
- region_add(resv, idx, idx + 1);
+static void vma_end_reservation(struct hstate *h,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ (void)__vma_reservation_common(h, vma, addr, VMA_END_RESV);
}
-static struct page *alloc_huge_page(struct vm_area_struct *vma,
+struct page *alloc_huge_page(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve)
{
struct hugepage_subpool *spool = subpool_vma(vma);
struct hstate *h = hstate_vma(vma);
struct page *page;
- long chg;
+ long map_chg, map_commit;
+ long gbl_chg;
int ret, idx;
struct hugetlb_cgroup *h_cg;
idx = hstate_index(h);
/*
- * Processes that did not create the mapping will have no
- * reserves and will not have accounted against subpool
- * limit. Check that the subpool limit can be made before
- * satisfying the allocation MAP_NORESERVE mappings may also
- * need pages and subpool limit allocated allocated if no reserve
- * mapping overlaps.
+ * Examine the region/reserve map to determine if the process
+ * has a reservation for the page to be allocated. A return
+ * code of zero indicates a reservation exists (no change).
*/
- chg = vma_needs_reservation(h, vma, addr);
- if (chg < 0)
+ map_chg = gbl_chg = vma_needs_reservation(h, vma, addr);
+ if (map_chg < 0)
return ERR_PTR(-ENOMEM);
- if (chg || avoid_reserve)
- if (hugepage_subpool_get_pages(spool, 1) < 0)
+
+ /*
+ * Processes that did not create the mapping will have no
+ * reserves as indicated by the region/reserve map. Check
+ * that the allocation will not exceed the subpool limit.
+ * Allocations for MAP_NORESERVE mappings also need to be
+ * checked against any subpool limit.
+ */
+ if (map_chg || avoid_reserve) {
+ gbl_chg = hugepage_subpool_get_pages(spool, 1);
+ if (gbl_chg < 0) {
+ vma_end_reservation(h, vma, addr);
return ERR_PTR(-ENOSPC);
+ }
+
+ /*
+ * Even though there was no reservation in the region/reserve
+ * map, there could be reservations associated with the
+ * subpool that can be used. This would be indicated if the
+ * return value of hugepage_subpool_get_pages() is zero.
+ * However, if avoid_reserve is specified we still avoid even
+ * the subpool reservations.
+ */
+ if (avoid_reserve)
+ gbl_chg = 1;
+ }
ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
if (ret)
goto out_subpool_put;
spin_lock(&hugetlb_lock);
- page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg);
+ /*
+ * glb_chg is passed to indicate whether or not a page must be taken
+ * from the global free pool (global change). gbl_chg == 0 indicates
+ * a reservation exists for the allocation.
+ */
+ page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
if (!page) {
spin_unlock(&hugetlb_lock);
- page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
+ page = __alloc_buddy_huge_page_with_mpol(h, vma, addr);
if (!page)
goto out_uncharge_cgroup;
-
+ if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
+ SetPagePrivate(page);
+ h->resv_huge_pages--;
+ }
spin_lock(&hugetlb_lock);
list_move(&page->lru, &h->hugepage_activelist);
/* Fall through */
@@ -1511,14 +1909,30 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
set_page_private(page, (unsigned long)spool);
- vma_commit_reservation(h, vma, addr);
+ map_commit = vma_commit_reservation(h, vma, addr);
+ if (unlikely(map_chg > map_commit)) {
+ /*
+ * The page was added to the reservation map between
+ * vma_needs_reservation and vma_commit_reservation.
+ * This indicates a race with hugetlb_reserve_pages.
+ * Adjust for the subpool count incremented above AND
+ * in hugetlb_reserve_pages for the same page. Also,
+ * the reservation count added in hugetlb_reserve_pages
+ * no longer applies.
+ */
+ long rsv_adjust;
+
+ rsv_adjust = hugepage_subpool_put_pages(spool, 1);
+ hugetlb_acct_memory(h, -rsv_adjust);
+ }
return page;
out_uncharge_cgroup:
hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg);
out_subpool_put:
- if (chg || avoid_reserve)
+ if (map_chg || avoid_reserve)
hugepage_subpool_put_pages(spool, 1);
+ vma_end_reservation(h, vma, addr);
return ERR_PTR(-ENOSPC);
}
@@ -1567,7 +1981,8 @@ found:
return 1;
}
-static void __init prep_compound_huge_page(struct page *page, int order)
+static void __init prep_compound_huge_page(struct page *page,
+ unsigned int order)
{
if (unlikely(order > (MAX_ORDER - 1)))
prep_compound_gigantic_page(page, order);
@@ -1736,7 +2151,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
* First take pages out of surplus state. Then make up the
* remaining difference by allocating fresh huge pages.
*
- * We might race with alloc_buddy_huge_page() here and be unable
+ * We might race with __alloc_buddy_huge_page() here and be unable
* to convert a surplus huge page to a normal huge page. That is
* not critical, though, it just means the overall size of the
* pool might be one hugepage larger than it needs to be, but
@@ -1778,7 +2193,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
* By placing pages into the surplus state independent of the
* overcommit value, we are allowing the surplus pool size to
* exceed overcommit. There are few sane options here. Since
- * alloc_buddy_huge_page() is checking the global counter,
+ * __alloc_buddy_huge_page() is checking the global counter,
* though, we'll note that we're not allowed to exceed surplus
* and won't grow the pool anywhere else. Not until one of the
* sysctls are changed, or the surplus pages go out of use.
@@ -2071,7 +2486,7 @@ struct node_hstate {
struct kobject *hugepages_kobj;
struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
};
-struct node_hstate node_hstates[MAX_NUMNODES];
+static struct node_hstate node_hstates[MAX_NUMNODES];
/*
* A subset of global hstate attributes for node devices
@@ -2234,7 +2649,7 @@ static void __exit hugetlb_exit(void)
}
kobject_put(hugepages_kobj);
- kfree(htlb_fault_mutex_table);
+ kfree(hugetlb_fault_mutex_table);
}
module_exit(hugetlb_exit);
@@ -2267,18 +2682,18 @@ static int __init hugetlb_init(void)
#else
num_fault_mutexes = 1;
#endif
- htlb_fault_mutex_table =
+ hugetlb_fault_mutex_table =
kmalloc(sizeof(struct mutex) * num_fault_mutexes, GFP_KERNEL);
- BUG_ON(!htlb_fault_mutex_table);
+ BUG_ON(!hugetlb_fault_mutex_table);
for (i = 0; i < num_fault_mutexes; i++)
- mutex_init(&htlb_fault_mutex_table[i]);
+ mutex_init(&hugetlb_fault_mutex_table[i]);
return 0;
}
module_init(hugetlb_init);
/* Should be called on processing a hugepagesz=... option */
-void __init hugetlb_add_hstate(unsigned order)
+void __init hugetlb_add_hstate(unsigned int order)
{
struct hstate *h;
unsigned long i;
@@ -2485,6 +2900,12 @@ void hugetlb_show_meminfo(void)
1UL << (huge_page_order(h) + PAGE_SHIFT - 10));
}
+void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm)
+{
+ seq_printf(m, "HugetlbPages:\t%8lu kB\n",
+ atomic_long_read(&mm->hugetlb_usage) << (PAGE_SHIFT - 10));
+}
+
/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
unsigned long hugetlb_total_pages(void)
{
@@ -2720,6 +3141,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
get_page(ptepage);
page_dup_rmap(ptepage);
set_huge_pte_at(dst, addr, dst_pte, entry);
+ hugetlb_count_add(pages_per_huge_page(h), dst);
}
spin_unlock(src_ptl);
spin_unlock(dst_ptl);
@@ -2800,6 +3222,7 @@ again:
if (huge_pte_dirty(pte))
set_page_dirty(page);
+ hugetlb_count_sub(pages_per_huge_page(h), mm);
page_remove_rmap(page);
force_flush = !__tlb_remove_page(tlb, page);
if (force_flush) {
@@ -2897,6 +3320,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
continue;
/*
+ * Shared VMAs have their own reserves and do not affect
+ * MAP_PRIVATE accounting but it is possible that a shared
+ * VMA is using the same page so check and skip such VMAs.
+ */
+ if (iter_vma->vm_flags & VM_MAYSHARE)
+ continue;
+
+ /*
* Unmap the page from other VMAs without their own reserves.
* They get marked to be SIGKILLed if they fault in these
* areas. This is because a future no-page fault on this VMA
@@ -3070,6 +3501,23 @@ static bool hugetlbfs_pagecache_present(struct hstate *h,
return page != NULL;
}
+int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
+ pgoff_t idx)
+{
+ struct inode *inode = mapping->host;
+ struct hstate *h = hstate_inode(inode);
+ int err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
+
+ if (err)
+ return err;
+ ClearPagePrivate(page);
+
+ spin_lock(&inode->i_lock);
+ inode->i_blocks += blocks_per_huge_page(h);
+ spin_unlock(&inode->i_lock);
+ return 0;
+}
+
static int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct address_space *mapping, pgoff_t idx,
unsigned long address, pte_t *ptep, unsigned int flags)
@@ -3117,21 +3565,13 @@ retry:
set_page_huge_active(page);
if (vma->vm_flags & VM_MAYSHARE) {
- int err;
- struct inode *inode = mapping->host;
-
- err = add_to_page_cache(page, mapping, idx, GFP_KERNEL);
+ int err = huge_add_to_page_cache(page, mapping, idx);
if (err) {
put_page(page);
if (err == -EEXIST)
goto retry;
goto out;
}
- ClearPagePrivate(page);
-
- spin_lock(&inode->i_lock);
- inode->i_blocks += blocks_per_huge_page(h);
- spin_unlock(&inode->i_lock);
} else {
lock_page(page);
if (unlikely(anon_vma_prepare(vma))) {
@@ -3159,11 +3599,14 @@ retry:
* any allocations necessary to record that reservation occur outside
* the spinlock.
*/
- if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED))
+ if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
if (vma_needs_reservation(h, vma, address) < 0) {
ret = VM_FAULT_OOM;
goto backout_unlocked;
}
+ /* Just decrements count, does not deallocate */
+ vma_end_reservation(h, vma, address);
+ }
ptl = huge_pte_lockptr(h, mm, ptep);
spin_lock(ptl);
@@ -3184,6 +3627,7 @@ retry:
&& (vma->vm_flags & VM_SHARED)));
set_huge_pte_at(mm, address, ptep, new_pte);
+ hugetlb_count_add(pages_per_huge_page(h), mm);
if ((flags & FAULT_FLAG_WRITE) && !(vma->vm_flags & VM_SHARED)) {
/* Optimization, do the COW without a second fault */
ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page, ptl);
@@ -3203,7 +3647,7 @@ backout_unlocked:
}
#ifdef CONFIG_SMP
-static u32 fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
+u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
struct vm_area_struct *vma,
struct address_space *mapping,
pgoff_t idx, unsigned long address)
@@ -3228,7 +3672,7 @@ static u32 fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
* For uniprocesor systems we always use a single mutex, so just
* return 0 and avoid the hashing overhead.
*/
-static u32 fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
+u32 hugetlb_fault_mutex_hash(struct hstate *h, struct mm_struct *mm,
struct vm_area_struct *vma,
struct address_space *mapping,
pgoff_t idx, unsigned long address)
@@ -3262,12 +3706,12 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
} else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
return VM_FAULT_HWPOISON_LARGE |
VM_FAULT_SET_HINDEX(hstate_index(h));
+ } else {
+ ptep = huge_pte_alloc(mm, address, huge_page_size(h));
+ if (!ptep)
+ return VM_FAULT_OOM;
}
- ptep = huge_pte_alloc(mm, address, huge_page_size(h));
- if (!ptep)
- return VM_FAULT_OOM;
-
mapping = vma->vm_file->f_mapping;
idx = vma_hugecache_offset(h, vma, address);
@@ -3276,8 +3720,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* get spurious allocation failures if two CPUs race to instantiate
* the same page in the page cache.
*/
- hash = fault_mutex_hash(h, mm, vma, mapping, idx, address);
- mutex_lock(&htlb_fault_mutex_table[hash]);
+ hash = hugetlb_fault_mutex_hash(h, mm, vma, mapping, idx, address);
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
entry = huge_ptep_get(ptep);
if (huge_pte_none(entry)) {
@@ -3310,6 +3754,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
ret = VM_FAULT_OOM;
goto out_mutex;
}
+ /* Just decrements count, does not deallocate */
+ vma_end_reservation(h, vma, address);
if (!(vma->vm_flags & VM_MAYSHARE))
pagecache_page = hugetlbfs_pagecache_page(h,
@@ -3360,7 +3806,7 @@ out_ptl:
put_page(pagecache_page);
}
out_mutex:
- mutex_unlock(&htlb_fault_mutex_table[hash]);
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
/*
* Generally it's safe to hold refcount during waiting page lock. But
* here we just wait to defer the next page fault to avoid busy loop and
@@ -3629,16 +4075,35 @@ int hugetlb_reserve_pages(struct inode *inode,
* consumed reservations are stored in the map. Hence, nothing
* else has to be done for private mappings here
*/
- if (!vma || vma->vm_flags & VM_MAYSHARE)
- region_add(resv_map, from, to);
+ if (!vma || vma->vm_flags & VM_MAYSHARE) {
+ long add = region_add(resv_map, from, to);
+
+ if (unlikely(chg > add)) {
+ /*
+ * pages in this range were added to the reserve
+ * map between region_chg and region_add. This
+ * indicates a race with alloc_huge_page. Adjust
+ * the subpool and reserve counts modified above
+ * based on the difference.
+ */
+ long rsv_adjust;
+
+ rsv_adjust = hugepage_subpool_put_pages(spool,
+ chg - add);
+ hugetlb_acct_memory(h, -rsv_adjust);
+ }
+ }
return 0;
out_err:
+ if (!vma || vma->vm_flags & VM_MAYSHARE)
+ region_abort(resv_map, from, to);
if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
kref_put(&resv_map->refs, resv_map_release);
return ret;
}
-void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
+long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
+ long freed)
{
struct hstate *h = hstate_inode(inode);
struct resv_map *resv_map = inode_resv_map(inode);
@@ -3646,8 +4111,17 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
struct hugepage_subpool *spool = subpool_inode(inode);
long gbl_reserve;
- if (resv_map)
- chg = region_truncate(resv_map, offset);
+ if (resv_map) {
+ chg = region_del(resv_map, start, end);
+ /*
+ * region_del() can fail in the rare case where a region
+ * must be split and another region descriptor can not be
+ * allocated. If end == LONG_MAX, it will not fail.
+ */
+ if (chg < 0)
+ return chg;
+ }
+
spin_lock(&inode->i_lock);
inode->i_blocks -= (blocks_per_huge_page(h) * freed);
spin_unlock(&inode->i_lock);
@@ -3658,6 +4132,8 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
*/
gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed));
hugetlb_acct_memory(h, -gbl_reserve);
+
+ return 0;
}
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
@@ -3671,8 +4147,8 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
unsigned long s_end = sbase + PUD_SIZE;
/* Allow segments to share if only one is marked locked */
- unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
- unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
+ unsigned long vm_flags = vma->vm_flags & VM_LOCKED_CLEAR_MASK;
+ unsigned long svm_flags = svma->vm_flags & VM_LOCKED_CLEAR_MASK;
/*
* match the virtual addresses, permission and the alignment of the
@@ -3686,7 +4162,7 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
return saddr;
}
-static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
+static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
{
unsigned long base = addr & PUD_MASK;
unsigned long end = base + PUD_SIZE;
@@ -3696,8 +4172,8 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
*/
if (vma->vm_flags & VM_MAYSHARE &&
vma->vm_start <= base && end <= vma->vm_end)
- return 1;
- return 0;
+ return true;
+ return false;
}
/*
@@ -3792,6 +4268,11 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
{
return NULL;
}
+
+int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+{
+ return 0;
+}
#define want_pmd_share() (0)
#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */