Merge tag 'amd-drm-fixes-5.9-2020-08-20' of git://people.freedesktop.org/~agd5f/linux...
[linux-2.6-microblaze.git] / mm / hugetlb.c
index e52c878..a301c2d 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/memblock.h>
 #include <linux/sysfs.h>
 #include <linux/slab.h>
+#include <linux/sched/mm.h>
 #include <linux/mmdebug.h>
 #include <linux/sched/signal.h>
 #include <linux/rmap.h>
@@ -133,7 +134,7 @@ void hugepage_put_subpool(struct hugepage_subpool *spool)
 /*
  * Subpool accounting for allocating and reserving pages.
  * Return -ENOMEM if there are not enough resources to satisfy the
- * the request.  Otherwise, return the number of pages by which the
+ * request.  Otherwise, return the number of pages by which the
  * global pools must be adjusted (upward).  The returned value may
  * only be different than the passed value (delta) in the case where
  * a subpool minimum size must be maintained.
@@ -1040,10 +1041,16 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
 static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
 {
        struct page *page;
+       bool nocma = !!(current->flags & PF_MEMALLOC_NOCMA);
+
+       list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
+               if (nocma && is_migrate_cma_page(page))
+                       continue;
 
-       list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
                if (!PageHWPoison(page))
                        break;
+       }
+
        /*
         * if 'non-isolated free hugepage' not found on the list,
         * the allocation fails.
@@ -1093,15 +1100,6 @@ retry_cpuset:
        return NULL;
 }
 
-/* Movability of hugepages depends on migration support. */
-static inline gfp_t htlb_alloc_mask(struct hstate *h)
-{
-       if (hugepage_movable_supported(h))
-               return GFP_HIGHUSER_MOVABLE;
-       else
-               return GFP_HIGHUSER;
-}
-
 static struct page *dequeue_huge_page_vma(struct hstate *h,
                                struct vm_area_struct *vma,
                                unsigned long address, int avoid_reserve,
@@ -1944,7 +1942,7 @@ out_unlock:
        return page;
 }
 
-struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
                                     int nid, nodemask_t *nmask)
 {
        struct page *page;
@@ -1985,32 +1983,10 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
        return page;
 }
 
-/* page migration callback function */
-struct page *alloc_huge_page_node(struct hstate *h, int nid)
-{
-       gfp_t gfp_mask = htlb_alloc_mask(h);
-       struct page *page = NULL;
-
-       if (nid != NUMA_NO_NODE)
-               gfp_mask |= __GFP_THISNODE;
-
-       spin_lock(&hugetlb_lock);
-       if (h->free_huge_pages - h->resv_huge_pages > 0)
-               page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
-       spin_unlock(&hugetlb_lock);
-
-       if (!page)
-               page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
-
-       return page;
-}
-
 /* page migration callback function */
 struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
-               nodemask_t *nmask)
+               nodemask_t *nmask, gfp_t gfp_mask)
 {
-       gfp_t gfp_mask = htlb_alloc_mask(h);
-
        spin_lock(&hugetlb_lock);
        if (h->free_huge_pages - h->resv_huge_pages > 0) {
                struct page *page;
@@ -2038,7 +2014,7 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
 
        gfp_mask = htlb_alloc_mask(h);
        node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
-       page = alloc_huge_page_nodemask(h, node, nodemask);
+       page = alloc_huge_page_nodemask(h, node, nodemask, gfp_mask);
        mpol_cond_put(mpol);
 
        return page;
@@ -2167,7 +2143,7 @@ static void return_unused_surplus_pages(struct hstate *h,
         * evenly across all nodes with memory. Iterate across these nodes
         * until we can no longer free unreserved surplus pages. This occurs
         * when the nodes with surplus pages have no free pages.
-        * free_pool_huge_page() will balance the the freed pages across the
+        * free_pool_huge_page() will balance the freed pages across the
         * on-line nodes with memory and will handle the hstate accounting.
         *
         * Note that we decrement resv_huge_pages as we free the pages.  If
@@ -3458,13 +3434,21 @@ static int __init default_hugepagesz_setup(char *s)
 }
 __setup("default_hugepagesz=", default_hugepagesz_setup);
 
-static unsigned int cpuset_mems_nr(unsigned int *array)
+static unsigned int allowed_mems_nr(struct hstate *h)
 {
        int node;
        unsigned int nr = 0;
+       nodemask_t *mpol_allowed;
+       unsigned int *array = h->free_huge_pages_node;
+       gfp_t gfp_mask = htlb_alloc_mask(h);
 
-       for_each_node_mask(node, cpuset_current_mems_allowed)
-               nr += array[node];
+       mpol_allowed = policy_nodemask_current(gfp_mask);
+
+       for_each_node_mask(node, cpuset_current_mems_allowed) {
+               if (!mpol_allowed ||
+                   (mpol_allowed && node_isset(node, *mpol_allowed)))
+                       nr += array[node];
+       }
 
        return nr;
 }
@@ -3643,12 +3627,18 @@ static int hugetlb_acct_memory(struct hstate *h, long delta)
         * we fall back to check against current free page availability as
         * a best attempt and hopefully to minimize the impact of changing
         * semantics that cpuset has.
+        *
+        * Apart from cpuset, we also have memory policy mechanism that
+        * also determines from which node the kernel will allocate memory
+        * in a NUMA system. So similar to cpuset, we also should consider
+        * the memory policy of the current task. Similar to the description
+        * above.
         */
        if (delta > 0) {
                if (gather_surplus_pages(h, delta) < 0)
                        goto out;
 
-               if (delta > cpuset_mems_nr(h->free_huge_pages_node)) {
+               if (delta > allowed_mems_nr(h)) {
                        return_unused_surplus_pages(h, delta);
                        goto out;
                }
@@ -3953,7 +3943,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
                        continue;
 
                ptl = huge_pte_lock(h, mm, ptep);
-               if (huge_pmd_unshare(mm, &address, ptep)) {
+               if (huge_pmd_unshare(mm, vma, &address, ptep)) {
                        spin_unlock(ptl);
                        /*
                         * We just unmapped a page of PMDs by clearing a PUD.
@@ -4540,10 +4530,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
                        return VM_FAULT_HWPOISON_LARGE |
                                VM_FAULT_SET_HINDEX(hstate_index(h));
-       } else {
-               ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
-               if (!ptep)
-                       return VM_FAULT_OOM;
        }
 
        /*
@@ -5020,7 +5006,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                if (!ptep)
                        continue;
                ptl = huge_pte_lock(h, mm, ptep);
-               if (huge_pmd_unshare(mm, &address, ptep)) {
+               if (huge_pmd_unshare(mm, vma, &address, ptep)) {
                        pages++;
                        spin_unlock(ptl);
                        shared_pmd = true;
@@ -5401,12 +5387,14 @@ out:
  * returns: 1 successfully unmapped a shared pte page
  *         0 the underlying pte page is not shared, or it is the last user
  */
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+                                       unsigned long *addr, pte_t *ptep)
 {
        pgd_t *pgd = pgd_offset(mm, *addr);
        p4d_t *p4d = p4d_offset(pgd, *addr);
        pud_t *pud = pud_offset(p4d, *addr);
 
+       i_mmap_assert_write_locked(vma->vm_file->f_mapping);
        BUG_ON(page_count(virt_to_page(ptep)) == 0);
        if (page_count(virt_to_page(ptep)) == 1)
                return 0;
@@ -5424,7 +5412,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
        return NULL;
 }
 
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+                               unsigned long *addr, pte_t *ptep)
 {
        return 0;
 }
@@ -5694,12 +5683,14 @@ void __init hugetlb_cma_reserve(int order)
        reserved = 0;
        for_each_node_state(nid, N_ONLINE) {
                int res;
+               char name[20];
 
                size = min(per_node, hugetlb_cma_size - reserved);
                size = round_up(size, PAGE_SIZE << order);
 
+               snprintf(name, 20, "hugetlb%d", nid);
                res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order,
-                                                0, false, "hugetlb",
+                                                0, false, name,
                                                 &hugetlb_cma[nid], nid);
                if (res) {
                        pr_warn("hugetlb_cma: reservation failed: err %d, node %d",