net: sunrpc: delete repeated words

[linux-2.6-microblaze.git] / mm / hugetlb.c
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index e52c878..67fc638 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -19,6 +19,7 @@
  #include <linux/memblock.h>
  #include <linux/sysfs.h>
  #include <linux/slab.h>
+#include <linux/sched/mm.h>
  #include <linux/mmdebug.h>
  #include <linux/sched/signal.h>
  #include <linux/rmap.h>
@@ -133,7 +134,7 @@ void hugepage_put_subpool(struct hugepage_subpool *spool)
  /*
   * Subpool accounting for allocating and reserving pages.
   * Return -ENOMEM if there are not enough resources to satisfy the
- * the request.  Otherwise, return the number of pages by which the
+ * request.  Otherwise, return the number of pages by which the
   * global pools must be adjusted (upward).  The returned value may
   * only be different than the passed value (delta) in the case where
   * a subpool minimum size must be maintained.
@@ -1040,10 +1041,16 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
  static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
  {
         struct page *page;
+       bool nocma = !!(current->flags & PF_MEMALLOC_NOCMA);
+
+       list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
+               if (nocma && is_migrate_cma_page(page))
+                       continue;
  
-       list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
                 if (!PageHWPoison(page))
                         break;
+       }
+
         /*
          * if 'non-isolated free hugepage' not found on the list,
          * the allocation fails.
@@ -1093,15 +1100,6 @@ retry_cpuset:
         return NULL;
  }
  
-/* Movability of hugepages depends on migration support. */
-static inline gfp_t htlb_alloc_mask(struct hstate *h)
-{
-       if (hugepage_movable_supported(h))
-               return GFP_HIGHUSER_MOVABLE;
-       else
-               return GFP_HIGHUSER;
-}
-
  static struct page *dequeue_huge_page_vma(struct hstate *h,
                                 struct vm_area_struct *vma,
                                 unsigned long address, int avoid_reserve,
@@ -1252,21 +1250,32 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
                 int nid, nodemask_t *nodemask)
  {
         unsigned long nr_pages = 1UL << huge_page_order(h);
+       if (nid == NUMA_NO_NODE)
+               nid = numa_mem_id();
  
  #ifdef CONFIG_CMA
         {
                 struct page *page;
                 int node;
  
-               for_each_node_mask(node, *nodemask) {
-                       if (!hugetlb_cma[node])
-                               continue;
-
-                       page = cma_alloc(hugetlb_cma[node], nr_pages,
-                                        huge_page_order(h), true);
+               if (hugetlb_cma[nid]) {
+                       page = cma_alloc(hugetlb_cma[nid], nr_pages,
+                                       huge_page_order(h), true);
                         if (page)
                                 return page;
                 }
+
+               if (!(gfp_mask & __GFP_THISNODE)) {
+                       for_each_node_mask(node, *nodemask) {
+                               if (node == nid || !hugetlb_cma[node])
+                                       continue;
+
+                               page = cma_alloc(hugetlb_cma[node], nr_pages,
+                                               huge_page_order(h), true);
+                               if (page)
+                                       return page;
+                       }
+               }
         }
  #endif
  
@@ -1944,7 +1953,7 @@ out_unlock:
         return page;
  }
  
-struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
+static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
                                      int nid, nodemask_t *nmask)
  {
         struct page *page;
@@ -1985,32 +1994,10 @@ struct page *alloc_buddy_huge_page_with_mpol(struct hstate *h,
         return page;
  }
  
-/* page migration callback function */
-struct page *alloc_huge_page_node(struct hstate *h, int nid)
-{
-       gfp_t gfp_mask = htlb_alloc_mask(h);
-       struct page *page = NULL;
-
-       if (nid != NUMA_NO_NODE)
-               gfp_mask |= __GFP_THISNODE;
-
-       spin_lock(&hugetlb_lock);
-       if (h->free_huge_pages - h->resv_huge_pages > 0)
-               page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL);
-       spin_unlock(&hugetlb_lock);
-
-       if (!page)
-               page = alloc_migrate_huge_page(h, gfp_mask, nid, NULL);
-
-       return page;
-}
-
  /* page migration callback function */
  struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
-               nodemask_t *nmask)
+               nodemask_t *nmask, gfp_t gfp_mask)
  {
-       gfp_t gfp_mask = htlb_alloc_mask(h);
-
         spin_lock(&hugetlb_lock);
         if (h->free_huge_pages - h->resv_huge_pages > 0) {
                 struct page *page;
@@ -2038,7 +2025,7 @@ struct page *alloc_huge_page_vma(struct hstate *h, struct vm_area_struct *vma,
  
         gfp_mask = htlb_alloc_mask(h);
         node = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
-       page = alloc_huge_page_nodemask(h, node, nodemask);
+       page = alloc_huge_page_nodemask(h, node, nodemask, gfp_mask);
         mpol_cond_put(mpol);
  
         return page;
@@ -2167,7 +2154,7 @@ static void return_unused_surplus_pages(struct hstate *h,
          * evenly across all nodes with memory. Iterate across these nodes
          * until we can no longer free unreserved surplus pages. This occurs
          * when the nodes with surplus pages have no free pages.
-        * free_pool_huge_page() will balance the the freed pages across the
+        * free_pool_huge_page() will balance the freed pages across the
          * on-line nodes with memory and will handle the hstate accounting.
          *
          * Note that we decrement resv_huge_pages as we free the pages.  If
@@ -3458,18 +3445,42 @@ static int __init default_hugepagesz_setup(char *s)
  }
  __setup("default_hugepagesz=", default_hugepagesz_setup);
  
-static unsigned int cpuset_mems_nr(unsigned int *array)
+static unsigned int allowed_mems_nr(struct hstate *h)
  {
         int node;
         unsigned int nr = 0;
+       nodemask_t *mpol_allowed;
+       unsigned int *array = h->free_huge_pages_node;
+       gfp_t gfp_mask = htlb_alloc_mask(h);
  
-       for_each_node_mask(node, cpuset_current_mems_allowed)
-               nr += array[node];
+       mpol_allowed = policy_nodemask_current(gfp_mask);
+
+       for_each_node_mask(node, cpuset_current_mems_allowed) {
+               if (!mpol_allowed ||
+                   (mpol_allowed && node_isset(node, *mpol_allowed)))
+                       nr += array[node];
+       }
  
         return nr;
  }
  
  #ifdef CONFIG_SYSCTL
+static int proc_hugetlb_doulongvec_minmax(struct ctl_table *table, int write,
+                                         void *buffer, size_t *length,
+                                         loff_t *ppos, unsigned long *out)
+{
+       struct ctl_table dup_table;
+
+       /*
+        * In order to avoid races with __do_proc_doulongvec_minmax(), we
+        * can duplicate the @table and alter the duplicate of it.
+        */
+       dup_table = *table;
+       dup_table.data = out;
+
+       return proc_doulongvec_minmax(&dup_table, write, buffer, length, ppos);
+}
+
  static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
                          struct ctl_table *table, int write,
                          void *buffer, size_t *length, loff_t *ppos)
@@ -3481,9 +3492,8 @@ static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
         if (!hugepages_supported())
                 return -EOPNOTSUPP;
  
-       table->data = &tmp;
-       table->maxlen = sizeof(unsigned long);
-       ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
+       ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos,
+                                            &tmp);
         if (ret)
                 goto out;
  
@@ -3526,9 +3536,8 @@ int hugetlb_overcommit_handler(struct ctl_table *table, int write,
         if (write && hstate_is_gigantic(h))
                 return -EINVAL;
  
-       table->data = &tmp;
-       table->maxlen = sizeof(unsigned long);
-       ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
+       ret = proc_hugetlb_doulongvec_minmax(table, write, buffer, length, ppos,
+                                            &tmp);
         if (ret)
                 goto out;
  
@@ -3643,12 +3652,18 @@ static int hugetlb_acct_memory(struct hstate *h, long delta)
          * we fall back to check against current free page availability as
          * a best attempt and hopefully to minimize the impact of changing
          * semantics that cpuset has.
+        *
+        * Apart from cpuset, we also have memory policy mechanism that
+        * also determines from which node the kernel will allocate memory
+        * in a NUMA system. So similar to cpuset, we also should consider
+        * the memory policy of the current task. Similar to the description
+        * above.
          */
         if (delta > 0) {
                 if (gather_surplus_pages(h, delta) < 0)
                         goto out;
  
-               if (delta > cpuset_mems_nr(h->free_huge_pages_node)) {
+               if (delta > allowed_mems_nr(h)) {
                         return_unused_surplus_pages(h, delta);
                         goto out;
                 }
@@ -3953,7 +3968,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
                         continue;
  
                 ptl = huge_pte_lock(h, mm, ptep);
-               if (huge_pmd_unshare(mm, &address, ptep)) {
+               if (huge_pmd_unshare(mm, vma, &address, ptep)) {
                         spin_unlock(ptl);
                         /*
                          * We just unmapped a page of PMDs by clearing a PUD.
@@ -4540,10 +4555,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
                         return VM_FAULT_HWPOISON_LARGE |
                                 VM_FAULT_SET_HINDEX(hstate_index(h));
-       } else {
-               ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
-               if (!ptep)
-                       return VM_FAULT_OOM;
         }
  
         /*
@@ -5020,7 +5031,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                 if (!ptep)
                         continue;
                 ptl = huge_pte_lock(h, mm, ptep);
-               if (huge_pmd_unshare(mm, &address, ptep)) {
+               if (huge_pmd_unshare(mm, vma, &address, ptep)) {
                         pages++;
                         spin_unlock(ptl);
                         shared_pmd = true;
@@ -5401,12 +5412,14 @@ out:
   * returns: 1 successfully unmapped a shared pte page
   *         0 the underlying pte page is not shared, or it is the last user
   */
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+                                       unsigned long *addr, pte_t *ptep)
  {
         pgd_t *pgd = pgd_offset(mm, *addr);
         p4d_t *p4d = p4d_offset(pgd, *addr);
         pud_t *pud = pud_offset(p4d, *addr);
  
+       i_mmap_assert_write_locked(vma->vm_file->f_mapping);
         BUG_ON(page_count(virt_to_page(ptep)) == 0);
         if (page_count(virt_to_page(ptep)) == 1)
                 return 0;
@@ -5424,7 +5437,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
         return NULL;
  }
  
-int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
+int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
+                               unsigned long *addr, pte_t *ptep)
  {
         return 0;
  }
@@ -5694,12 +5708,14 @@ void __init hugetlb_cma_reserve(int order)
         reserved = 0;
         for_each_node_state(nid, N_ONLINE) {
                 int res;
+               char name[20];
  
                 size = min(per_node, hugetlb_cma_size - reserved);
                 size = round_up(size, PAGE_SIZE << order);
  
+               snprintf(name, 20, "hugetlb%d", nid);
                 res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order,
-                                                0, false, "hugetlb",
+                                                0, false, name,
                                                  &hugetlb_cma[nid], nid);
                 if (res) {
                         pr_warn("hugetlb_cma: reservation failed: err %d, node %d",