Merge remote-tracking branches 'regulator/topic/act8865', 'regulator/topic/arizona...

[linux-2.6-microblaze.git] / mm / mempolicy.c
diff --git a/mm/mempolicy.c b/mm/mempolicy.c

index 0e0961b..4721046 100644 (file)
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -471,24 +471,34 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
  static void migrate_page_add(struct page *page, struct list_head *pagelist,
                                 unsigned long flags);
  
+struct queue_pages {
+       struct list_head *pagelist;
+       unsigned long flags;
+       nodemask_t *nmask;
+       struct vm_area_struct *prev;
+};
+
  /*
   * Scan through pages checking if pages follow certain conditions,
   * and move them to the pagelist if they do.
   */
-static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
-               unsigned long addr, unsigned long end,
-               const nodemask_t *nodes, unsigned long flags,
-               void *private)
+static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr,
+                       unsigned long end, struct mm_walk *walk)
  {
-       pte_t *orig_pte;
+       struct vm_area_struct *vma = walk->vma;
+       struct page *page;
+       struct queue_pages *qp = walk->private;
+       unsigned long flags = qp->flags;
+       int nid;
         pte_t *pte;
         spinlock_t *ptl;
  
-       orig_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-       do {
-               struct page *page;
-               int nid;
+       split_huge_page_pmd(vma, addr, pmd);
+       if (pmd_trans_unstable(pmd))
+               return 0;
  
+       pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+       for (; addr != end; pte++, addr += PAGE_SIZE) {
                 if (!pte_present(*pte))
                         continue;
                 page = vm_normal_page(vma, addr, *pte);
@@ -501,114 +511,46 @@ static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                 if (PageReserved(page))
                         continue;
                 nid = page_to_nid(page);
-               if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
+               if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
                         continue;
  
                 if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
-                       migrate_page_add(page, private, flags);
-               else
-                       break;
-       } while (pte++, addr += PAGE_SIZE, addr != end);
-       pte_unmap_unlock(orig_pte, ptl);
-       return addr != end;
+                       migrate_page_add(page, qp->pagelist, flags);
+       }
+       pte_unmap_unlock(pte - 1, ptl);
+       cond_resched();
+       return 0;
  }
  
-static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
-               pmd_t *pmd, const nodemask_t *nodes, unsigned long flags,
-                                   void *private)
+static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
+                              unsigned long addr, unsigned long end,
+                              struct mm_walk *walk)
  {
  #ifdef CONFIG_HUGETLB_PAGE
+       struct queue_pages *qp = walk->private;
+       unsigned long flags = qp->flags;
         int nid;
         struct page *page;
         spinlock_t *ptl;
         pte_t entry;
  
-       ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, (pte_t *)pmd);
-       entry = huge_ptep_get((pte_t *)pmd);
+       ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
+       entry = huge_ptep_get(pte);
         if (!pte_present(entry))
                 goto unlock;
         page = pte_page(entry);
         nid = page_to_nid(page);
-       if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
+       if (node_isset(nid, *qp->nmask) == !!(flags & MPOL_MF_INVERT))
                 goto unlock;
         /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
         if (flags & (MPOL_MF_MOVE_ALL) ||
             (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
-               isolate_huge_page(page, private);
+               isolate_huge_page(page, qp->pagelist);
  unlock:
         spin_unlock(ptl);
  #else
         BUG();
  #endif
-}
-
-static inline int queue_pages_pmd_range(struct vm_area_struct *vma, pud_t *pud,
-               unsigned long addr, unsigned long end,
-               const nodemask_t *nodes, unsigned long flags,
-               void *private)
-{
-       pmd_t *pmd;
-       unsigned long next;
-
-       pmd = pmd_offset(pud, addr);
-       do {
-               next = pmd_addr_end(addr, end);
-               if (!pmd_present(*pmd))
-                       continue;
-               if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) {
-                       queue_pages_hugetlb_pmd_range(vma, pmd, nodes,
-                                               flags, private);
-                       continue;
-               }
-               split_huge_page_pmd(vma, addr, pmd);
-               if (pmd_none_or_trans_huge_or_clear_bad(pmd))
-                       continue;
-               if (queue_pages_pte_range(vma, pmd, addr, next, nodes,
-                                   flags, private))
-                       return -EIO;
-       } while (pmd++, addr = next, addr != end);
-       return 0;
-}
-
-static inline int queue_pages_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
-               unsigned long addr, unsigned long end,
-               const nodemask_t *nodes, unsigned long flags,
-               void *private)
-{
-       pud_t *pud;
-       unsigned long next;
-
-       pud = pud_offset(pgd, addr);
-       do {
-               next = pud_addr_end(addr, end);
-               if (pud_huge(*pud) && is_vm_hugetlb_page(vma))
-                       continue;
-               if (pud_none_or_clear_bad(pud))
-                       continue;
-               if (queue_pages_pmd_range(vma, pud, addr, next, nodes,
-                                   flags, private))
-                       return -EIO;
-       } while (pud++, addr = next, addr != end);
-       return 0;
-}
-
-static inline int queue_pages_pgd_range(struct vm_area_struct *vma,
-               unsigned long addr, unsigned long end,
-               const nodemask_t *nodes, unsigned long flags,
-               void *private)
-{
-       pgd_t *pgd;
-       unsigned long next;
-
-       pgd = pgd_offset(vma->vm_mm, addr);
-       do {
-               next = pgd_addr_end(addr, end);
-               if (pgd_none_or_clear_bad(pgd))
-                       continue;
-               if (queue_pages_pud_range(vma, pgd, addr, next, nodes,
-                                   flags, private))
-                       return -EIO;
-       } while (pgd++, addr = next, addr != end);
         return 0;
  }
  
@@ -627,7 +569,7 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
  {
         int nr_updated;
  
-       nr_updated = change_protection(vma, addr, end, vma->vm_page_prot, 0, 1);
+       nr_updated = change_protection(vma, addr, end, PAGE_NONE, 0, 1);
         if (nr_updated)
                 count_vm_numa_events(NUMA_PTE_UPDATES, nr_updated);
  
@@ -641,6 +583,49 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
  }
  #endif /* CONFIG_NUMA_BALANCING */
  
+static int queue_pages_test_walk(unsigned long start, unsigned long end,
+                               struct mm_walk *walk)
+{
+       struct vm_area_struct *vma = walk->vma;
+       struct queue_pages *qp = walk->private;
+       unsigned long endvma = vma->vm_end;
+       unsigned long flags = qp->flags;
+
+       if (vma->vm_flags & VM_PFNMAP)
+               return 1;
+
+       if (endvma > end)
+               endvma = end;
+       if (vma->vm_start > start)
+               start = vma->vm_start;
+
+       if (!(flags & MPOL_MF_DISCONTIG_OK)) {
+               if (!vma->vm_next && vma->vm_end < end)
+                       return -EFAULT;
+               if (qp->prev && qp->prev->vm_end < vma->vm_start)
+                       return -EFAULT;
+       }
+
+       qp->prev = vma;
+
+       if (vma->vm_flags & VM_PFNMAP)
+               return 1;
+
+       if (flags & MPOL_MF_LAZY) {
+               /* Similar to task_numa_work, skip inaccessible VMAs */
+               if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
+                       change_prot_numa(vma, start, endvma);
+               return 1;
+       }
+
+       if ((flags & MPOL_MF_STRICT) ||
+           ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
+            vma_migratable(vma)))
+               /* queue pages from current vma */
+               return 0;
+       return 1;
+}
+
  /*
   * Walk through page tables and collect pages to be migrated.
   *
@@ -650,50 +635,24 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
   */
  static int
  queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
-               const nodemask_t *nodes, unsigned long flags, void *private)
-{
-       int err = 0;
-       struct vm_area_struct *vma, *prev;
-
-       vma = find_vma(mm, start);
-       if (!vma)
-               return -EFAULT;
-       prev = NULL;
-       for (; vma && vma->vm_start < end; vma = vma->vm_next) {
-               unsigned long endvma = vma->vm_end;
-
-               if (endvma > end)
-                       endvma = end;
-               if (vma->vm_start > start)
-                       start = vma->vm_start;
-
-               if (!(flags & MPOL_MF_DISCONTIG_OK)) {
-                       if (!vma->vm_next && vma->vm_end < end)
-                               return -EFAULT;
-                       if (prev && prev->vm_end < vma->vm_start)
-                               return -EFAULT;
-               }
-
-               if (flags & MPOL_MF_LAZY) {
-                       /* Similar to task_numa_work, skip inaccessible VMAs */
-                       if (vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))
-                               change_prot_numa(vma, start, endvma);
-                       goto next;
-               }
-
-               if ((flags & MPOL_MF_STRICT) ||
-                    ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
-                     vma_migratable(vma))) {
-
-                       err = queue_pages_pgd_range(vma, start, endvma, nodes,
-                                               flags, private);
-                       if (err)
-                               break;
-               }
-next:
-               prev = vma;
-       }
-       return err;
+               nodemask_t *nodes, unsigned long flags,
+               struct list_head *pagelist)
+{
+       struct queue_pages qp = {
+               .pagelist = pagelist,
+               .flags = flags,
+               .nmask = nodes,
+               .prev = NULL,
+       };
+       struct mm_walk queue_pages_walk = {
+               .hugetlb_entry = queue_pages_hugetlb,
+               .pmd_entry = queue_pages_pte_range,
+               .test_walk = queue_pages_test_walk,
+               .mm = mm,
+               .private = &qp,
+       };
+
+       return walk_page_range(start, end, &queue_pages_walk);
  }
  
  /*
@@ -1988,43 +1947,63 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
   *     @order:Order of the GFP allocation.
   *     @vma:  Pointer to VMA or NULL if not available.
   *     @addr: Virtual Address of the allocation. Must be inside the VMA.
+ *     @node: Which node to prefer for allocation (modulo policy).
+ *     @hugepage: for hugepages try only the preferred node if possible
   *
   *     This function allocates a page from the kernel page pool and applies
   *     a NUMA policy associated with the VMA or the current process.
   *     When VMA is not NULL caller must hold down_read on the mmap_sem of the
   *     mm_struct of the VMA to prevent it from going away. Should be used for
- *     all allocations for pages that will be mapped into
- *     user space. Returns NULL when no page can be allocated.
- *
- *     Should be called with the mm_sem of the vma hold.
+ *     all allocations for pages that will be mapped into user space. Returns
+ *     NULL when no page can be allocated.
   */
  struct page *
  alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
-               unsigned long addr, int node)
+               unsigned long addr, int node, bool hugepage)
  {
         struct mempolicy *pol;
         struct page *page;
         unsigned int cpuset_mems_cookie;
+       struct zonelist *zl;
+       nodemask_t *nmask;
  
  retry_cpuset:
         pol = get_vma_policy(vma, addr);
         cpuset_mems_cookie = read_mems_allowed_begin();
  
-       if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
+       if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage &&
+                                       pol->mode != MPOL_INTERLEAVE)) {
+               /*
+                * For hugepage allocation and non-interleave policy which
+                * allows the current node, we only try to allocate from the
+                * current node and don't fall back to other nodes, as the
+                * cost of remote accesses would likely offset THP benefits.
+                *
+                * If the policy is interleave, or does not allow the current
+                * node in its nodemask, we allocate the standard way.
+                */
+               nmask = policy_nodemask(gfp, pol);
+               if (!nmask || node_isset(node, *nmask)) {
+                       mpol_cond_put(pol);
+                       page = alloc_pages_exact_node(node, gfp, order);
+                       goto out;
+               }
+       }
+
+       if (pol->mode == MPOL_INTERLEAVE) {
                 unsigned nid;
  
                 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
                 mpol_cond_put(pol);
                 page = alloc_page_interleave(gfp, order, nid);
-               if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
-                       goto retry_cpuset;
-
-               return page;
+               goto out;
         }
-       page = __alloc_pages_nodemask(gfp, order,
-                                     policy_zonelist(gfp, pol, node),
-                                     policy_nodemask(gfp, pol));
+
+       nmask = policy_nodemask(gfp, pol);
+       zl = policy_zonelist(gfp, pol, node);
         mpol_cond_put(pol);
+       page = __alloc_pages_nodemask(gfp, order, zl, nmask);
+out:
         if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
                 goto retry_cpuset;
         return page;
@@ -2838,8 +2817,7 @@ void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol)
                         p += snprintf(p, buffer + maxlen - p, "relative");
         }
  
-       if (!nodes_empty(nodes)) {
-               p += snprintf(p, buffer + maxlen - p, ":");
-               p += nodelist_scnprintf(p, buffer + maxlen - p, nodes);
-       }
+       if (!nodes_empty(nodes))
+               p += scnprintf(p, buffer + maxlen - p, ":%*pbl",
+                              nodemask_pr_args(&nodes));
  }