perf build-id: Fix coding style, replace 8 spaces by tabs

[linux-2.6-microblaze.git] / mm / hugetlb.c
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index aa39534..f044962 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -66,12 +66,6 @@ static bool hugetlb_cma_page(struct page *page, unsigned int order)
  #endif
  static unsigned long hugetlb_cma_size __initdata;
  
-/*
- * Minimum page order among possible hugepage sizes, set to a proper value
- * at boot time.
- */
-static unsigned int minimum_order __read_mostly = UINT_MAX;
-
  __initdata LIST_HEAD(huge_boot_pages);
  
  /* for command line parsing */
@@ -1135,7 +1129,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
  
         lockdep_assert_held(&hugetlb_lock);
         list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
-               if (pin && !is_pinnable_page(page))
+               if (pin && !is_longterm_pinnable_page(page))
                         continue;
  
                 if (PageHWPoison(page))
@@ -2152,11 +2146,17 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
         unsigned long pfn;
         struct page *page;
         int rc = 0;
+       unsigned int order;
+       struct hstate *h;
  
         if (!hugepages_supported())
                 return rc;
  
-       for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) {
+       order = huge_page_order(&default_hstate);
+       for_each_hstate(h)
+               order = min(order, huge_page_order(h));
+
+       for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order) {
                 page = pfn_to_page(pfn);
                 rc = dissolve_free_huge_page(page);
                 if (rc)
@@ -2766,8 +2766,7 @@ retry:
                  * Fail with -EBUSY if not possible.
                  */
                 spin_unlock_irq(&hugetlb_lock);
-               if (!isolate_huge_page(old_page, list))
-                       ret = -EBUSY;
+               ret = isolate_hugetlb(old_page, list);
                 spin_lock_irq(&hugetlb_lock);
                 goto free_new;
         } else if (!HPageFreed(old_page)) {
@@ -2843,7 +2842,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
         if (hstate_is_gigantic(h))
                 return -ENOMEM;
  
-       if (page_count(head) && isolate_huge_page(head, list))
+       if (page_count(head) && !isolate_hugetlb(head, list))
                 ret = 0;
         else if (!page_count(head))
                 ret = alloc_and_dissolve_huge_page(h, head, list);
@@ -3149,9 +3148,6 @@ static void __init hugetlb_init_hstates(void)
         struct hstate *h, *h2;
  
         for_each_hstate(h) {
-               if (minimum_order > huge_page_order(h))
-                       minimum_order = huge_page_order(h);
-
                 /* oversize hugepages were init'ed in early boot */
                 if (!hstate_is_gigantic(h))
                         hugetlb_hstate_alloc_pages(h);
@@ -3176,7 +3172,6 @@ static void __init hugetlb_init_hstates(void)
                                 h->demote_order = h2->order;
                 }
         }
-       VM_BUG_ON(minimum_order == UINT_MAX);
  }
  
  static void __init report_hugepages(void)
@@ -4482,22 +4477,20 @@ int hugetlb_report_node_meminfo(char *buf, int len, int nid)
                              nid, h->surplus_huge_pages_node[nid]);
  }
  
-void hugetlb_show_meminfo(void)
+void hugetlb_show_meminfo_node(int nid)
  {
         struct hstate *h;
-       int nid;
  
         if (!hugepages_supported())
                 return;
  
-       for_each_node_state(nid, N_MEMORY)
-               for_each_hstate(h)
-                       pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n",
-                               nid,
-                               h->nr_huge_pages_node[nid],
-                               h->free_huge_pages_node[nid],
-                               h->surplus_huge_pages_node[nid],
-                               huge_page_size(h) / SZ_1K);
+       for_each_hstate(h)
+               printk("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n",
+                       nid,
+                       h->nr_huge_pages_node[nid],
+                       h->free_huge_pages_node[nid],
+                       h->surplus_huge_pages_node[nid],
+                       huge_page_size(h) / SZ_1K);
  }
  
  void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm)
@@ -4732,6 +4725,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
         unsigned long npages = pages_per_huge_page(h);
         struct address_space *mapping = src_vma->vm_file->f_mapping;
         struct mmu_notifier_range range;
+       unsigned long last_addr_mask;
         int ret = 0;
  
         if (cow) {
@@ -4751,11 +4745,14 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                 i_mmap_lock_read(mapping);
         }
  
+       last_addr_mask = hugetlb_mask_last_page(h);
         for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
                 spinlock_t *src_ptl, *dst_ptl;
                 src_pte = huge_pte_offset(src, addr, sz);
-               if (!src_pte)
+               if (!src_pte) {
+                       addr |= last_addr_mask;
                         continue;
+               }
                 dst_pte = huge_pte_alloc(dst, dst_vma, addr, sz);
                 if (!dst_pte) {
                         ret = -ENOMEM;
@@ -4772,8 +4769,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                  * after taking the lock below.
                  */
                 dst_entry = huge_ptep_get(dst_pte);
-               if ((dst_pte == src_pte) || !huge_pte_none(dst_entry))
+               if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) {
+                       addr |= last_addr_mask;
                         continue;
+               }
  
                 dst_ptl = huge_pte_lock(h, dst, dst_pte);
                 src_ptl = huge_pte_lockptr(h, src, src_pte);
@@ -4808,12 +4807,11 @@ again:
                                 entry = swp_entry_to_pte(swp_entry);
                                 if (userfaultfd_wp(src_vma) && uffd_wp)
                                         entry = huge_pte_mkuffd_wp(entry);
-                               set_huge_swap_pte_at(src, addr, src_pte,
-                                                    entry, sz);
+                               set_huge_pte_at(src, addr, src_pte, entry);
                         }
                         if (!userfaultfd_wp(dst_vma) && uffd_wp)
                                 entry = huge_pte_clear_uffd_wp(entry);
-                       set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
+                       set_huge_pte_at(dst, addr, dst_pte, entry);
                 } else if (unlikely(is_pte_marker(entry))) {
                         /*
                          * We copy the pte marker only if the dst vma has
@@ -4880,7 +4878,7 @@ again:
                                  * table protection not changing it to point
                                  * to a new page.
                                  *
-                                * See Documentation/vm/mmu_notifier.rst
+                                * See Documentation/mm/mmu_notifier.rst
                                  */
                                 huge_ptep_set_wrprotect(src, addr, src_pte);
                                 entry = huge_pte_wrprotect(entry);
@@ -4939,7 +4937,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
         unsigned long sz = huge_page_size(h);
         struct mm_struct *mm = vma->vm_mm;
         unsigned long old_end = old_addr + len;
-       unsigned long old_addr_copy;
+       unsigned long last_addr_mask;
         pte_t *src_pte, *dst_pte;
         struct mmu_notifier_range range;
         bool shared_pmd = false;
@@ -4954,23 +4952,23 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
         flush_cache_range(vma, range.start, range.end);
  
         mmu_notifier_invalidate_range_start(&range);
+       last_addr_mask = hugetlb_mask_last_page(h);
         /* Prevent race with file truncation */
         i_mmap_lock_write(mapping);
         for (; old_addr < old_end; old_addr += sz, new_addr += sz) {
                 src_pte = huge_pte_offset(mm, old_addr, sz);
-               if (!src_pte)
+               if (!src_pte) {
+                       old_addr |= last_addr_mask;
+                       new_addr |= last_addr_mask;
                         continue;
+               }
                 if (huge_pte_none(huge_ptep_get(src_pte)))
                         continue;
  
-               /* old_addr arg to huge_pmd_unshare() is a pointer and so the
-                * arg may be modified. Pass a copy instead to preserve the
-                * value in old_addr.
-                */
-               old_addr_copy = old_addr;
-
-               if (huge_pmd_unshare(mm, vma, &old_addr_copy, src_pte)) {
+               if (huge_pmd_unshare(mm, vma, old_addr, src_pte)) {
                         shared_pmd = true;
+                       old_addr |= last_addr_mask;
+                       new_addr |= last_addr_mask;
                         continue;
                 }
  
@@ -5004,6 +5002,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
         struct hstate *h = hstate_vma(vma);
         unsigned long sz = huge_page_size(h);
         struct mmu_notifier_range range;
+       unsigned long last_addr_mask;
         bool force_flush = false;
  
         WARN_ON(!is_vm_hugetlb_page(vma));
@@ -5024,17 +5023,21 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                                 end);
         adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
         mmu_notifier_invalidate_range_start(&range);
+       last_addr_mask = hugetlb_mask_last_page(h);
         address = start;
         for (; address < end; address += sz) {
                 ptep = huge_pte_offset(mm, address, sz);
-               if (!ptep)
+               if (!ptep) {
+                       address |= last_addr_mask;
                         continue;
+               }
  
                 ptl = huge_pte_lock(h, mm, ptep);
-               if (huge_pmd_unshare(mm, vma, &address, ptep)) {
+               if (huge_pmd_unshare(mm, vma, address, ptep)) {
                         spin_unlock(ptl);
                         tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
                         force_flush = true;
+                       address |= last_addr_mask;
                         continue;
                 }
  
@@ -5714,7 +5717,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                  */
                 entry = huge_ptep_get(ptep);
                 if (unlikely(is_hugetlb_entry_migration(entry))) {
-                       migration_entry_wait_huge(vma, mm, ptep);
+                       migration_entry_wait_huge(vma, ptep);
                         return 0;
                 } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
                         return VM_FAULT_HWPOISON_LARGE |
@@ -6052,8 +6055,6 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
  
         set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
  
-       (void)huge_ptep_set_access_flags(dst_vma, dst_addr, dst_pte, _dst_pte,
-                                       dst_vma->vm_flags & VM_WRITE);
         hugetlb_count_add(pages_per_huge_page(h), dst_mm);
  
         /* No need to invalidate - it was non-present before */
@@ -6305,6 +6306,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
         unsigned long pages = 0, psize = huge_page_size(h);
         bool shared_pmd = false;
         struct mmu_notifier_range range;
+       unsigned long last_addr_mask;
         bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
         bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
  
@@ -6321,14 +6323,17 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
         flush_cache_range(vma, range.start, range.end);
  
         mmu_notifier_invalidate_range_start(&range);
+       last_addr_mask = hugetlb_mask_last_page(h);
         i_mmap_lock_write(vma->vm_file->f_mapping);
         for (; address < end; address += psize) {
                 spinlock_t *ptl;
                 ptep = huge_pte_offset(mm, address, psize);
-               if (!ptep)
+               if (!ptep) {
+                       address |= last_addr_mask;
                         continue;
+               }
                 ptl = huge_pte_lock(h, mm, ptep);
-               if (huge_pmd_unshare(mm, vma, &address, ptep)) {
+               if (huge_pmd_unshare(mm, vma, address, ptep)) {
                         /*
                          * When uffd-wp is enabled on the vma, unshare
                          * shouldn't happen at all.  Warn about it if it
@@ -6338,6 +6343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                         pages++;
                         spin_unlock(ptl);
                         shared_pmd = true;
+                       address |= last_addr_mask;
                         continue;
                 }
                 pte = huge_ptep_get(ptep);
@@ -6363,8 +6369,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                                         newpte = pte_swp_mkuffd_wp(newpte);
                                 else if (uffd_wp_resolve)
                                         newpte = pte_swp_clear_uffd_wp(newpte);
-                               set_huge_swap_pte_at(mm, address, ptep,
-                                                    newpte, psize);
+                               set_huge_pte_at(mm, address, ptep, newpte);
                                 pages++;
                         }
                         spin_unlock(ptl);
@@ -6415,7 +6420,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
          * No need to call mmu_notifier_invalidate_range() we are downgrading
          * page table protection not changing it to point to a new page.
          *
-        * See Documentation/vm/mmu_notifier.rst
+        * See Documentation/mm/mmu_notifier.rst
          */
         i_mmap_unlock_write(vma->vm_file->f_mapping);
         mmu_notifier_invalidate_range_end(&range);
@@ -6761,11 +6766,11 @@ out:
   *         0 the underlying pte page is not shared, or it is the last user
   */
  int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
-                                       unsigned long *addr, pte_t *ptep)
+                                       unsigned long addr, pte_t *ptep)
  {
-       pgd_t *pgd = pgd_offset(mm, *addr);
-       p4d_t *p4d = p4d_offset(pgd, *addr);
-       pud_t *pud = pud_offset(p4d, *addr);
+       pgd_t *pgd = pgd_offset(mm, addr);
+       p4d_t *p4d = p4d_offset(pgd, addr);
+       pud_t *pud = pud_offset(p4d, addr);
  
         i_mmap_assert_write_locked(vma->vm_file->f_mapping);
         BUG_ON(page_count(virt_to_page(ptep)) == 0);
@@ -6775,14 +6780,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
         pud_clear(pud);
         put_page(virt_to_page(ptep));
         mm_dec_nr_pmds(mm);
-       /*
-        * This update of passed address optimizes loops sequentially
-        * processing addresses in increments of huge page size (PMD_SIZE
-        * in this case).  By clearing the pud, a PUD_SIZE area is unmapped.
-        * Update address to the 'last page' in the cleared area so that
-        * calling loop can move to first page past this area.
-        */
-       *addr |= PUD_SIZE - PMD_SIZE;
         return 1;
  }
  
@@ -6794,7 +6791,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
  }
  
  int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
-                               unsigned long *addr, pte_t *ptep)
+                               unsigned long addr, pte_t *ptep)
  {
         return 0;
  }
@@ -6877,6 +6874,37 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
         return (pte_t *)pmd;
  }
  
+/*
+ * Return a mask that can be used to update an address to the last huge
+ * page in a page table page mapping size.  Used to skip non-present
+ * page table entries when linearly scanning address ranges.  Architectures
+ * with unique huge page to page table relationships can define their own
+ * version of this routine.
+ */
+unsigned long hugetlb_mask_last_page(struct hstate *h)
+{
+       unsigned long hp_size = huge_page_size(h);
+
+       if (hp_size == PUD_SIZE)
+               return P4D_SIZE - PUD_SIZE;
+       else if (hp_size == PMD_SIZE)
+               return PUD_SIZE - PMD_SIZE;
+       else
+               return 0UL;
+}
+
+#else
+
+/* See description above.  Architectures can provide their own version. */
+__weak unsigned long hugetlb_mask_last_page(struct hstate *h)
+{
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+       if (huge_page_size(h) == PMD_SIZE)
+               return PUD_SIZE - PMD_SIZE;
+#endif
+       return 0UL;
+}
+
  #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
  
  /*
@@ -6940,7 +6968,7 @@ retry:
         } else {
                 if (is_hugetlb_entry_migration(pte)) {
                         spin_unlock(ptl);
-                       __migration_entry_wait(mm, (pte_t *)pmd, ptl);
+                       __migration_entry_wait_huge((pte_t *)pmd, ptl);
                         goto retry;
                 }
                 /*
@@ -6972,15 +7000,15 @@ follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int fla
         return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
  }
  
-bool isolate_huge_page(struct page *page, struct list_head *list)
+int isolate_hugetlb(struct page *page, struct list_head *list)
  {
-       bool ret = true;
+       int ret = 0;
  
         spin_lock_irq(&hugetlb_lock);
         if (!PageHeadHuge(page) ||
             !HPageMigratable(page) ||
             !get_page_unless_zero(page)) {
-               ret = false;
+               ret = -EBUSY;
                 goto unlock;
         }
         ClearHPageMigratable(page);
@@ -7100,21 +7128,18 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
         mmu_notifier_invalidate_range_start(&range);
         i_mmap_lock_write(vma->vm_file->f_mapping);
         for (address = start; address < end; address += PUD_SIZE) {
-               unsigned long tmp = address;
-
                 ptep = huge_pte_offset(mm, address, sz);
                 if (!ptep)
                         continue;
                 ptl = huge_pte_lock(h, mm, ptep);
-               /* We don't want 'address' to be changed */
-               huge_pmd_unshare(mm, vma, &tmp, ptep);
+               huge_pmd_unshare(mm, vma, address, ptep);
                 spin_unlock(ptl);
         }
         flush_hugetlb_tlb_range(vma, start, end);
         i_mmap_unlock_write(vma->vm_file->f_mapping);
         /*
          * No need to call mmu_notifier_invalidate_range(), see
-        * Documentation/vm/mmu_notifier.rst.
+        * Documentation/mm/mmu_notifier.rst.
          */
         mmu_notifier_invalidate_range_end(&range);
  }