perf build-id: Fix coding style, replace 8 spaces by tabs
[linux-2.6-microblaze.git] / mm / hugetlb.c
index aa39534..f044962 100644 (file)
@@ -66,12 +66,6 @@ static bool hugetlb_cma_page(struct page *page, unsigned int order)
 #endif
 static unsigned long hugetlb_cma_size __initdata;
 
-/*
- * Minimum page order among possible hugepage sizes, set to a proper value
- * at boot time.
- */
-static unsigned int minimum_order __read_mostly = UINT_MAX;
-
 __initdata LIST_HEAD(huge_boot_pages);
 
 /* for command line parsing */
@@ -1135,7 +1129,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
 
        lockdep_assert_held(&hugetlb_lock);
        list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
-               if (pin && !is_pinnable_page(page))
+               if (pin && !is_longterm_pinnable_page(page))
                        continue;
 
                if (PageHWPoison(page))
@@ -2152,11 +2146,17 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
        unsigned long pfn;
        struct page *page;
        int rc = 0;
+       unsigned int order;
+       struct hstate *h;
 
        if (!hugepages_supported())
                return rc;
 
-       for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << minimum_order) {
+       order = huge_page_order(&default_hstate);
+       for_each_hstate(h)
+               order = min(order, huge_page_order(h));
+
+       for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order) {
                page = pfn_to_page(pfn);
                rc = dissolve_free_huge_page(page);
                if (rc)
@@ -2766,8 +2766,7 @@ retry:
                 * Fail with -EBUSY if not possible.
                 */
                spin_unlock_irq(&hugetlb_lock);
-               if (!isolate_huge_page(old_page, list))
-                       ret = -EBUSY;
+               ret = isolate_hugetlb(old_page, list);
                spin_lock_irq(&hugetlb_lock);
                goto free_new;
        } else if (!HPageFreed(old_page)) {
@@ -2843,7 +2842,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
        if (hstate_is_gigantic(h))
                return -ENOMEM;
 
-       if (page_count(head) && isolate_huge_page(head, list))
+       if (page_count(head) && !isolate_hugetlb(head, list))
                ret = 0;
        else if (!page_count(head))
                ret = alloc_and_dissolve_huge_page(h, head, list);
@@ -3149,9 +3148,6 @@ static void __init hugetlb_init_hstates(void)
        struct hstate *h, *h2;
 
        for_each_hstate(h) {
-               if (minimum_order > huge_page_order(h))
-                       minimum_order = huge_page_order(h);
-
                /* oversize hugepages were init'ed in early boot */
                if (!hstate_is_gigantic(h))
                        hugetlb_hstate_alloc_pages(h);
@@ -3176,7 +3172,6 @@ static void __init hugetlb_init_hstates(void)
                                h->demote_order = h2->order;
                }
        }
-       VM_BUG_ON(minimum_order == UINT_MAX);
 }
 
 static void __init report_hugepages(void)
@@ -4482,22 +4477,20 @@ int hugetlb_report_node_meminfo(char *buf, int len, int nid)
                             nid, h->surplus_huge_pages_node[nid]);
 }
 
-void hugetlb_show_meminfo(void)
+void hugetlb_show_meminfo_node(int nid)
 {
        struct hstate *h;
-       int nid;
 
        if (!hugepages_supported())
                return;
 
-       for_each_node_state(nid, N_MEMORY)
-               for_each_hstate(h)
-                       pr_info("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n",
-                               nid,
-                               h->nr_huge_pages_node[nid],
-                               h->free_huge_pages_node[nid],
-                               h->surplus_huge_pages_node[nid],
-                               huge_page_size(h) / SZ_1K);
+       for_each_hstate(h)
+               printk("Node %d hugepages_total=%u hugepages_free=%u hugepages_surp=%u hugepages_size=%lukB\n",
+                       nid,
+                       h->nr_huge_pages_node[nid],
+                       h->free_huge_pages_node[nid],
+                       h->surplus_huge_pages_node[nid],
+                       huge_page_size(h) / SZ_1K);
 }
 
 void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm)
@@ -4732,6 +4725,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
        unsigned long npages = pages_per_huge_page(h);
        struct address_space *mapping = src_vma->vm_file->f_mapping;
        struct mmu_notifier_range range;
+       unsigned long last_addr_mask;
        int ret = 0;
 
        if (cow) {
@@ -4751,11 +4745,14 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                i_mmap_lock_read(mapping);
        }
 
+       last_addr_mask = hugetlb_mask_last_page(h);
        for (addr = src_vma->vm_start; addr < src_vma->vm_end; addr += sz) {
                spinlock_t *src_ptl, *dst_ptl;
                src_pte = huge_pte_offset(src, addr, sz);
-               if (!src_pte)
+               if (!src_pte) {
+                       addr |= last_addr_mask;
                        continue;
+               }
                dst_pte = huge_pte_alloc(dst, dst_vma, addr, sz);
                if (!dst_pte) {
                        ret = -ENOMEM;
@@ -4772,8 +4769,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                 * after taking the lock below.
                 */
                dst_entry = huge_ptep_get(dst_pte);
-               if ((dst_pte == src_pte) || !huge_pte_none(dst_entry))
+               if ((dst_pte == src_pte) || !huge_pte_none(dst_entry)) {
+                       addr |= last_addr_mask;
                        continue;
+               }
 
                dst_ptl = huge_pte_lock(h, dst, dst_pte);
                src_ptl = huge_pte_lockptr(h, src, src_pte);
@@ -4808,12 +4807,11 @@ again:
                                entry = swp_entry_to_pte(swp_entry);
                                if (userfaultfd_wp(src_vma) && uffd_wp)
                                        entry = huge_pte_mkuffd_wp(entry);
-                               set_huge_swap_pte_at(src, addr, src_pte,
-                                                    entry, sz);
+                               set_huge_pte_at(src, addr, src_pte, entry);
                        }
                        if (!userfaultfd_wp(dst_vma) && uffd_wp)
                                entry = huge_pte_clear_uffd_wp(entry);
-                       set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
+                       set_huge_pte_at(dst, addr, dst_pte, entry);
                } else if (unlikely(is_pte_marker(entry))) {
                        /*
                         * We copy the pte marker only if the dst vma has
@@ -4880,7 +4878,7 @@ again:
                                 * table protection not changing it to point
                                 * to a new page.
                                 *
-                                * See Documentation/vm/mmu_notifier.rst
+                                * See Documentation/mm/mmu_notifier.rst
                                 */
                                huge_ptep_set_wrprotect(src, addr, src_pte);
                                entry = huge_pte_wrprotect(entry);
@@ -4939,7 +4937,7 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
        unsigned long sz = huge_page_size(h);
        struct mm_struct *mm = vma->vm_mm;
        unsigned long old_end = old_addr + len;
-       unsigned long old_addr_copy;
+       unsigned long last_addr_mask;
        pte_t *src_pte, *dst_pte;
        struct mmu_notifier_range range;
        bool shared_pmd = false;
@@ -4954,23 +4952,23 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
        flush_cache_range(vma, range.start, range.end);
 
        mmu_notifier_invalidate_range_start(&range);
+       last_addr_mask = hugetlb_mask_last_page(h);
        /* Prevent race with file truncation */
        i_mmap_lock_write(mapping);
        for (; old_addr < old_end; old_addr += sz, new_addr += sz) {
                src_pte = huge_pte_offset(mm, old_addr, sz);
-               if (!src_pte)
+               if (!src_pte) {
+                       old_addr |= last_addr_mask;
+                       new_addr |= last_addr_mask;
                        continue;
+               }
                if (huge_pte_none(huge_ptep_get(src_pte)))
                        continue;
 
-               /* old_addr arg to huge_pmd_unshare() is a pointer and so the
-                * arg may be modified. Pass a copy instead to preserve the
-                * value in old_addr.
-                */
-               old_addr_copy = old_addr;
-
-               if (huge_pmd_unshare(mm, vma, &old_addr_copy, src_pte)) {
+               if (huge_pmd_unshare(mm, vma, old_addr, src_pte)) {
                        shared_pmd = true;
+                       old_addr |= last_addr_mask;
+                       new_addr |= last_addr_mask;
                        continue;
                }
 
@@ -5004,6 +5002,7 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
        struct hstate *h = hstate_vma(vma);
        unsigned long sz = huge_page_size(h);
        struct mmu_notifier_range range;
+       unsigned long last_addr_mask;
        bool force_flush = false;
 
        WARN_ON(!is_vm_hugetlb_page(vma));
@@ -5024,17 +5023,21 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
                                end);
        adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
        mmu_notifier_invalidate_range_start(&range);
+       last_addr_mask = hugetlb_mask_last_page(h);
        address = start;
        for (; address < end; address += sz) {
                ptep = huge_pte_offset(mm, address, sz);
-               if (!ptep)
+               if (!ptep) {
+                       address |= last_addr_mask;
                        continue;
+               }
 
                ptl = huge_pte_lock(h, mm, ptep);
-               if (huge_pmd_unshare(mm, vma, &address, ptep)) {
+               if (huge_pmd_unshare(mm, vma, address, ptep)) {
                        spin_unlock(ptl);
                        tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE);
                        force_flush = true;
+                       address |= last_addr_mask;
                        continue;
                }
 
@@ -5714,7 +5717,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
                 */
                entry = huge_ptep_get(ptep);
                if (unlikely(is_hugetlb_entry_migration(entry))) {
-                       migration_entry_wait_huge(vma, mm, ptep);
+                       migration_entry_wait_huge(vma, ptep);
                        return 0;
                } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry)))
                        return VM_FAULT_HWPOISON_LARGE |
@@ -6052,8 +6055,6 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 
        set_huge_pte_at(dst_mm, dst_addr, dst_pte, _dst_pte);
 
-       (void)huge_ptep_set_access_flags(dst_vma, dst_addr, dst_pte, _dst_pte,
-                                       dst_vma->vm_flags & VM_WRITE);
        hugetlb_count_add(pages_per_huge_page(h), dst_mm);
 
        /* No need to invalidate - it was non-present before */
@@ -6305,6 +6306,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
        unsigned long pages = 0, psize = huge_page_size(h);
        bool shared_pmd = false;
        struct mmu_notifier_range range;
+       unsigned long last_addr_mask;
        bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
        bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
 
@@ -6321,14 +6323,17 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
        flush_cache_range(vma, range.start, range.end);
 
        mmu_notifier_invalidate_range_start(&range);
+       last_addr_mask = hugetlb_mask_last_page(h);
        i_mmap_lock_write(vma->vm_file->f_mapping);
        for (; address < end; address += psize) {
                spinlock_t *ptl;
                ptep = huge_pte_offset(mm, address, psize);
-               if (!ptep)
+               if (!ptep) {
+                       address |= last_addr_mask;
                        continue;
+               }
                ptl = huge_pte_lock(h, mm, ptep);
-               if (huge_pmd_unshare(mm, vma, &address, ptep)) {
+               if (huge_pmd_unshare(mm, vma, address, ptep)) {
                        /*
                         * When uffd-wp is enabled on the vma, unshare
                         * shouldn't happen at all.  Warn about it if it
@@ -6338,6 +6343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                        pages++;
                        spin_unlock(ptl);
                        shared_pmd = true;
+                       address |= last_addr_mask;
                        continue;
                }
                pte = huge_ptep_get(ptep);
@@ -6363,8 +6369,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
                                        newpte = pte_swp_mkuffd_wp(newpte);
                                else if (uffd_wp_resolve)
                                        newpte = pte_swp_clear_uffd_wp(newpte);
-                               set_huge_swap_pte_at(mm, address, ptep,
-                                                    newpte, psize);
+                               set_huge_pte_at(mm, address, ptep, newpte);
                                pages++;
                        }
                        spin_unlock(ptl);
@@ -6415,7 +6420,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
         * No need to call mmu_notifier_invalidate_range() we are downgrading
         * page table protection not changing it to point to a new page.
         *
-        * See Documentation/vm/mmu_notifier.rst
+        * See Documentation/mm/mmu_notifier.rst
         */
        i_mmap_unlock_write(vma->vm_file->f_mapping);
        mmu_notifier_invalidate_range_end(&range);
@@ -6761,11 +6766,11 @@ out:
  *         0 the underlying pte page is not shared, or it is the last user
  */
 int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
-                                       unsigned long *addr, pte_t *ptep)
+                                       unsigned long addr, pte_t *ptep)
 {
-       pgd_t *pgd = pgd_offset(mm, *addr);
-       p4d_t *p4d = p4d_offset(pgd, *addr);
-       pud_t *pud = pud_offset(p4d, *addr);
+       pgd_t *pgd = pgd_offset(mm, addr);
+       p4d_t *p4d = p4d_offset(pgd, addr);
+       pud_t *pud = pud_offset(p4d, addr);
 
        i_mmap_assert_write_locked(vma->vm_file->f_mapping);
        BUG_ON(page_count(virt_to_page(ptep)) == 0);
@@ -6775,14 +6780,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
        pud_clear(pud);
        put_page(virt_to_page(ptep));
        mm_dec_nr_pmds(mm);
-       /*
-        * This update of passed address optimizes loops sequentially
-        * processing addresses in increments of huge page size (PMD_SIZE
-        * in this case).  By clearing the pud, a PUD_SIZE area is unmapped.
-        * Update address to the 'last page' in the cleared area so that
-        * calling loop can move to first page past this area.
-        */
-       *addr |= PUD_SIZE - PMD_SIZE;
        return 1;
 }
 
@@ -6794,7 +6791,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
-                               unsigned long *addr, pte_t *ptep)
+                               unsigned long addr, pte_t *ptep)
 {
        return 0;
 }
@@ -6877,6 +6874,37 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
        return (pte_t *)pmd;
 }
 
+/*
+ * Return a mask that can be used to update an address to the last huge
+ * page in a page table page mapping size.  Used to skip non-present
+ * page table entries when linearly scanning address ranges.  Architectures
+ * with unique huge page to page table relationships can define their own
+ * version of this routine.
+ */
+unsigned long hugetlb_mask_last_page(struct hstate *h)
+{
+       unsigned long hp_size = huge_page_size(h);
+
+       if (hp_size == PUD_SIZE)
+               return P4D_SIZE - PUD_SIZE;
+       else if (hp_size == PMD_SIZE)
+               return PUD_SIZE - PMD_SIZE;
+       else
+               return 0UL;
+}
+
+#else
+
+/* See description above.  Architectures can provide their own version. */
+__weak unsigned long hugetlb_mask_last_page(struct hstate *h)
+{
+#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
+       if (huge_page_size(h) == PMD_SIZE)
+               return PUD_SIZE - PMD_SIZE;
+#endif
+       return 0UL;
+}
+
 #endif /* CONFIG_ARCH_WANT_GENERAL_HUGETLB */
 
 /*
@@ -6940,7 +6968,7 @@ retry:
        } else {
                if (is_hugetlb_entry_migration(pte)) {
                        spin_unlock(ptl);
-                       __migration_entry_wait(mm, (pte_t *)pmd, ptl);
+                       __migration_entry_wait_huge((pte_t *)pmd, ptl);
                        goto retry;
                }
                /*
@@ -6972,15 +7000,15 @@ follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int fla
        return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
 }
 
-bool isolate_huge_page(struct page *page, struct list_head *list)
+int isolate_hugetlb(struct page *page, struct list_head *list)
 {
-       bool ret = true;
+       int ret = 0;
 
        spin_lock_irq(&hugetlb_lock);
        if (!PageHeadHuge(page) ||
            !HPageMigratable(page) ||
            !get_page_unless_zero(page)) {
-               ret = false;
+               ret = -EBUSY;
                goto unlock;
        }
        ClearHPageMigratable(page);
@@ -7100,21 +7128,18 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
        mmu_notifier_invalidate_range_start(&range);
        i_mmap_lock_write(vma->vm_file->f_mapping);
        for (address = start; address < end; address += PUD_SIZE) {
-               unsigned long tmp = address;
-
                ptep = huge_pte_offset(mm, address, sz);
                if (!ptep)
                        continue;
                ptl = huge_pte_lock(h, mm, ptep);
-               /* We don't want 'address' to be changed */
-               huge_pmd_unshare(mm, vma, &tmp, ptep);
+               huge_pmd_unshare(mm, vma, address, ptep);
                spin_unlock(ptl);
        }
        flush_hugetlb_tlb_range(vma, start, end);
        i_mmap_unlock_write(vma->vm_file->f_mapping);
        /*
         * No need to call mmu_notifier_invalidate_range(), see
-        * Documentation/vm/mmu_notifier.rst.
+        * Documentation/mm/mmu_notifier.rst.
         */
        mmu_notifier_invalidate_range_end(&range);
 }