Merge tag 'thunderbolt-for-v5.10-rc4' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / mm / huge_memory.c
index 7ff29cc..9474dbc 100644 (file)
@@ -1074,6 +1074,24 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
        src_page = pmd_page(pmd);
        VM_BUG_ON_PAGE(!PageHead(src_page), src_page);
+
+       /*
+        * If this page is a potentially pinned page, split and retry the fault
+        * with smaller page size.  Normally this should not happen because the
+        * userspace should use MADV_DONTFORK upon pinned regions.  This is a
+        * best effort that the pinned pages won't be replaced by another
+        * random page during the coming copy-on-write.
+        */
+       if (unlikely(is_cow_mapping(vma->vm_flags) &&
+                    atomic_read(&src_mm->has_pinned) &&
+                    page_maybe_dma_pinned(src_page))) {
+               pte_free(dst_mm, pgtable);
+               spin_unlock(src_ptl);
+               spin_unlock(dst_ptl);
+               __split_huge_pmd(vma, src_pmd, addr, false, NULL);
+               return -EAGAIN;
+       }
+
        get_page(src_page);
        page_dup_rmap(src_page, true);
        add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
@@ -1177,6 +1195,16 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                /* No huge zero pud yet */
        }
 
+       /* Please refer to comments in copy_huge_pmd() */
+       if (unlikely(is_cow_mapping(vma->vm_flags) &&
+                    atomic_read(&src_mm->has_pinned) &&
+                    page_maybe_dma_pinned(pud_page(pud)))) {
+               spin_unlock(src_ptl);
+               spin_unlock(dst_ptl);
+               __split_huge_pud(vma, src_pud, addr);
+               return -EAGAIN;
+       }
+
        pudp_set_wrprotect(src_mm, addr, src_pud);
        pud = pud_mkold(pud_wrprotect(pud));
        set_pud_at(dst_mm, addr, dst_pud, pud);
@@ -2022,7 +2050,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                put_page(page);
                add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR);
                return;
-       } else if (is_huge_zero_pmd(*pmd)) {
+       } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) {
                /*
                 * FIXME: Do we want to invalidate secondary mmu by calling
                 * mmu_notifier_invalidate_range() see comments below inside
@@ -2116,30 +2144,34 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                pte = pte_offset_map(&_pmd, addr);
                BUG_ON(!pte_none(*pte));
                set_pte_at(mm, addr, pte, entry);
-               atomic_inc(&page[i]._mapcount);
-               pte_unmap(pte);
-       }
-
-       /*
-        * Set PG_double_map before dropping compound_mapcount to avoid
-        * false-negative page_mapped().
-        */
-       if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) {
-               for (i = 0; i < HPAGE_PMD_NR; i++)
+               if (!pmd_migration)
                        atomic_inc(&page[i]._mapcount);
+               pte_unmap(pte);
        }
 
-       lock_page_memcg(page);
-       if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
-               /* Last compound_mapcount is gone. */
-               __dec_lruvec_page_state(page, NR_ANON_THPS);
-               if (TestClearPageDoubleMap(page)) {
-                       /* No need in mapcount reference anymore */
+       if (!pmd_migration) {
+               /*
+                * Set PG_double_map before dropping compound_mapcount to avoid
+                * false-negative page_mapped().
+                */
+               if (compound_mapcount(page) > 1 &&
+                   !TestSetPageDoubleMap(page)) {
                        for (i = 0; i < HPAGE_PMD_NR; i++)
-                               atomic_dec(&page[i]._mapcount);
+                               atomic_inc(&page[i]._mapcount);
                }
+
+               lock_page_memcg(page);
+               if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
+                       /* Last compound_mapcount is gone. */
+                       __dec_lruvec_page_state(page, NR_ANON_THPS);
+                       if (TestClearPageDoubleMap(page)) {
+                               /* No need in mapcount reference anymore */
+                               for (i = 0; i < HPAGE_PMD_NR; i++)
+                                       atomic_dec(&page[i]._mapcount);
+                       }
+               }
+               unlock_page_memcg(page);
        }
-       unlock_page_memcg(page);
 
        smp_wmb(); /* make pte visible before pmd */
        pmd_populate(mm, pmd, pgtable);
@@ -2274,13 +2306,13 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
 
        /*
         * If we're also updating the vma->vm_next->vm_start, if the new
-        * vm_next->vm_start isn't page aligned and it could previously
+        * vm_next->vm_start isn't hpage aligned and it could previously
         * contain an hugepage: check if we need to split an huge pmd.
         */
        if (adjust_next > 0) {
                struct vm_area_struct *next = vma->vm_next;
                unsigned long nstart = next->vm_start;
-               nstart += adjust_next << PAGE_SHIFT;
+               nstart += adjust_next;
                if (nstart & ~HPAGE_PMD_MASK &&
                    (nstart & HPAGE_PMD_MASK) >= next->vm_start &&
                    (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
@@ -2303,13 +2335,13 @@ static void unmap_page(struct page *page)
        VM_BUG_ON_PAGE(!unmap_success, page);
 }
 
-static void remap_page(struct page *page)
+static void remap_page(struct page *page, unsigned int nr)
 {
        int i;
        if (PageTransHuge(page)) {
                remove_migration_ptes(page, page, true);
        } else {
-               for (i = 0; i < HPAGE_PMD_NR; i++)
+               for (i = 0; i < nr; i++)
                        remove_migration_ptes(page + i, page + i, true);
        }
 }
@@ -2338,6 +2370,9 @@ static void __split_huge_page_tail(struct page *head, int tail,
                         (1L << PG_workingset) |
                         (1L << PG_locked) |
                         (1L << PG_unevictable) |
+#ifdef CONFIG_64BIT
+                        (1L << PG_arch_2) |
+#endif
                         (1L << PG_dirty)));
 
        /* ->mapping in first tail page is compound_mapcount */
@@ -2384,6 +2419,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        struct lruvec *lruvec;
        struct address_space *swap_cache = NULL;
        unsigned long offset = 0;
+       unsigned int nr = thp_nr_pages(head);
        int i;
 
        lruvec = mem_cgroup_page_lruvec(head, pgdat);
@@ -2399,7 +2435,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
                xa_lock(&swap_cache->i_pages);
        }
 
-       for (i = HPAGE_PMD_NR - 1; i >= 1; i--) {
+       for (i = nr - 1; i >= 1; i--) {
                __split_huge_page_tail(head, i, lruvec, list);
                /* Some pages can be beyond i_size: drop them from page cache */
                if (head[i].index >= end) {
@@ -2419,7 +2455,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 
        ClearPageCompound(head);
 
-       split_page_owner(head, HPAGE_PMD_ORDER);
+       split_page_owner(head, nr);
 
        /* See comment in __split_huge_page_tail() */
        if (PageAnon(head)) {
@@ -2438,9 +2474,15 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 
        spin_unlock_irqrestore(&pgdat->lru_lock, flags);
 
-       remap_page(head);
+       remap_page(head, nr);
+
+       if (PageSwapCache(head)) {
+               swp_entry_t entry = { .val = page_private(head) };
+
+               split_swap_cluster(entry);
+       }
 
-       for (i = 0; i < HPAGE_PMD_NR; i++) {
+       for (i = 0; i < nr; i++) {
                struct page *subpage = head + i;
                if (subpage == page)
                        continue;
@@ -2459,7 +2501,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
 
 int total_mapcount(struct page *page)
 {
-       int i, compound, ret;
+       int i, compound, nr, ret;
 
        VM_BUG_ON_PAGE(PageTail(page), page);
 
@@ -2467,16 +2509,17 @@ int total_mapcount(struct page *page)
                return atomic_read(&page->_mapcount) + 1;
 
        compound = compound_mapcount(page);
+       nr = compound_nr(page);
        if (PageHuge(page))
                return compound;
        ret = compound;
-       for (i = 0; i < HPAGE_PMD_NR; i++)
+       for (i = 0; i < nr; i++)
                ret += atomic_read(&page[i]._mapcount) + 1;
        /* File pages has compound_mapcount included in _mapcount */
        if (!PageAnon(page))
-               return ret - compound * HPAGE_PMD_NR;
+               return ret - compound * nr;
        if (PageDoubleMap(page))
-               ret -= HPAGE_PMD_NR;
+               ret -= nr;
        return ret;
 }
 
@@ -2521,14 +2564,14 @@ int page_trans_huge_mapcount(struct page *page, int *total_mapcount)
        page = compound_head(page);
 
        _total_mapcount = ret = 0;
-       for (i = 0; i < HPAGE_PMD_NR; i++) {
+       for (i = 0; i < thp_nr_pages(page); i++) {
                mapcount = atomic_read(&page[i]._mapcount) + 1;
                ret = max(ret, mapcount);
                _total_mapcount += mapcount;
        }
        if (PageDoubleMap(page)) {
                ret -= 1;
-               _total_mapcount -= HPAGE_PMD_NR;
+               _total_mapcount -= thp_nr_pages(page);
        }
        mapcount = compound_mapcount(page);
        ret += mapcount;
@@ -2545,9 +2588,9 @@ bool can_split_huge_page(struct page *page, int *pextra_pins)
 
        /* Additional pins from page cache */
        if (PageAnon(page))
-               extra_pins = PageSwapCache(page) ? HPAGE_PMD_NR : 0;
+               extra_pins = PageSwapCache(page) ? thp_nr_pages(page) : 0;
        else
-               extra_pins = HPAGE_PMD_NR;
+               extra_pins = thp_nr_pages(page);
        if (pextra_pins)
                *pextra_pins = extra_pins;
        return total_mapcount(page) == page_count(page) - extra_pins - 1;
@@ -2674,12 +2717,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                }
 
                __split_huge_page(page, list, end, flags);
-               if (PageSwapCache(head)) {
-                       swp_entry_t entry = { .val = page_private(head) };
-
-                       ret = split_swap_cluster(entry);
-               } else
-                       ret = 0;
+               ret = 0;
        } else {
                if (IS_ENABLED(CONFIG_DEBUG_VM) && mapcount) {
                        pr_alert("total_mapcount: %u, page_count(): %u\n",
@@ -2693,7 +2731,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 fail:          if (mapping)
                        xa_unlock(&mapping->i_pages);
                spin_unlock_irqrestore(&pgdata->lru_lock, flags);
-               remap_page(head);
+               remap_page(head, thp_nr_pages(head));
                ret = -EBUSY;
        }