Merge tag 'nand/for-5.19' into mtd/next
[linux-2.6-microblaze.git] / mm / gup.c
index f0af462..f598a03 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -29,107 +29,71 @@ struct follow_page_context {
        unsigned int page_mask;
 };
 
-static void hpage_pincount_add(struct page *page, int refs)
-{
-       VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
-       VM_BUG_ON_PAGE(page != compound_head(page), page);
-
-       atomic_add(refs, compound_pincount_ptr(page));
-}
-
-static void hpage_pincount_sub(struct page *page, int refs)
-{
-       VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
-       VM_BUG_ON_PAGE(page != compound_head(page), page);
-
-       atomic_sub(refs, compound_pincount_ptr(page));
-}
-
-/* Equivalent to calling put_page() @refs times. */
-static void put_page_refs(struct page *page, int refs)
-{
-#ifdef CONFIG_DEBUG_VM
-       if (VM_WARN_ON_ONCE_PAGE(page_ref_count(page) < refs, page))
-               return;
-#endif
-
-       /*
-        * Calling put_page() for each ref is unnecessarily slow. Only the last
-        * ref needs a put_page().
-        */
-       if (refs > 1)
-               page_ref_sub(page, refs - 1);
-       put_page(page);
-}
-
 /*
- * Return the compound head page with ref appropriately incremented,
+ * Return the folio with ref appropriately incremented,
  * or NULL if that failed.
  */
-static inline struct page *try_get_compound_head(struct page *page, int refs)
+static inline struct folio *try_get_folio(struct page *page, int refs)
 {
-       struct page *head = compound_head(page);
+       struct folio *folio;
 
-       if (WARN_ON_ONCE(page_ref_count(head) < 0))
+retry:
+       folio = page_folio(page);
+       if (WARN_ON_ONCE(folio_ref_count(folio) < 0))
                return NULL;
-       if (unlikely(!page_cache_add_speculative(head, refs)))
+       if (unlikely(!folio_ref_try_add_rcu(folio, refs)))
                return NULL;
 
        /*
-        * At this point we have a stable reference to the head page; but it
-        * could be that between the compound_head() lookup and the refcount
-        * increment, the compound page was split, in which case we'd end up
-        * holding a reference on a page that has nothing to do with the page
+        * At this point we have a stable reference to the folio; but it
+        * could be that between calling page_folio() and the refcount
+        * increment, the folio was split, in which case we'd end up
+        * holding a reference on a folio that has nothing to do with the page
         * we were given anymore.
-        * So now that the head page is stable, recheck that the pages still
-        * belong together.
+        * So now that the folio is stable, recheck that the page still
+        * belongs to this folio.
         */
-       if (unlikely(compound_head(page) != head)) {
-               put_page_refs(head, refs);
-               return NULL;
+       if (unlikely(page_folio(page) != folio)) {
+               folio_put_refs(folio, refs);
+               goto retry;
        }
 
-       return head;
+       return folio;
 }
 
 /**
- * try_grab_compound_head() - attempt to elevate a page's refcount, by a
- * flags-dependent amount.
- *
- * Even though the name includes "compound_head", this function is still
- * appropriate for callers that have a non-compound @page to get.
- *
+ * try_grab_folio() - Attempt to get or pin a folio.
  * @page:  pointer to page to be grabbed
- * @refs:  the value to (effectively) add to the page's refcount
+ * @refs:  the value to (effectively) add to the folio's refcount
  * @flags: gup flags: these are the FOLL_* flag values.
  *
  * "grab" names in this file mean, "look at flags to decide whether to use
- * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
+ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.
  *
  * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
  * same time. (That's true throughout the get_user_pages*() and
  * pin_user_pages*() APIs.) Cases:
  *
- *    FOLL_GET: page's refcount will be incremented by @refs.
+ *    FOLL_GET: folio's refcount will be incremented by @refs.
  *
- *    FOLL_PIN on compound pages that are > two pages long: page's refcount will
- *    be incremented by @refs, and page[2].hpage_pinned_refcount will be
- *    incremented by @refs * GUP_PIN_COUNTING_BIAS.
+ *    FOLL_PIN on large folios: folio's refcount will be incremented by
+ *    @refs, and its compound_pincount will be incremented by @refs.
  *
- *    FOLL_PIN on normal pages, or compound pages that are two pages long:
- *    page's refcount will be incremented by @refs * GUP_PIN_COUNTING_BIAS.
+ *    FOLL_PIN on single-page folios: folio's refcount will be incremented by
+ *    @refs * GUP_PIN_COUNTING_BIAS.
  *
- * Return: head page (with refcount appropriately incremented) for success, or
- * NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's
- * considered failure, and furthermore, a likely bug in the caller, so a warning
- * is also emitted.
+ * Return: The folio containing @page (with refcount appropriately
+ * incremented) for success, or NULL upon failure. If neither FOLL_GET
+ * nor FOLL_PIN was set, that's considered failure, and furthermore,
+ * a likely bug in the caller, so a warning is also emitted.
  */
-struct page *try_grab_compound_head(struct page *page,
-                                   int refs, unsigned int flags)
+struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
 {
        if (flags & FOLL_GET)
-               return try_get_compound_head(page, refs);
+               return try_get_folio(page, refs);
        else if (flags & FOLL_PIN) {
+               struct folio *folio;
+
                /*
                 * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
                 * right zone, so fail and let the caller fall back to the slow
@@ -143,63 +107,57 @@ struct page *try_grab_compound_head(struct page *page,
                 * CAUTION: Don't use compound_head() on the page before this
                 * point, the result won't be stable.
                 */
-               page = try_get_compound_head(page, refs);
-               if (!page)
+               folio = try_get_folio(page, refs);
+               if (!folio)
                        return NULL;
 
                /*
-                * When pinning a compound page of order > 1 (which is what
-                * hpage_pincount_available() checks for), use an exact count to
-                * track it, via hpage_pincount_add/_sub().
+                * When pinning a large folio, use an exact count to track it.
                 *
-                * However, be sure to *also* increment the normal page refcount
-                * field at least once, so that the page really is pinned.
-                * That's why the refcount from the earlier
-                * try_get_compound_head() is left intact.
+                * However, be sure to *also* increment the normal folio
+                * refcount field at least once, so that the folio really
+                * is pinned.  That's why the refcount from the earlier
+                * try_get_folio() is left intact.
                 */
-               if (hpage_pincount_available(page))
-                       hpage_pincount_add(page, refs);
+               if (folio_test_large(folio))
+                       atomic_add(refs, folio_pincount_ptr(folio));
                else
-                       page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1));
-
-               mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED,
-                                   refs);
+                       folio_ref_add(folio,
+                                       refs * (GUP_PIN_COUNTING_BIAS - 1));
+               node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);
 
-               return page;
+               return folio;
        }
 
        WARN_ON_ONCE(1);
        return NULL;
 }
 
-static void put_compound_head(struct page *page, int refs, unsigned int flags)
+static void gup_put_folio(struct folio *folio, int refs, unsigned int flags)
 {
        if (flags & FOLL_PIN) {
-               mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED,
-                                   refs);
-
-               if (hpage_pincount_available(page))
-                       hpage_pincount_sub(page, refs);
+               node_stat_mod_folio(folio, NR_FOLL_PIN_RELEASED, refs);
+               if (folio_test_large(folio))
+                       atomic_sub(refs, folio_pincount_ptr(folio));
                else
                        refs *= GUP_PIN_COUNTING_BIAS;
        }
 
-       put_page_refs(page, refs);
+       folio_put_refs(folio, refs);
 }
 
 /**
  * try_grab_page() - elevate a page's refcount by a flag-dependent amount
+ * @page:    pointer to page to be grabbed
+ * @flags:   gup flags: these are the FOLL_* flag values.
  *
  * This might not do anything at all, depending on the flags argument.
  *
  * "grab" names in this file mean, "look at flags to decide whether to use
  * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
  *
- * @page:    pointer to page to be grabbed
- * @flags:   gup flags: these are the FOLL_* flag values.
- *
  * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same
- * time. Cases: please see the try_grab_compound_head() documentation, with
+ * time. Cases: please see the try_grab_folio() documentation, with
  * "refs=1".
  *
  * Return: true for success, or if no action was required (if neither FOLL_PIN
@@ -208,10 +166,31 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags)
  */
 bool __must_check try_grab_page(struct page *page, unsigned int flags)
 {
-       if (!(flags & (FOLL_GET | FOLL_PIN)))
-               return true;
+       struct folio *folio = page_folio(page);
 
-       return try_grab_compound_head(page, 1, flags);
+       WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
+       if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))
+               return false;
+
+       if (flags & FOLL_GET)
+               folio_ref_inc(folio);
+       else if (flags & FOLL_PIN) {
+               /*
+                * Similar to try_grab_folio(): be sure to *also*
+                * increment the normal page refcount field at least once,
+                * so that the page really is pinned.
+                */
+               if (folio_test_large(folio)) {
+                       folio_ref_add(folio, 1);
+                       atomic_add(1, folio_pincount_ptr(folio));
+               } else {
+                       folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);
+               }
+
+               node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);
+       }
+
+       return true;
 }
 
 /**
@@ -225,62 +204,40 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags)
  */
 void unpin_user_page(struct page *page)
 {
-       put_compound_head(compound_head(page), 1, FOLL_PIN);
+       gup_put_folio(page_folio(page), 1, FOLL_PIN);
 }
 EXPORT_SYMBOL(unpin_user_page);
 
-static inline void compound_range_next(unsigned long i, unsigned long npages,
-                                      struct page **list, struct page **head,
-                                      unsigned int *ntails)
+static inline struct folio *gup_folio_range_next(struct page *start,
+               unsigned long npages, unsigned long i, unsigned int *ntails)
 {
-       struct page *next, *page;
+       struct page *next = nth_page(start, i);
+       struct folio *folio = page_folio(next);
        unsigned int nr = 1;
 
-       if (i >= npages)
-               return;
-
-       next = *list + i;
-       page = compound_head(next);
-       if (PageCompound(page) && compound_order(page) >= 1)
-               nr = min_t(unsigned int,
-                          page + compound_nr(page) - next, npages - i);
+       if (folio_test_large(folio))
+               nr = min_t(unsigned int, npages - i,
+                          folio_nr_pages(folio) - folio_page_idx(folio, next));
 
-       *head = page;
        *ntails = nr;
+       return folio;
 }
 
-#define for_each_compound_range(__i, __list, __npages, __head, __ntails) \
-       for (__i = 0, \
-            compound_range_next(__i, __npages, __list, &(__head), &(__ntails)); \
-            __i < __npages; __i += __ntails, \
-            compound_range_next(__i, __npages, __list, &(__head), &(__ntails)))
-
-static inline void compound_next(unsigned long i, unsigned long npages,
-                                struct page **list, struct page **head,
-                                unsigned int *ntails)
+static inline struct folio *gup_folio_next(struct page **list,
+               unsigned long npages, unsigned long i, unsigned int *ntails)
 {
-       struct page *page;
+       struct folio *folio = page_folio(list[i]);
        unsigned int nr;
 
-       if (i >= npages)
-               return;
-
-       page = compound_head(list[i]);
        for (nr = i + 1; nr < npages; nr++) {
-               if (compound_head(list[nr]) != page)
+               if (page_folio(list[nr]) != folio)
                        break;
        }
 
-       *head = page;
        *ntails = nr - i;
+       return folio;
 }
 
-#define for_each_compound_head(__i, __list, __npages, __head, __ntails) \
-       for (__i = 0, \
-            compound_next(__i, __npages, __list, &(__head), &(__ntails)); \
-            __i < __npages; __i += __ntails, \
-            compound_next(__i, __npages, __list, &(__head), &(__ntails)))
-
 /**
  * unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
  * @pages:  array of pages to be maybe marked dirty, and definitely released.
@@ -306,16 +263,17 @@ static inline void compound_next(unsigned long i, unsigned long npages,
 void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
                                 bool make_dirty)
 {
-       unsigned long index;
-       struct page *head;
-       unsigned int ntails;
+       unsigned long i;
+       struct folio *folio;
+       unsigned int nr;
 
        if (!make_dirty) {
                unpin_user_pages(pages, npages);
                return;
        }
 
-       for_each_compound_head(index, pages, npages, head, ntails) {
+       for (i = 0; i < npages; i += nr) {
+               folio = gup_folio_next(pages, npages, i, &nr);
                /*
                 * Checking PageDirty at this point may race with
                 * clear_page_dirty_for_io(), but that's OK. Two key
@@ -336,9 +294,12 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
                 * written back, so it gets written back again in the
                 * next writeback cycle. This is harmless.
                 */
-               if (!PageDirty(head))
-                       set_page_dirty_lock(head);
-               put_compound_head(head, ntails, FOLL_PIN);
+               if (!folio_test_dirty(folio)) {
+                       folio_lock(folio);
+                       folio_mark_dirty(folio);
+                       folio_unlock(folio);
+               }
+               gup_put_folio(folio, nr, FOLL_PIN);
        }
 }
 EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
@@ -367,14 +328,18 @@ EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
 void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
                                      bool make_dirty)
 {
-       unsigned long index;
-       struct page *head;
-       unsigned int ntails;
+       unsigned long i;
+       struct folio *folio;
+       unsigned int nr;
 
-       for_each_compound_range(index, &page, npages, head, ntails) {
-               if (make_dirty && !PageDirty(head))
-                       set_page_dirty_lock(head);
-               put_compound_head(head, ntails, FOLL_PIN);
+       for (i = 0; i < npages; i += nr) {
+               folio = gup_folio_range_next(page, npages, i, &nr);
+               if (make_dirty && !folio_test_dirty(folio)) {
+                       folio_lock(folio);
+                       folio_mark_dirty(folio);
+                       folio_unlock(folio);
+               }
+               gup_put_folio(folio, nr, FOLL_PIN);
        }
 }
 EXPORT_SYMBOL(unpin_user_page_range_dirty_lock);
@@ -390,9 +355,9 @@ EXPORT_SYMBOL(unpin_user_page_range_dirty_lock);
  */
 void unpin_user_pages(struct page **pages, unsigned long npages)
 {
-       unsigned long index;
-       struct page *head;
-       unsigned int ntails;
+       unsigned long i;
+       struct folio *folio;
+       unsigned int nr;
 
        /*
         * If this WARN_ON() fires, then the system *might* be leaking pages (by
@@ -402,8 +367,10 @@ void unpin_user_pages(struct page **pages, unsigned long npages)
        if (WARN_ON(IS_ERR_VALUE(npages)))
                return;
 
-       for_each_compound_head(index, pages, npages, head, ntails)
-               put_compound_head(head, ntails, FOLL_PIN);
+       for (i = 0; i < npages; i += nr) {
+               folio = gup_folio_next(pages, npages, i, &nr);
+               gup_put_folio(folio, nr, FOLL_PIN);
+       }
 }
 EXPORT_SYMBOL(unpin_user_pages);
 
@@ -439,10 +406,6 @@ static struct page *no_page_table(struct vm_area_struct *vma,
 static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
                pte_t *pte, unsigned int flags)
 {
-       /* No page to get reference */
-       if (flags & FOLL_GET)
-               return -EFAULT;
-
        if (flags & FOLL_TOUCH) {
                pte_t entry = *pte;
 
@@ -572,32 +535,6 @@ retry:
                 */
                mark_page_accessed(page);
        }
-       if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
-               /* Do not mlock pte-mapped THP */
-               if (PageTransCompound(page))
-                       goto out;
-
-               /*
-                * The preliminary mapping check is mainly to avoid the
-                * pointless overhead of lock_page on the ZERO_PAGE
-                * which might bounce very badly if there is contention.
-                *
-                * If the page is already locked, we don't need to
-                * handle it now - vmscan will handle it later if and
-                * when it attempts to reclaim the page.
-                */
-               if (page->mapping && trylock_page(page)) {
-                       lru_add_drain();  /* push cached pages to LRU */
-                       /*
-                        * Because we lock page here, and migration is
-                        * blocked by the pte's page reference, and we
-                        * know the page is still mapped, we don't even
-                        * need to check for file-cache page truncation.
-                        */
-                       mlock_vma_page(page);
-                       unlock_page(page);
-               }
-       }
 out:
        pte_unmap_unlock(ptep, ptl);
        return page;
@@ -920,9 +857,6 @@ static int faultin_page(struct vm_area_struct *vma,
        unsigned int fault_flags = 0;
        vm_fault_t ret;
 
-       /* mlock all present pages, but do not fault in new pages */
-       if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
-               return -ENOENT;
        if (*flags & FOLL_NOFAULT)
                return -EFAULT;
        if (*flags & FOLL_WRITE)
@@ -1173,15 +1107,20 @@ retry:
                        case -ENOMEM:
                        case -EHWPOISON:
                                goto out;
-                       case -ENOENT:
-                               goto next_page;
                        }
                        BUG();
                } else if (PTR_ERR(page) == -EEXIST) {
                        /*
                         * Proper page table entry exists, but no corresponding
-                        * struct page.
+                        * struct page. If the caller expects **pages to be
+                        * filled in, bail out now, because that can't be done
+                        * for this page.
                         */
+                       if (pages) {
+                               ret = PTR_ERR(page);
+                               goto out;
+                       }
+
                        goto next_page;
                } else if (IS_ERR(page)) {
                        ret = PTR_ERR(page);
@@ -1465,6 +1404,7 @@ long populate_vma_page_range(struct vm_area_struct *vma,
        struct mm_struct *mm = vma->vm_mm;
        unsigned long nr_pages = (end - start) / PAGE_SIZE;
        int gup_flags;
+       long ret;
 
        VM_BUG_ON(!PAGE_ALIGNED(start));
        VM_BUG_ON(!PAGE_ALIGNED(end));
@@ -1472,9 +1412,14 @@ long populate_vma_page_range(struct vm_area_struct *vma,
        VM_BUG_ON_VMA(end   > vma->vm_end, vma);
        mmap_assert_locked(mm);
 
-       gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
+       /*
+        * Rightly or wrongly, the VM_LOCKONFAULT case has never used
+        * faultin_page() to break COW, so it has no work to do here.
+        */
        if (vma->vm_flags & VM_LOCKONFAULT)
-               gup_flags &= ~FOLL_POPULATE;
+               return nr_pages;
+
+       gup_flags = FOLL_TOUCH;
        /*
         * We want to touch writable mappings with a write fault in order
         * to break COW, except for shared mappings because these don't COW
@@ -1494,8 +1439,10 @@ long populate_vma_page_range(struct vm_area_struct *vma,
         * We made sure addr is within a VMA, so the following will
         * not result in a stack expansion that recurses back here.
         */
-       return __get_user_pages(mm, start, nr_pages, gup_flags,
+       ret = __get_user_pages(mm, start, nr_pages, gup_flags,
                                NULL, NULL, locked);
+       lru_add_drain();
+       return ret;
 }
 
 /*
@@ -1527,6 +1474,7 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
        struct mm_struct *mm = vma->vm_mm;
        unsigned long nr_pages = (end - start) / PAGE_SIZE;
        int gup_flags;
+       long ret;
 
        VM_BUG_ON(!PAGE_ALIGNED(start));
        VM_BUG_ON(!PAGE_ALIGNED(end));
@@ -1541,10 +1489,9 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
         *             in the page table.
         * FOLL_HWPOISON: Return -EHWPOISON instead of -EFAULT when we hit
         *                a poisoned page.
-        * FOLL_POPULATE: Always populate memory with VM_LOCKONFAULT.
         * !FOLL_FORCE: Require proper access permissions.
         */
-       gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK | FOLL_HWPOISON;
+       gup_flags = FOLL_TOUCH | FOLL_HWPOISON;
        if (write)
                gup_flags |= FOLL_WRITE;
 
@@ -1555,8 +1502,10 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
        if (check_vma_flags(vma, gup_flags))
                return -EINVAL;
 
-       return __get_user_pages(mm, start, nr_pages, gup_flags,
+       ret = __get_user_pages(mm, start, nr_pages, gup_flags,
                                NULL, NULL, locked);
+       lru_add_drain();
+       return ret;
 }
 
 /*
@@ -1704,11 +1653,11 @@ EXPORT_SYMBOL(fault_in_writeable);
  * @uaddr: start of address range
  * @size: length of address range
  *
- * Faults in an address range using get_user_pages, i.e., without triggering
- * hardware page faults.  This is primarily useful when we already know that
- * some or all of the pages in the address range aren't in memory.
+ * Faults in an address range for writing.  This is primarily useful when we
+ * already know that some or all of the pages in the address range aren't in
+ * memory.
  *
- * Other than fault_in_writeable(), this function is non-destructive.
+ * Unlike fault_in_writeable(), this function is non-destructive.
  *
  * Note that we don't pin or otherwise hold the pages referenced that we fault
  * in.  There's no guarantee that they'll stay in memory for any duration of
@@ -1719,46 +1668,27 @@ EXPORT_SYMBOL(fault_in_writeable);
  */
 size_t fault_in_safe_writeable(const char __user *uaddr, size_t size)
 {
-       unsigned long start = (unsigned long)untagged_addr(uaddr);
-       unsigned long end, nstart, nend;
+       unsigned long start = (unsigned long)uaddr, end;
        struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma = NULL;
-       int locked = 0;
+       bool unlocked = false;
 
-       nstart = start & PAGE_MASK;
+       if (unlikely(size == 0))
+               return 0;
        end = PAGE_ALIGN(start + size);
-       if (end < nstart)
+       if (end < start)
                end = 0;
-       for (; nstart != end; nstart = nend) {
-               unsigned long nr_pages;
-               long ret;
 
-               if (!locked) {
-                       locked = 1;
-                       mmap_read_lock(mm);
-                       vma = find_vma(mm, nstart);
-               } else if (nstart >= vma->vm_end)
-                       vma = vma->vm_next;
-               if (!vma || vma->vm_start >= end)
-                       break;
-               nend = end ? min(end, vma->vm_end) : vma->vm_end;
-               if (vma->vm_flags & (VM_IO | VM_PFNMAP))
-                       continue;
-               if (nstart < vma->vm_start)
-                       nstart = vma->vm_start;
-               nr_pages = (nend - nstart) / PAGE_SIZE;
-               ret = __get_user_pages_locked(mm, nstart, nr_pages,
-                                             NULL, NULL, &locked,
-                                             FOLL_TOUCH | FOLL_WRITE);
-               if (ret <= 0)
+       mmap_read_lock(mm);
+       do {
+               if (fixup_user_fault(mm, start, FAULT_FLAG_WRITE, &unlocked))
                        break;
-               nend = nstart + ret * PAGE_SIZE;
-       }
-       if (locked)
-               mmap_read_unlock(mm);
-       if (nstart == end)
-               return 0;
-       return size - min_t(size_t, nstart - start, size);
+               start = (start + PAGE_SIZE) & PAGE_MASK;
+       } while (start != end);
+       mmap_read_unlock(mm);
+
+       if (size > (unsigned long)uaddr - start)
+               return size - ((unsigned long)uaddr - start);
+       return 0;
 }
 EXPORT_SYMBOL(fault_in_safe_writeable);
 
@@ -1843,72 +1773,80 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages,
                                            struct page **pages,
                                            unsigned int gup_flags)
 {
-       unsigned long i;
-       unsigned long isolation_error_count = 0;
-       bool drain_allow = true;
+       unsigned long isolation_error_count = 0, i;
+       struct folio *prev_folio = NULL;
        LIST_HEAD(movable_page_list);
-       long ret = 0;
-       struct page *prev_head = NULL;
-       struct page *head;
-       struct migration_target_control mtc = {
-               .nid = NUMA_NO_NODE,
-               .gfp_mask = GFP_USER | __GFP_NOWARN,
-       };
+       bool drain_allow = true;
+       int ret = 0;
 
        for (i = 0; i < nr_pages; i++) {
-               head = compound_head(pages[i]);
-               if (head == prev_head)
+               struct folio *folio = page_folio(pages[i]);
+
+               if (folio == prev_folio)
                        continue;
-               prev_head = head;
+               prev_folio = folio;
+
+               if (folio_is_pinnable(folio))
+                       continue;
+
                /*
-                * If we get a movable page, since we are going to be pinning
-                * these entries, try to move them out if possible.
+                * Try to move out any movable page before pinning the range.
                 */
-               if (!is_pinnable_page(head)) {
-                       if (PageHuge(head)) {
-                               if (!isolate_huge_page(head, &movable_page_list))
-                                       isolation_error_count++;
-                       } else {
-                               if (!PageLRU(head) && drain_allow) {
-                                       lru_add_drain_all();
-                                       drain_allow = false;
-                               }
+               if (folio_test_hugetlb(folio)) {
+                       if (!isolate_huge_page(&folio->page,
+                                               &movable_page_list))
+                               isolation_error_count++;
+                       continue;
+               }
 
-                               if (isolate_lru_page(head)) {
-                                       isolation_error_count++;
-                                       continue;
-                               }
-                               list_add_tail(&head->lru, &movable_page_list);
-                               mod_node_page_state(page_pgdat(head),
-                                                   NR_ISOLATED_ANON +
-                                                   page_is_file_lru(head),
-                                                   thp_nr_pages(head));
-                       }
+               if (!folio_test_lru(folio) && drain_allow) {
+                       lru_add_drain_all();
+                       drain_allow = false;
                }
+
+               if (folio_isolate_lru(folio)) {
+                       isolation_error_count++;
+                       continue;
+               }
+               list_add_tail(&folio->lru, &movable_page_list);
+               node_stat_mod_folio(folio,
+                                   NR_ISOLATED_ANON + folio_is_file_lru(folio),
+                                   folio_nr_pages(folio));
        }
 
+       if (!list_empty(&movable_page_list) || isolation_error_count)
+               goto unpin_pages;
+
        /*
         * If list is empty, and no isolation errors, means that all pages are
         * in the correct zone.
         */
-       if (list_empty(&movable_page_list) && !isolation_error_count)
-               return nr_pages;
+       return nr_pages;
 
+unpin_pages:
        if (gup_flags & FOLL_PIN) {
                unpin_user_pages(pages, nr_pages);
        } else {
                for (i = 0; i < nr_pages; i++)
                        put_page(pages[i]);
        }
+
        if (!list_empty(&movable_page_list)) {
+               struct migration_target_control mtc = {
+                       .nid = NUMA_NO_NODE,
+                       .gfp_mask = GFP_USER | __GFP_NOWARN,
+               };
+
                ret = migrate_pages(&movable_page_list, alloc_migration_target,
                                    NULL, (unsigned long)&mtc, MIGRATE_SYNC,
                                    MR_LONGTERM_PIN, NULL);
-               if (ret && !list_empty(&movable_page_list))
-                       putback_movable_pages(&movable_page_list);
+               if (ret > 0) /* number of pages not migrated */
+                       ret = -ENOMEM;
        }
 
-       return ret > 0 ? -ENOMEM : ret;
+       if (ret && !list_empty(&movable_page_list))
+               putback_movable_pages(&movable_page_list);
+       return ret;
 }
 #else
 static long check_and_migrate_movable_pages(unsigned long nr_pages,
@@ -2117,65 +2055,6 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
 }
 EXPORT_SYMBOL(get_user_pages);
 
-/**
- * get_user_pages_locked() - variant of get_user_pages()
- *
- * @start:      starting user address
- * @nr_pages:   number of pages from start to pin
- * @gup_flags:  flags modifying lookup behaviour
- * @pages:      array that receives pointers to the pages pinned.
- *              Should be at least nr_pages long. Or NULL, if caller
- *              only intends to ensure the pages are faulted in.
- * @locked:     pointer to lock flag indicating whether lock is held and
- *              subsequently whether VM_FAULT_RETRY functionality can be
- *              utilised. Lock must initially be held.
- *
- * It is suitable to replace the form:
- *
- *      mmap_read_lock(mm);
- *      do_something()
- *      get_user_pages(mm, ..., pages, NULL);
- *      mmap_read_unlock(mm);
- *
- *  to:
- *
- *      int locked = 1;
- *      mmap_read_lock(mm);
- *      do_something()
- *      get_user_pages_locked(mm, ..., pages, &locked);
- *      if (locked)
- *          mmap_read_unlock(mm);
- *
- * We can leverage the VM_FAULT_RETRY functionality in the page fault
- * paths better by using either get_user_pages_locked() or
- * get_user_pages_unlocked().
- *
- */
-long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
-                          unsigned int gup_flags, struct page **pages,
-                          int *locked)
-{
-       /*
-        * FIXME: Current FOLL_LONGTERM behavior is incompatible with
-        * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
-        * vmas.  As there are no users of this flag in this call we simply
-        * disallow this option for now.
-        */
-       if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
-               return -EINVAL;
-       /*
-        * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
-        * never directly by the caller, so enforce that:
-        */
-       if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
-               return -EINVAL;
-
-       return __get_user_pages_locked(current->mm, start, nr_pages,
-                                      pages, NULL, locked,
-                                      gup_flags | FOLL_TOUCH);
-}
-EXPORT_SYMBOL(get_user_pages_locked);
-
 /*
  * get_user_pages_unlocked() is suitable to replace the form:
  *
@@ -2277,7 +2156,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
        ptem = ptep = pte_offset_map(&pmd, addr);
        do {
                pte_t pte = ptep_get_lockless(ptep);
-               struct page *head, *page;
+               struct page *page;
+               struct folio *folio;
 
                /*
                 * Similar to the PMD case below, NUMA hinting must take slow
@@ -2304,22 +2184,20 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
                page = pte_page(pte);
 
-               head = try_grab_compound_head(page, 1, flags);
-               if (!head)
+               folio = try_grab_folio(page, 1, flags);
+               if (!folio)
                        goto pte_unmap;
 
                if (unlikely(page_is_secretmem(page))) {
-                       put_compound_head(head, 1, flags);
+                       gup_put_folio(folio, 1, flags);
                        goto pte_unmap;
                }
 
                if (unlikely(pte_val(pte) != pte_val(*ptep))) {
-                       put_compound_head(head, 1, flags);
+                       gup_put_folio(folio, 1, flags);
                        goto pte_unmap;
                }
 
-               VM_BUG_ON_PAGE(compound_head(page) != head, page);
-
                /*
                 * We need to make the page accessible if and only if we are
                 * going to access its content (the FOLL_PIN case).  Please
@@ -2329,14 +2207,13 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
                if (flags & FOLL_PIN) {
                        ret = arch_make_page_accessible(page);
                        if (ret) {
-                               unpin_user_page(page);
+                               gup_put_folio(folio, 1, flags);
                                goto pte_unmap;
                        }
                }
-               SetPageReferenced(page);
+               folio_set_referenced(folio);
                pages[*nr] = page;
                (*nr)++;
-
        } while (ptep++, addr += PAGE_SIZE, addr != end);
 
        ret = 1;
@@ -2453,8 +2330,8 @@ static int record_subpages(struct page *page, unsigned long addr,
 {
        int nr;
 
-       for (nr = 0; addr != end; addr += PAGE_SIZE)
-               pages[nr++] = page++;
+       for (nr = 0; addr != end; nr++, addr += PAGE_SIZE)
+               pages[nr] = nth_page(page, nr);
 
        return nr;
 }
@@ -2472,7 +2349,8 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
                       struct page **pages, int *nr)
 {
        unsigned long pte_end;
-       struct page *head, *page;
+       struct page *page;
+       struct folio *folio;
        pte_t pte;
        int refs;
 
@@ -2488,21 +2366,20 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
        /* hugepages are never "special" */
        VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 
-       head = pte_page(pte);
-       page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
+       page = nth_page(pte_page(pte), (addr & (sz - 1)) >> PAGE_SHIFT);
        refs = record_subpages(page, addr, end, pages + *nr);
 
-       head = try_grab_compound_head(head, refs, flags);
-       if (!head)
+       folio = try_grab_folio(page, refs, flags);
+       if (!folio)
                return 0;
 
        if (unlikely(pte_val(pte) != pte_val(*ptep))) {
-               put_compound_head(head, refs, flags);
+               gup_put_folio(folio, refs, flags);
                return 0;
        }
 
        *nr += refs;
-       SetPageReferenced(head);
+       folio_set_referenced(folio);
        return 1;
 }
 
@@ -2536,7 +2413,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
                        unsigned long end, unsigned int flags,
                        struct page **pages, int *nr)
 {
-       struct page *head, *page;
+       struct page *page;
+       struct folio *folio;
        int refs;
 
        if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
@@ -2549,20 +2427,20 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
                                             pages, nr);
        }
 
-       page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+       page = nth_page(pmd_page(orig), (addr & ~PMD_MASK) >> PAGE_SHIFT);
        refs = record_subpages(page, addr, end, pages + *nr);
 
-       head = try_grab_compound_head(pmd_page(orig), refs, flags);
-       if (!head)
+       folio = try_grab_folio(page, refs, flags);
+       if (!folio)
                return 0;
 
        if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
-               put_compound_head(head, refs, flags);
+               gup_put_folio(folio, refs, flags);
                return 0;
        }
 
        *nr += refs;
-       SetPageReferenced(head);
+       folio_set_referenced(folio);
        return 1;
 }
 
@@ -2570,7 +2448,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
                        unsigned long end, unsigned int flags,
                        struct page **pages, int *nr)
 {
-       struct page *head, *page;
+       struct page *page;
+       struct folio *folio;
        int refs;
 
        if (!pud_access_permitted(orig, flags & FOLL_WRITE))
@@ -2583,20 +2462,20 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
                                             pages, nr);
        }
 
-       page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+       page = nth_page(pud_page(orig), (addr & ~PUD_MASK) >> PAGE_SHIFT);
        refs = record_subpages(page, addr, end, pages + *nr);
 
-       head = try_grab_compound_head(pud_page(orig), refs, flags);
-       if (!head)
+       folio = try_grab_folio(page, refs, flags);
+       if (!folio)
                return 0;
 
        if (unlikely(pud_val(orig) != pud_val(*pudp))) {
-               put_compound_head(head, refs, flags);
+               gup_put_folio(folio, refs, flags);
                return 0;
        }
 
        *nr += refs;
-       SetPageReferenced(head);
+       folio_set_referenced(folio);
        return 1;
 }
 
@@ -2605,27 +2484,28 @@ static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
                        struct page **pages, int *nr)
 {
        int refs;
-       struct page *head, *page;
+       struct page *page;
+       struct folio *folio;
 
        if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
                return 0;
 
        BUILD_BUG_ON(pgd_devmap(orig));
 
-       page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
+       page = nth_page(pgd_page(orig), (addr & ~PGDIR_MASK) >> PAGE_SHIFT);
        refs = record_subpages(page, addr, end, pages + *nr);
 
-       head = try_grab_compound_head(pgd_page(orig), refs, flags);
-       if (!head)
+       folio = try_grab_folio(page, refs, flags);
+       if (!folio)
                return 0;
 
        if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
-               put_compound_head(head, refs, flags);
+               gup_put_folio(folio, refs, flags);
                return 0;
        }
 
        *nr += refs;
-       SetPageReferenced(head);
+       folio_set_referenced(folio);
        return 1;
 }
 
@@ -3118,32 +2998,3 @@ long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
        return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
 }
 EXPORT_SYMBOL(pin_user_pages_unlocked);
-
-/*
- * pin_user_pages_locked() is the FOLL_PIN variant of get_user_pages_locked().
- * Behavior is the same, except that this one sets FOLL_PIN and rejects
- * FOLL_GET.
- */
-long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
-                          unsigned int gup_flags, struct page **pages,
-                          int *locked)
-{
-       /*
-        * FIXME: Current FOLL_LONGTERM behavior is incompatible with
-        * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
-        * vmas.  As there are no users of this flag in this call we simply
-        * disallow this option for now.
-        */
-       if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
-               return -EINVAL;
-
-       /* FOLL_GET and FOLL_PIN are mutually exclusive. */
-       if (WARN_ON_ONCE(gup_flags & FOLL_GET))
-               return -EINVAL;
-
-       gup_flags |= FOLL_PIN;
-       return __get_user_pages_locked(current->mm, start, nr_pages,
-                                      pages, NULL, locked,
-                                      gup_flags | FOLL_TOUCH);
-}
-EXPORT_SYMBOL(pin_user_pages_locked);