Merge tag 'kbuild-v5.13-2' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy...
[linux-2.6-microblaze.git] / mm / gup.c
index ef7d2da..0697134 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -87,11 +87,12 @@ __maybe_unused struct page *try_grab_compound_head(struct page *page,
                int orig_refs = refs;
 
                /*
-                * Can't do FOLL_LONGTERM + FOLL_PIN with CMA in the gup fast
-                * path, so fail and let the caller fall back to the slow path.
+                * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
+                * right zone, so fail and let the caller fall back to the slow
+                * path.
                 */
-               if (unlikely(flags & FOLL_LONGTERM) &&
-                               is_migrate_cma_page(page))
+               if (unlikely((flags & FOLL_LONGTERM) &&
+                            !is_pinnable_page(page)))
                        return NULL;
 
                /*
@@ -213,6 +214,58 @@ void unpin_user_page(struct page *page)
 }
 EXPORT_SYMBOL(unpin_user_page);
 
+static inline void compound_range_next(unsigned long i, unsigned long npages,
+                                      struct page **list, struct page **head,
+                                      unsigned int *ntails)
+{
+       struct page *next, *page;
+       unsigned int nr = 1;
+
+       if (i >= npages)
+               return;
+
+       next = *list + i;
+       page = compound_head(next);
+       if (PageCompound(page) && compound_order(page) >= 1)
+               nr = min_t(unsigned int,
+                          page + compound_nr(page) - next, npages - i);
+
+       *head = page;
+       *ntails = nr;
+}
+
+#define for_each_compound_range(__i, __list, __npages, __head, __ntails) \
+       for (__i = 0, \
+            compound_range_next(__i, __npages, __list, &(__head), &(__ntails)); \
+            __i < __npages; __i += __ntails, \
+            compound_range_next(__i, __npages, __list, &(__head), &(__ntails)))
+
+static inline void compound_next(unsigned long i, unsigned long npages,
+                                struct page **list, struct page **head,
+                                unsigned int *ntails)
+{
+       struct page *page;
+       unsigned int nr;
+
+       if (i >= npages)
+               return;
+
+       page = compound_head(list[i]);
+       for (nr = i + 1; nr < npages; nr++) {
+               if (compound_head(list[nr]) != page)
+                       break;
+       }
+
+       *head = page;
+       *ntails = nr - i;
+}
+
+#define for_each_compound_head(__i, __list, __npages, __head, __ntails) \
+       for (__i = 0, \
+            compound_next(__i, __npages, __list, &(__head), &(__ntails)); \
+            __i < __npages; __i += __ntails, \
+            compound_next(__i, __npages, __list, &(__head), &(__ntails)))
+
 /**
  * unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
  * @pages:  array of pages to be maybe marked dirty, and definitely released.
@@ -239,20 +292,15 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
                                 bool make_dirty)
 {
        unsigned long index;
-
-       /*
-        * TODO: this can be optimized for huge pages: if a series of pages is
-        * physically contiguous and part of the same compound page, then a
-        * single operation to the head page should suffice.
-        */
+       struct page *head;
+       unsigned int ntails;
 
        if (!make_dirty) {
                unpin_user_pages(pages, npages);
                return;
        }
 
-       for (index = 0; index < npages; index++) {
-               struct page *page = compound_head(pages[index]);
+       for_each_compound_head(index, pages, npages, head, ntails) {
                /*
                 * Checking PageDirty at this point may race with
                 * clear_page_dirty_for_io(), but that's OK. Two key
@@ -273,13 +321,49 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
                 * written back, so it gets written back again in the
                 * next writeback cycle. This is harmless.
                 */
-               if (!PageDirty(page))
-                       set_page_dirty_lock(page);
-               unpin_user_page(page);
+               if (!PageDirty(head))
+                       set_page_dirty_lock(head);
+               put_compound_head(head, ntails, FOLL_PIN);
        }
 }
 EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
 
+/**
+ * unpin_user_page_range_dirty_lock() - release and optionally dirty
+ * gup-pinned page range
+ *
+ * @page:  the starting page of a range maybe marked dirty, and definitely released.
+ * @npages: number of consecutive pages to release.
+ * @make_dirty: whether to mark the pages dirty
+ *
+ * "gup-pinned page range" refers to a range of pages that has had one of the
+ * pin_user_pages() variants called on that page.
+ *
+ * For the page ranges defined by [page .. page+npages], make that range (or
+ * its head pages, if a compound page) dirty, if @make_dirty is true, and if the
+ * page range was previously listed as clean.
+ *
+ * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
+ * required, then the caller should a) verify that this is really correct,
+ * because _lock() is usually required, and b) hand code it:
+ * set_page_dirty_lock(), unpin_user_page().
+ *
+ */
+void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
+                                     bool make_dirty)
+{
+       unsigned long index;
+       struct page *head;
+       unsigned int ntails;
+
+       for_each_compound_range(index, &page, npages, head, ntails) {
+               if (make_dirty && !PageDirty(head))
+                       set_page_dirty_lock(head);
+               put_compound_head(head, ntails, FOLL_PIN);
+       }
+}
+EXPORT_SYMBOL(unpin_user_page_range_dirty_lock);
+
 /**
  * unpin_user_pages() - release an array of gup-pinned pages.
  * @pages:  array of pages to be marked dirty and released.
@@ -292,6 +376,8 @@ EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
 void unpin_user_pages(struct page **pages, unsigned long npages)
 {
        unsigned long index;
+       struct page *head;
+       unsigned int ntails;
 
        /*
         * If this WARN_ON() fires, then the system *might* be leaking pages (by
@@ -300,13 +386,9 @@ void unpin_user_pages(struct page **pages, unsigned long npages)
         */
        if (WARN_ON(IS_ERR_VALUE(npages)))
                return;
-       /*
-        * TODO: this can be optimized for huge pages: if a series of pages is
-        * physically contiguous and part of the same compound page, then a
-        * single operation to the head page should suffice.
-        */
-       for (index = 0; index < npages; index++)
-               unpin_user_page(pages[index]);
+
+       for_each_compound_head(index, pages, npages, head, ntails)
+               put_compound_head(head, ntails, FOLL_PIN);
 }
 EXPORT_SYMBOL(unpin_user_pages);
 
@@ -435,18 +517,6 @@ retry:
                }
        }
 
-       if (flags & FOLL_SPLIT && PageTransCompound(page)) {
-               get_page(page);
-               pte_unmap_unlock(ptep, ptl);
-               lock_page(page);
-               ret = split_huge_page(page);
-               unlock_page(page);
-               put_page(page);
-               if (ret)
-                       return ERR_PTR(ret);
-               goto retry;
-       }
-
        /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
        if (unlikely(!try_grab_page(page, flags))) {
                page = ERR_PTR(-ENOMEM);
@@ -591,7 +661,7 @@ retry_locked:
                spin_unlock(ptl);
                return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
        }
-       if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
+       if (flags & FOLL_SPLIT_PMD) {
                int ret;
                page = pmd_page(*pmd);
                if (is_huge_zero_page(page)) {
@@ -600,19 +670,7 @@ retry_locked:
                        split_huge_pmd(vma, pmd, address);
                        if (pmd_trans_unstable(pmd))
                                ret = -EBUSY;
-               } else if (flags & FOLL_SPLIT) {
-                       if (unlikely(!try_get_page(page))) {
-                               spin_unlock(ptl);
-                               return ERR_PTR(-ENOMEM);
-                       }
-                       spin_unlock(ptl);
-                       lock_page(page);
-                       ret = split_huge_page(page);
-                       unlock_page(page);
-                       put_page(page);
-                       if (pmd_none(*pmd))
-                               return no_page_table(vma, flags);
-               } else {  /* flags & FOLL_SPLIT_PMD */
+               } else {
                        spin_unlock(ptl);
                        split_huge_pmd(vma, pmd, address);
                        ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
@@ -1470,7 +1528,7 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
 {
        struct vm_area_struct *vma;
        unsigned long vm_flags;
-       int i;
+       long i;
 
        /* calculate required read or write permissions.
         * If FOLL_FORCE is set, we only require the "MAY" flags.
@@ -1517,7 +1575,7 @@ finish_or_fault:
  * Returns NULL on any kind of failure - a hole must then be inserted into
  * the corefile, to preserve alignment with its headers; and also returns
  * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
- * allowing a hole to be left in the corefile to save diskspace.
+ * allowing a hole to be left in the corefile to save disk space.
  *
  * Called without mmap_lock (takes and releases the mmap_lock by itself).
  */
@@ -1543,112 +1601,92 @@ struct page *get_dump_page(unsigned long addr)
 }
 #endif /* CONFIG_ELF_CORE */
 
-#ifdef CONFIG_CMA
-static long check_and_migrate_cma_pages(struct mm_struct *mm,
-                                       unsigned long start,
-                                       unsigned long nr_pages,
-                                       struct page **pages,
-                                       struct vm_area_struct **vmas,
-                                       unsigned int gup_flags)
+#ifdef CONFIG_MIGRATION
+/*
+ * Check whether all pages are pinnable, if so return number of pages.  If some
+ * pages are not pinnable, migrate them, and unpin all pages. Return zero if
+ * pages were migrated, or if some pages were not successfully isolated.
+ * Return negative error if migration fails.
+ */
+static long check_and_migrate_movable_pages(unsigned long nr_pages,
+                                           struct page **pages,
+                                           unsigned int gup_flags)
 {
        unsigned long i;
-       unsigned long step;
+       unsigned long isolation_error_count = 0;
        bool drain_allow = true;
-       bool migrate_allow = true;
-       LIST_HEAD(cma_page_list);
-       long ret = nr_pages;
+       LIST_HEAD(movable_page_list);
+       long ret = 0;
+       struct page *prev_head = NULL;
+       struct page *head;
        struct migration_target_control mtc = {
                .nid = NUMA_NO_NODE,
-               .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN,
+               .gfp_mask = GFP_USER | __GFP_NOWARN,
        };
 
-check_again:
-       for (i = 0; i < nr_pages;) {
-
-               struct page *head = compound_head(pages[i]);
-
-               /*
-                * gup may start from a tail page. Advance step by the left
-                * part.
-                */
-               step = compound_nr(head) - (pages[i] - head);
+       for (i = 0; i < nr_pages; i++) {
+               head = compound_head(pages[i]);
+               if (head == prev_head)
+                       continue;
+               prev_head = head;
                /*
-                * If we get a page from the CMA zone, since we are going to
-                * be pinning these entries, we might as well move them out
-                * of the CMA zone if possible.
+                * If we get a movable page, since we are going to be pinning
+                * these entries, try to move them out if possible.
                 */
-               if (is_migrate_cma_page(head)) {
-                       if (PageHuge(head))
-                               isolate_huge_page(head, &cma_page_list);
-                       else {
+               if (!is_pinnable_page(head)) {
+                       if (PageHuge(head)) {
+                               if (!isolate_huge_page(head, &movable_page_list))
+                                       isolation_error_count++;
+                       } else {
                                if (!PageLRU(head) && drain_allow) {
                                        lru_add_drain_all();
                                        drain_allow = false;
                                }
 
-                               if (!isolate_lru_page(head)) {
-                                       list_add_tail(&head->lru, &cma_page_list);
-                                       mod_node_page_state(page_pgdat(head),
-                                                           NR_ISOLATED_ANON +
-                                                           page_is_file_lru(head),
-                                                           thp_nr_pages(head));
+                               if (isolate_lru_page(head)) {
+                                       isolation_error_count++;
+                                       continue;
                                }
+                               list_add_tail(&head->lru, &movable_page_list);
+                               mod_node_page_state(page_pgdat(head),
+                                                   NR_ISOLATED_ANON +
+                                                   page_is_file_lru(head),
+                                                   thp_nr_pages(head));
                        }
                }
-
-               i += step;
        }
 
-       if (!list_empty(&cma_page_list)) {
-               /*
-                * drop the above get_user_pages reference.
-                */
-               if (gup_flags & FOLL_PIN)
-                       unpin_user_pages(pages, nr_pages);
-               else
-                       for (i = 0; i < nr_pages; i++)
-                               put_page(pages[i]);
-
-               if (migrate_pages(&cma_page_list, alloc_migration_target, NULL,
-                       (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
-                       /*
-                        * some of the pages failed migration. Do get_user_pages
-                        * without migration.
-                        */
-                       migrate_allow = false;
+       /*
+        * If list is empty, and no isolation errors, means that all pages are
+        * in the correct zone.
+        */
+       if (list_empty(&movable_page_list) && !isolation_error_count)
+               return nr_pages;
 
-                       if (!list_empty(&cma_page_list))
-                               putback_movable_pages(&cma_page_list);
-               }
-               /*
-                * We did migrate all the pages, Try to get the page references
-                * again migrating any new CMA pages which we failed to isolate
-                * earlier.
-                */
-               ret = __get_user_pages_locked(mm, start, nr_pages,
-                                                  pages, vmas, NULL,
-                                                  gup_flags);
-
-               if ((ret > 0) && migrate_allow) {
-                       nr_pages = ret;
-                       drain_allow = true;
-                       goto check_again;
-               }
+       if (gup_flags & FOLL_PIN) {
+               unpin_user_pages(pages, nr_pages);
+       } else {
+               for (i = 0; i < nr_pages; i++)
+                       put_page(pages[i]);
+       }
+       if (!list_empty(&movable_page_list)) {
+               ret = migrate_pages(&movable_page_list, alloc_migration_target,
+                                   NULL, (unsigned long)&mtc, MIGRATE_SYNC,
+                                   MR_LONGTERM_PIN);
+               if (ret && !list_empty(&movable_page_list))
+                       putback_movable_pages(&movable_page_list);
        }
 
-       return ret;
+       return ret > 0 ? -ENOMEM : ret;
 }
 #else
-static long check_and_migrate_cma_pages(struct mm_struct *mm,
-                                       unsigned long start,
-                                       unsigned long nr_pages,
-                                       struct page **pages,
-                                       struct vm_area_struct **vmas,
-                                       unsigned int gup_flags)
+static long check_and_migrate_movable_pages(unsigned long nr_pages,
+                                           struct page **pages,
+                                           unsigned int gup_flags)
 {
        return nr_pages;
 }
-#endif /* CONFIG_CMA */
+#endif /* CONFIG_MIGRATION */
 
 /*
  * __gup_longterm_locked() is a wrapper for __get_user_pages_locked which
@@ -1661,21 +1699,22 @@ static long __gup_longterm_locked(struct mm_struct *mm,
                                  struct vm_area_struct **vmas,
                                  unsigned int gup_flags)
 {
-       unsigned long flags = 0;
+       unsigned int flags;
        long rc;
 
-       if (gup_flags & FOLL_LONGTERM)
-               flags = memalloc_nocma_save();
-
-       rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL,
-                                    gup_flags);
+       if (!(gup_flags & FOLL_LONGTERM))
+               return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
+                                              NULL, gup_flags);
+       flags = memalloc_pin_save();
+       do {
+               rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
+                                            NULL, gup_flags);
+               if (rc <= 0)
+                       break;
+               rc = check_and_migrate_movable_pages(rc, pages, gup_flags);
+       } while (!rc);
+       memalloc_pin_restore(flags);
 
-       if (gup_flags & FOLL_LONGTERM) {
-               if (rc > 0)
-                       rc = check_and_migrate_cma_pages(mm, start, rc, pages,
-                                                        vmas, gup_flags);
-               memalloc_nocma_restore(flags);
-       }
        return rc;
 }