Merge tag 'pwm/for-5.13-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/thierry...
[linux-2.6-microblaze.git] / mm / gup.c
index ef7d2da..71e546e 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -213,6 +213,58 @@ void unpin_user_page(struct page *page)
 }
 EXPORT_SYMBOL(unpin_user_page);
 
+static inline void compound_range_next(unsigned long i, unsigned long npages,
+                                      struct page **list, struct page **head,
+                                      unsigned int *ntails)
+{
+       struct page *next, *page;
+       unsigned int nr = 1;
+
+       if (i >= npages)
+               return;
+
+       next = *list + i;
+       page = compound_head(next);
+       if (PageCompound(page) && compound_order(page) >= 1)
+               nr = min_t(unsigned int,
+                          page + compound_nr(page) - next, npages - i);
+
+       *head = page;
+       *ntails = nr;
+}
+
+#define for_each_compound_range(__i, __list, __npages, __head, __ntails) \
+       for (__i = 0, \
+            compound_range_next(__i, __npages, __list, &(__head), &(__ntails)); \
+            __i < __npages; __i += __ntails, \
+            compound_range_next(__i, __npages, __list, &(__head), &(__ntails)))
+
+static inline void compound_next(unsigned long i, unsigned long npages,
+                                struct page **list, struct page **head,
+                                unsigned int *ntails)
+{
+       struct page *page;
+       unsigned int nr;
+
+       if (i >= npages)
+               return;
+
+       page = compound_head(list[i]);
+       for (nr = i + 1; nr < npages; nr++) {
+               if (compound_head(list[nr]) != page)
+                       break;
+       }
+
+       *head = page;
+       *ntails = nr - i;
+}
+
+#define for_each_compound_head(__i, __list, __npages, __head, __ntails) \
+       for (__i = 0, \
+            compound_next(__i, __npages, __list, &(__head), &(__ntails)); \
+            __i < __npages; __i += __ntails, \
+            compound_next(__i, __npages, __list, &(__head), &(__ntails)))
+
 /**
  * unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
  * @pages:  array of pages to be maybe marked dirty, and definitely released.
@@ -239,20 +291,15 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
                                 bool make_dirty)
 {
        unsigned long index;
-
-       /*
-        * TODO: this can be optimized for huge pages: if a series of pages is
-        * physically contiguous and part of the same compound page, then a
-        * single operation to the head page should suffice.
-        */
+       struct page *head;
+       unsigned int ntails;
 
        if (!make_dirty) {
                unpin_user_pages(pages, npages);
                return;
        }
 
-       for (index = 0; index < npages; index++) {
-               struct page *page = compound_head(pages[index]);
+       for_each_compound_head(index, pages, npages, head, ntails) {
                /*
                 * Checking PageDirty at this point may race with
                 * clear_page_dirty_for_io(), but that's OK. Two key
@@ -273,13 +320,49 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
                 * written back, so it gets written back again in the
                 * next writeback cycle. This is harmless.
                 */
-               if (!PageDirty(page))
-                       set_page_dirty_lock(page);
-               unpin_user_page(page);
+               if (!PageDirty(head))
+                       set_page_dirty_lock(head);
+               put_compound_head(head, ntails, FOLL_PIN);
        }
 }
 EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
 
+/**
+ * unpin_user_page_range_dirty_lock() - release and optionally dirty
+ * gup-pinned page range
+ *
+ * @page:  the starting page of a range maybe marked dirty, and definitely released.
+ * @npages: number of consecutive pages to release.
+ * @make_dirty: whether to mark the pages dirty
+ *
+ * "gup-pinned page range" refers to a range of pages that has had one of the
+ * pin_user_pages() variants called on that page.
+ *
+ * For the page ranges defined by [page .. page+npages], make that range (or
+ * its head pages, if a compound page) dirty, if @make_dirty is true, and if the
+ * page range was previously listed as clean.
+ *
+ * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
+ * required, then the caller should a) verify that this is really correct,
+ * because _lock() is usually required, and b) hand code it:
+ * set_page_dirty_lock(), unpin_user_page().
+ *
+ */
+void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
+                                     bool make_dirty)
+{
+       unsigned long index;
+       struct page *head;
+       unsigned int ntails;
+
+       for_each_compound_range(index, &page, npages, head, ntails) {
+               if (make_dirty && !PageDirty(head))
+                       set_page_dirty_lock(head);
+               put_compound_head(head, ntails, FOLL_PIN);
+       }
+}
+EXPORT_SYMBOL(unpin_user_page_range_dirty_lock);
+
 /**
  * unpin_user_pages() - release an array of gup-pinned pages.
  * @pages:  array of pages to be marked dirty and released.
@@ -292,6 +375,8 @@ EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
 void unpin_user_pages(struct page **pages, unsigned long npages)
 {
        unsigned long index;
+       struct page *head;
+       unsigned int ntails;
 
        /*
         * If this WARN_ON() fires, then the system *might* be leaking pages (by
@@ -300,13 +385,9 @@ void unpin_user_pages(struct page **pages, unsigned long npages)
         */
        if (WARN_ON(IS_ERR_VALUE(npages)))
                return;
-       /*
-        * TODO: this can be optimized for huge pages: if a series of pages is
-        * physically contiguous and part of the same compound page, then a
-        * single operation to the head page should suffice.
-        */
-       for (index = 0; index < npages; index++)
-               unpin_user_page(pages[index]);
+
+       for_each_compound_head(index, pages, npages, head, ntails)
+               put_compound_head(head, ntails, FOLL_PIN);
 }
 EXPORT_SYMBOL(unpin_user_pages);
 
@@ -435,18 +516,6 @@ retry:
                }
        }
 
-       if (flags & FOLL_SPLIT && PageTransCompound(page)) {
-               get_page(page);
-               pte_unmap_unlock(ptep, ptl);
-               lock_page(page);
-               ret = split_huge_page(page);
-               unlock_page(page);
-               put_page(page);
-               if (ret)
-                       return ERR_PTR(ret);
-               goto retry;
-       }
-
        /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
        if (unlikely(!try_grab_page(page, flags))) {
                page = ERR_PTR(-ENOMEM);
@@ -591,7 +660,7 @@ retry_locked:
                spin_unlock(ptl);
                return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
        }
-       if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
+       if (flags & FOLL_SPLIT_PMD) {
                int ret;
                page = pmd_page(*pmd);
                if (is_huge_zero_page(page)) {
@@ -600,19 +669,7 @@ retry_locked:
                        split_huge_pmd(vma, pmd, address);
                        if (pmd_trans_unstable(pmd))
                                ret = -EBUSY;
-               } else if (flags & FOLL_SPLIT) {
-                       if (unlikely(!try_get_page(page))) {
-                               spin_unlock(ptl);
-                               return ERR_PTR(-ENOMEM);
-                       }
-                       spin_unlock(ptl);
-                       lock_page(page);
-                       ret = split_huge_page(page);
-                       unlock_page(page);
-                       put_page(page);
-                       if (pmd_none(*pmd))
-                               return no_page_table(vma, flags);
-               } else {  /* flags & FOLL_SPLIT_PMD */
+               } else {
                        spin_unlock(ptl);
                        split_huge_pmd(vma, pmd, address);
                        ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;