Merge branch 'for-5.16' of https://git.kernel.org/pub/scm/linux/kernel/git/broonie...

[linux-2.6-microblaze.git] / mm / filemap.c
diff --git a/mm/filemap.c b/mm/filemap.c

index b6140de..daa0e23 100644 (file)
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -30,7 +30,6 @@
  #include <linux/writeback.h>
  #include <linux/backing-dev.h>
  #include <linux/pagevec.h>
-#include <linux/blkdev.h>
  #include <linux/security.h>
  #include <linux/cpuset.h>
  #include <linux/hugetlb.h>
@@ -90,7 +89,7 @@
   *      ->lock_page            (filemap_fault, access_process_vm)
   *
   *  ->i_rwsem                  (generic_perform_write)
- *    ->mmap_lock              (fault_in_pages_readable->do_page_fault)
+ *    ->mmap_lock              (fault_in_readable->do_page_fault)
   *
   *  bdi->wb.list_lock
   *    sb_lock                  (fs/fs-writeback.c)
@@ -262,9 +261,13 @@ void delete_from_page_cache(struct page *page)
         struct address_space *mapping = page_mapping(page);
  
         BUG_ON(!PageLocked(page));
+       spin_lock(&mapping->host->i_lock);
         xa_lock_irq(&mapping->i_pages);
         __delete_from_page_cache(page, NULL);
         xa_unlock_irq(&mapping->i_pages);
+       if (mapping_shrinkable(mapping))
+               inode_add_lru(mapping->host);
+       spin_unlock(&mapping->host->i_lock);
  
         page_cache_free_page(mapping, page);
  }
@@ -340,6 +343,7 @@ void delete_from_page_cache_batch(struct address_space *mapping,
         if (!pagevec_count(pvec))
                 return;
  
+       spin_lock(&mapping->host->i_lock);
         xa_lock_irq(&mapping->i_pages);
         for (i = 0; i < pagevec_count(pvec); i++) {
                 trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
@@ -348,6 +352,9 @@ void delete_from_page_cache_batch(struct address_space *mapping,
         }
         page_cache_delete_batch(mapping, pvec);
         xa_unlock_irq(&mapping->i_pages);
+       if (mapping_shrinkable(mapping))
+               inode_add_lru(mapping->host);
+       spin_unlock(&mapping->host->i_lock);
  
         for (i = 0; i < pagevec_count(pvec); i++)
                 page_cache_free_page(mapping, pvec->pages[i]);
@@ -842,6 +849,8 @@ EXPORT_SYMBOL(file_write_and_wait_range);
   */
  void replace_page_cache_page(struct page *old, struct page *new)
  {
+       struct folio *fold = page_folio(old);
+       struct folio *fnew = page_folio(new);
         struct address_space *mapping = old->mapping;
         void (*freepage)(struct page *) = mapping->a_ops->freepage;
         pgoff_t offset = old->index;
@@ -855,7 +864,7 @@ void replace_page_cache_page(struct page *old, struct page *new)
         new->mapping = mapping;
         new->index = offset;
  
-       mem_cgroup_migrate(old, new);
+       mem_cgroup_migrate(fold, fnew);
  
         xas_lock_irq(&xas);
         xas_store(&xas, new);
@@ -877,26 +886,25 @@ void replace_page_cache_page(struct page *old, struct page *new)
  }
  EXPORT_SYMBOL_GPL(replace_page_cache_page);
  
-noinline int __add_to_page_cache_locked(struct page *page,
-                                       struct address_space *mapping,
-                                       pgoff_t offset, gfp_t gfp,
-                                       void **shadowp)
+noinline int __filemap_add_folio(struct address_space *mapping,
+               struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
  {
-       XA_STATE(xas, &mapping->i_pages, offset);
-       int huge = PageHuge(page);
+       XA_STATE(xas, &mapping->i_pages, index);
+       int huge = folio_test_hugetlb(folio);
         int error;
         bool charged = false;
  
-       VM_BUG_ON_PAGE(!PageLocked(page), page);
-       VM_BUG_ON_PAGE(PageSwapBacked(page), page);
+       VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+       VM_BUG_ON_FOLIO(folio_test_swapbacked(folio), folio);
         mapping_set_update(&xas, mapping);
  
-       get_page(page);
-       page->mapping = mapping;
-       page->index = offset;
+       folio_get(folio);
+       folio->mapping = mapping;
+       folio->index = index;
  
         if (!huge) {
-               error = mem_cgroup_charge(page, NULL, gfp);
+               error = mem_cgroup_charge(folio, NULL, gfp);
+               VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
                 if (error)
                         goto error;
                 charged = true;
@@ -908,7 +916,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
                 unsigned int order = xa_get_order(xas.xa, xas.xa_index);
                 void *entry, *old = NULL;
  
-               if (order > thp_order(page))
+               if (order > folio_order(folio))
                         xas_split_alloc(&xas, xa_load(xas.xa, xas.xa_index),
                                         order, gfp);
                 xas_lock_irq(&xas);
@@ -925,13 +933,13 @@ noinline int __add_to_page_cache_locked(struct page *page,
                                 *shadowp = old;
                         /* entry may have been split before we acquired lock */
                         order = xa_get_order(xas.xa, xas.xa_index);
-                       if (order > thp_order(page)) {
+                       if (order > folio_order(folio)) {
                                 xas_split(&xas, old, order);
                                 xas_reset(&xas);
                         }
                 }
  
-               xas_store(&xas, page);
+               xas_store(&xas, folio);
                 if (xas_error(&xas))
                         goto unlock;
  
@@ -939,7 +947,7 @@ noinline int __add_to_page_cache_locked(struct page *page,
  
                 /* hugetlb pages do not participate in page cache accounting */
                 if (!huge)
-                       __inc_lruvec_page_state(page, NR_FILE_PAGES);
+                       __lruvec_stat_add_folio(folio, NR_FILE_PAGES);
  unlock:
                 xas_unlock_irq(&xas);
         } while (xas_nomem(&xas, gfp));
@@ -947,19 +955,19 @@ unlock:
         if (xas_error(&xas)) {
                 error = xas_error(&xas);
                 if (charged)
-                       mem_cgroup_uncharge(page);
+                       mem_cgroup_uncharge(folio);
                 goto error;
         }
  
-       trace_mm_filemap_add_to_page_cache(page);
+       trace_mm_filemap_add_to_page_cache(&folio->page);
         return 0;
  error:
-       page->mapping = NULL;
+       folio->mapping = NULL;
         /* Leave page->index set: truncation relies upon it */
-       put_page(page);
+       folio_put(folio);
         return error;
  }
-ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
+ALLOW_ERROR_INJECTION(__filemap_add_folio, ERRNO);
  
  /**
   * add_to_page_cache_locked - add a locked page to the pagecache
@@ -976,59 +984,58 @@ ALLOW_ERROR_INJECTION(__add_to_page_cache_locked, ERRNO);
  int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
                 pgoff_t offset, gfp_t gfp_mask)
  {
-       return __add_to_page_cache_locked(page, mapping, offset,
+       return __filemap_add_folio(mapping, page_folio(page), offset,
                                           gfp_mask, NULL);
  }
  EXPORT_SYMBOL(add_to_page_cache_locked);
  
-int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
-                               pgoff_t offset, gfp_t gfp_mask)
+int filemap_add_folio(struct address_space *mapping, struct folio *folio,
+                               pgoff_t index, gfp_t gfp)
  {
         void *shadow = NULL;
         int ret;
  
-       __SetPageLocked(page);
-       ret = __add_to_page_cache_locked(page, mapping, offset,
-                                        gfp_mask, &shadow);
+       __folio_set_locked(folio);
+       ret = __filemap_add_folio(mapping, folio, index, gfp, &shadow);
         if (unlikely(ret))
-               __ClearPageLocked(page);
+               __folio_clear_locked(folio);
         else {
                 /*
-                * The page might have been evicted from cache only
+                * The folio might have been evicted from cache only
                  * recently, in which case it should be activated like
-                * any other repeatedly accessed page.
-                * The exception is pages getting rewritten; evicting other
+                * any other repeatedly accessed folio.
+                * The exception is folios getting rewritten; evicting other
                  * data from the working set, only to cache data that will
                  * get overwritten with something else, is a waste of memory.
                  */
-               WARN_ON_ONCE(PageActive(page));
-               if (!(gfp_mask & __GFP_WRITE) && shadow)
-                       workingset_refault(page, shadow);
-               lru_cache_add(page);
+               WARN_ON_ONCE(folio_test_active(folio));
+               if (!(gfp & __GFP_WRITE) && shadow)
+                       workingset_refault(folio, shadow);
+               folio_add_lru(folio);
         }
         return ret;
  }
-EXPORT_SYMBOL_GPL(add_to_page_cache_lru);
+EXPORT_SYMBOL_GPL(filemap_add_folio);
  
  #ifdef CONFIG_NUMA
-struct page *__page_cache_alloc(gfp_t gfp)
+struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
  {
         int n;
-       struct page *page;
+       struct folio *folio;
  
         if (cpuset_do_page_mem_spread()) {
                 unsigned int cpuset_mems_cookie;
                 do {
                         cpuset_mems_cookie = read_mems_allowed_begin();
                         n = cpuset_mem_spread_node();
-                       page = __alloc_pages_node(n, gfp, 0);
-               } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
+                       folio = __folio_alloc_node(gfp, order, n);
+               } while (!folio && read_mems_allowed_retry(cpuset_mems_cookie));
  
-               return page;
+               return folio;
         }
-       return alloc_pages(gfp, 0);
+       return folio_alloc(gfp, order);
  }
-EXPORT_SYMBOL(__page_cache_alloc);
+EXPORT_SYMBOL(filemap_alloc_folio);
  #endif
  
  /*
@@ -1081,11 +1088,11 @@ EXPORT_SYMBOL(filemap_invalidate_unlock_two);
   */
  #define PAGE_WAIT_TABLE_BITS 8
  #define PAGE_WAIT_TABLE_SIZE (1 << PAGE_WAIT_TABLE_BITS)
-static wait_queue_head_t page_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
+static wait_queue_head_t folio_wait_table[PAGE_WAIT_TABLE_SIZE] __cacheline_aligned;
  
-static wait_queue_head_t *page_waitqueue(struct page *page)
+static wait_queue_head_t *folio_waitqueue(struct folio *folio)
  {
-       return &page_wait_table[hash_ptr(page, PAGE_WAIT_TABLE_BITS)];
+       return &folio_wait_table[hash_ptr(folio, PAGE_WAIT_TABLE_BITS)];
  }
  
  void __init pagecache_init(void)
@@ -1093,7 +1100,7 @@ void __init pagecache_init(void)
         int i;
  
         for (i = 0; i < PAGE_WAIT_TABLE_SIZE; i++)
-               init_waitqueue_head(&page_wait_table[i]);
+               init_waitqueue_head(&folio_wait_table[i]);
  
         page_writeback_init();
  }
@@ -1148,10 +1155,10 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
          */
         flags = wait->flags;
         if (flags & WQ_FLAG_EXCLUSIVE) {
-               if (test_bit(key->bit_nr, &key->page->flags))
+               if (test_bit(key->bit_nr, &key->folio->flags))
                         return -1;
                 if (flags & WQ_FLAG_CUSTOM) {
-                       if (test_and_set_bit(key->bit_nr, &key->page->flags))
+                       if (test_and_set_bit(key->bit_nr, &key->folio->flags))
                                 return -1;
                         flags |= WQ_FLAG_DONE;
                 }
@@ -1164,7 +1171,7 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
          *
          * So update the flags atomically, and wake up the waiter
          * afterwards to avoid any races. This store-release pairs
-        * with the load-acquire in wait_on_page_bit_common().
+        * with the load-acquire in folio_wait_bit_common().
          */
         smp_store_release(&wait->flags, flags | WQ_FLAG_WOKEN);
         wake_up_state(wait->private, mode);
@@ -1183,14 +1190,14 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
         return (flags & WQ_FLAG_EXCLUSIVE) != 0;
  }
  
-static void wake_up_page_bit(struct page *page, int bit_nr)
+static void folio_wake_bit(struct folio *folio, int bit_nr)
  {
-       wait_queue_head_t *q = page_waitqueue(page);
+       wait_queue_head_t *q = folio_waitqueue(folio);
         struct wait_page_key key;
         unsigned long flags;
         wait_queue_entry_t bookmark;
  
-       key.page = page;
+       key.folio = folio;
         key.bit_nr = bit_nr;
         key.page_match = 0;
  
@@ -1225,7 +1232,7 @@ static void wake_up_page_bit(struct page *page, int bit_nr)
          * page waiters.
          */
         if (!waitqueue_active(q) || !key.page_match) {
-               ClearPageWaiters(page);
+               folio_clear_waiters(folio);
                 /*
                  * It's possible to miss clearing Waiters here, when we woke
                  * our page waiters, but the hashed waitqueue has waiters for
@@ -1237,19 +1244,19 @@ static void wake_up_page_bit(struct page *page, int bit_nr)
         spin_unlock_irqrestore(&q->lock, flags);
  }
  
-static void wake_up_page(struct page *page, int bit)
+static void folio_wake(struct folio *folio, int bit)
  {
-       if (!PageWaiters(page))
+       if (!folio_test_waiters(folio))
                 return;
-       wake_up_page_bit(page, bit);
+       folio_wake_bit(folio, bit);
  }
  
  /*
- * A choice of three behaviors for wait_on_page_bit_common():
+ * A choice of three behaviors for folio_wait_bit_common():
   */
  enum behavior {
         EXCLUSIVE,      /* Hold ref to page and take the bit when woken, like
-                        * __lock_page() waiting on then setting PG_locked.
+                        * __folio_lock() waiting on then setting PG_locked.
                          */
         SHARED,         /* Hold ref to page and check the bit when woken, like
                          * wait_on_page_writeback() waiting on PG_writeback.
@@ -1260,16 +1267,16 @@ enum behavior {
  };
  
  /*
- * Attempt to check (or get) the page bit, and mark us done
+ * Attempt to check (or get) the folio flag, and mark us done
   * if successful.
   */
-static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
+static inline bool folio_trylock_flag(struct folio *folio, int bit_nr,
                                         struct wait_queue_entry *wait)
  {
         if (wait->flags & WQ_FLAG_EXCLUSIVE) {
-               if (test_and_set_bit(bit_nr, &page->flags))
+               if (test_and_set_bit(bit_nr, &folio->flags))
                         return false;
-       } else if (test_bit(bit_nr, &page->flags))
+       } else if (test_bit(bit_nr, &folio->flags))
                 return false;
  
         wait->flags |= WQ_FLAG_WOKEN | WQ_FLAG_DONE;
@@ -1279,9 +1286,10 @@ static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
  /* How many times do we accept lock stealing from under a waiter? */
  int sysctl_page_lock_unfairness = 5;
  
-static inline int wait_on_page_bit_common(wait_queue_head_t *q,
-       struct page *page, int bit_nr, int state, enum behavior behavior)
+static inline int folio_wait_bit_common(struct folio *folio, int bit_nr,
+               int state, enum behavior behavior)
  {
+       wait_queue_head_t *q = folio_waitqueue(folio);
         int unfairness = sysctl_page_lock_unfairness;
         struct wait_page_queue wait_page;
         wait_queue_entry_t *wait = &wait_page.wait;
@@ -1290,8 +1298,8 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
         unsigned long pflags;
  
         if (bit_nr == PG_locked &&
-           !PageUptodate(page) && PageWorkingset(page)) {
-               if (!PageSwapBacked(page)) {
+           !folio_test_uptodate(folio) && folio_test_workingset(folio)) {
+               if (!folio_test_swapbacked(folio)) {
                         delayacct_thrashing_start();
                         delayacct = true;
                 }
@@ -1301,7 +1309,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
  
         init_wait(wait);
         wait->func = wake_page_function;
-       wait_page.page = page;
+       wait_page.folio = folio;
         wait_page.bit_nr = bit_nr;
  
  repeat:
@@ -1316,7 +1324,7 @@ repeat:
          * Do one last check whether we can get the
          * page bit synchronously.
          *
-        * Do the SetPageWaiters() marking before that
+        * Do the folio_set_waiters() marking before that
          * to let any waker we _just_ missed know they
          * need to wake us up (otherwise they'll never
          * even go to the slow case that looks at the
@@ -1327,8 +1335,8 @@ repeat:
          * lock to avoid races.
          */
         spin_lock_irq(&q->lock);
-       SetPageWaiters(page);
-       if (!trylock_page_bit_common(page, bit_nr, wait))
+       folio_set_waiters(folio);
+       if (!folio_trylock_flag(folio, bit_nr, wait))
                 __add_wait_queue_entry_tail(q, wait);
         spin_unlock_irq(&q->lock);
  
@@ -1338,10 +1346,10 @@ repeat:
          * see whether the page bit testing has already
          * been done by the wake function.
          *
-        * We can drop our reference to the page.
+        * We can drop our reference to the folio.
          */
         if (behavior == DROP)
-               put_page(page);
+               folio_put(folio);
  
         /*
          * Note that until the "finish_wait()", or until
@@ -1378,7 +1386,7 @@ repeat:
                  *
                  * And if that fails, we'll have to retry this all.
                  */
-               if (unlikely(test_and_set_bit(bit_nr, &page->flags)))
+               if (unlikely(test_and_set_bit(bit_nr, folio_flags(folio, 0))))
                         goto repeat;
  
                 wait->flags |= WQ_FLAG_DONE;
@@ -1387,7 +1395,7 @@ repeat:
  
         /*
          * If a signal happened, this 'finish_wait()' may remove the last
-        * waiter from the wait-queues, but the PageWaiters bit will remain
+        * waiter from the wait-queues, but the folio waiters bit will remain
          * set. That's ok. The next wakeup will take care of it, and trying
          * to do it here would be difficult and prone to races.
          */
@@ -1418,19 +1426,17 @@ repeat:
         return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
  }
  
-void wait_on_page_bit(struct page *page, int bit_nr)
+void folio_wait_bit(struct folio *folio, int bit_nr)
  {
-       wait_queue_head_t *q = page_waitqueue(page);
-       wait_on_page_bit_common(q, page, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
+       folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED);
  }
-EXPORT_SYMBOL(wait_on_page_bit);
+EXPORT_SYMBOL(folio_wait_bit);
  
-int wait_on_page_bit_killable(struct page *page, int bit_nr)
+int folio_wait_bit_killable(struct folio *folio, int bit_nr)
  {
-       wait_queue_head_t *q = page_waitqueue(page);
-       return wait_on_page_bit_common(q, page, bit_nr, TASK_KILLABLE, SHARED);
+       return folio_wait_bit_common(folio, bit_nr, TASK_KILLABLE, SHARED);
  }
-EXPORT_SYMBOL(wait_on_page_bit_killable);
+EXPORT_SYMBOL(folio_wait_bit_killable);
  
  /**
   * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
@@ -1447,31 +1453,28 @@ EXPORT_SYMBOL(wait_on_page_bit_killable);
   */
  int put_and_wait_on_page_locked(struct page *page, int state)
  {
-       wait_queue_head_t *q;
-
-       page = compound_head(page);
-       q = page_waitqueue(page);
-       return wait_on_page_bit_common(q, page, PG_locked, state, DROP);
+       return folio_wait_bit_common(page_folio(page), PG_locked, state,
+                       DROP);
  }
  
  /**
- * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
- * @page: Page defining the wait queue of interest
+ * folio_add_wait_queue - Add an arbitrary waiter to a folio's wait queue
+ * @folio: Folio defining the wait queue of interest
   * @waiter: Waiter to add to the queue
   *
- * Add an arbitrary @waiter to the wait queue for the nominated @page.
+ * Add an arbitrary @waiter to the wait queue for the nominated @folio.
   */
-void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter)
+void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter)
  {
-       wait_queue_head_t *q = page_waitqueue(page);
+       wait_queue_head_t *q = folio_waitqueue(folio);
         unsigned long flags;
  
         spin_lock_irqsave(&q->lock, flags);
         __add_wait_queue_entry_tail(q, waiter);
-       SetPageWaiters(page);
+       folio_set_waiters(folio);
         spin_unlock_irqrestore(&q->lock, flags);
  }
-EXPORT_SYMBOL_GPL(add_page_wait_queue);
+EXPORT_SYMBOL_GPL(folio_add_wait_queue);
  
  #ifndef clear_bit_unlock_is_negative_byte
  
@@ -1497,125 +1500,117 @@ static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem
  #endif
  
  /**
- * unlock_page - unlock a locked page
- * @page: the page
+ * folio_unlock - Unlock a locked folio.
+ * @folio: The folio.
   *
- * Unlocks the page and wakes up sleepers in wait_on_page_locked().
- * Also wakes sleepers in wait_on_page_writeback() because the wakeup
- * mechanism between PageLocked pages and PageWriteback pages is shared.
- * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
+ * Unlocks the folio and wakes up any thread sleeping on the page lock.
   *
- * Note that this depends on PG_waiters being the sign bit in the byte
- * that contains PG_locked - thus the BUILD_BUG_ON(). That allows us to
- * clear the PG_locked bit and test PG_waiters at the same time fairly
- * portably (architectures that do LL/SC can test any bit, while x86 can
- * test the sign bit).
+ * Context: May be called from interrupt or process context.  May not be
+ * called from NMI context.
   */
-void unlock_page(struct page *page)
+void folio_unlock(struct folio *folio)
  {
+       /* Bit 7 allows x86 to check the byte's sign bit */
         BUILD_BUG_ON(PG_waiters != 7);
-       page = compound_head(page);
-       VM_BUG_ON_PAGE(!PageLocked(page), page);
-       if (clear_bit_unlock_is_negative_byte(PG_locked, &page->flags))
-               wake_up_page_bit(page, PG_locked);
+       BUILD_BUG_ON(PG_locked > 7);
+       VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+       if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0)))
+               folio_wake_bit(folio, PG_locked);
  }
-EXPORT_SYMBOL(unlock_page);
+EXPORT_SYMBOL(folio_unlock);
  
  /**
- * end_page_private_2 - Clear PG_private_2 and release any waiters
- * @page: The page
+ * folio_end_private_2 - Clear PG_private_2 and wake any waiters.
+ * @folio: The folio.
   *
- * Clear the PG_private_2 bit on a page and wake up any sleepers waiting for
- * this.  The page ref held for PG_private_2 being set is released.
+ * Clear the PG_private_2 bit on a folio and wake up any sleepers waiting for
+ * it.  The folio reference held for PG_private_2 being set is released.
   *
- * This is, for example, used when a netfs page is being written to a local
- * disk cache, thereby allowing writes to the cache for the same page to be
+ * This is, for example, used when a netfs folio is being written to a local
+ * disk cache, thereby allowing writes to the cache for the same folio to be
   * serialised.
   */
-void end_page_private_2(struct page *page)
+void folio_end_private_2(struct folio *folio)
  {
-       page = compound_head(page);
-       VM_BUG_ON_PAGE(!PagePrivate2(page), page);
-       clear_bit_unlock(PG_private_2, &page->flags);
-       wake_up_page_bit(page, PG_private_2);
-       put_page(page);
+       VM_BUG_ON_FOLIO(!folio_test_private_2(folio), folio);
+       clear_bit_unlock(PG_private_2, folio_flags(folio, 0));
+       folio_wake_bit(folio, PG_private_2);
+       folio_put(folio);
  }
-EXPORT_SYMBOL(end_page_private_2);
+EXPORT_SYMBOL(folio_end_private_2);
  
  /**
- * wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_private_2 - Wait for PG_private_2 to be cleared on a folio.
+ * @folio: The folio to wait on.
   *
- * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page.
+ * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio.
   */
-void wait_on_page_private_2(struct page *page)
+void folio_wait_private_2(struct folio *folio)
  {
-       page = compound_head(page);
-       while (PagePrivate2(page))
-               wait_on_page_bit(page, PG_private_2);
+       while (folio_test_private_2(folio))
+               folio_wait_bit(folio, PG_private_2);
  }
-EXPORT_SYMBOL(wait_on_page_private_2);
+EXPORT_SYMBOL(folio_wait_private_2);
  
  /**
- * wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page
- * @page: The page to wait on
+ * folio_wait_private_2_killable - Wait for PG_private_2 to be cleared on a folio.
+ * @folio: The folio to wait on.
   *
- * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a
+ * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio or until a
   * fatal signal is received by the calling task.
   *
   * Return:
   * - 0 if successful.
   * - -EINTR if a fatal signal was encountered.
   */
-int wait_on_page_private_2_killable(struct page *page)
+int folio_wait_private_2_killable(struct folio *folio)
  {
         int ret = 0;
  
-       page = compound_head(page);
-       while (PagePrivate2(page)) {
-               ret = wait_on_page_bit_killable(page, PG_private_2);
+       while (folio_test_private_2(folio)) {
+               ret = folio_wait_bit_killable(folio, PG_private_2);
                 if (ret < 0)
                         break;
         }
  
         return ret;
  }
-EXPORT_SYMBOL(wait_on_page_private_2_killable);
+EXPORT_SYMBOL(folio_wait_private_2_killable);
  
  /**
- * end_page_writeback - end writeback against a page
- * @page: the page
+ * folio_end_writeback - End writeback against a folio.
+ * @folio: The folio.
   */
-void end_page_writeback(struct page *page)
+void folio_end_writeback(struct folio *folio)
  {
         /*
-        * TestClearPageReclaim could be used here but it is an atomic
-        * operation and overkill in this particular case. Failing to
-        * shuffle a page marked for immediate reclaim is too mild to
-        * justify taking an atomic operation penalty at the end of
-        * ever page writeback.
+        * folio_test_clear_reclaim() could be used here but it is an
+        * atomic operation and overkill in this particular case. Failing
+        * to shuffle a folio marked for immediate reclaim is too mild
+        * a gain to justify taking an atomic operation penalty at the
+        * end of every folio writeback.
          */
-       if (PageReclaim(page)) {
-               ClearPageReclaim(page);
-               rotate_reclaimable_page(page);
+       if (folio_test_reclaim(folio)) {
+               folio_clear_reclaim(folio);
+               folio_rotate_reclaimable(folio);
         }
  
         /*
-        * Writeback does not hold a page reference of its own, relying
+        * Writeback does not hold a folio reference of its own, relying
          * on truncation to wait for the clearing of PG_writeback.
-        * But here we must make sure that the page is not freed and
-        * reused before the wake_up_page().
+        * But here we must make sure that the folio is not freed and
+        * reused before the folio_wake().
          */
-       get_page(page);
-       if (!test_clear_page_writeback(page))
+       folio_get(folio);
+       if (!__folio_end_writeback(folio))
                 BUG();
  
         smp_mb__after_atomic();
-       wake_up_page(page, PG_writeback);
-       acct_reclaim_writeback(page);
-       put_page(page);
+       folio_wake(folio, PG_writeback);
+       acct_reclaim_writeback(folio);
+       folio_put(folio);
  }
-EXPORT_SYMBOL(end_page_writeback);
+EXPORT_SYMBOL(folio_end_writeback);
  
  /*
   * After completing I/O on a page, call this routine to update the page
@@ -1646,39 +1641,35 @@ void page_endio(struct page *page, bool is_write, int err)
  EXPORT_SYMBOL_GPL(page_endio);
  
  /**
- * __lock_page - get a lock on the page, assuming we need to sleep to get it
- * @__page: the page to lock
+ * __folio_lock - Get a lock on the folio, assuming we need to sleep to get it.
+ * @folio: The folio to lock
   */
-void __lock_page(struct page *__page)
+void __folio_lock(struct folio *folio)
  {
-       struct page *page = compound_head(__page);
-       wait_queue_head_t *q = page_waitqueue(page);
-       wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE,
+       folio_wait_bit_common(folio, PG_locked, TASK_UNINTERRUPTIBLE,
                                 EXCLUSIVE);
  }
-EXPORT_SYMBOL(__lock_page);
+EXPORT_SYMBOL(__folio_lock);
  
-int __lock_page_killable(struct page *__page)
+int __folio_lock_killable(struct folio *folio)
  {
-       struct page *page = compound_head(__page);
-       wait_queue_head_t *q = page_waitqueue(page);
-       return wait_on_page_bit_common(q, page, PG_locked, TASK_KILLABLE,
+       return folio_wait_bit_common(folio, PG_locked, TASK_KILLABLE,
                                         EXCLUSIVE);
  }
-EXPORT_SYMBOL_GPL(__lock_page_killable);
+EXPORT_SYMBOL_GPL(__folio_lock_killable);
  
-int __lock_page_async(struct page *page, struct wait_page_queue *wait)
+static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait)
  {
-       struct wait_queue_head *q = page_waitqueue(page);
+       struct wait_queue_head *q = folio_waitqueue(folio);
         int ret = 0;
  
-       wait->page = page;
+       wait->folio = folio;
         wait->bit_nr = PG_locked;
  
         spin_lock_irq(&q->lock);
         __add_wait_queue_entry_tail(q, &wait->wait);
-       SetPageWaiters(page);
-       ret = !trylock_page(page);
+       folio_set_waiters(folio);
+       ret = !folio_trylock(folio);
         /*
          * If we were successful now, we know we're still on the
          * waitqueue as we're still under the lock. This means it's
@@ -1695,16 +1686,16 @@ int __lock_page_async(struct page *page, struct wait_page_queue *wait)
  
  /*
   * Return values:
- * 1 - page is locked; mmap_lock is still held.
- * 0 - page is not locked.
+ * true - folio is locked; mmap_lock is still held.
+ * false - folio is not locked.
   *     mmap_lock has been released (mmap_read_unlock(), unless flags had both
   *     FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in
   *     which case mmap_lock is still held.
   *
- * If neither ALLOW_RETRY nor KILLABLE are set, will always return 1
- * with the page locked and the mmap_lock unperturbed.
+ * If neither ALLOW_RETRY nor KILLABLE are set, will always return true
+ * with the folio locked and the mmap_lock unperturbed.
   */
-int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
+bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm,
                          unsigned int flags)
  {
         if (fault_flag_allow_retry_first(flags)) {
@@ -1713,28 +1704,28 @@ int __lock_page_or_retry(struct page *page, struct mm_struct *mm,
                  * even though return 0.
                  */
                 if (flags & FAULT_FLAG_RETRY_NOWAIT)
-                       return 0;
+                       return false;
  
                 mmap_read_unlock(mm);
                 if (flags & FAULT_FLAG_KILLABLE)
-                       wait_on_page_locked_killable(page);
+                       folio_wait_locked_killable(folio);
                 else
-                       wait_on_page_locked(page);
-               return 0;
+                       folio_wait_locked(folio);
+               return false;
         }
         if (flags & FAULT_FLAG_KILLABLE) {
-               int ret;
+               bool ret;
  
-               ret = __lock_page_killable(page);
+               ret = __folio_lock_killable(folio);
                 if (ret) {
                         mmap_read_unlock(mm);
-                       return 0;
+                       return false;
                 }
         } else {
-               __lock_page(page);
+               __folio_lock(folio);
         }
-       return 1;
  
+       return true;
  }
  
  /**
@@ -1809,144 +1800,156 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
  }
  EXPORT_SYMBOL(page_cache_prev_miss);
  
+/*
+ * Lockless page cache protocol:
+ * On the lookup side:
+ * 1. Load the folio from i_pages
+ * 2. Increment the refcount if it's not zero
+ * 3. If the folio is not found by xas_reload(), put the refcount and retry
+ *
+ * On the removal side:
+ * A. Freeze the page (by zeroing the refcount if nobody else has a reference)
+ * B. Remove the page from i_pages
+ * C. Return the page to the page allocator
+ *
+ * This means that any page may have its reference count temporarily
+ * increased by a speculative page cache (or fast GUP) lookup as it can
+ * be allocated by another user before the RCU grace period expires.
+ * Because the refcount temporarily acquired here may end up being the
+ * last refcount on the page, any page allocation must be freeable by
+ * folio_put().
+ */
+
  /*
   * mapping_get_entry - Get a page cache entry.
   * @mapping: the address_space to search
   * @index: The page cache index.
   *
- * Looks up the page cache slot at @mapping & @index.  If there is a
- * page cache page, the head page is returned with an increased refcount.
+ * Looks up the page cache entry at @mapping & @index.  If it is a folio,
+ * it is returned with an increased refcount.  If it is a shadow entry
+ * of a previously evicted folio, or a swap entry from shmem/tmpfs,
+ * it is returned without further action.
   *
- * If the slot holds a shadow entry of a previously evicted page, or a
- * swap entry from shmem/tmpfs, it is returned.
- *
- * Return: The head page or shadow entry, %NULL if nothing is found.
+ * Return: The folio, swap or shadow entry, %NULL if nothing is found.
   */
-static struct page *mapping_get_entry(struct address_space *mapping,
-               pgoff_t index)
+static void *mapping_get_entry(struct address_space *mapping, pgoff_t index)
  {
         XA_STATE(xas, &mapping->i_pages, index);
-       struct page *page;
+       struct folio *folio;
  
         rcu_read_lock();
  repeat:
         xas_reset(&xas);
-       page = xas_load(&xas);
-       if (xas_retry(&xas, page))
+       folio = xas_load(&xas);
+       if (xas_retry(&xas, folio))
                 goto repeat;
         /*
          * A shadow entry of a recently evicted page, or a swap entry from
          * shmem/tmpfs.  Return it without attempting to raise page count.
          */
-       if (!page || xa_is_value(page))
+       if (!folio || xa_is_value(folio))
                 goto out;
  
-       if (!page_cache_get_speculative(page))
+       if (!folio_try_get_rcu(folio))
                 goto repeat;
  
-       /*
-        * Has the page moved or been split?
-        * This is part of the lockless pagecache protocol. See
-        * include/linux/pagemap.h for details.
-        */
-       if (unlikely(page != xas_reload(&xas))) {
-               put_page(page);
+       if (unlikely(folio != xas_reload(&xas))) {
+               folio_put(folio);
                 goto repeat;
         }
  out:
         rcu_read_unlock();
  
-       return page;
+       return folio;
  }
  
  /**
- * pagecache_get_page - Find and get a reference to a page.
+ * __filemap_get_folio - Find and get a reference to a folio.
   * @mapping: The address_space to search.
   * @index: The page index.
- * @fgp_flags: %FGP flags modify how the page is returned.
- * @gfp_mask: Memory allocation flags to use if %FGP_CREAT is specified.
+ * @fgp_flags: %FGP flags modify how the folio is returned.
+ * @gfp: Memory allocation flags to use if %FGP_CREAT is specified.
   *
   * Looks up the page cache entry at @mapping & @index.
   *
   * @fgp_flags can be zero or more of these flags:
   *
- * * %FGP_ACCESSED - The page will be marked accessed.
- * * %FGP_LOCK - The page is returned locked.
- * * %FGP_HEAD - If the page is present and a THP, return the head page
- *   rather than the exact page specified by the index.
+ * * %FGP_ACCESSED - The folio will be marked accessed.
+ * * %FGP_LOCK - The folio is returned locked.
   * * %FGP_ENTRY - If there is a shadow / swap / DAX entry, return it
- *   instead of allocating a new page to replace it.
+ *   instead of allocating a new folio to replace it.
   * * %FGP_CREAT - If no page is present then a new page is allocated using
- *   @gfp_mask and added to the page cache and the VM's LRU list.
+ *   @gfp and added to the page cache and the VM's LRU list.
   *   The page is returned locked and with an increased refcount.
   * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
   *   page is already in cache.  If the page was allocated, unlock it before
   *   returning so the caller can do the same dance.
- * * %FGP_WRITE - The page will be written
- * * %FGP_NOFS - __GFP_FS will get cleared in gfp mask
- * * %FGP_NOWAIT - Don't get blocked by page lock
+ * * %FGP_WRITE - The page will be written to by the caller.
+ * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
+ * * %FGP_NOWAIT - Don't get blocked by page lock.
+ * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
   *
   * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even
   * if the %GFP flags specified for %FGP_CREAT are atomic.
   *
   * If there is a page cache page, it is returned with an increased refcount.
   *
- * Return: The found page or %NULL otherwise.
+ * Return: The found folio or %NULL otherwise.
   */
-struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
-               int fgp_flags, gfp_t gfp_mask)
+struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
+               int fgp_flags, gfp_t gfp)
  {
-       struct page *page;
+       struct folio *folio;
  
  repeat:
-       page = mapping_get_entry(mapping, index);
-       if (xa_is_value(page)) {
+       folio = mapping_get_entry(mapping, index);
+       if (xa_is_value(folio)) {
                 if (fgp_flags & FGP_ENTRY)
-                       return page;
-               page = NULL;
+                       return folio;
+               folio = NULL;
         }
-       if (!page)
+       if (!folio)
                 goto no_page;
  
         if (fgp_flags & FGP_LOCK) {
                 if (fgp_flags & FGP_NOWAIT) {
-                       if (!trylock_page(page)) {
-                               put_page(page);
+                       if (!folio_trylock(folio)) {
+                               folio_put(folio);
                                 return NULL;
                         }
                 } else {
-                       lock_page(page);
+                       folio_lock(folio);
                 }
  
                 /* Has the page been truncated? */
-               if (unlikely(page->mapping != mapping)) {
-                       unlock_page(page);
-                       put_page(page);
+               if (unlikely(folio->mapping != mapping)) {
+                       folio_unlock(folio);
+                       folio_put(folio);
                         goto repeat;
                 }
-               VM_BUG_ON_PAGE(!thp_contains(page, index), page);
+               VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
         }
  
         if (fgp_flags & FGP_ACCESSED)
-               mark_page_accessed(page);
+               folio_mark_accessed(folio);
         else if (fgp_flags & FGP_WRITE) {
                 /* Clear idle flag for buffer write */
-               if (page_is_idle(page))
-                       clear_page_idle(page);
+               if (folio_test_idle(folio))
+                       folio_clear_idle(folio);
         }
-       if (!(fgp_flags & FGP_HEAD))
-               page = find_subpage(page, index);
  
+       if (fgp_flags & FGP_STABLE)
+               folio_wait_stable(folio);
  no_page:
-       if (!page && (fgp_flags & FGP_CREAT)) {
+       if (!folio && (fgp_flags & FGP_CREAT)) {
                 int err;
                 if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
-                       gfp_mask |= __GFP_WRITE;
+                       gfp |= __GFP_WRITE;
                 if (fgp_flags & FGP_NOFS)
-                       gfp_mask &= ~__GFP_FS;
+                       gfp &= ~__GFP_FS;
  
-               page = __page_cache_alloc(gfp_mask);
-               if (!page)
+               folio = filemap_alloc_folio(gfp, 0);
+               if (!folio)
                         return NULL;
  
                 if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
@@ -1954,27 +1957,27 @@ no_page:
  
                 /* Init accessed so avoid atomic mark_page_accessed later */
                 if (fgp_flags & FGP_ACCESSED)
-                       __SetPageReferenced(page);
+                       __folio_set_referenced(folio);
  
-               err = add_to_page_cache_lru(page, mapping, index, gfp_mask);
+               err = filemap_add_folio(mapping, folio, index, gfp);
                 if (unlikely(err)) {
-                       put_page(page);
-                       page = NULL;
+                       folio_put(folio);
+                       folio = NULL;
                         if (err == -EEXIST)
                                 goto repeat;
                 }
  
                 /*
-                * add_to_page_cache_lru locks the page, and for mmap we expect
-                * an unlocked page.
+                * filemap_add_folio locks the page, and for mmap
+                * we expect an unlocked page.
                  */
-               if (page && (fgp_flags & FGP_FOR_MMAP))
-                       unlock_page(page);
+               if (folio && (fgp_flags & FGP_FOR_MMAP))
+                       folio_unlock(folio);
         }
  
-       return page;
+       return folio;
  }
-EXPORT_SYMBOL(pagecache_get_page);
+EXPORT_SYMBOL(__filemap_get_folio);
  
  static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max,
                 xa_mark_t mark)
@@ -2428,6 +2431,7 @@ static int filemap_update_page(struct kiocb *iocb,
                 struct address_space *mapping, struct iov_iter *iter,
                 struct page *page)
  {
+       struct folio *folio = page_folio(page);
         int error;
  
         if (iocb->ki_flags & IOCB_NOWAIT) {
@@ -2437,40 +2441,40 @@ static int filemap_update_page(struct kiocb *iocb,
                 filemap_invalidate_lock_shared(mapping);
         }
  
-       if (!trylock_page(page)) {
+       if (!folio_trylock(folio)) {
                 error = -EAGAIN;
                 if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
                         goto unlock_mapping;
                 if (!(iocb->ki_flags & IOCB_WAITQ)) {
                         filemap_invalidate_unlock_shared(mapping);
-                       put_and_wait_on_page_locked(page, TASK_KILLABLE);
+                       put_and_wait_on_page_locked(&folio->page, TASK_KILLABLE);
                         return AOP_TRUNCATED_PAGE;
                 }
-               error = __lock_page_async(page, iocb->ki_waitq);
+               error = __folio_lock_async(folio, iocb->ki_waitq);
                 if (error)
                         goto unlock_mapping;
         }
  
         error = AOP_TRUNCATED_PAGE;
-       if (!page->mapping)
+       if (!folio->mapping)
                 goto unlock;
  
         error = 0;
-       if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, page))
+       if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, &folio->page))
                 goto unlock;
  
         error = -EAGAIN;
         if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
                 goto unlock;
  
-       error = filemap_read_page(iocb->ki_filp, mapping, page);
+       error = filemap_read_page(iocb->ki_filp, mapping, &folio->page);
         goto unlock_mapping;
  unlock:
-       unlock_page(page);
+       folio_unlock(folio);
  unlock_mapping:
         filemap_invalidate_unlock_shared(mapping);
         if (error == AOP_TRUNCATED_PAGE)
-               put_page(page);
+               folio_put(folio);
         return error;
  }
  
@@ -2909,7 +2913,9 @@ unlock:
  static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
                                      struct file **fpin)
  {
-       if (trylock_page(page))
+       struct folio *folio = page_folio(page);
+
+       if (folio_trylock(folio))
                 return 1;
  
         /*
@@ -2922,7 +2928,7 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
  
         *fpin = maybe_unlock_mmap_for_io(vmf, *fpin);
         if (vmf->flags & FAULT_FLAG_KILLABLE) {
-               if (__lock_page_killable(page)) {
+               if (__folio_lock_killable(folio)) {
                         /*
                          * We didn't have the right flags to drop the mmap_lock,
                          * but all fault_handlers only check for fatal signals
@@ -2934,11 +2940,11 @@ static int lock_page_maybe_drop_mmap(struct vm_fault *vmf, struct page *page,
                         return 0;
                 }
         } else
-               __lock_page(page);
+               __folio_lock(folio);
+
         return 1;
  }
  
-
  /*
   * Synchronous readahead happens when we don't even find a page in the page
   * cache at all.  We don't want to perform IO under the mmap sem, so if we have
@@ -3710,28 +3716,6 @@ out:
  }
  EXPORT_SYMBOL(generic_file_direct_write);
  
-/*
- * Find or create a page at the given pagecache position. Return the locked
- * page. This function is specifically for buffered writes.
- */
-struct page *grab_cache_page_write_begin(struct address_space *mapping,
-                                       pgoff_t index, unsigned flags)
-{
-       struct page *page;
-       int fgp_flags = FGP_LOCK|FGP_WRITE|FGP_CREAT;
-
-       if (flags & AOP_FLAG_NOFS)
-               fgp_flags |= FGP_NOFS;
-
-       page = pagecache_get_page(mapping, index, fgp_flags,
-                       mapping_gfp_mask(mapping));
-       if (page)
-               wait_for_stable_page(page);
-
-       return page;
-}
-EXPORT_SYMBOL(grab_cache_page_write_begin);
-
  ssize_t generic_perform_write(struct file *file,
                                 struct iov_iter *i, loff_t pos)
  {
@@ -3759,7 +3743,7 @@ again:
                  * same page as we're writing to, without it being marked
                  * up-to-date.
                  */
-               if (unlikely(iov_iter_fault_in_readable(i, bytes))) {
+               if (unlikely(fault_in_iov_iter_readable(i, bytes))) {
                         status = -EFAULT;
                         break;
                 }