Merge tag 'tty-5.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty

[linux-2.6-microblaze.git] / mm / vmscan.c
diff --git a/mm/vmscan.c b/mm/vmscan.c

index 59b14e0..1678802 100644 (file)
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -56,6 +56,7 @@
  
  #include <linux/swapops.h>
  #include <linux/balloon_compaction.h>
+#include <linux/sched/sysctl.h>
  
  #include "internal.h"
  
@@ -978,47 +979,36 @@ void drop_slab(void)
                 drop_slab_node(nid);
  }
  
-static inline int is_page_cache_freeable(struct page *page)
+static inline int is_page_cache_freeable(struct folio *folio)
  {
         /*
          * A freeable page cache page is referenced only by the caller
          * that isolated the page, the page cache and optional buffer
          * heads at page->private.
          */
-       int page_cache_pins = thp_nr_pages(page);
-       return page_count(page) - page_has_private(page) == 1 + page_cache_pins;
-}
-
-static int may_write_to_inode(struct inode *inode)
-{
-       if (current->flags & PF_SWAPWRITE)
-               return 1;
-       if (!inode_write_congested(inode))
-               return 1;
-       if (inode_to_bdi(inode) == current->backing_dev_info)
-               return 1;
-       return 0;
+       return folio_ref_count(folio) - folio_test_private(folio) ==
+               1 + folio_nr_pages(folio);
  }
  
  /*
- * We detected a synchronous write error writing a page out.  Probably
+ * We detected a synchronous write error writing a folio out.  Probably
   * -ENOSPC.  We need to propagate that into the address_space for a subsequent
   * fsync(), msync() or close().
   *
   * The tricky part is that after writepage we cannot touch the mapping: nothing
- * prevents it from being freed up.  But we have a ref on the page and once
- * that page is locked, the mapping is pinned.
+ * prevents it from being freed up.  But we have a ref on the folio and once
+ * that folio is locked, the mapping is pinned.
   *
- * We're allowed to run sleeping lock_page() here because we know the caller has
+ * We're allowed to run sleeping folio_lock() here because we know the caller has
   * __GFP_FS.
   */
  static void handle_write_error(struct address_space *mapping,
-                               struct page *page, int error)
+                               struct folio *folio, int error)
  {
-       lock_page(page);
-       if (page_mapping(page) == mapping)
+       folio_lock(folio);
+       if (folio_mapping(folio) == mapping)
                 mapping_set_error(mapping, error);
-       unlock_page(page);
+       folio_unlock(folio);
  }
  
  static bool skip_throttle_noprogress(pg_data_t *pgdat)
@@ -1165,35 +1155,35 @@ typedef enum {
   * pageout is called by shrink_page_list() for each dirty page.
   * Calls ->writepage().
   */
-static pageout_t pageout(struct page *page, struct address_space *mapping)
+static pageout_t pageout(struct folio *folio, struct address_space *mapping)
  {
         /*
-        * If the page is dirty, only perform writeback if that write
+        * If the folio is dirty, only perform writeback if that write
          * will be non-blocking.  To prevent this allocation from being
          * stalled by pagecache activity.  But note that there may be
          * stalls if we need to run get_block().  We could test
          * PagePrivate for that.
          *
          * If this process is currently in __generic_file_write_iter() against
-        * this page's queue, we can perform writeback even if that
+        * this folio's queue, we can perform writeback even if that
          * will block.
          *
-        * If the page is swapcache, write it back even if that would
+        * If the folio is swapcache, write it back even if that would
          * block, for some throttling. This happens by accident, because
          * swap_backing_dev_info is bust: it doesn't reflect the
          * congestion state of the swapdevs.  Easy to fix, if needed.
          */
-       if (!is_page_cache_freeable(page))
+       if (!is_page_cache_freeable(folio))
                 return PAGE_KEEP;
         if (!mapping) {
                 /*
-                * Some data journaling orphaned pages can have
-                * page->mapping == NULL while being dirty with clean buffers.
+                * Some data journaling orphaned folios can have
+                * folio->mapping == NULL while being dirty with clean buffers.
                  */
-               if (page_has_private(page)) {
-                       if (try_to_free_buffers(page)) {
-                               ClearPageDirty(page);
-                               pr_info("%s: orphaned page\n", __func__);
+               if (folio_test_private(folio)) {
+                       if (try_to_free_buffers(&folio->page)) {
+                               folio_clear_dirty(folio);
+                               pr_info("%s: orphaned folio\n", __func__);
                                 return PAGE_CLEAN;
                         }
                 }
@@ -1201,10 +1191,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
         }
         if (mapping->a_ops->writepage == NULL)
                 return PAGE_ACTIVATE;
-       if (!may_write_to_inode(mapping->host))
-               return PAGE_KEEP;
  
-       if (clear_page_dirty_for_io(page)) {
+       if (folio_clear_dirty_for_io(folio)) {
                 int res;
                 struct writeback_control wbc = {
                         .sync_mode = WB_SYNC_NONE,
@@ -1214,21 +1202,21 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
                         .for_reclaim = 1,
                 };
  
-               SetPageReclaim(page);
-               res = mapping->a_ops->writepage(page, &wbc);
+               folio_set_reclaim(folio);
+               res = mapping->a_ops->writepage(&folio->page, &wbc);
                 if (res < 0)
-                       handle_write_error(mapping, page, res);
+                       handle_write_error(mapping, folio, res);
                 if (res == AOP_WRITEPAGE_ACTIVATE) {
-                       ClearPageReclaim(page);
+                       folio_clear_reclaim(folio);
                         return PAGE_ACTIVATE;
                 }
  
-               if (!PageWriteback(page)) {
+               if (!folio_test_writeback(folio)) {
                         /* synchronous write or broken a_ops? */
-                       ClearPageReclaim(page);
+                       folio_clear_reclaim(folio);
                 }
-               trace_mm_vmscan_writepage(page);
-               inc_node_page_state(page, NR_VMSCAN_WRITE);
+               trace_mm_vmscan_write_folio(folio);
+               node_stat_add_folio(folio, NR_VMSCAN_WRITE);
                 return PAGE_SUCCESS;
         }
  
@@ -1239,16 +1227,16 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
   * Same as remove_mapping, but if the page is removed from the mapping, it
   * gets returned with a refcount of 0.
   */
-static int __remove_mapping(struct address_space *mapping, struct page *page,
+static int __remove_mapping(struct address_space *mapping, struct folio *folio,
                             bool reclaimed, struct mem_cgroup *target_memcg)
  {
         int refcount;
         void *shadow = NULL;
  
-       BUG_ON(!PageLocked(page));
-       BUG_ON(mapping != page_mapping(page));
+       BUG_ON(!folio_test_locked(folio));
+       BUG_ON(mapping != folio_mapping(folio));
  
-       if (!PageSwapCache(page))
+       if (!folio_test_swapcache(folio))
                 spin_lock(&mapping->host->i_lock);
         xa_lock_irq(&mapping->i_pages);
         /*
@@ -1276,23 +1264,23 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
          * Note that if SetPageDirty is always performed via set_page_dirty,
          * and thus under the i_pages lock, then this ordering is not required.
          */
-       refcount = 1 + compound_nr(page);
-       if (!page_ref_freeze(page, refcount))
+       refcount = 1 + folio_nr_pages(folio);
+       if (!folio_ref_freeze(folio, refcount))
                 goto cannot_free;
         /* note: atomic_cmpxchg in page_ref_freeze provides the smp_rmb */
-       if (unlikely(PageDirty(page))) {
-               page_ref_unfreeze(page, refcount);
+       if (unlikely(folio_test_dirty(folio))) {
+               folio_ref_unfreeze(folio, refcount);
                 goto cannot_free;
         }
  
-       if (PageSwapCache(page)) {
-               swp_entry_t swap = { .val = page_private(page) };
-               mem_cgroup_swapout(page, swap);
+       if (folio_test_swapcache(folio)) {
+               swp_entry_t swap = folio_swap_entry(folio);
+               mem_cgroup_swapout(folio, swap);
                 if (reclaimed && !mapping_exiting(mapping))
-                       shadow = workingset_eviction(page, target_memcg);
-               __delete_from_swap_cache(page, swap, shadow);
+                       shadow = workingset_eviction(folio, target_memcg);
+               __delete_from_swap_cache(&folio->page, swap, shadow);
                 xa_unlock_irq(&mapping->i_pages);
-               put_swap_page(page, swap);
+               put_swap_page(&folio->page, swap);
         } else {
                 void (*freepage)(struct page *);
  
@@ -1313,61 +1301,67 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                  * exceptional entries and shadow exceptional entries in the
                  * same address_space.
                  */
-               if (reclaimed && page_is_file_lru(page) &&
+               if (reclaimed && folio_is_file_lru(folio) &&
                     !mapping_exiting(mapping) && !dax_mapping(mapping))
-                       shadow = workingset_eviction(page, target_memcg);
-               __delete_from_page_cache(page, shadow);
+                       shadow = workingset_eviction(folio, target_memcg);
+               __filemap_remove_folio(folio, shadow);
                 xa_unlock_irq(&mapping->i_pages);
                 if (mapping_shrinkable(mapping))
                         inode_add_lru(mapping->host);
                 spin_unlock(&mapping->host->i_lock);
  
                 if (freepage != NULL)
-                       freepage(page);
+                       freepage(&folio->page);
         }
  
         return 1;
  
  cannot_free:
         xa_unlock_irq(&mapping->i_pages);
-       if (!PageSwapCache(page))
+       if (!folio_test_swapcache(folio))
                 spin_unlock(&mapping->host->i_lock);
         return 0;
  }
  
-/*
- * Attempt to detach a locked page from its ->mapping.  If it is dirty or if
- * someone else has a ref on the page, abort and return 0.  If it was
- * successfully detached, return 1.  Assumes the caller has a single ref on
- * this page.
+/**
+ * remove_mapping() - Attempt to remove a folio from its mapping.
+ * @mapping: The address space.
+ * @folio: The folio to remove.
+ *
+ * If the folio is dirty, under writeback or if someone else has a ref
+ * on it, removal will fail.
+ * Return: The number of pages removed from the mapping.  0 if the folio
+ * could not be removed.
+ * Context: The caller should have a single refcount on the folio and
+ * hold its lock.
   */
-int remove_mapping(struct address_space *mapping, struct page *page)
+long remove_mapping(struct address_space *mapping, struct folio *folio)
  {
-       if (__remove_mapping(mapping, page, false, NULL)) {
+       if (__remove_mapping(mapping, folio, false, NULL)) {
                 /*
-                * Unfreezing the refcount with 1 rather than 2 effectively
+                * Unfreezing the refcount with 1 effectively
                  * drops the pagecache ref for us without requiring another
                  * atomic operation.
                  */
-               page_ref_unfreeze(page, 1);
-               return 1;
+               folio_ref_unfreeze(folio, 1);
+               return folio_nr_pages(folio);
         }
         return 0;
  }
  
  /**
- * putback_lru_page - put previously isolated page onto appropriate LRU list
- * @page: page to be put back to appropriate lru list
+ * folio_putback_lru - Put previously isolated folio onto appropriate LRU list.
+ * @folio: Folio to be returned to an LRU list.
   *
- * Add previously isolated @page to appropriate LRU list.
- * Page may still be unevictable for other reasons.
+ * Add previously isolated @folio to appropriate LRU list.
+ * The folio may still be unevictable for other reasons.
   *
- * lru_lock must not be held, interrupts must be enabled.
+ * Context: lru_lock must not be held, interrupts must be enabled.
   */
-void putback_lru_page(struct page *page)
+void folio_putback_lru(struct folio *folio)
  {
-       lru_cache_add(page);
-       put_page(page);         /* drop ref from isolate */
+       folio_add_lru(folio);
+       folio_put(folio);               /* drop ref from isolate */
  }
  
  enum page_references {
@@ -1377,61 +1371,61 @@ enum page_references {
         PAGEREF_ACTIVATE,
  };
  
-static enum page_references page_check_references(struct page *page,
+static enum page_references folio_check_references(struct folio *folio,
                                                   struct scan_control *sc)
  {
-       int referenced_ptes, referenced_page;
+       int referenced_ptes, referenced_folio;
         unsigned long vm_flags;
  
-       referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
-                                         &vm_flags);
-       referenced_page = TestClearPageReferenced(page);
+       referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup,
+                                          &vm_flags);
+       referenced_folio = folio_test_clear_referenced(folio);
  
         /*
-        * Mlock lost the isolation race with us.  Let try_to_unmap()
-        * move the page to the unevictable list.
+        * The supposedly reclaimable folio was found to be in a VM_LOCKED vma.
+        * Let the folio, now marked Mlocked, be moved to the unevictable list.
          */
         if (vm_flags & VM_LOCKED)
-               return PAGEREF_RECLAIM;
+               return PAGEREF_ACTIVATE;
  
         if (referenced_ptes) {
                 /*
-                * All mapped pages start out with page table
+                * All mapped folios start out with page table
                  * references from the instantiating fault, so we need
-                * to look twice if a mapped file page is used more
+                * to look twice if a mapped file/anon folio is used more
                  * than once.
                  *
                  * Mark it and spare it for another trip around the
                  * inactive list.  Another page table reference will
                  * lead to its activation.
                  *
-                * Note: the mark is set for activated pages as well
-                * so that recently deactivated but used pages are
+                * Note: the mark is set for activated folios as well
+                * so that recently deactivated but used folios are
                  * quickly recovered.
                  */
-               SetPageReferenced(page);
+               folio_set_referenced(folio);
  
-               if (referenced_page || referenced_ptes > 1)
+               if (referenced_folio || referenced_ptes > 1)
                         return PAGEREF_ACTIVATE;
  
                 /*
-                * Activate file-backed executable pages after first usage.
+                * Activate file-backed executable folios after first usage.
                  */
-               if ((vm_flags & VM_EXEC) && !PageSwapBacked(page))
+               if ((vm_flags & VM_EXEC) && !folio_test_swapbacked(folio))
                         return PAGEREF_ACTIVATE;
  
                 return PAGEREF_KEEP;
         }
  
-       /* Reclaim if clean, defer dirty pages to writeback */
-       if (referenced_page && !PageSwapBacked(page))
+       /* Reclaim if clean, defer dirty folios to writeback */
+       if (referenced_folio && !folio_test_swapbacked(folio))
                 return PAGEREF_RECLAIM_CLEAN;
  
         return PAGEREF_RECLAIM;
  }
  
  /* Check if a page is dirty or under writeback */
-static void page_check_dirty_writeback(struct page *page,
+static void folio_check_dirty_writeback(struct folio *folio,
                                        bool *dirty, bool *writeback)
  {
         struct address_space *mapping;
@@ -1440,24 +1434,24 @@ static void page_check_dirty_writeback(struct page *page,
          * Anonymous pages are not handled by flushers and must be written
          * from reclaim context. Do not stall reclaim based on them
          */
-       if (!page_is_file_lru(page) ||
-           (PageAnon(page) && !PageSwapBacked(page))) {
+       if (!folio_is_file_lru(folio) ||
+           (folio_test_anon(folio) && !folio_test_swapbacked(folio))) {
                 *dirty = false;
                 *writeback = false;
                 return;
         }
  
-       /* By default assume that the page flags are accurate */
-       *dirty = PageDirty(page);
-       *writeback = PageWriteback(page);
+       /* By default assume that the folio flags are accurate */
+       *dirty = folio_test_dirty(folio);
+       *writeback = folio_test_writeback(folio);
  
         /* Verify dirty/writeback state if the filesystem supports it */
-       if (!page_has_private(page))
+       if (!folio_test_private(folio))
                 return;
  
-       mapping = page_mapping(page);
+       mapping = folio_mapping(folio);
         if (mapping && mapping->a_ops->is_dirty_writeback)
-               mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
+               mapping->a_ops->is_dirty_writeback(&folio->page, dirty, writeback);
  }
  
  static struct page *alloc_demote_page(struct page *page, unsigned long node)
@@ -1531,14 +1525,16 @@ retry:
         while (!list_empty(page_list)) {
                 struct address_space *mapping;
                 struct page *page;
+               struct folio *folio;
                 enum page_references references = PAGEREF_RECLAIM;
                 bool dirty, writeback, may_enter_fs;
                 unsigned int nr_pages;
  
                 cond_resched();
  
-               page = lru_to_page(page_list);
-               list_del(&page->lru);
+               folio = lru_to_folio(page_list);
+               list_del(&folio->lru);
+               page = &folio->page;
  
                 if (!trylock_page(page))
                         goto keep;
@@ -1564,12 +1560,12 @@ retry:
                  * reclaim_congested. kswapd will stall and start writing
                  * pages if the tail of the LRU is all dirty unqueued pages.
                  */
-               page_check_dirty_writeback(page, &dirty, &writeback);
+               folio_check_dirty_writeback(folio, &dirty, &writeback);
                 if (dirty || writeback)
-                       stat->nr_dirty++;
+                       stat->nr_dirty += nr_pages;
  
                 if (dirty && !writeback)
-                       stat->nr_unqueued_dirty++;
+                       stat->nr_unqueued_dirty += nr_pages;
  
                 /*
                  * Treat this page as congested if the underlying BDI is or if
@@ -1578,10 +1574,8 @@ retry:
                  * end of the LRU a second time.
                  */
                 mapping = page_mapping(page);
-               if (((dirty || writeback) && mapping &&
-                    inode_write_congested(mapping->host)) ||
-                   (writeback && PageReclaim(page)))
-                       stat->nr_congested++;
+               if (writeback && PageReclaim(page))
+                       stat->nr_congested += nr_pages;
  
                 /*
                  * If a page at the tail of the LRU is under writeback, there
@@ -1630,7 +1624,7 @@ retry:
                         if (current_is_kswapd() &&
                             PageReclaim(page) &&
                             test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
-                               stat->nr_immediate++;
+                               stat->nr_immediate += nr_pages;
                                 goto activate_locked;
  
                         /* Case 2 above */
@@ -1648,7 +1642,7 @@ retry:
                                  * and it's also appropriate in global reclaim.
                                  */
                                 SetPageReclaim(page);
-                               stat->nr_writeback++;
+                               stat->nr_writeback += nr_pages;
                                 goto activate_locked;
  
                         /* Case 3 above */
@@ -1662,7 +1656,7 @@ retry:
                 }
  
                 if (!ignore_references)
-                       references = page_check_references(page, sc);
+                       references = folio_check_references(folio, sc);
  
                 switch (references) {
                 case PAGEREF_ACTIVATE:
@@ -1695,28 +1689,28 @@ retry:
                         if (!PageSwapCache(page)) {
                                 if (!(sc->gfp_mask & __GFP_IO))
                                         goto keep_locked;
-                               if (page_maybe_dma_pinned(page))
+                               if (folio_maybe_dma_pinned(folio))
                                         goto keep_locked;
                                 if (PageTransHuge(page)) {
                                         /* cannot split THP, skip it */
-                                       if (!can_split_huge_page(page, NULL))
+                                       if (!can_split_folio(folio, NULL))
                                                 goto activate_locked;
                                         /*
                                          * Split pages without a PMD map right
                                          * away. Chances are some or all of the
                                          * tail pages can be freed without IO.
                                          */
-                                       if (!compound_mapcount(page) &&
-                                           split_huge_page_to_list(page,
-                                                                   page_list))
+                                       if (!folio_entire_mapcount(folio) &&
+                                           split_folio_to_list(folio,
+                                                               page_list))
                                                 goto activate_locked;
                                 }
                                 if (!add_to_swap(page)) {
                                         if (!PageTransHuge(page))
                                                 goto activate_locked_split;
                                         /* Fallback to swap normal pages */
-                                       if (split_huge_page_to_list(page,
-                                                                   page_list))
+                                       if (split_folio_to_list(folio,
+                                                               page_list))
                                                 goto activate_locked;
  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
                                         count_vm_event(THP_SWPOUT_FALLBACK);
@@ -1730,9 +1724,9 @@ retry:
                                 /* Adding to swap updated mapping */
                                 mapping = page_mapping(page);
                         }
-               } else if (unlikely(PageTransHuge(page))) {
-                       /* Split file THP */
-                       if (split_huge_page_to_list(page, page_list))
+               } else if (PageSwapBacked(page) && PageTransHuge(page)) {
+                       /* Split shmem THP */
+                       if (split_folio_to_list(folio, page_list))
                                 goto keep_locked;
                 }
  
@@ -1756,10 +1750,11 @@ retry:
                         enum ttu_flags flags = TTU_BATCH_FLUSH;
                         bool was_swapbacked = PageSwapBacked(page);
  
-                       if (unlikely(PageTransHuge(page)))
+                       if (PageTransHuge(page) &&
+                                       thp_order(page) >= HPAGE_PMD_ORDER)
                                 flags |= TTU_SPLIT_HUGE_PMD;
  
-                       try_to_unmap(page, flags);
+                       try_to_unmap(folio, flags);
                         if (page_mapped(page)) {
                                 stat->nr_unmap_fail += nr_pages;
                                 if (!was_swapbacked && PageSwapBacked(page))
@@ -1807,13 +1802,13 @@ retry:
                          * starts and then write it out here.
                          */
                         try_to_unmap_flush_dirty();
-                       switch (pageout(page, mapping)) {
+                       switch (pageout(folio, mapping)) {
                         case PAGE_KEEP:
                                 goto keep_locked;
                         case PAGE_ACTIVATE:
                                 goto activate_locked;
                         case PAGE_SUCCESS:
-                               stat->nr_pageout += thp_nr_pages(page);
+                               stat->nr_pageout += nr_pages;
  
                                 if (PageWriteback(page))
                                         goto keep;
@@ -1891,7 +1886,7 @@ retry:
                          */
                         count_vm_event(PGLAZYFREED);
                         count_memcg_page_event(page, PGLAZYFREED);
-               } else if (!mapping || !__remove_mapping(mapping, page, true,
+               } else if (!mapping || !__remove_mapping(mapping, folio, true,
                                                          sc->target_mem_cgroup))
                         goto keep_locked;
  
@@ -2013,69 +2008,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
         return nr_reclaimed;
  }
  
-/*
- * Attempt to remove the specified page from its LRU.  Only take this page
- * if it is of the appropriate PageActive status.  Pages which are being
- * freed elsewhere are also ignored.
- *
- * page:       page to consider
- * mode:       one of the LRU isolation modes defined above
- *
- * returns true on success, false on failure.
- */
-bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
-{
-       /* Only take pages on the LRU. */
-       if (!PageLRU(page))
-               return false;
-
-       /* Compaction should not handle unevictable pages but CMA can do so */
-       if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
-               return false;
-
-       /*
-        * To minimise LRU disruption, the caller can indicate that it only
-        * wants to isolate pages it will be able to operate on without
-        * blocking - clean pages for the most part.
-        *
-        * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
-        * that it is possible to migrate without blocking
-        */
-       if (mode & ISOLATE_ASYNC_MIGRATE) {
-               /* All the caller can do on PageWriteback is block */
-               if (PageWriteback(page))
-                       return false;
-
-               if (PageDirty(page)) {
-                       struct address_space *mapping;
-                       bool migrate_dirty;
-
-                       /*
-                        * Only pages without mappings or that have a
-                        * ->migratepage callback are possible to migrate
-                        * without blocking. However, we can be racing with
-                        * truncation so it's necessary to lock the page
-                        * to stabilise the mapping as truncation holds
-                        * the page lock until after the page is removed
-                        * from the page cache.
-                        */
-                       if (!trylock_page(page))
-                               return false;
-
-                       mapping = page_mapping(page);
-                       migrate_dirty = !mapping || mapping->a_ops->migratepage;
-                       unlock_page(page);
-                       if (!migrate_dirty)
-                               return false;
-               }
-       }
-
-       if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
-               return false;
-
-       return true;
-}
-
  /*
   * Update LRU sizes after isolating pages. The LRU size updates must
   * be complete before mem_cgroup_update_lru_size due to a sanity check.
@@ -2127,11 +2059,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
         unsigned long skipped = 0;
         unsigned long scan, total_scan, nr_pages;
         LIST_HEAD(pages_skipped);
-       isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
  
         total_scan = 0;
         scan = 0;
         while (scan < nr_to_scan && !list_empty(src)) {
+               struct list_head *move_to = src;
                 struct page *page;
  
                 page = lru_to_page(src);
@@ -2141,9 +2073,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                 total_scan += nr_pages;
  
                 if (page_zonenum(page) > sc->reclaim_idx) {
-                       list_move(&page->lru, &pages_skipped);
                         nr_skipped[page_zonenum(page)] += nr_pages;
-                       continue;
+                       move_to = &pages_skipped;
+                       goto move;
                 }
  
                 /*
@@ -2151,37 +2083,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                  * return with no isolated pages if the LRU mostly contains
                  * ineligible pages.  This causes the VM to not reclaim any
                  * pages, triggering a premature OOM.
-                *
-                * Account all tail pages of THP.  This would not cause
-                * premature OOM since __isolate_lru_page() returns -EBUSY
-                * only when the page is being freed somewhere else.
+                * Account all tail pages of THP.
                  */
                 scan += nr_pages;
-               if (!__isolate_lru_page_prepare(page, mode)) {
-                       /* It is being freed elsewhere */
-                       list_move(&page->lru, src);
-                       continue;
-               }
+
+               if (!PageLRU(page))
+                       goto move;
+               if (!sc->may_unmap && page_mapped(page))
+                       goto move;
+
                 /*
                  * Be careful not to clear PageLRU until after we're
                  * sure the page is not being freed elsewhere -- the
                  * page release code relies on it.
                  */
-               if (unlikely(!get_page_unless_zero(page))) {
-                       list_move(&page->lru, src);
-                       continue;
-               }
+               if (unlikely(!get_page_unless_zero(page)))
+                       goto move;
  
                 if (!TestClearPageLRU(page)) {
                         /* Another thread is already isolating this page */
                         put_page(page);
-                       list_move(&page->lru, src);
-                       continue;
+                       goto move;
                 }
  
                 nr_taken += nr_pages;
                 nr_zone_taken[page_zonenum(page)] += nr_pages;
-               list_move(&page->lru, dst);
+               move_to = dst;
+move:
+               list_move(&page->lru, move_to);
         }
  
         /*
@@ -2205,51 +2134,47 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
         }
         *nr_scanned = total_scan;
         trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
-                                   total_scan, skipped, nr_taken, mode, lru);
+                                   total_scan, skipped, nr_taken,
+                                   sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru);
         update_lru_sizes(lruvec, lru, nr_zone_taken);
         return nr_taken;
  }
  
  /**
- * isolate_lru_page - tries to isolate a page from its LRU list
- * @page: page to isolate from its LRU list
- *
- * Isolates a @page from an LRU list, clears PageLRU and adjusts the
- * vmstat statistic corresponding to whatever LRU list the page was on.
+ * folio_isolate_lru() - Try to isolate a folio from its LRU list.
+ * @folio: Folio to isolate from its LRU list.
   *
- * Returns 0 if the page was removed from an LRU list.
- * Returns -EBUSY if the page was not on an LRU list.
+ * Isolate a @folio from an LRU list and adjust the vmstat statistic
+ * corresponding to whatever LRU list the folio was on.
   *
- * The returned page will have PageLRU() cleared.  If it was found on
- * the active list, it will have PageActive set.  If it was found on
- * the unevictable list, it will have the PageUnevictable bit set. That flag
+ * The folio will have its LRU flag cleared.  If it was found on the
+ * active list, it will have the Active flag set.  If it was found on the
+ * unevictable list, it will have the Unevictable flag set.  These flags
   * may need to be cleared by the caller before letting the page go.
   *
- * The vmstat statistic corresponding to the list on which the page was
- * found will be decremented.
- *
- * Restrictions:
+ * Context:
   *
   * (1) Must be called with an elevated refcount on the page. This is a
- *     fundamental difference from isolate_lru_pages (which is called
+ *     fundamental difference from isolate_lru_pages() (which is called
   *     without a stable reference).
- * (2) the lru_lock must not be held.
- * (3) interrupts must be enabled.
+ * (2) The lru_lock must not be held.
+ * (3) Interrupts must be enabled.
+ *
+ * Return: 0 if the folio was removed from an LRU list.
+ * -EBUSY if the folio was not on an LRU list.
   */
-int isolate_lru_page(struct page *page)
+int folio_isolate_lru(struct folio *folio)
  {
-       struct folio *folio = page_folio(page);
         int ret = -EBUSY;
  
-       VM_BUG_ON_PAGE(!page_count(page), page);
-       WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
+       VM_BUG_ON_FOLIO(!folio_ref_count(folio), folio);
  
-       if (TestClearPageLRU(page)) {
+       if (folio_test_clear_lru(folio)) {
                 struct lruvec *lruvec;
  
-               get_page(page);
+               folio_get(folio);
                 lruvec = folio_lruvec_lock_irq(folio);
-               del_page_from_lru_list(page, lruvec);
+               lruvec_del_folio(lruvec, folio);
                 unlock_page_lruvec_irq(lruvec);
                 ret = 0;
         }
@@ -2379,9 +2304,7 @@ static unsigned int move_pages_to_lru(struct lruvec *lruvec,
   */
  static int current_may_throttle(void)
  {
-       return !(current->flags & PF_LOCAL_THROTTLE) ||
-               current->backing_dev_info == NULL ||
-               bdi_write_congested(current->backing_dev_info);
+       return !(current->flags & PF_LOCAL_THROTTLE);
  }
  
  /*
@@ -2487,7 +2410,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
   *
   * If the pages are mostly unmapped, the processing is fast and it is
   * appropriate to hold lru_lock across the whole operation.  But if
- * the pages are mapped, the processing is slow (page_referenced()), so
+ * the pages are mapped, the processing is slow (folio_referenced()), so
   * we should drop lru_lock around each page.  It's impossible to balance
   * this, so instead we remove the pages from the LRU while processing them.
   * It is safe to rely on PG_active against the non-LRU pages in here because
@@ -2507,7 +2430,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
         LIST_HEAD(l_hold);      /* The pages which were snipped off */
         LIST_HEAD(l_active);
         LIST_HEAD(l_inactive);
-       struct page *page;
         unsigned nr_deactivate, nr_activate;
         unsigned nr_rotated = 0;
         int file = is_file_lru(lru);
@@ -2529,9 +2451,13 @@ static void shrink_active_list(unsigned long nr_to_scan,
         spin_unlock_irq(&lruvec->lru_lock);
  
         while (!list_empty(&l_hold)) {
+               struct folio *folio;
+               struct page *page;
+
                 cond_resched();
-               page = lru_to_page(&l_hold);
-               list_del(&page->lru);
+               folio = lru_to_folio(&l_hold);
+               list_del(&folio->lru);
+               page = &folio->page;
  
                 if (unlikely(!page_evictable(page))) {
                         putback_lru_page(page);
@@ -2546,8 +2472,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
                         }
                 }
  
-               if (page_referenced(page, 0, sc->target_mem_cgroup,
-                                   &vm_flags)) {
+               if (folio_referenced(folio, 0, sc->target_mem_cgroup,
+                                    &vm_flags)) {
                         /*
                          * Identify referenced, file-backed active pages and
                          * give them one more trip around the active list. So
@@ -3977,7 +3903,10 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
                 if (!managed_zone(zone))
                         continue;
  
-               mark = high_wmark_pages(zone);
+               if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
+                       mark = wmark_pages(zone, WMARK_PROMO);
+               else
+                       mark = high_wmark_pages(zone);
                 if (zone_watermark_ok_safe(zone, order, mark, highest_zoneidx))
                         return true;
         }
@@ -4474,7 +4403,7 @@ static int kswapd(void *p)
          * us from recursively trying to free more memory as we're
          * trying to free the first piece of memory in the first place).
          */
-       tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+       tsk->flags |= PF_MEMALLOC | PF_KSWAPD;
         set_freezable();
  
         WRITE_ONCE(pgdat->kswapd_order, 0);
@@ -4525,7 +4454,7 @@ kswapd_try_sleep:
                         goto kswapd_try_sleep;
         }
  
-       tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
+       tsk->flags &= ~(PF_MEMALLOC | PF_KSWAPD);
  
         return 0;
  }
@@ -4766,11 +4695,8 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
         fs_reclaim_acquire(sc.gfp_mask);
         /*
          * We need to be able to allocate from the reserves for RECLAIM_UNMAP
-        * and we also need to be able to write out pages for RECLAIM_WRITE
-        * and RECLAIM_UNMAP.
          */
         noreclaim_flag = memalloc_noreclaim_save();
-       p->flags |= PF_SWAPWRITE;
         set_task_reclaim_state(p, &sc.reclaim_state);
  
         if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) {
@@ -4784,7 +4710,6 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
         }
  
         set_task_reclaim_state(p, NULL);
-       current->flags &= ~PF_SWAPWRITE;
         memalloc_noreclaim_restore(noreclaim_flag);
         fs_reclaim_release(sc.gfp_mask);
         psi_memstall_leave(&pflags);