Merge tag 'tty-5.18-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty
[linux-2.6-microblaze.git] / mm / vmscan.c
index 59b14e0..1678802 100644 (file)
@@ -56,6 +56,7 @@
 
 #include <linux/swapops.h>
 #include <linux/balloon_compaction.h>
+#include <linux/sched/sysctl.h>
 
 #include "internal.h"
 
@@ -978,47 +979,36 @@ void drop_slab(void)
                drop_slab_node(nid);
 }
 
-static inline int is_page_cache_freeable(struct page *page)
+static inline int is_page_cache_freeable(struct folio *folio)
 {
        /*
         * A freeable page cache page is referenced only by the caller
         * that isolated the page, the page cache and optional buffer
         * heads at page->private.
         */
-       int page_cache_pins = thp_nr_pages(page);
-       return page_count(page) - page_has_private(page) == 1 + page_cache_pins;
-}
-
-static int may_write_to_inode(struct inode *inode)
-{
-       if (current->flags & PF_SWAPWRITE)
-               return 1;
-       if (!inode_write_congested(inode))
-               return 1;
-       if (inode_to_bdi(inode) == current->backing_dev_info)
-               return 1;
-       return 0;
+       return folio_ref_count(folio) - folio_test_private(folio) ==
+               1 + folio_nr_pages(folio);
 }
 
 /*
- * We detected a synchronous write error writing a page out.  Probably
+ * We detected a synchronous write error writing a folio out.  Probably
  * -ENOSPC.  We need to propagate that into the address_space for a subsequent
  * fsync(), msync() or close().
  *
  * The tricky part is that after writepage we cannot touch the mapping: nothing
- * prevents it from being freed up.  But we have a ref on the page and once
- * that page is locked, the mapping is pinned.
+ * prevents it from being freed up.  But we have a ref on the folio and once
+ * that folio is locked, the mapping is pinned.
  *
- * We're allowed to run sleeping lock_page() here because we know the caller has
+ * We're allowed to run sleeping folio_lock() here because we know the caller has
  * __GFP_FS.
  */
 static void handle_write_error(struct address_space *mapping,
-                               struct page *page, int error)
+                               struct folio *folio, int error)
 {
-       lock_page(page);
-       if (page_mapping(page) == mapping)
+       folio_lock(folio);
+       if (folio_mapping(folio) == mapping)
                mapping_set_error(mapping, error);
-       unlock_page(page);
+       folio_unlock(folio);
 }
 
 static bool skip_throttle_noprogress(pg_data_t *pgdat)
@@ -1165,35 +1155,35 @@ typedef enum {
  * pageout is called by shrink_page_list() for each dirty page.
  * Calls ->writepage().
  */
-static pageout_t pageout(struct page *page, struct address_space *mapping)
+static pageout_t pageout(struct folio *folio, struct address_space *mapping)
 {
        /*
-        * If the page is dirty, only perform writeback if that write
+        * If the folio is dirty, only perform writeback if that write
         * will be non-blocking.  To prevent this allocation from being
         * stalled by pagecache activity.  But note that there may be
         * stalls if we need to run get_block().  We could test
         * PagePrivate for that.
         *
         * If this process is currently in __generic_file_write_iter() against
-        * this page's queue, we can perform writeback even if that
+        * this folio's queue, we can perform writeback even if that
         * will block.
         *
-        * If the page is swapcache, write it back even if that would
+        * If the folio is swapcache, write it back even if that would
         * block, for some throttling. This happens by accident, because
         * swap_backing_dev_info is bust: it doesn't reflect the
         * congestion state of the swapdevs.  Easy to fix, if needed.
         */
-       if (!is_page_cache_freeable(page))
+       if (!is_page_cache_freeable(folio))
                return PAGE_KEEP;
        if (!mapping) {
                /*
-                * Some data journaling orphaned pages can have
-                * page->mapping == NULL while being dirty with clean buffers.
+                * Some data journaling orphaned folios can have
+                * folio->mapping == NULL while being dirty with clean buffers.
                 */
-               if (page_has_private(page)) {
-                       if (try_to_free_buffers(page)) {
-                               ClearPageDirty(page);
-                               pr_info("%s: orphaned page\n", __func__);
+               if (folio_test_private(folio)) {
+                       if (try_to_free_buffers(&folio->page)) {
+                               folio_clear_dirty(folio);
+                               pr_info("%s: orphaned folio\n", __func__);
                                return PAGE_CLEAN;
                        }
                }
@@ -1201,10 +1191,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
        }
        if (mapping->a_ops->writepage == NULL)
                return PAGE_ACTIVATE;
-       if (!may_write_to_inode(mapping->host))
-               return PAGE_KEEP;
 
-       if (clear_page_dirty_for_io(page)) {
+       if (folio_clear_dirty_for_io(folio)) {
                int res;
                struct writeback_control wbc = {
                        .sync_mode = WB_SYNC_NONE,
@@ -1214,21 +1202,21 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
                        .for_reclaim = 1,
                };
 
-               SetPageReclaim(page);
-               res = mapping->a_ops->writepage(page, &wbc);
+               folio_set_reclaim(folio);
+               res = mapping->a_ops->writepage(&folio->page, &wbc);
                if (res < 0)
-                       handle_write_error(mapping, page, res);
+                       handle_write_error(mapping, folio, res);
                if (res == AOP_WRITEPAGE_ACTIVATE) {
-                       ClearPageReclaim(page);
+                       folio_clear_reclaim(folio);
                        return PAGE_ACTIVATE;
                }
 
-               if (!PageWriteback(page)) {
+               if (!folio_test_writeback(folio)) {
                        /* synchronous write or broken a_ops? */
-                       ClearPageReclaim(page);
+                       folio_clear_reclaim(folio);
                }
-               trace_mm_vmscan_writepage(page);
-               inc_node_page_state(page, NR_VMSCAN_WRITE);
+               trace_mm_vmscan_write_folio(folio);
+               node_stat_add_folio(folio, NR_VMSCAN_WRITE);
                return PAGE_SUCCESS;
        }
 
@@ -1239,16 +1227,16 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
  * Same as remove_mapping, but if the page is removed from the mapping, it
  * gets returned with a refcount of 0.
  */
-static int __remove_mapping(struct address_space *mapping, struct page *page,
+static int __remove_mapping(struct address_space *mapping, struct folio *folio,
                            bool reclaimed, struct mem_cgroup *target_memcg)
 {
        int refcount;
        void *shadow = NULL;
 
-       BUG_ON(!PageLocked(page));
-       BUG_ON(mapping != page_mapping(page));
+       BUG_ON(!folio_test_locked(folio));
+       BUG_ON(mapping != folio_mapping(folio));
 
-       if (!PageSwapCache(page))
+       if (!folio_test_swapcache(folio))
                spin_lock(&mapping->host->i_lock);
        xa_lock_irq(&mapping->i_pages);
        /*
@@ -1276,23 +1264,23 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
         * Note that if SetPageDirty is always performed via set_page_dirty,
         * and thus under the i_pages lock, then this ordering is not required.
         */
-       refcount = 1 + compound_nr(page);
-       if (!page_ref_freeze(page, refcount))
+       refcount = 1 + folio_nr_pages(folio);
+       if (!folio_ref_freeze(folio, refcount))
                goto cannot_free;
        /* note: atomic_cmpxchg in page_ref_freeze provides the smp_rmb */
-       if (unlikely(PageDirty(page))) {
-               page_ref_unfreeze(page, refcount);
+       if (unlikely(folio_test_dirty(folio))) {
+               folio_ref_unfreeze(folio, refcount);
                goto cannot_free;
        }
 
-       if (PageSwapCache(page)) {
-               swp_entry_t swap = { .val = page_private(page) };
-               mem_cgroup_swapout(page, swap);
+       if (folio_test_swapcache(folio)) {
+               swp_entry_t swap = folio_swap_entry(folio);
+               mem_cgroup_swapout(folio, swap);
                if (reclaimed && !mapping_exiting(mapping))
-                       shadow = workingset_eviction(page, target_memcg);
-               __delete_from_swap_cache(page, swap, shadow);
+                       shadow = workingset_eviction(folio, target_memcg);
+               __delete_from_swap_cache(&folio->page, swap, shadow);
                xa_unlock_irq(&mapping->i_pages);
-               put_swap_page(page, swap);
+               put_swap_page(&folio->page, swap);
        } else {
                void (*freepage)(struct page *);
 
@@ -1313,61 +1301,67 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
                 * exceptional entries and shadow exceptional entries in the
                 * same address_space.
                 */
-               if (reclaimed && page_is_file_lru(page) &&
+               if (reclaimed && folio_is_file_lru(folio) &&
                    !mapping_exiting(mapping) && !dax_mapping(mapping))
-                       shadow = workingset_eviction(page, target_memcg);
-               __delete_from_page_cache(page, shadow);
+                       shadow = workingset_eviction(folio, target_memcg);
+               __filemap_remove_folio(folio, shadow);
                xa_unlock_irq(&mapping->i_pages);
                if (mapping_shrinkable(mapping))
                        inode_add_lru(mapping->host);
                spin_unlock(&mapping->host->i_lock);
 
                if (freepage != NULL)
-                       freepage(page);
+                       freepage(&folio->page);
        }
 
        return 1;
 
 cannot_free:
        xa_unlock_irq(&mapping->i_pages);
-       if (!PageSwapCache(page))
+       if (!folio_test_swapcache(folio))
                spin_unlock(&mapping->host->i_lock);
        return 0;
 }
 
-/*
- * Attempt to detach a locked page from its ->mapping.  If it is dirty or if
- * someone else has a ref on the page, abort and return 0.  If it was
- * successfully detached, return 1.  Assumes the caller has a single ref on
- * this page.
+/**
+ * remove_mapping() - Attempt to remove a folio from its mapping.
+ * @mapping: The address space.
+ * @folio: The folio to remove.
+ *
+ * If the folio is dirty, under writeback or if someone else has a ref
+ * on it, removal will fail.
+ * Return: The number of pages removed from the mapping.  0 if the folio
+ * could not be removed.
+ * Context: The caller should have a single refcount on the folio and
+ * hold its lock.
  */
-int remove_mapping(struct address_space *mapping, struct page *page)
+long remove_mapping(struct address_space *mapping, struct folio *folio)
 {
-       if (__remove_mapping(mapping, page, false, NULL)) {
+       if (__remove_mapping(mapping, folio, false, NULL)) {
                /*
-                * Unfreezing the refcount with 1 rather than 2 effectively
+                * Unfreezing the refcount with 1 effectively
                 * drops the pagecache ref for us without requiring another
                 * atomic operation.
                 */
-               page_ref_unfreeze(page, 1);
-               return 1;
+               folio_ref_unfreeze(folio, 1);
+               return folio_nr_pages(folio);
        }
        return 0;
 }
 
 /**
- * putback_lru_page - put previously isolated page onto appropriate LRU list
- * @page: page to be put back to appropriate lru list
+ * folio_putback_lru - Put previously isolated folio onto appropriate LRU list.
+ * @folio: Folio to be returned to an LRU list.
  *
- * Add previously isolated @page to appropriate LRU list.
- * Page may still be unevictable for other reasons.
+ * Add previously isolated @folio to appropriate LRU list.
+ * The folio may still be unevictable for other reasons.
  *
- * lru_lock must not be held, interrupts must be enabled.
+ * Context: lru_lock must not be held, interrupts must be enabled.
  */
-void putback_lru_page(struct page *page)
+void folio_putback_lru(struct folio *folio)
 {
-       lru_cache_add(page);
-       put_page(page);         /* drop ref from isolate */
+       folio_add_lru(folio);
+       folio_put(folio);               /* drop ref from isolate */
 }
 
 enum page_references {
@@ -1377,61 +1371,61 @@ enum page_references {
        PAGEREF_ACTIVATE,
 };
 
-static enum page_references page_check_references(struct page *page,
+static enum page_references folio_check_references(struct folio *folio,
                                                  struct scan_control *sc)
 {
-       int referenced_ptes, referenced_page;
+       int referenced_ptes, referenced_folio;
        unsigned long vm_flags;
 
-       referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup,
-                                         &vm_flags);
-       referenced_page = TestClearPageReferenced(page);
+       referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup,
+                                          &vm_flags);
+       referenced_folio = folio_test_clear_referenced(folio);
 
        /*
-        * Mlock lost the isolation race with us.  Let try_to_unmap()
-        * move the page to the unevictable list.
+        * The supposedly reclaimable folio was found to be in a VM_LOCKED vma.
+        * Let the folio, now marked Mlocked, be moved to the unevictable list.
         */
        if (vm_flags & VM_LOCKED)
-               return PAGEREF_RECLAIM;
+               return PAGEREF_ACTIVATE;
 
        if (referenced_ptes) {
                /*
-                * All mapped pages start out with page table
+                * All mapped folios start out with page table
                 * references from the instantiating fault, so we need
-                * to look twice if a mapped file page is used more
+                * to look twice if a mapped file/anon folio is used more
                 * than once.
                 *
                 * Mark it and spare it for another trip around the
                 * inactive list.  Another page table reference will
                 * lead to its activation.
                 *
-                * Note: the mark is set for activated pages as well
-                * so that recently deactivated but used pages are
+                * Note: the mark is set for activated folios as well
+                * so that recently deactivated but used folios are
                 * quickly recovered.
                 */
-               SetPageReferenced(page);
+               folio_set_referenced(folio);
 
-               if (referenced_page || referenced_ptes > 1)
+               if (referenced_folio || referenced_ptes > 1)
                        return PAGEREF_ACTIVATE;
 
                /*
-                * Activate file-backed executable pages after first usage.
+                * Activate file-backed executable folios after first usage.
                 */
-               if ((vm_flags & VM_EXEC) && !PageSwapBacked(page))
+               if ((vm_flags & VM_EXEC) && !folio_test_swapbacked(folio))
                        return PAGEREF_ACTIVATE;
 
                return PAGEREF_KEEP;
        }
 
-       /* Reclaim if clean, defer dirty pages to writeback */
-       if (referenced_page && !PageSwapBacked(page))
+       /* Reclaim if clean, defer dirty folios to writeback */
+       if (referenced_folio && !folio_test_swapbacked(folio))
                return PAGEREF_RECLAIM_CLEAN;
 
        return PAGEREF_RECLAIM;
 }
 
 /* Check if a page is dirty or under writeback */
-static void page_check_dirty_writeback(struct page *page,
+static void folio_check_dirty_writeback(struct folio *folio,
                                       bool *dirty, bool *writeback)
 {
        struct address_space *mapping;
@@ -1440,24 +1434,24 @@ static void page_check_dirty_writeback(struct page *page,
         * Anonymous pages are not handled by flushers and must be written
         * from reclaim context. Do not stall reclaim based on them
         */
-       if (!page_is_file_lru(page) ||
-           (PageAnon(page) && !PageSwapBacked(page))) {
+       if (!folio_is_file_lru(folio) ||
+           (folio_test_anon(folio) && !folio_test_swapbacked(folio))) {
                *dirty = false;
                *writeback = false;
                return;
        }
 
-       /* By default assume that the page flags are accurate */
-       *dirty = PageDirty(page);
-       *writeback = PageWriteback(page);
+       /* By default assume that the folio flags are accurate */
+       *dirty = folio_test_dirty(folio);
+       *writeback = folio_test_writeback(folio);
 
        /* Verify dirty/writeback state if the filesystem supports it */
-       if (!page_has_private(page))
+       if (!folio_test_private(folio))
                return;
 
-       mapping = page_mapping(page);
+       mapping = folio_mapping(folio);
        if (mapping && mapping->a_ops->is_dirty_writeback)
-               mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
+               mapping->a_ops->is_dirty_writeback(&folio->page, dirty, writeback);
 }
 
 static struct page *alloc_demote_page(struct page *page, unsigned long node)
@@ -1531,14 +1525,16 @@ retry:
        while (!list_empty(page_list)) {
                struct address_space *mapping;
                struct page *page;
+               struct folio *folio;
                enum page_references references = PAGEREF_RECLAIM;
                bool dirty, writeback, may_enter_fs;
                unsigned int nr_pages;
 
                cond_resched();
 
-               page = lru_to_page(page_list);
-               list_del(&page->lru);
+               folio = lru_to_folio(page_list);
+               list_del(&folio->lru);
+               page = &folio->page;
 
                if (!trylock_page(page))
                        goto keep;
@@ -1564,12 +1560,12 @@ retry:
                 * reclaim_congested. kswapd will stall and start writing
                 * pages if the tail of the LRU is all dirty unqueued pages.
                 */
-               page_check_dirty_writeback(page, &dirty, &writeback);
+               folio_check_dirty_writeback(folio, &dirty, &writeback);
                if (dirty || writeback)
-                       stat->nr_dirty++;
+                       stat->nr_dirty += nr_pages;
 
                if (dirty && !writeback)
-                       stat->nr_unqueued_dirty++;
+                       stat->nr_unqueued_dirty += nr_pages;
 
                /*
                 * Treat this page as congested if the underlying BDI is or if
@@ -1578,10 +1574,8 @@ retry:
                 * end of the LRU a second time.
                 */
                mapping = page_mapping(page);
-               if (((dirty || writeback) && mapping &&
-                    inode_write_congested(mapping->host)) ||
-                   (writeback && PageReclaim(page)))
-                       stat->nr_congested++;
+               if (writeback && PageReclaim(page))
+                       stat->nr_congested += nr_pages;
 
                /*
                 * If a page at the tail of the LRU is under writeback, there
@@ -1630,7 +1624,7 @@ retry:
                        if (current_is_kswapd() &&
                            PageReclaim(page) &&
                            test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
-                               stat->nr_immediate++;
+                               stat->nr_immediate += nr_pages;
                                goto activate_locked;
 
                        /* Case 2 above */
@@ -1648,7 +1642,7 @@ retry:
                                 * and it's also appropriate in global reclaim.
                                 */
                                SetPageReclaim(page);
-                               stat->nr_writeback++;
+                               stat->nr_writeback += nr_pages;
                                goto activate_locked;
 
                        /* Case 3 above */
@@ -1662,7 +1656,7 @@ retry:
                }
 
                if (!ignore_references)
-                       references = page_check_references(page, sc);
+                       references = folio_check_references(folio, sc);
 
                switch (references) {
                case PAGEREF_ACTIVATE:
@@ -1695,28 +1689,28 @@ retry:
                        if (!PageSwapCache(page)) {
                                if (!(sc->gfp_mask & __GFP_IO))
                                        goto keep_locked;
-                               if (page_maybe_dma_pinned(page))
+                               if (folio_maybe_dma_pinned(folio))
                                        goto keep_locked;
                                if (PageTransHuge(page)) {
                                        /* cannot split THP, skip it */
-                                       if (!can_split_huge_page(page, NULL))
+                                       if (!can_split_folio(folio, NULL))
                                                goto activate_locked;
                                        /*
                                         * Split pages without a PMD map right
                                         * away. Chances are some or all of the
                                         * tail pages can be freed without IO.
                                         */
-                                       if (!compound_mapcount(page) &&
-                                           split_huge_page_to_list(page,
-                                                                   page_list))
+                                       if (!folio_entire_mapcount(folio) &&
+                                           split_folio_to_list(folio,
+                                                               page_list))
                                                goto activate_locked;
                                }
                                if (!add_to_swap(page)) {
                                        if (!PageTransHuge(page))
                                                goto activate_locked_split;
                                        /* Fallback to swap normal pages */
-                                       if (split_huge_page_to_list(page,
-                                                                   page_list))
+                                       if (split_folio_to_list(folio,
+                                                               page_list))
                                                goto activate_locked;
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
                                        count_vm_event(THP_SWPOUT_FALLBACK);
@@ -1730,9 +1724,9 @@ retry:
                                /* Adding to swap updated mapping */
                                mapping = page_mapping(page);
                        }
-               } else if (unlikely(PageTransHuge(page))) {
-                       /* Split file THP */
-                       if (split_huge_page_to_list(page, page_list))
+               } else if (PageSwapBacked(page) && PageTransHuge(page)) {
+                       /* Split shmem THP */
+                       if (split_folio_to_list(folio, page_list))
                                goto keep_locked;
                }
 
@@ -1756,10 +1750,11 @@ retry:
                        enum ttu_flags flags = TTU_BATCH_FLUSH;
                        bool was_swapbacked = PageSwapBacked(page);
 
-                       if (unlikely(PageTransHuge(page)))
+                       if (PageTransHuge(page) &&
+                                       thp_order(page) >= HPAGE_PMD_ORDER)
                                flags |= TTU_SPLIT_HUGE_PMD;
 
-                       try_to_unmap(page, flags);
+                       try_to_unmap(folio, flags);
                        if (page_mapped(page)) {
                                stat->nr_unmap_fail += nr_pages;
                                if (!was_swapbacked && PageSwapBacked(page))
@@ -1807,13 +1802,13 @@ retry:
                         * starts and then write it out here.
                         */
                        try_to_unmap_flush_dirty();
-                       switch (pageout(page, mapping)) {
+                       switch (pageout(folio, mapping)) {
                        case PAGE_KEEP:
                                goto keep_locked;
                        case PAGE_ACTIVATE:
                                goto activate_locked;
                        case PAGE_SUCCESS:
-                               stat->nr_pageout += thp_nr_pages(page);
+                               stat->nr_pageout += nr_pages;
 
                                if (PageWriteback(page))
                                        goto keep;
@@ -1891,7 +1886,7 @@ retry:
                         */
                        count_vm_event(PGLAZYFREED);
                        count_memcg_page_event(page, PGLAZYFREED);
-               } else if (!mapping || !__remove_mapping(mapping, page, true,
+               } else if (!mapping || !__remove_mapping(mapping, folio, true,
                                                         sc->target_mem_cgroup))
                        goto keep_locked;
 
@@ -2013,69 +2008,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
        return nr_reclaimed;
 }
 
-/*
- * Attempt to remove the specified page from its LRU.  Only take this page
- * if it is of the appropriate PageActive status.  Pages which are being
- * freed elsewhere are also ignored.
- *
- * page:       page to consider
- * mode:       one of the LRU isolation modes defined above
- *
- * returns true on success, false on failure.
- */
-bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
-{
-       /* Only take pages on the LRU. */
-       if (!PageLRU(page))
-               return false;
-
-       /* Compaction should not handle unevictable pages but CMA can do so */
-       if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
-               return false;
-
-       /*
-        * To minimise LRU disruption, the caller can indicate that it only
-        * wants to isolate pages it will be able to operate on without
-        * blocking - clean pages for the most part.
-        *
-        * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
-        * that it is possible to migrate without blocking
-        */
-       if (mode & ISOLATE_ASYNC_MIGRATE) {
-               /* All the caller can do on PageWriteback is block */
-               if (PageWriteback(page))
-                       return false;
-
-               if (PageDirty(page)) {
-                       struct address_space *mapping;
-                       bool migrate_dirty;
-
-                       /*
-                        * Only pages without mappings or that have a
-                        * ->migratepage callback are possible to migrate
-                        * without blocking. However, we can be racing with
-                        * truncation so it's necessary to lock the page
-                        * to stabilise the mapping as truncation holds
-                        * the page lock until after the page is removed
-                        * from the page cache.
-                        */
-                       if (!trylock_page(page))
-                               return false;
-
-                       mapping = page_mapping(page);
-                       migrate_dirty = !mapping || mapping->a_ops->migratepage;
-                       unlock_page(page);
-                       if (!migrate_dirty)
-                               return false;
-               }
-       }
-
-       if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
-               return false;
-
-       return true;
-}
-
 /*
  * Update LRU sizes after isolating pages. The LRU size updates must
  * be complete before mem_cgroup_update_lru_size due to a sanity check.
@@ -2127,11 +2059,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
        unsigned long skipped = 0;
        unsigned long scan, total_scan, nr_pages;
        LIST_HEAD(pages_skipped);
-       isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED);
 
        total_scan = 0;
        scan = 0;
        while (scan < nr_to_scan && !list_empty(src)) {
+               struct list_head *move_to = src;
                struct page *page;
 
                page = lru_to_page(src);
@@ -2141,9 +2073,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                total_scan += nr_pages;
 
                if (page_zonenum(page) > sc->reclaim_idx) {
-                       list_move(&page->lru, &pages_skipped);
                        nr_skipped[page_zonenum(page)] += nr_pages;
-                       continue;
+                       move_to = &pages_skipped;
+                       goto move;
                }
 
                /*
@@ -2151,37 +2083,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                 * return with no isolated pages if the LRU mostly contains
                 * ineligible pages.  This causes the VM to not reclaim any
                 * pages, triggering a premature OOM.
-                *
-                * Account all tail pages of THP.  This would not cause
-                * premature OOM since __isolate_lru_page() returns -EBUSY
-                * only when the page is being freed somewhere else.
+                * Account all tail pages of THP.
                 */
                scan += nr_pages;
-               if (!__isolate_lru_page_prepare(page, mode)) {
-                       /* It is being freed elsewhere */
-                       list_move(&page->lru, src);
-                       continue;
-               }
+
+               if (!PageLRU(page))
+                       goto move;
+               if (!sc->may_unmap && page_mapped(page))
+                       goto move;
+
                /*
                 * Be careful not to clear PageLRU until after we're
                 * sure the page is not being freed elsewhere -- the
                 * page release code relies on it.
                 */
-               if (unlikely(!get_page_unless_zero(page))) {
-                       list_move(&page->lru, src);
-                       continue;
-               }
+               if (unlikely(!get_page_unless_zero(page)))
+                       goto move;
 
                if (!TestClearPageLRU(page)) {
                        /* Another thread is already isolating this page */
                        put_page(page);
-                       list_move(&page->lru, src);
-                       continue;
+                       goto move;
                }
 
                nr_taken += nr_pages;
                nr_zone_taken[page_zonenum(page)] += nr_pages;
-               list_move(&page->lru, dst);
+               move_to = dst;
+move:
+               list_move(&page->lru, move_to);
        }
 
        /*
@@ -2205,51 +2134,47 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
        }
        *nr_scanned = total_scan;
        trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
-                                   total_scan, skipped, nr_taken, mode, lru);
+                                   total_scan, skipped, nr_taken,
+                                   sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru);
        update_lru_sizes(lruvec, lru, nr_zone_taken);
        return nr_taken;
 }
 
 /**
- * isolate_lru_page - tries to isolate a page from its LRU list
- * @page: page to isolate from its LRU list
- *
- * Isolates a @page from an LRU list, clears PageLRU and adjusts the
- * vmstat statistic corresponding to whatever LRU list the page was on.
+ * folio_isolate_lru() - Try to isolate a folio from its LRU list.
+ * @folio: Folio to isolate from its LRU list.
  *
- * Returns 0 if the page was removed from an LRU list.
- * Returns -EBUSY if the page was not on an LRU list.
+ * Isolate a @folio from an LRU list and adjust the vmstat statistic
+ * corresponding to whatever LRU list the folio was on.
  *
- * The returned page will have PageLRU() cleared.  If it was found on
- * the active list, it will have PageActive set.  If it was found on
- * the unevictable list, it will have the PageUnevictable bit set. That flag
+ * The folio will have its LRU flag cleared.  If it was found on the
+ * active list, it will have the Active flag set.  If it was found on the
+ * unevictable list, it will have the Unevictable flag set.  These flags
  * may need to be cleared by the caller before letting the page go.
  *
- * The vmstat statistic corresponding to the list on which the page was
- * found will be decremented.
- *
- * Restrictions:
+ * Context:
  *
  * (1) Must be called with an elevated refcount on the page. This is a
- *     fundamental difference from isolate_lru_pages (which is called
+ *     fundamental difference from isolate_lru_pages() (which is called
  *     without a stable reference).
- * (2) the lru_lock must not be held.
- * (3) interrupts must be enabled.
+ * (2) The lru_lock must not be held.
+ * (3) Interrupts must be enabled.
+ *
+ * Return: 0 if the folio was removed from an LRU list.
+ * -EBUSY if the folio was not on an LRU list.
  */
-int isolate_lru_page(struct page *page)
+int folio_isolate_lru(struct folio *folio)
 {
-       struct folio *folio = page_folio(page);
        int ret = -EBUSY;
 
-       VM_BUG_ON_PAGE(!page_count(page), page);
-       WARN_RATELIMIT(PageTail(page), "trying to isolate tail page");
+       VM_BUG_ON_FOLIO(!folio_ref_count(folio), folio);
 
-       if (TestClearPageLRU(page)) {
+       if (folio_test_clear_lru(folio)) {
                struct lruvec *lruvec;
 
-               get_page(page);
+               folio_get(folio);
                lruvec = folio_lruvec_lock_irq(folio);
-               del_page_from_lru_list(page, lruvec);
+               lruvec_del_folio(lruvec, folio);
                unlock_page_lruvec_irq(lruvec);
                ret = 0;
        }
@@ -2379,9 +2304,7 @@ static unsigned int move_pages_to_lru(struct lruvec *lruvec,
  */
 static int current_may_throttle(void)
 {
-       return !(current->flags & PF_LOCAL_THROTTLE) ||
-               current->backing_dev_info == NULL ||
-               bdi_write_congested(current->backing_dev_info);
+       return !(current->flags & PF_LOCAL_THROTTLE);
 }
 
 /*
@@ -2487,7 +2410,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
  *
  * If the pages are mostly unmapped, the processing is fast and it is
  * appropriate to hold lru_lock across the whole operation.  But if
- * the pages are mapped, the processing is slow (page_referenced()), so
+ * the pages are mapped, the processing is slow (folio_referenced()), so
  * we should drop lru_lock around each page.  It's impossible to balance
  * this, so instead we remove the pages from the LRU while processing them.
  * It is safe to rely on PG_active against the non-LRU pages in here because
@@ -2507,7 +2430,6 @@ static void shrink_active_list(unsigned long nr_to_scan,
        LIST_HEAD(l_hold);      /* The pages which were snipped off */
        LIST_HEAD(l_active);
        LIST_HEAD(l_inactive);
-       struct page *page;
        unsigned nr_deactivate, nr_activate;
        unsigned nr_rotated = 0;
        int file = is_file_lru(lru);
@@ -2529,9 +2451,13 @@ static void shrink_active_list(unsigned long nr_to_scan,
        spin_unlock_irq(&lruvec->lru_lock);
 
        while (!list_empty(&l_hold)) {
+               struct folio *folio;
+               struct page *page;
+
                cond_resched();
-               page = lru_to_page(&l_hold);
-               list_del(&page->lru);
+               folio = lru_to_folio(&l_hold);
+               list_del(&folio->lru);
+               page = &folio->page;
 
                if (unlikely(!page_evictable(page))) {
                        putback_lru_page(page);
@@ -2546,8 +2472,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
                        }
                }
 
-               if (page_referenced(page, 0, sc->target_mem_cgroup,
-                                   &vm_flags)) {
+               if (folio_referenced(folio, 0, sc->target_mem_cgroup,
+                                    &vm_flags)) {
                        /*
                         * Identify referenced, file-backed active pages and
                         * give them one more trip around the active list. So
@@ -3977,7 +3903,10 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx)
                if (!managed_zone(zone))
                        continue;
 
-               mark = high_wmark_pages(zone);
+               if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING)
+                       mark = wmark_pages(zone, WMARK_PROMO);
+               else
+                       mark = high_wmark_pages(zone);
                if (zone_watermark_ok_safe(zone, order, mark, highest_zoneidx))
                        return true;
        }
@@ -4474,7 +4403,7 @@ static int kswapd(void *p)
         * us from recursively trying to free more memory as we're
         * trying to free the first piece of memory in the first place).
         */
-       tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+       tsk->flags |= PF_MEMALLOC | PF_KSWAPD;
        set_freezable();
 
        WRITE_ONCE(pgdat->kswapd_order, 0);
@@ -4525,7 +4454,7 @@ kswapd_try_sleep:
                        goto kswapd_try_sleep;
        }
 
-       tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
+       tsk->flags &= ~(PF_MEMALLOC | PF_KSWAPD);
 
        return 0;
 }
@@ -4766,11 +4695,8 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
        fs_reclaim_acquire(sc.gfp_mask);
        /*
         * We need to be able to allocate from the reserves for RECLAIM_UNMAP
-        * and we also need to be able to write out pages for RECLAIM_WRITE
-        * and RECLAIM_UNMAP.
         */
        noreclaim_flag = memalloc_noreclaim_save();
-       p->flags |= PF_SWAPWRITE;
        set_task_reclaim_state(p, &sc.reclaim_state);
 
        if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) {
@@ -4784,7 +4710,6 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
        }
 
        set_task_reclaim_state(p, NULL);
-       current->flags &= ~PF_SWAPWRITE;
        memalloc_noreclaim_restore(noreclaim_flag);
        fs_reclaim_release(sc.gfp_mask);
        psi_memstall_leave(&pflags);