mm/page_alloc: limit the number of pages on PCP lists when reclaim is active
authorMel Gorman <mgorman@techsingularity.net>
Tue, 29 Jun 2021 02:42:21 +0000 (19:42 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 29 Jun 2021 17:53:54 +0000 (10:53 -0700)
When kswapd is active then direct reclaim is potentially active.  In
either case, it is possible that a zone would be balanced if pages were
not trapped on PCP lists.  Instead of draining remote pages, simply limit
the size of the PCP lists while kswapd is active.

Link: https://lkml.kernel.org/r/20210525080119.5455-6-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Hillf Danton <hdanton@sina.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/mmzone.h
mm/page_alloc.c
mm/vmscan.c

index 0a86b28..b2f40d6 100644 (file)
@@ -647,6 +647,7 @@ enum zone_flags {
        ZONE_BOOSTED_WATERMARK,         /* zone recently boosted watermarks.
                                         * Cleared when kswapd is woken.
                                         */
+       ZONE_RECLAIM_ACTIVE,            /* kswapd may be scanning the zone. */
 };
 
 static inline unsigned long zone_managed_pages(struct zone *zone)
index e1d1825..adf35cc 100644 (file)
@@ -3302,6 +3302,23 @@ static int nr_pcp_free(struct per_cpu_pages *pcp, int high, int batch)
        return batch;
 }
 
+static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone)
+{
+       int high = READ_ONCE(pcp->high);
+
+       if (unlikely(!high))
+               return 0;
+
+       if (!test_bit(ZONE_RECLAIM_ACTIVE, &zone->flags))
+               return high;
+
+       /*
+        * If reclaim is active, limit the number of pages that can be
+        * stored on pcp lists
+        */
+       return min(READ_ONCE(pcp->batch) << 2, high);
+}
+
 static void free_unref_page_commit(struct page *page, unsigned long pfn,
                                   int migratetype)
 {
@@ -3313,7 +3330,7 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn,
        pcp = this_cpu_ptr(zone->per_cpu_pageset);
        list_add(&page->lru, &pcp->lists[migratetype]);
        pcp->count++;
-       high = READ_ONCE(pcp->high);
+       high = nr_pcp_high(pcp, zone);
        if (pcp->count >= high) {
                int batch = READ_ONCE(pcp->batch);
 
index f96d621..d7c3cb8 100644 (file)
@@ -3722,6 +3722,38 @@ static bool kswapd_shrink_node(pg_data_t *pgdat,
        return sc->nr_scanned >= sc->nr_to_reclaim;
 }
 
+/* Page allocator PCP high watermark is lowered if reclaim is active. */
+static inline void
+update_reclaim_active(pg_data_t *pgdat, int highest_zoneidx, bool active)
+{
+       int i;
+       struct zone *zone;
+
+       for (i = 0; i <= highest_zoneidx; i++) {
+               zone = pgdat->node_zones + i;
+
+               if (!managed_zone(zone))
+                       continue;
+
+               if (active)
+                       set_bit(ZONE_RECLAIM_ACTIVE, &zone->flags);
+               else
+                       clear_bit(ZONE_RECLAIM_ACTIVE, &zone->flags);
+       }
+}
+
+static inline void
+set_reclaim_active(pg_data_t *pgdat, int highest_zoneidx)
+{
+       update_reclaim_active(pgdat, highest_zoneidx, true);
+}
+
+static inline void
+clear_reclaim_active(pg_data_t *pgdat, int highest_zoneidx)
+{
+       update_reclaim_active(pgdat, highest_zoneidx, false);
+}
+
 /*
  * For kswapd, balance_pgdat() will reclaim pages across a node from zones
  * that are eligible for use by the caller until at least one zone is
@@ -3774,6 +3806,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int highest_zoneidx)
        boosted = nr_boost_reclaim;
 
 restart:
+       set_reclaim_active(pgdat, highest_zoneidx);
        sc.priority = DEF_PRIORITY;
        do {
                unsigned long nr_reclaimed = sc.nr_reclaimed;
@@ -3907,6 +3940,8 @@ restart:
                pgdat->kswapd_failures++;
 
 out:
+       clear_reclaim_active(pgdat, highest_zoneidx);
+
        /* If reclaim was boosted, account for the reclaim done in this pass */
        if (boosted) {
                unsigned long flags;