Documentation: mtd: amend the sysfs docs after mtdchar merge
[linux-2.6-microblaze.git] / mm / vmscan.c
index 196709f..88c5fed 100644 (file)
@@ -128,7 +128,7 @@ struct scan_control {
  * From 0 .. 100.  Higher means more swappy.
  */
 int vm_swappiness = 60;
-long vm_total_pages;   /* The total number of pages which the VM controls */
+unsigned long vm_total_pages;  /* The total number of pages which the VM controls */
 
 static LIST_HEAD(shrinker_list);
 static DECLARE_RWSEM(shrinker_rwsem);
@@ -1579,16 +1579,6 @@ static inline int inactive_anon_is_low(struct lruvec *lruvec)
 }
 #endif
 
-static int inactive_file_is_low_global(struct zone *zone)
-{
-       unsigned long active, inactive;
-
-       active = zone_page_state(zone, NR_ACTIVE_FILE);
-       inactive = zone_page_state(zone, NR_INACTIVE_FILE);
-
-       return (active > inactive);
-}
-
 /**
  * inactive_file_is_low - check if file pages need to be deactivated
  * @lruvec: LRU vector to check
@@ -1605,10 +1595,13 @@ static int inactive_file_is_low_global(struct zone *zone)
  */
 static int inactive_file_is_low(struct lruvec *lruvec)
 {
-       if (!mem_cgroup_disabled())
-               return mem_cgroup_inactive_file_is_low(lruvec);
+       unsigned long inactive;
+       unsigned long active;
+
+       inactive = get_lru_size(lruvec, LRU_INACTIVE_FILE);
+       active = get_lru_size(lruvec, LRU_ACTIVE_FILE);
 
-       return inactive_file_is_low_global(lruvec_zone(lruvec));
+       return active > inactive;
 }
 
 static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru)
@@ -1638,6 +1631,13 @@ static int vmscan_swappiness(struct scan_control *sc)
        return mem_cgroup_swappiness(sc->target_mem_cgroup);
 }
 
+enum scan_balance {
+       SCAN_EQUAL,
+       SCAN_FRACT,
+       SCAN_ANON,
+       SCAN_FILE,
+};
+
 /*
  * Determine how aggressively the anon and file LRU lists should be
  * scanned.  The relative value of each set of LRU lists is determined
@@ -1650,15 +1650,16 @@ static int vmscan_swappiness(struct scan_control *sc)
 static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
                           unsigned long *nr)
 {
-       unsigned long anon, file, free;
+       struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
+       u64 fraction[2];
+       u64 denominator = 0;    /* gcc */
+       struct zone *zone = lruvec_zone(lruvec);
        unsigned long anon_prio, file_prio;
+       enum scan_balance scan_balance;
+       unsigned long anon, file, free;
+       bool force_scan = false;
        unsigned long ap, fp;
-       struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
-       u64 fraction[2], denominator;
        enum lru_list lru;
-       int noswap = 0;
-       bool force_scan = false;
-       struct zone *zone = lruvec_zone(lruvec);
 
        /*
         * If the zone or memcg is small, nr[l] can be 0.  This
@@ -1676,11 +1677,30 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
                force_scan = true;
 
        /* If we have no swap space, do not bother scanning anon pages. */
-       if (!sc->may_swap || (nr_swap_pages <= 0)) {
-               noswap = 1;
-               fraction[0] = 0;
-               fraction[1] = 1;
-               denominator = 1;
+       if (!sc->may_swap || (get_nr_swap_pages() <= 0)) {
+               scan_balance = SCAN_FILE;
+               goto out;
+       }
+
+       /*
+        * Global reclaim will swap to prevent OOM even with no
+        * swappiness, but memcg users want to use this knob to
+        * disable swapping for individual groups completely when
+        * using the memory controller's swap limit feature would be
+        * too expensive.
+        */
+       if (!global_reclaim(sc) && !vmscan_swappiness(sc)) {
+               scan_balance = SCAN_FILE;
+               goto out;
+       }
+
+       /*
+        * Do not apply any pressure balancing cleverness when the
+        * system is close to OOM, scan both anon and file equally
+        * (unless the swappiness setting disagrees with swapping).
+        */
+       if (!sc->priority && vmscan_swappiness(sc)) {
+               scan_balance = SCAN_EQUAL;
                goto out;
        }
 
@@ -1689,29 +1709,31 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
        file  = get_lru_size(lruvec, LRU_ACTIVE_FILE) +
                get_lru_size(lruvec, LRU_INACTIVE_FILE);
 
+       /*
+        * If it's foreseeable that reclaiming the file cache won't be
+        * enough to get the zone back into a desirable shape, we have
+        * to swap.  Better start now and leave the - probably heavily
+        * thrashing - remaining file pages alone.
+        */
        if (global_reclaim(sc)) {
-               free  = zone_page_state(zone, NR_FREE_PAGES);
+               free = zone_page_state(zone, NR_FREE_PAGES);
                if (unlikely(file + free <= high_wmark_pages(zone))) {
-                       /*
-                        * If we have very few page cache pages, force-scan
-                        * anon pages.
-                        */
-                       fraction[0] = 1;
-                       fraction[1] = 0;
-                       denominator = 1;
-                       goto out;
-               } else if (!inactive_file_is_low_global(zone)) {
-                       /*
-                        * There is enough inactive page cache, do not
-                        * reclaim anything from the working set right now.
-                        */
-                       fraction[0] = 0;
-                       fraction[1] = 1;
-                       denominator = 1;
+                       scan_balance = SCAN_ANON;
                        goto out;
                }
        }
 
+       /*
+        * There is enough inactive page cache, do not reclaim
+        * anything from the anonymous working set right now.
+        */
+       if (!inactive_file_is_low(lruvec)) {
+               scan_balance = SCAN_FILE;
+               goto out;
+       }
+
+       scan_balance = SCAN_FRACT;
+
        /*
         * With swappiness at 100, anonymous and file have the same priority.
         * This scanning priority is essentially the inverse of IO cost.
@@ -1759,19 +1781,92 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 out:
        for_each_evictable_lru(lru) {
                int file = is_file_lru(lru);
+               unsigned long size;
                unsigned long scan;
 
-               scan = get_lru_size(lruvec, lru);
-               if (sc->priority || noswap || !vmscan_swappiness(sc)) {
-                       scan >>= sc->priority;
-                       if (!scan && force_scan)
-                               scan = SWAP_CLUSTER_MAX;
+               size = get_lru_size(lruvec, lru);
+               scan = size >> sc->priority;
+
+               if (!scan && force_scan)
+                       scan = min(size, SWAP_CLUSTER_MAX);
+
+               switch (scan_balance) {
+               case SCAN_EQUAL:
+                       /* Scan lists relative to size */
+                       break;
+               case SCAN_FRACT:
+                       /*
+                        * Scan types proportional to swappiness and
+                        * their relative recent reclaim efficiency.
+                        */
                        scan = div64_u64(scan * fraction[file], denominator);
+                       break;
+               case SCAN_FILE:
+               case SCAN_ANON:
+                       /* Scan one type exclusively */
+                       if ((scan_balance == SCAN_FILE) != file)
+                               scan = 0;
+                       break;
+               default:
+                       /* Look ma, no brain */
+                       BUG();
                }
                nr[lru] = scan;
        }
 }
 
+/*
+ * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
+ */
+static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+{
+       unsigned long nr[NR_LRU_LISTS];
+       unsigned long nr_to_scan;
+       enum lru_list lru;
+       unsigned long nr_reclaimed = 0;
+       unsigned long nr_to_reclaim = sc->nr_to_reclaim;
+       struct blk_plug plug;
+
+       get_scan_count(lruvec, sc, nr);
+
+       blk_start_plug(&plug);
+       while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
+                                       nr[LRU_INACTIVE_FILE]) {
+               for_each_evictable_lru(lru) {
+                       if (nr[lru]) {
+                               nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX);
+                               nr[lru] -= nr_to_scan;
+
+                               nr_reclaimed += shrink_list(lru, nr_to_scan,
+                                                           lruvec, sc);
+                       }
+               }
+               /*
+                * On large memory systems, scan >> priority can become
+                * really large. This is fine for the starting priority;
+                * we want to put equal scanning pressure on each zone.
+                * However, if the VM has a harder time of freeing pages,
+                * with multiple processes reclaiming pages, the total
+                * freeing target can get unreasonably large.
+                */
+               if (nr_reclaimed >= nr_to_reclaim &&
+                   sc->priority < DEF_PRIORITY)
+                       break;
+       }
+       blk_finish_plug(&plug);
+       sc->nr_reclaimed += nr_reclaimed;
+
+       /*
+        * Even if we did not try to evict anon pages at all, we want to
+        * rebalance the anon lru active/inactive ratio.
+        */
+       if (inactive_anon_is_low(lruvec))
+               shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
+                                  sc, LRU_ACTIVE_ANON);
+
+       throttle_vm_writeout(sc->gfp_mask);
+}
+
 /* Use reclaim/compaction for costly allocs or under memory pressure */
 static bool in_reclaim_compaction(struct scan_control *sc)
 {
@@ -1790,7 +1885,7 @@ static bool in_reclaim_compaction(struct scan_control *sc)
  * calls try_to_compact_zone() that it will have enough free pages to succeed.
  * It will give up earlier than that if there is difficulty reclaiming pages.
  */
-static inline bool should_continue_reclaim(struct lruvec *lruvec,
+static inline bool should_continue_reclaim(struct zone *zone,
                                        unsigned long nr_reclaimed,
                                        unsigned long nr_scanned,
                                        struct scan_control *sc)
@@ -1830,15 +1925,15 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec,
         * inactive lists are large enough, continue reclaiming
         */
        pages_for_compaction = (2UL << sc->order);
-       inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE);
-       if (nr_swap_pages > 0)
-               inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON);
+       inactive_lru_pages = zone_page_state(zone, NR_INACTIVE_FILE);
+       if (get_nr_swap_pages() > 0)
+               inactive_lru_pages += zone_page_state(zone, NR_INACTIVE_ANON);
        if (sc->nr_reclaimed < pages_for_compaction &&
                        inactive_lru_pages > pages_for_compaction)
                return true;
 
        /* If compaction would go ahead or the allocation would succeed, stop */
-       switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) {
+       switch (compaction_suitable(zone, sc->order)) {
        case COMPACT_PARTIAL:
        case COMPACT_CONTINUE:
                return false;
@@ -1847,98 +1942,48 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec,
        }
 }
 
-/*
- * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
- */
-static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
 {
-       unsigned long nr[NR_LRU_LISTS];
-       unsigned long nr_to_scan;
-       enum lru_list lru;
        unsigned long nr_reclaimed, nr_scanned;
-       unsigned long nr_to_reclaim = sc->nr_to_reclaim;
-       struct blk_plug plug;
-
-restart:
-       nr_reclaimed = 0;
-       nr_scanned = sc->nr_scanned;
-       get_scan_count(lruvec, sc, nr);
-
-       blk_start_plug(&plug);
-       while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
-                                       nr[LRU_INACTIVE_FILE]) {
-               for_each_evictable_lru(lru) {
-                       if (nr[lru]) {
-                               nr_to_scan = min_t(unsigned long,
-                                                  nr[lru], SWAP_CLUSTER_MAX);
-                               nr[lru] -= nr_to_scan;
-
-                               nr_reclaimed += shrink_list(lru, nr_to_scan,
-                                                           lruvec, sc);
-                       }
-               }
-               /*
-                * On large memory systems, scan >> priority can become
-                * really large. This is fine for the starting priority;
-                * we want to put equal scanning pressure on each zone.
-                * However, if the VM has a harder time of freeing pages,
-                * with multiple processes reclaiming pages, the total
-                * freeing target can get unreasonably large.
-                */
-               if (nr_reclaimed >= nr_to_reclaim &&
-                   sc->priority < DEF_PRIORITY)
-                       break;
-       }
-       blk_finish_plug(&plug);
-       sc->nr_reclaimed += nr_reclaimed;
 
-       /*
-        * Even if we did not try to evict anon pages at all, we want to
-        * rebalance the anon lru active/inactive ratio.
-        */
-       if (inactive_anon_is_low(lruvec))
-               shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
-                                  sc, LRU_ACTIVE_ANON);
-
-       /* reclaim/compaction might need reclaim to continue */
-       if (should_continue_reclaim(lruvec, nr_reclaimed,
-                                   sc->nr_scanned - nr_scanned, sc))
-               goto restart;
+       do {
+               struct mem_cgroup *root = sc->target_mem_cgroup;
+               struct mem_cgroup_reclaim_cookie reclaim = {
+                       .zone = zone,
+                       .priority = sc->priority,
+               };
+               struct mem_cgroup *memcg;
 
-       throttle_vm_writeout(sc->gfp_mask);
-}
+               nr_reclaimed = sc->nr_reclaimed;
+               nr_scanned = sc->nr_scanned;
 
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
-{
-       struct mem_cgroup *root = sc->target_mem_cgroup;
-       struct mem_cgroup_reclaim_cookie reclaim = {
-               .zone = zone,
-               .priority = sc->priority,
-       };
-       struct mem_cgroup *memcg;
+               memcg = mem_cgroup_iter(root, NULL, &reclaim);
+               do {
+                       struct lruvec *lruvec;
 
-       memcg = mem_cgroup_iter(root, NULL, &reclaim);
-       do {
-               struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
+                       lruvec = mem_cgroup_zone_lruvec(zone, memcg);
 
-               shrink_lruvec(lruvec, sc);
+                       shrink_lruvec(lruvec, sc);
 
-               /*
-                * Limit reclaim has historically picked one memcg and
-                * scanned it with decreasing priority levels until
-                * nr_to_reclaim had been reclaimed.  This priority
-                * cycle is thus over after a single memcg.
-                *
-                * Direct reclaim and kswapd, on the other hand, have
-                * to scan all memory cgroups to fulfill the overall
-                * scan target for the zone.
-                */
-               if (!global_reclaim(sc)) {
-                       mem_cgroup_iter_break(root, memcg);
-                       break;
-               }
-               memcg = mem_cgroup_iter(root, memcg, &reclaim);
-       } while (memcg);
+                       /*
+                        * Direct reclaim and kswapd have to scan all memory
+                        * cgroups to fulfill the overall scan target for the
+                        * zone.
+                        *
+                        * Limit reclaim, on the other hand, only cares about
+                        * nr_to_reclaim pages to be reclaimed and it will
+                        * retry with decreasing priority if one round over the
+                        * whole hierarchy is not sufficient.
+                        */
+                       if (!global_reclaim(sc) &&
+                                       sc->nr_reclaimed >= sc->nr_to_reclaim) {
+                               mem_cgroup_iter_break(root, memcg);
+                               break;
+                       }
+                       memcg = mem_cgroup_iter(root, memcg, &reclaim);
+               } while (memcg);
+       } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
+                                        sc->nr_scanned - nr_scanned, sc));
 }
 
 /* Returns true if compaction should go ahead for a high-order request */
@@ -1958,7 +2003,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
         * a reasonable chance of completing and allocating the page
         */
        balance_gap = min(low_wmark_pages(zone),
-               (zone->present_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
+               (zone->managed_pages + KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
                        KSWAPD_ZONE_BALANCE_GAP_RATIO);
        watermark = high_wmark_pages(zone) + balance_gap + (2UL << sc->order);
        watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, 0, 0);
@@ -2149,6 +2194,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                if (sc->nr_reclaimed >= sc->nr_to_reclaim)
                        goto out;
 
+               /*
+                * If we're getting trouble reclaiming, start doing
+                * writepage even in laptop mode.
+                */
+               if (sc->priority < DEF_PRIORITY - 2)
+                       sc->may_writepage = 1;
+
                /*
                 * Try to write back as many pages as we just scanned.  This
                 * tends to cause slow streaming writers to write data to the
@@ -2300,7 +2352,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 {
        unsigned long nr_reclaimed;
        struct scan_control sc = {
-               .gfp_mask = gfp_mask,
+               .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
                .may_writepage = !laptop_mode,
                .nr_to_reclaim = SWAP_CLUSTER_MAX,
                .may_unmap = 1,
@@ -2473,7 +2525,7 @@ static bool zone_balanced(struct zone *zone, int order,
  */
 static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
 {
-       unsigned long present_pages = 0;
+       unsigned long managed_pages = 0;
        unsigned long balanced_pages = 0;
        int i;
 
@@ -2484,7 +2536,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
                if (!populated_zone(zone))
                        continue;
 
-               present_pages += zone->present_pages;
+               managed_pages += zone->managed_pages;
 
                /*
                 * A special case here:
@@ -2494,18 +2546,18 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
                 * they must be considered balanced here as well!
                 */
                if (zone->all_unreclaimable) {
-                       balanced_pages += zone->present_pages;
+                       balanced_pages += zone->managed_pages;
                        continue;
                }
 
                if (zone_balanced(zone, order, 0, i))
-                       balanced_pages += zone->present_pages;
+                       balanced_pages += zone->managed_pages;
                else if (!order)
                        return false;
        }
 
        if (order)
-               return balanced_pages >= (present_pages >> 2);
+               return balanced_pages >= (managed_pages >> 2);
        else
                return true;
 }
@@ -2564,7 +2616,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
 static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                                                        int *classzone_idx)
 {
-       struct zone *unbalanced_zone;
+       bool pgdat_is_balanced = false;
        int i;
        int end_zone = 0;       /* Inclusive.  0 = ZONE_DMA */
        unsigned long total_scanned;
@@ -2595,9 +2647,6 @@ loop_again:
 
        do {
                unsigned long lru_pages = 0;
-               int has_under_min_watermark_zone = 0;
-
-               unbalanced_zone = NULL;
 
                /*
                 * Scan in the highmem->dma direction for the highest
@@ -2638,8 +2687,11 @@ loop_again:
                                zone_clear_flag(zone, ZONE_CONGESTED);
                        }
                }
-               if (i < 0)
+
+               if (i < 0) {
+                       pgdat_is_balanced = true;
                        goto out;
+               }
 
                for (i = 0; i <= end_zone; i++) {
                        struct zone *zone = pgdat->node_zones + i;
@@ -2689,7 +2741,7 @@ loop_again:
                         * of the zone, whichever is smaller.
                         */
                        balance_gap = min(low_wmark_pages(zone),
-                               (zone->present_pages +
+                               (zone->managed_pages +
                                        KSWAPD_ZONE_BALANCE_GAP_RATIO-1) /
                                KSWAPD_ZONE_BALANCE_GAP_RATIO);
                        /*
@@ -2720,12 +2772,10 @@ loop_again:
                        }
 
                        /*
-                        * If we've done a decent amount of scanning and
-                        * the reclaim ratio is low, start doing writepage
-                        * even in laptop mode
+                        * If we're getting trouble reclaiming, start doing
+                        * writepage even in laptop mode.
                         */
-                       if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
-                           total_scanned > sc.nr_reclaimed + sc.nr_reclaimed / 2)
+                       if (sc.priority < DEF_PRIORITY - 2)
                                sc.may_writepage = 1;
 
                        if (zone->all_unreclaimable) {
@@ -2734,17 +2784,7 @@ loop_again:
                                continue;
                        }
 
-                       if (!zone_balanced(zone, testorder, 0, end_zone)) {
-                               unbalanced_zone = zone;
-                               /*
-                                * We are still under min water mark.  This
-                                * means that we have a GFP_ATOMIC allocation
-                                * failure risk. Hurry up!
-                                */
-                               if (!zone_watermark_ok_safe(zone, order,
-                                           min_wmark_pages(zone), end_zone, 0))
-                                       has_under_min_watermark_zone = 1;
-                       } else {
+                       if (zone_balanced(zone, testorder, 0, end_zone))
                                /*
                                 * If a zone reaches its high watermark,
                                 * consider it to be no longer congested. It's
@@ -2753,8 +2793,6 @@ loop_again:
                                 * speculatively avoid congestion waits
                                 */
                                zone_clear_flag(zone, ZONE_CONGESTED);
-                       }
-
                }
 
                /*
@@ -2766,17 +2804,9 @@ loop_again:
                                pfmemalloc_watermark_ok(pgdat))
                        wake_up(&pgdat->pfmemalloc_wait);
 
-               if (pgdat_balanced(pgdat, order, *classzone_idx))
+               if (pgdat_balanced(pgdat, order, *classzone_idx)) {
+                       pgdat_is_balanced = true;
                        break;          /* kswapd: all done */
-               /*
-                * OK, kswapd is getting into trouble.  Take a nap, then take
-                * another pass across the zones.
-                */
-               if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) {
-                       if (has_under_min_watermark_zone)
-                               count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT);
-                       else if (unbalanced_zone)
-                               wait_iff_congested(unbalanced_zone, BLK_RW_ASYNC, HZ/10);
                }
 
                /*
@@ -2788,9 +2818,9 @@ loop_again:
                if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX)
                        break;
        } while (--sc.priority >= 0);
-out:
 
-       if (!pgdat_balanced(pgdat, order, *classzone_idx)) {
+out:
+       if (!pgdat_is_balanced) {
                cond_resched();
 
                try_to_freeze();
@@ -3053,7 +3083,7 @@ unsigned long global_reclaimable_pages(void)
        nr = global_page_state(NR_ACTIVE_FILE) +
             global_page_state(NR_INACTIVE_FILE);
 
-       if (nr_swap_pages > 0)
+       if (get_nr_swap_pages() > 0)
                nr += global_page_state(NR_ACTIVE_ANON) +
                      global_page_state(NR_INACTIVE_ANON);
 
@@ -3067,7 +3097,7 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
        nr = zone_page_state(zone, NR_ACTIVE_FILE) +
             zone_page_state(zone, NR_INACTIVE_FILE);
 
-       if (nr_swap_pages > 0)
+       if (get_nr_swap_pages() > 0)
                nr += zone_page_state(zone, NR_ACTIVE_ANON) +
                      zone_page_state(zone, NR_INACTIVE_ANON);
 
@@ -3280,9 +3310,8 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
                .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
                .may_swap = 1,
-               .nr_to_reclaim = max_t(unsigned long, nr_pages,
-                                      SWAP_CLUSTER_MAX),
-               .gfp_mask = gfp_mask,
+               .nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
+               .gfp_mask = (gfp_mask = memalloc_noio_flags(gfp_mask)),
                .order = order,
                .priority = ZONE_RECLAIM_PRIORITY,
        };