mm: memcg: move soft limit reclaim code to memcontrol-v1.c
authorRoman Gushchin <roman.gushchin@linux.dev>
Tue, 25 Jun 2024 00:58:54 +0000 (17:58 -0700)
committerAndrew Morton <akpm@linux-foundation.org>
Fri, 5 Jul 2024 01:05:51 +0000 (18:05 -0700)
Soft limits are cgroup v1-specific and are not supported by cgroup v2, so
let's move the corresponding code into memcontrol-v1.c.

Aside from simple moving the code, this commits introduces a trivial
memcg1_soft_limit_reset() function to reset soft limits and also moves the
global soft limit tree initialization code into a new memcg1_init()
function.

It also moves corresponding declarations shared between memcontrol.c and
memcontrol-v1.c into mm/memcontrol-v1.h.

Link: https://lkml.kernel.org/r/20240625005906.106920-3-roman.gushchin@linux.dev
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Muchun Song <muchun.song@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
mm/memcontrol-v1.c
mm/memcontrol-v1.h
mm/memcontrol.c

index a941446..2ccb840 100644 (file)
@@ -1,3 +1,345 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include <linux/memcontrol.h>
+#include <linux/swap.h>
+#include <linux/mm_inline.h>
+
 #include "memcontrol-v1.h"
+
+/*
+ * Cgroups above their limits are maintained in a RB-Tree, independent of
+ * their hierarchy representation
+ */
+
+struct mem_cgroup_tree_per_node {
+       struct rb_root rb_root;
+       struct rb_node *rb_rightmost;
+       spinlock_t lock;
+};
+
+struct mem_cgroup_tree {
+       struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
+};
+
+static struct mem_cgroup_tree soft_limit_tree __read_mostly;
+
+/*
+ * Maximum loops in mem_cgroup_soft_reclaim(), used for soft
+ * limit reclaim to prevent infinite loops, if they ever occur.
+ */
+#define        MEM_CGROUP_MAX_RECLAIM_LOOPS            100
+#define        MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
+
+static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz,
+                                        struct mem_cgroup_tree_per_node *mctz,
+                                        unsigned long new_usage_in_excess)
+{
+       struct rb_node **p = &mctz->rb_root.rb_node;
+       struct rb_node *parent = NULL;
+       struct mem_cgroup_per_node *mz_node;
+       bool rightmost = true;
+
+       if (mz->on_tree)
+               return;
+
+       mz->usage_in_excess = new_usage_in_excess;
+       if (!mz->usage_in_excess)
+               return;
+       while (*p) {
+               parent = *p;
+               mz_node = rb_entry(parent, struct mem_cgroup_per_node,
+                                       tree_node);
+               if (mz->usage_in_excess < mz_node->usage_in_excess) {
+                       p = &(*p)->rb_left;
+                       rightmost = false;
+               } else {
+                       p = &(*p)->rb_right;
+               }
+       }
+
+       if (rightmost)
+               mctz->rb_rightmost = &mz->tree_node;
+
+       rb_link_node(&mz->tree_node, parent, p);
+       rb_insert_color(&mz->tree_node, &mctz->rb_root);
+       mz->on_tree = true;
+}
+
+static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
+                                        struct mem_cgroup_tree_per_node *mctz)
+{
+       if (!mz->on_tree)
+               return;
+
+       if (&mz->tree_node == mctz->rb_rightmost)
+               mctz->rb_rightmost = rb_prev(&mz->tree_node);
+
+       rb_erase(&mz->tree_node, &mctz->rb_root);
+       mz->on_tree = false;
+}
+
+static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
+                                      struct mem_cgroup_tree_per_node *mctz)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&mctz->lock, flags);
+       __mem_cgroup_remove_exceeded(mz, mctz);
+       spin_unlock_irqrestore(&mctz->lock, flags);
+}
+
+static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
+{
+       unsigned long nr_pages = page_counter_read(&memcg->memory);
+       unsigned long soft_limit = READ_ONCE(memcg->soft_limit);
+       unsigned long excess = 0;
+
+       if (nr_pages > soft_limit)
+               excess = nr_pages - soft_limit;
+
+       return excess;
+}
+
+void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid)
+{
+       unsigned long excess;
+       struct mem_cgroup_per_node *mz;
+       struct mem_cgroup_tree_per_node *mctz;
+
+       if (lru_gen_enabled()) {
+               if (soft_limit_excess(memcg))
+                       lru_gen_soft_reclaim(memcg, nid);
+               return;
+       }
+
+       mctz = soft_limit_tree.rb_tree_per_node[nid];
+       if (!mctz)
+               return;
+       /*
+        * Necessary to update all ancestors when hierarchy is used.
+        * because their event counter is not touched.
+        */
+       for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+               mz = memcg->nodeinfo[nid];
+               excess = soft_limit_excess(memcg);
+               /*
+                * We have to update the tree if mz is on RB-tree or
+                * mem is over its softlimit.
+                */
+               if (excess || mz->on_tree) {
+                       unsigned long flags;
+
+                       spin_lock_irqsave(&mctz->lock, flags);
+                       /* if on-tree, remove it */
+                       if (mz->on_tree)
+                               __mem_cgroup_remove_exceeded(mz, mctz);
+                       /*
+                        * Insert again. mz->usage_in_excess will be updated.
+                        * If excess is 0, no tree ops.
+                        */
+                       __mem_cgroup_insert_exceeded(mz, mctz, excess);
+                       spin_unlock_irqrestore(&mctz->lock, flags);
+               }
+       }
+}
+
+void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
+{
+       struct mem_cgroup_tree_per_node *mctz;
+       struct mem_cgroup_per_node *mz;
+       int nid;
+
+       for_each_node(nid) {
+               mz = memcg->nodeinfo[nid];
+               mctz = soft_limit_tree.rb_tree_per_node[nid];
+               if (mctz)
+                       mem_cgroup_remove_exceeded(mz, mctz);
+       }
+}
+
+static struct mem_cgroup_per_node *
+__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
+{
+       struct mem_cgroup_per_node *mz;
+
+retry:
+       mz = NULL;
+       if (!mctz->rb_rightmost)
+               goto done;              /* Nothing to reclaim from */
+
+       mz = rb_entry(mctz->rb_rightmost,
+                     struct mem_cgroup_per_node, tree_node);
+       /*
+        * Remove the node now but someone else can add it back,
+        * we will to add it back at the end of reclaim to its correct
+        * position in the tree.
+        */
+       __mem_cgroup_remove_exceeded(mz, mctz);
+       if (!soft_limit_excess(mz->memcg) ||
+           !css_tryget(&mz->memcg->css))
+               goto retry;
+done:
+       return mz;
+}
+
+static struct mem_cgroup_per_node *
+mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
+{
+       struct mem_cgroup_per_node *mz;
+
+       spin_lock_irq(&mctz->lock);
+       mz = __mem_cgroup_largest_soft_limit_node(mctz);
+       spin_unlock_irq(&mctz->lock);
+       return mz;
+}
+
+static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
+                                  pg_data_t *pgdat,
+                                  gfp_t gfp_mask,
+                                  unsigned long *total_scanned)
+{
+       struct mem_cgroup *victim = NULL;
+       int total = 0;
+       int loop = 0;
+       unsigned long excess;
+       unsigned long nr_scanned;
+       struct mem_cgroup_reclaim_cookie reclaim = {
+               .pgdat = pgdat,
+       };
+
+       excess = soft_limit_excess(root_memcg);
+
+       while (1) {
+               victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
+               if (!victim) {
+                       loop++;
+                       if (loop >= 2) {
+                               /*
+                                * If we have not been able to reclaim
+                                * anything, it might because there are
+                                * no reclaimable pages under this hierarchy
+                                */
+                               if (!total)
+                                       break;
+                               /*
+                                * We want to do more targeted reclaim.
+                                * excess >> 2 is not to excessive so as to
+                                * reclaim too much, nor too less that we keep
+                                * coming back to reclaim from this cgroup
+                                */
+                               if (total >= (excess >> 2) ||
+                                       (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
+                                       break;
+                       }
+                       continue;
+               }
+               total += mem_cgroup_shrink_node(victim, gfp_mask, false,
+                                       pgdat, &nr_scanned);
+               *total_scanned += nr_scanned;
+               if (!soft_limit_excess(root_memcg))
+                       break;
+       }
+       mem_cgroup_iter_break(root_memcg, victim);
+       return total;
+}
+
+unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
+                                           gfp_t gfp_mask,
+                                           unsigned long *total_scanned)
+{
+       unsigned long nr_reclaimed = 0;
+       struct mem_cgroup_per_node *mz, *next_mz = NULL;
+       unsigned long reclaimed;
+       int loop = 0;
+       struct mem_cgroup_tree_per_node *mctz;
+       unsigned long excess;
+
+       if (lru_gen_enabled())
+               return 0;
+
+       if (order > 0)
+               return 0;
+
+       mctz = soft_limit_tree.rb_tree_per_node[pgdat->node_id];
+
+       /*
+        * Do not even bother to check the largest node if the root
+        * is empty. Do it lockless to prevent lock bouncing. Races
+        * are acceptable as soft limit is best effort anyway.
+        */
+       if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root))
+               return 0;
+
+       /*
+        * This loop can run a while, specially if mem_cgroup's continuously
+        * keep exceeding their soft limit and putting the system under
+        * pressure
+        */
+       do {
+               if (next_mz)
+                       mz = next_mz;
+               else
+                       mz = mem_cgroup_largest_soft_limit_node(mctz);
+               if (!mz)
+                       break;
+
+               reclaimed = mem_cgroup_soft_reclaim(mz->memcg, pgdat,
+                                                   gfp_mask, total_scanned);
+               nr_reclaimed += reclaimed;
+               spin_lock_irq(&mctz->lock);
+
+               /*
+                * If we failed to reclaim anything from this memory cgroup
+                * it is time to move on to the next cgroup
+                */
+               next_mz = NULL;
+               if (!reclaimed)
+                       next_mz = __mem_cgroup_largest_soft_limit_node(mctz);
+
+               excess = soft_limit_excess(mz->memcg);
+               /*
+                * One school of thought says that we should not add
+                * back the node to the tree if reclaim returns 0.
+                * But our reclaim could return 0, simply because due
+                * to priority we are exposing a smaller subset of
+                * memory to reclaim from. Consider this as a longer
+                * term TODO.
+                */
+               /* If excess == 0, no tree ops */
+               __mem_cgroup_insert_exceeded(mz, mctz, excess);
+               spin_unlock_irq(&mctz->lock);
+               css_put(&mz->memcg->css);
+               loop++;
+               /*
+                * Could not reclaim anything and there are no more
+                * mem cgroups to try or we seem to be looping without
+                * reclaiming anything.
+                */
+               if (!nr_reclaimed &&
+                       (next_mz == NULL ||
+                       loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
+                       break;
+       } while (!nr_reclaimed);
+       if (next_mz)
+               css_put(&next_mz->memcg->css);
+       return nr_reclaimed;
+}
+
+static int __init memcg1_init(void)
+{
+       int node;
+
+       for_each_node(node) {
+               struct mem_cgroup_tree_per_node *rtpn;
+
+               rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, node);
+
+               rtpn->rb_root = RB_ROOT;
+               rtpn->rb_rightmost = NULL;
+               spin_lock_init(&rtpn->lock);
+               soft_limit_tree.rb_tree_per_node[node] = rtpn;
+       }
+
+       return 0;
+}
+subsys_initcall(memcg1_init);
index 7c5f094..4da6fa5 100644 (file)
@@ -3,5 +3,12 @@
 #ifndef __MM_MEMCONTROL_V1_H
 #define __MM_MEMCONTROL_V1_H
 
+void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid);
+void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg);
+
+static inline void memcg1_soft_limit_reset(struct mem_cgroup *memcg)
+{
+       WRITE_ONCE(memcg->soft_limit, PAGE_COUNTER_MAX);
+}
 
 #endif /* __MM_MEMCONTROL_V1_H */
index 6244752..643c29e 100644 (file)
@@ -71,6 +71,7 @@
 #include <net/ip.h>
 #include "slab.h"
 #include "swap.h"
+#include "memcontrol-v1.h"
 
 #include <linux/uaccess.h>
 
@@ -107,23 +108,6 @@ static bool do_memsw_account(void)
 #define THRESHOLDS_EVENTS_TARGET 128
 #define SOFTLIMIT_EVENTS_TARGET 1024
 
-/*
- * Cgroups above their limits are maintained in a RB-Tree, independent of
- * their hierarchy representation
- */
-
-struct mem_cgroup_tree_per_node {
-       struct rb_root rb_root;
-       struct rb_node *rb_rightmost;
-       spinlock_t lock;
-};
-
-struct mem_cgroup_tree {
-       struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
-};
-
-static struct mem_cgroup_tree soft_limit_tree __read_mostly;
-
 /* for OOM */
 struct mem_cgroup_eventfd_list {
        struct list_head list;
@@ -198,13 +182,6 @@ static struct move_charge_struct {
        .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq),
 };
 
-/*
- * Maximum loops in mem_cgroup_soft_reclaim(), used for soft
- * limit reclaim to prevent infinite loops, if they ever occur.
- */
-#define        MEM_CGROUP_MAX_RECLAIM_LOOPS            100
-#define        MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
-
 /* for encoding cft->private value on file */
 enum res_type {
        _MEM,
@@ -412,169 +389,6 @@ ino_t page_cgroup_ino(struct page *page)
        return ino;
 }
 
-static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz,
-                                        struct mem_cgroup_tree_per_node *mctz,
-                                        unsigned long new_usage_in_excess)
-{
-       struct rb_node **p = &mctz->rb_root.rb_node;
-       struct rb_node *parent = NULL;
-       struct mem_cgroup_per_node *mz_node;
-       bool rightmost = true;
-
-       if (mz->on_tree)
-               return;
-
-       mz->usage_in_excess = new_usage_in_excess;
-       if (!mz->usage_in_excess)
-               return;
-       while (*p) {
-               parent = *p;
-               mz_node = rb_entry(parent, struct mem_cgroup_per_node,
-                                       tree_node);
-               if (mz->usage_in_excess < mz_node->usage_in_excess) {
-                       p = &(*p)->rb_left;
-                       rightmost = false;
-               } else {
-                       p = &(*p)->rb_right;
-               }
-       }
-
-       if (rightmost)
-               mctz->rb_rightmost = &mz->tree_node;
-
-       rb_link_node(&mz->tree_node, parent, p);
-       rb_insert_color(&mz->tree_node, &mctz->rb_root);
-       mz->on_tree = true;
-}
-
-static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
-                                        struct mem_cgroup_tree_per_node *mctz)
-{
-       if (!mz->on_tree)
-               return;
-
-       if (&mz->tree_node == mctz->rb_rightmost)
-               mctz->rb_rightmost = rb_prev(&mz->tree_node);
-
-       rb_erase(&mz->tree_node, &mctz->rb_root);
-       mz->on_tree = false;
-}
-
-static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
-                                      struct mem_cgroup_tree_per_node *mctz)
-{
-       unsigned long flags;
-
-       spin_lock_irqsave(&mctz->lock, flags);
-       __mem_cgroup_remove_exceeded(mz, mctz);
-       spin_unlock_irqrestore(&mctz->lock, flags);
-}
-
-static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
-{
-       unsigned long nr_pages = page_counter_read(&memcg->memory);
-       unsigned long soft_limit = READ_ONCE(memcg->soft_limit);
-       unsigned long excess = 0;
-
-       if (nr_pages > soft_limit)
-               excess = nr_pages - soft_limit;
-
-       return excess;
-}
-
-static void mem_cgroup_update_tree(struct mem_cgroup *memcg, int nid)
-{
-       unsigned long excess;
-       struct mem_cgroup_per_node *mz;
-       struct mem_cgroup_tree_per_node *mctz;
-
-       if (lru_gen_enabled()) {
-               if (soft_limit_excess(memcg))
-                       lru_gen_soft_reclaim(memcg, nid);
-               return;
-       }
-
-       mctz = soft_limit_tree.rb_tree_per_node[nid];
-       if (!mctz)
-               return;
-       /*
-        * Necessary to update all ancestors when hierarchy is used.
-        * because their event counter is not touched.
-        */
-       for (; memcg; memcg = parent_mem_cgroup(memcg)) {
-               mz = memcg->nodeinfo[nid];
-               excess = soft_limit_excess(memcg);
-               /*
-                * We have to update the tree if mz is on RB-tree or
-                * mem is over its softlimit.
-                */
-               if (excess || mz->on_tree) {
-                       unsigned long flags;
-
-                       spin_lock_irqsave(&mctz->lock, flags);
-                       /* if on-tree, remove it */
-                       if (mz->on_tree)
-                               __mem_cgroup_remove_exceeded(mz, mctz);
-                       /*
-                        * Insert again. mz->usage_in_excess will be updated.
-                        * If excess is 0, no tree ops.
-                        */
-                       __mem_cgroup_insert_exceeded(mz, mctz, excess);
-                       spin_unlock_irqrestore(&mctz->lock, flags);
-               }
-       }
-}
-
-static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
-{
-       struct mem_cgroup_tree_per_node *mctz;
-       struct mem_cgroup_per_node *mz;
-       int nid;
-
-       for_each_node(nid) {
-               mz = memcg->nodeinfo[nid];
-               mctz = soft_limit_tree.rb_tree_per_node[nid];
-               if (mctz)
-                       mem_cgroup_remove_exceeded(mz, mctz);
-       }
-}
-
-static struct mem_cgroup_per_node *
-__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
-{
-       struct mem_cgroup_per_node *mz;
-
-retry:
-       mz = NULL;
-       if (!mctz->rb_rightmost)
-               goto done;              /* Nothing to reclaim from */
-
-       mz = rb_entry(mctz->rb_rightmost,
-                     struct mem_cgroup_per_node, tree_node);
-       /*
-        * Remove the node now but someone else can add it back,
-        * we will to add it back at the end of reclaim to its correct
-        * position in the tree.
-        */
-       __mem_cgroup_remove_exceeded(mz, mctz);
-       if (!soft_limit_excess(mz->memcg) ||
-           !css_tryget(&mz->memcg->css))
-               goto retry;
-done:
-       return mz;
-}
-
-static struct mem_cgroup_per_node *
-mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
-{
-       struct mem_cgroup_per_node *mz;
-
-       spin_lock_irq(&mctz->lock);
-       mz = __mem_cgroup_largest_soft_limit_node(mctz);
-       spin_unlock_irq(&mctz->lock);
-       return mz;
-}
-
 /* Subset of node_stat_item for memcg stats */
 static const unsigned int memcg_node_stat_items[] = {
        NR_INACTIVE_ANON,
@@ -1979,56 +1793,6 @@ unlock:
        return ret;
 }
 
-static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
-                                  pg_data_t *pgdat,
-                                  gfp_t gfp_mask,
-                                  unsigned long *total_scanned)
-{
-       struct mem_cgroup *victim = NULL;
-       int total = 0;
-       int loop = 0;
-       unsigned long excess;
-       unsigned long nr_scanned;
-       struct mem_cgroup_reclaim_cookie reclaim = {
-               .pgdat = pgdat,
-       };
-
-       excess = soft_limit_excess(root_memcg);
-
-       while (1) {
-               victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
-               if (!victim) {
-                       loop++;
-                       if (loop >= 2) {
-                               /*
-                                * If we have not been able to reclaim
-                                * anything, it might because there are
-                                * no reclaimable pages under this hierarchy
-                                */
-                               if (!total)
-                                       break;
-                               /*
-                                * We want to do more targeted reclaim.
-                                * excess >> 2 is not to excessive so as to
-                                * reclaim too much, nor too less that we keep
-                                * coming back to reclaim from this cgroup
-                                */
-                               if (total >= (excess >> 2) ||
-                                       (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
-                                       break;
-                       }
-                       continue;
-               }
-               total += mem_cgroup_shrink_node(victim, gfp_mask, false,
-                                       pgdat, &nr_scanned);
-               *total_scanned += nr_scanned;
-               if (!soft_limit_excess(root_memcg))
-                       break;
-       }
-       mem_cgroup_iter_break(root_memcg, victim);
-       return total;
-}
-
 #ifdef CONFIG_LOCKDEP
 static struct lockdep_map memcg_oom_lock_dep_map = {
        .name = "memcg_oom_lock",
@@ -3923,88 +3687,6 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
        return ret;
 }
 
-unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
-                                           gfp_t gfp_mask,
-                                           unsigned long *total_scanned)
-{
-       unsigned long nr_reclaimed = 0;
-       struct mem_cgroup_per_node *mz, *next_mz = NULL;
-       unsigned long reclaimed;
-       int loop = 0;
-       struct mem_cgroup_tree_per_node *mctz;
-       unsigned long excess;
-
-       if (lru_gen_enabled())
-               return 0;
-
-       if (order > 0)
-               return 0;
-
-       mctz = soft_limit_tree.rb_tree_per_node[pgdat->node_id];
-
-       /*
-        * Do not even bother to check the largest node if the root
-        * is empty. Do it lockless to prevent lock bouncing. Races
-        * are acceptable as soft limit is best effort anyway.
-        */
-       if (!mctz || RB_EMPTY_ROOT(&mctz->rb_root))
-               return 0;
-
-       /*
-        * This loop can run a while, specially if mem_cgroup's continuously
-        * keep exceeding their soft limit and putting the system under
-        * pressure
-        */
-       do {
-               if (next_mz)
-                       mz = next_mz;
-               else
-                       mz = mem_cgroup_largest_soft_limit_node(mctz);
-               if (!mz)
-                       break;
-
-               reclaimed = mem_cgroup_soft_reclaim(mz->memcg, pgdat,
-                                                   gfp_mask, total_scanned);
-               nr_reclaimed += reclaimed;
-               spin_lock_irq(&mctz->lock);
-
-               /*
-                * If we failed to reclaim anything from this memory cgroup
-                * it is time to move on to the next cgroup
-                */
-               next_mz = NULL;
-               if (!reclaimed)
-                       next_mz = __mem_cgroup_largest_soft_limit_node(mctz);
-
-               excess = soft_limit_excess(mz->memcg);
-               /*
-                * One school of thought says that we should not add
-                * back the node to the tree if reclaim returns 0.
-                * But our reclaim could return 0, simply because due
-                * to priority we are exposing a smaller subset of
-                * memory to reclaim from. Consider this as a longer
-                * term TODO.
-                */
-               /* If excess == 0, no tree ops */
-               __mem_cgroup_insert_exceeded(mz, mctz, excess);
-               spin_unlock_irq(&mctz->lock);
-               css_put(&mz->memcg->css);
-               loop++;
-               /*
-                * Could not reclaim anything and there are no more
-                * mem cgroups to try or we seem to be looping without
-                * reclaiming anything.
-                */
-               if (!nr_reclaimed &&
-                       (next_mz == NULL ||
-                       loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
-                       break;
-       } while (!nr_reclaimed);
-       if (next_mz)
-               css_put(&next_mz->memcg->css);
-       return nr_reclaimed;
-}
-
 /*
  * Reclaims as many pages from the given memcg as possible.
  *
@@ -5782,7 +5464,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
                return ERR_CAST(memcg);
 
        page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
-       WRITE_ONCE(memcg->soft_limit, PAGE_COUNTER_MAX);
+       memcg1_soft_limit_reset(memcg);
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
        memcg->zswap_max = PAGE_COUNTER_MAX;
        WRITE_ONCE(memcg->zswap_writeback,
@@ -5955,7 +5637,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
        page_counter_set_min(&memcg->memory, 0);
        page_counter_set_low(&memcg->memory, 0);
        page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
-       WRITE_ONCE(memcg->soft_limit, PAGE_COUNTER_MAX);
+       memcg1_soft_limit_reset(memcg);
        page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
        memcg_wb_domain_size_changed(memcg);
 }
@@ -7950,7 +7632,7 @@ __setup("cgroup.memory=", cgroup_memory);
  */
 static int __init mem_cgroup_init(void)
 {
-       int cpu, node;
+       int cpu;
 
        /*
         * Currently s32 type (can refer to struct batched_lruvec_stat) is
@@ -7967,17 +7649,6 @@ static int __init mem_cgroup_init(void)
                INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
                          drain_local_stock);
 
-       for_each_node(node) {
-               struct mem_cgroup_tree_per_node *rtpn;
-
-               rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, node);
-
-               rtpn->rb_root = RB_ROOT;
-               rtpn->rb_rightmost = NULL;
-               spin_lock_init(&rtpn->lock);
-               soft_limit_tree.rb_tree_per_node[node] = rtpn;
-       }
-
        return 0;
 }
 subsys_initcall(mem_cgroup_init);