mm/memcg: move penalty delay clamping out of calculate_high_delay()
[linux-2.6-microblaze.git] / mm / memcontrol.c
index 5beea03..6a857b8 100644 (file)
@@ -1314,7 +1314,7 @@ static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
        if (do_memsw_account()) {
                count = page_counter_read(&memcg->memsw);
                limit = READ_ONCE(memcg->memsw.max);
-               if (count <= limit)
+               if (count < limit)
                        margin = min(margin, limit - count);
                else
                        margin = 0;
@@ -1451,6 +1451,8 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
                       memcg_page_state(memcg, WORKINGSET_REFAULT));
        seq_buf_printf(&s, "workingset_activate %lu\n",
                       memcg_page_state(memcg, WORKINGSET_ACTIVATE));
+       seq_buf_printf(&s, "workingset_restore %lu\n",
+                      memcg_page_state(memcg, WORKINGSET_RESTORE));
        seq_buf_printf(&s, "workingset_nodereclaim %lu\n",
                       memcg_page_state(memcg, WORKINGSET_NODERECLAIM));
 
@@ -2319,41 +2321,48 @@ static void high_work_func(struct work_struct *work)
  #define MEMCG_DELAY_PRECISION_SHIFT 20
  #define MEMCG_DELAY_SCALING_SHIFT 14
 
-/*
- * Get the number of jiffies that we should penalise a mischievous cgroup which
- * is exceeding its memory.high by checking both it and its ancestors.
- */
-static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
-                                         unsigned int nr_pages)
+static u64 calculate_overage(unsigned long usage, unsigned long high)
 {
-       unsigned long penalty_jiffies;
-       u64 max_overage = 0;
+       u64 overage;
 
-       do {
-               unsigned long usage, high;
-               u64 overage;
-
-               usage = page_counter_read(&memcg->memory);
-               high = READ_ONCE(memcg->high);
+       if (usage <= high)
+               return 0;
 
-               if (usage <= high)
-                       continue;
+       /*
+        * Prevent division by 0 in overage calculation by acting as if
+        * it was a threshold of 1 page
+        */
+       high = max(high, 1UL);
 
-               /*
-                * Prevent division by 0 in overage calculation by acting as if
-                * it was a threshold of 1 page
-                */
-               high = max(high, 1UL);
+       overage = usage - high;
+       overage <<= MEMCG_DELAY_PRECISION_SHIFT;
+       return div64_u64(overage, high);
+}
 
-               overage = usage - high;
-               overage <<= MEMCG_DELAY_PRECISION_SHIFT;
-               overage = div64_u64(overage, high);
+static u64 mem_find_max_overage(struct mem_cgroup *memcg)
+{
+       u64 overage, max_overage = 0;
 
-               if (overage > max_overage)
-                       max_overage = overage;
+       do {
+               overage = calculate_overage(page_counter_read(&memcg->memory),
+                                           READ_ONCE(memcg->high));
+               max_overage = max(overage, max_overage);
        } while ((memcg = parent_mem_cgroup(memcg)) &&
                 !mem_cgroup_is_root(memcg));
 
+       return max_overage;
+}
+
+/*
+ * Get the number of jiffies that we should penalise a mischievous cgroup which
+ * is exceeding its memory.high by checking both it and its ancestors.
+ */
+static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
+                                         unsigned int nr_pages,
+                                         u64 max_overage)
+{
+       unsigned long penalty_jiffies;
+
        if (!max_overage)
                return 0;
 
@@ -2377,14 +2386,7 @@ static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
         * MEMCG_CHARGE_BATCH pages is nominal, so work out how much smaller or
         * larger the current charge patch is than that.
         */
-       penalty_jiffies = penalty_jiffies * nr_pages / MEMCG_CHARGE_BATCH;
-
-       /*
-        * Clamp the max delay per usermode return so as to still keep the
-        * application moving forwards and also permit diagnostics, albeit
-        * extremely slowly.
-        */
-       return min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
+       return penalty_jiffies * nr_pages / MEMCG_CHARGE_BATCH;
 }
 
 /*
@@ -2409,7 +2411,15 @@ void mem_cgroup_handle_over_high(void)
         * memory.high is breached and reclaim is unable to keep up. Throttle
         * allocators proactively to slow down excessive growth.
         */
-       penalty_jiffies = calculate_high_delay(memcg, nr_pages);
+       penalty_jiffies = calculate_high_delay(memcg, nr_pages,
+                                              mem_find_max_overage(memcg));
+
+       /*
+        * Clamp the max delay per usermode return so as to still keep the
+        * application moving forwards and also permit diagnostics, albeit
+        * extremely slowly.
+        */
+       penalty_jiffies = min(penalty_jiffies, MEMCG_MAX_HIGH_DELAY_JIFFIES);
 
        /*
         * Don't sleep if the amount of jiffies this memcg owes us is so low
@@ -4330,7 +4340,6 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
 
        *pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);
 
-       /* this should eventually include NR_UNSTABLE_NFS */
        *pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
        *pfilepages = memcg_exact_page_state(memcg, NR_INACTIVE_FILE) +
                        memcg_exact_page_state(memcg, NR_ACTIVE_FILE);
@@ -4990,19 +4999,22 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
        unsigned int size;
        int node;
        int __maybe_unused i;
+       long error = -ENOMEM;
 
        size = sizeof(struct mem_cgroup);
        size += nr_node_ids * sizeof(struct mem_cgroup_per_node *);
 
        memcg = kzalloc(size, GFP_KERNEL);
        if (!memcg)
-               return NULL;
+               return ERR_PTR(error);
 
        memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
                                 1, MEM_CGROUP_ID_MAX,
                                 GFP_KERNEL);
-       if (memcg->id.id < 0)
+       if (memcg->id.id < 0) {
+               error = memcg->id.id;
                goto fail;
+       }
 
        memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu);
        if (!memcg->vmstats_local)
@@ -5046,7 +5058,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 fail:
        mem_cgroup_id_remove(memcg);
        __mem_cgroup_free(memcg);
-       return NULL;
+       return ERR_PTR(error);
 }
 
 static struct cgroup_subsys_state * __ref
@@ -5057,8 +5069,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
        long error = -ENOMEM;
 
        memcg = mem_cgroup_alloc();
-       if (!memcg)
-               return ERR_PTR(error);
+       if (IS_ERR(memcg))
+               return ERR_CAST(memcg);
 
        WRITE_ONCE(memcg->high, PAGE_COUNTER_MAX);
        memcg->soft_limit = PAGE_COUNTER_MAX;
@@ -5108,7 +5120,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 fail:
        mem_cgroup_id_remove(memcg);
        mem_cgroup_free(memcg);
-       return ERR_PTR(-ENOMEM);
+       return ERR_PTR(error);
 }
 
 static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
@@ -6224,7 +6236,6 @@ static struct cftype memory_files[] = {
        },
        {
                .name = "stat",
-               .flags = CFTYPE_NOT_ON_ROOT,
                .seq_show = memory_stat_show,
        },
        {