Merge tag 'vfs-5.8-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
[linux-2.6-microblaze.git] / mm / memcontrol.c
index 08cf17b..f973a02 100644 (file)
@@ -2354,6 +2354,22 @@ static u64 mem_find_max_overage(struct mem_cgroup *memcg)
        return max_overage;
 }
 
+static u64 swap_find_max_overage(struct mem_cgroup *memcg)
+{
+       u64 overage, max_overage = 0;
+
+       do {
+               overage = calculate_overage(page_counter_read(&memcg->swap),
+                                           READ_ONCE(memcg->swap.high));
+               if (overage)
+                       memcg_memory_event(memcg, MEMCG_SWAP_HIGH);
+               max_overage = max(overage, max_overage);
+       } while ((memcg = parent_mem_cgroup(memcg)) &&
+                !mem_cgroup_is_root(memcg));
+
+       return max_overage;
+}
+
 /*
  * Get the number of jiffies that we should penalise a mischievous cgroup which
  * is exceeding its memory.high by checking both it and its ancestors.
@@ -2415,6 +2431,9 @@ void mem_cgroup_handle_over_high(void)
        penalty_jiffies = calculate_high_delay(memcg, nr_pages,
                                               mem_find_max_overage(memcg));
 
+       penalty_jiffies += calculate_high_delay(memcg, nr_pages,
+                                               swap_find_max_overage(memcg));
+
        /*
         * Clamp the max delay per usermode return so as to still keep the
         * application moving forwards and also permit diagnostics, albeit
@@ -2605,13 +2624,32 @@ done_restock:
         * reclaim, the cost of mismatch is negligible.
         */
        do {
-               if (page_counter_read(&memcg->memory) >
-                   READ_ONCE(memcg->memory.high)) {
-                       /* Don't bother a random interrupted task */
-                       if (in_interrupt()) {
+               bool mem_high, swap_high;
+
+               mem_high = page_counter_read(&memcg->memory) >
+                       READ_ONCE(memcg->memory.high);
+               swap_high = page_counter_read(&memcg->swap) >
+                       READ_ONCE(memcg->swap.high);
+
+               /* Don't bother a random interrupted task */
+               if (in_interrupt()) {
+                       if (mem_high) {
                                schedule_work(&memcg->high_work);
                                break;
                        }
+                       continue;
+               }
+
+               if (mem_high || swap_high) {
+                       /*
+                        * The allocating tasks in this cgroup will need to do
+                        * reclaim or be throttled to prevent further growth
+                        * of the memory or swap footprints.
+                        *
+                        * Target some best-effort fairness between the tasks,
+                        * and distribute reclaim work and delay penalties
+                        * based on how much each task is actually allocating.
+                        */
                        current->memcg_nr_pages_over_high += batch;
                        set_notify_resume(current);
                        break;
@@ -2814,7 +2852,12 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
 
 static inline bool memcg_kmem_bypass(void)
 {
-       if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
+       if (in_interrupt())
+               return true;
+
+       /* Allow remote memcg charging in kthread contexts. */
+       if ((!current->mm || (current->flags & PF_KTHREAD)) &&
+            !current->active_memcg)
                return true;
        return false;
 }
@@ -5076,6 +5119,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 
        page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
        memcg->soft_limit = PAGE_COUNTER_MAX;
+       page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
        if (parent) {
                memcg->swappiness = mem_cgroup_swappiness(parent);
                memcg->oom_kill_disable = parent->oom_kill_disable;
@@ -5229,6 +5273,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
        page_counter_set_low(&memcg->memory, 0);
        page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
        memcg->soft_limit = PAGE_COUNTER_MAX;
+       page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
        memcg_wb_domain_size_changed(memcg);
 }
 
@@ -7142,10 +7187,13 @@ bool mem_cgroup_swap_full(struct page *page)
        if (!memcg)
                return false;
 
-       for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg))
-               if (page_counter_read(&memcg->swap) * 2 >=
-                   READ_ONCE(memcg->swap.max))
+       for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) {
+               unsigned long usage = page_counter_read(&memcg->swap);
+
+               if (usage * 2 >= READ_ONCE(memcg->swap.high) ||
+                   usage * 2 >= READ_ONCE(memcg->swap.max))
                        return true;
+       }
 
        return false;
 }
@@ -7175,6 +7223,29 @@ static u64 swap_current_read(struct cgroup_subsys_state *css,
        return (u64)page_counter_read(&memcg->swap) * PAGE_SIZE;
 }
 
+static int swap_high_show(struct seq_file *m, void *v)
+{
+       return seq_puts_memcg_tunable(m,
+               READ_ONCE(mem_cgroup_from_seq(m)->swap.high));
+}
+
+static ssize_t swap_high_write(struct kernfs_open_file *of,
+                              char *buf, size_t nbytes, loff_t off)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+       unsigned long high;
+       int err;
+
+       buf = strstrip(buf);
+       err = page_counter_memparse(buf, "max", &high);
+       if (err)
+               return err;
+
+       page_counter_set_high(&memcg->swap, high);
+
+       return nbytes;
+}
+
 static int swap_max_show(struct seq_file *m, void *v)
 {
        return seq_puts_memcg_tunable(m,
@@ -7202,6 +7273,8 @@ static int swap_events_show(struct seq_file *m, void *v)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
 
+       seq_printf(m, "high %lu\n",
+                  atomic_long_read(&memcg->memory_events[MEMCG_SWAP_HIGH]));
        seq_printf(m, "max %lu\n",
                   atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]));
        seq_printf(m, "fail %lu\n",
@@ -7216,6 +7289,12 @@ static struct cftype swap_files[] = {
                .flags = CFTYPE_NOT_ON_ROOT,
                .read_u64 = swap_current_read,
        },
+       {
+               .name = "swap.high",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = swap_high_show,
+               .write = swap_high_write,
+       },
        {
                .name = "swap.max",
                .flags = CFTYPE_NOT_ON_ROOT,