psi: introduce state_mask to represent stalled psi states
[linux-2.6-microblaze.git] / kernel / cgroup / cgroup.c
index 3f2b4bd..327f37c 100644 (file)
@@ -593,6 +593,39 @@ static void cgroup_get_live(struct cgroup *cgrp)
        css_get(&cgrp->self);
 }
 
+/**
+ * __cgroup_task_count - count the number of tasks in a cgroup. The caller
+ * is responsible for taking the css_set_lock.
+ * @cgrp: the cgroup in question
+ */
+int __cgroup_task_count(const struct cgroup *cgrp)
+{
+       int count = 0;
+       struct cgrp_cset_link *link;
+
+       lockdep_assert_held(&css_set_lock);
+
+       list_for_each_entry(link, &cgrp->cset_links, cset_link)
+               count += link->cset->nr_tasks;
+
+       return count;
+}
+
+/**
+ * cgroup_task_count - count the number of tasks in a cgroup.
+ * @cgrp: the cgroup in question
+ */
+int cgroup_task_count(const struct cgroup *cgrp)
+{
+       int count;
+
+       spin_lock_irq(&css_set_lock);
+       count = __cgroup_task_count(cgrp);
+       spin_unlock_irq(&css_set_lock);
+
+       return count;
+}
+
 struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
 {
        struct cgroup *cgrp = of->kn->parent->priv;
@@ -783,6 +816,8 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
                        break;
 
                cgroup1_check_for_release(cgrp);
+               TRACE_CGROUP_PATH(notify_populated, cgrp,
+                                 cgroup_is_populated(cgrp));
                cgroup_file_notify(&cgrp->events_file);
 
                child = cgrp;
@@ -2402,8 +2437,15 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
                        get_css_set(to_cset);
                        to_cset->nr_tasks++;
                        css_set_move_task(task, from_cset, to_cset, true);
-                       put_css_set_locked(from_cset);
                        from_cset->nr_tasks--;
+                       /*
+                        * If the source or destination cgroup is frozen,
+                        * the task might require to change its state.
+                        */
+                       cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
+                                                   to_cset->dfl_cgrp);
+                       put_css_set_locked(from_cset);
+
                }
        }
        spin_unlock_irq(&css_set_lock);
@@ -2602,7 +2644,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
 
                dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
                if (!dst_cset)
-                       goto err;
+                       return -ENOMEM;
 
                WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
 
@@ -2634,9 +2676,6 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
        }
 
        return 0;
-err:
-       cgroup_migrate_finish(mgctx);
-       return -ENOMEM;
 }
 
 /**
@@ -3447,8 +3486,11 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
 
 static int cgroup_events_show(struct seq_file *seq, void *v)
 {
-       seq_printf(seq, "populated %d\n",
-                  cgroup_is_populated(seq_css(seq)->cgroup));
+       struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+       seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
+       seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
+
        return 0;
 }
 
@@ -3510,6 +3552,40 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
 }
 #endif
 
+static int cgroup_freeze_show(struct seq_file *seq, void *v)
+{
+       struct cgroup *cgrp = seq_css(seq)->cgroup;
+
+       seq_printf(seq, "%d\n", cgrp->freezer.freeze);
+
+       return 0;
+}
+
+static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
+                                  char *buf, size_t nbytes, loff_t off)
+{
+       struct cgroup *cgrp;
+       ssize_t ret;
+       int freeze;
+
+       ret = kstrtoint(strstrip(buf), 0, &freeze);
+       if (ret)
+               return ret;
+
+       if (freeze < 0 || freeze > 1)
+               return -ERANGE;
+
+       cgrp = cgroup_kn_lock_live(of->kn, false);
+       if (!cgrp)
+               return -ENOENT;
+
+       cgroup_freeze(cgrp, freeze);
+
+       cgroup_kn_unlock(of->kn);
+
+       return nbytes;
+}
+
 static int cgroup_file_open(struct kernfs_open_file *of)
 {
        struct cftype *cft = of->kn->priv;
@@ -4653,6 +4729,12 @@ static struct cftype cgroup_base_files[] = {
                .name = "cgroup.stat",
                .seq_show = cgroup_stat_show,
        },
+       {
+               .name = "cgroup.freeze",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = cgroup_freeze_show,
+               .write = cgroup_freeze_write,
+       },
        {
                .name = "cpu.stat",
                .flags = CFTYPE_NOT_ON_ROOT,
@@ -4781,9 +4863,11 @@ static void css_release_work_fn(struct work_struct *work)
                if (cgroup_on_dfl(cgrp))
                        cgroup_rstat_flush(cgrp);
 
+               spin_lock_irq(&css_set_lock);
                for (tcgrp = cgroup_parent(cgrp); tcgrp;
                     tcgrp = cgroup_parent(tcgrp))
                        tcgrp->nr_dying_descendants--;
+               spin_unlock_irq(&css_set_lock);
 
                cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
                cgrp->id = -1;
@@ -5001,12 +5085,31 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
        if (ret)
                goto out_psi_free;
 
+       /*
+        * New cgroup inherits effective freeze counter, and
+        * if the parent has to be frozen, the child has too.
+        */
+       cgrp->freezer.e_freeze = parent->freezer.e_freeze;
+       if (cgrp->freezer.e_freeze)
+               set_bit(CGRP_FROZEN, &cgrp->flags);
+
+       spin_lock_irq(&css_set_lock);
        for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
                cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
 
-               if (tcgrp != cgrp)
+               if (tcgrp != cgrp) {
                        tcgrp->nr_descendants++;
+
+                       /*
+                        * If the new cgroup is frozen, all ancestor cgroups
+                        * get a new frozen descendant, but their state can't
+                        * change because of this.
+                        */
+                       if (cgrp->freezer.e_freeze)
+                               tcgrp->freezer.nr_frozen_descendants++;
+               }
        }
+       spin_unlock_irq(&css_set_lock);
 
        if (notify_on_release(parent))
                set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -5291,10 +5394,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
        if (parent && cgroup_is_threaded(cgrp))
                parent->nr_threaded_children--;
 
+       spin_lock_irq(&css_set_lock);
        for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
                tcgrp->nr_descendants--;
                tcgrp->nr_dying_descendants++;
+               /*
+                * If the dying cgroup is frozen, decrease frozen descendants
+                * counters of ancestor cgroups.
+                */
+               if (test_bit(CGRP_FROZEN, &cgrp->flags))
+                       tcgrp->freezer.nr_frozen_descendants--;
        }
+       spin_unlock_irq(&css_set_lock);
 
        cgroup1_check_for_release(parent);
 
@@ -5746,6 +5857,26 @@ void cgroup_post_fork(struct task_struct *child)
                        cset->nr_tasks++;
                        css_set_move_task(child, NULL, cset, false);
                }
+
+               /*
+                * If the cgroup has to be frozen, the new task has too.
+                * Let's set the JOBCTL_TRAP_FREEZE jobctl bit to get
+                * the task into the frozen state.
+                */
+               if (unlikely(cgroup_task_freeze(child))) {
+                       spin_lock(&child->sighand->siglock);
+                       WARN_ON_ONCE(child->frozen);
+                       child->jobctl |= JOBCTL_TRAP_FREEZE;
+                       spin_unlock(&child->sighand->siglock);
+
+                       /*
+                        * Calling cgroup_update_frozen() isn't required here,
+                        * because it will be called anyway a bit later
+                        * from do_freezer_trap(). So we avoid cgroup's
+                        * transient switch from the frozen state and back.
+                        */
+               }
+
                spin_unlock_irq(&css_set_lock);
        }
 
@@ -5794,6 +5925,11 @@ void cgroup_exit(struct task_struct *tsk)
                spin_lock_irq(&css_set_lock);
                css_set_move_task(tsk, cset, NULL, false);
                cset->nr_tasks--;
+
+               WARN_ON_ONCE(cgroup_task_frozen(tsk));
+               if (unlikely(cgroup_task_freeze(tsk)))
+                       cgroup_update_frozen(task_dfl_cgroup(tsk));
+
                spin_unlock_irq(&css_set_lock);
        } else {
                get_css_set(cset);