cgroup: Eliminate the need for cgroup_mutex in proc_cgroup_show()
authorYafang Shao <laoar.shao@gmail.com>
Sun, 29 Oct 2023 06:14:30 +0000 (06:14 +0000)
committerTejun Heo <tj@kernel.org>
Thu, 9 Nov 2023 23:25:47 +0000 (13:25 -1000)
The cgroup root_list is already RCU-safe. Therefore, we can replace the
cgroup_mutex with the RCU read lock in some particular paths. This change
will be particularly beneficial for frequent operations, such as
`cat /proc/self/cgroup`, in a cgroup1-based container environment.

I did stress tests with this change, as outlined below
(with CONFIG_PROVE_RCU_LIST enabled):

- Continuously mounting and unmounting named cgroups in some tasks,
  for example:

  cgrp_name=$1
  while true
  do
      mount -t cgroup -o none,name=$cgrp_name none /$cgrp_name
      umount /$cgrp_name
  done

- Continuously triggering proc_cgroup_show() in some tasks concurrently,
  for example:
  while true; do cat /proc/self/cgroup > /dev/null; done

They can ran successfully after implementing this change, with no RCU
warnings in dmesg.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/cgroup/cgroup.c

index 19784d4..9bb255e 100644 (file)
@@ -6285,7 +6285,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
        if (!buf)
                goto out;
 
-       cgroup_lock();
+       rcu_read_lock();
        spin_lock_irq(&css_set_lock);
 
        for_each_root(root) {
@@ -6296,6 +6296,11 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                if (root == &cgrp_dfl_root && !READ_ONCE(cgrp_dfl_visible))
                        continue;
 
+               cgrp = task_cgroup_from_root(tsk, root);
+               /* The root has already been unmounted. */
+               if (!cgrp)
+                       continue;
+
                seq_printf(m, "%d:", root->hierarchy_id);
                if (root != &cgrp_dfl_root)
                        for_each_subsys(ss, ssid)
@@ -6306,9 +6311,6 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                        seq_printf(m, "%sname=%s", count ? "," : "",
                                   root->name);
                seq_putc(m, ':');
-
-               cgrp = task_cgroup_from_root(tsk, root);
-
                /*
                 * On traditional hierarchies, all zombie tasks show up as
                 * belonging to the root cgroup.  On the default hierarchy,
@@ -6340,7 +6342,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
        retval = 0;
 out_unlock:
        spin_unlock_irq(&css_set_lock);
-       cgroup_unlock();
+       rcu_read_unlock();
        kfree(buf);
 out:
        return retval;