sched: Allow task CPU affinity to be restricted on asymmetric systems

author Will Deacon <will@kernel.org>

Fri, 30 Jul 2021 11:24:35 +0000 (12:24 +0100)

committer Peter Zijlstra <peterz@infradead.org>

Fri, 20 Aug 2021 10:33:00 +0000 (12:33 +0200)
author Will Deacon <will@kernel.org>
Fri, 30 Jul 2021 11:24:35 +0000 (12:24 +0100)
committer Peter Zijlstra <peterz@infradead.org>
Fri, 20 Aug 2021 10:33:00 +0000 (12:33 +0200)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 2c5d638..ce2d5cf 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1709,6 +1709,8 @@ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new
  extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
  extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node);
  extern void release_user_cpus_ptr(struct task_struct *p);
+extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
+extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
  #else
  static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 672d0fc..6ee1970 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2494,10 +2494,18 @@ int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,
         return 0;
  }
  
+static inline struct cpumask *clear_user_cpus_ptr(struct task_struct *p)
+{
+       struct cpumask *user_mask = NULL;
+
+       swap(p->user_cpus_ptr, user_mask);
+
+       return user_mask;
+}
+
  void release_user_cpus_ptr(struct task_struct *p)
  {
-       kfree(p->user_cpus_ptr);
-       p->user_cpus_ptr = NULL;
+       kfree(clear_user_cpus_ptr(p));
  }
  
  /*
@@ -2717,27 +2725,23 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
  }
  
  /*
- * Change a given task's CPU affinity. Migrate the thread to a
- * proper CPU and schedule it away if the CPU it's executing on
- * is removed from the allowed bitmask.
- *
- * NOTE: the caller must have a valid reference to the task, the
- * task must not exit() & deallocate itself prematurely. The
- * call is not atomic; no spinlocks may be held.
+ * Called with both p->pi_lock and rq->lock held; drops both before returning.
   */
-static int __set_cpus_allowed_ptr(struct task_struct *p,
-                                 const struct cpumask *new_mask,
-                                 u32 flags)
+static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
+                                        const struct cpumask *new_mask,
+                                        u32 flags,
+                                        struct rq *rq,
+                                        struct rq_flags *rf)
+       __releases(rq->lock)
+       __releases(p->pi_lock)
  {
         const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p);
         const struct cpumask *cpu_valid_mask = cpu_active_mask;
         bool kthread = p->flags & PF_KTHREAD;
+       struct cpumask *user_mask = NULL;
         unsigned int dest_cpu;
-       struct rq_flags rf;
-       struct rq *rq;
         int ret = 0;
  
-       rq = task_rq_lock(p, &rf);
         update_rq_clock(rq);
  
         if (kthread || is_migration_disabled(p)) {
@@ -2793,20 +2797,178 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
  
         __do_set_cpus_allowed(p, new_mask, flags);
  
-       return affine_move_task(rq, p, &rf, dest_cpu, flags);
+       if (flags & SCA_USER)
+               user_mask = clear_user_cpus_ptr(p);
+
+       ret = affine_move_task(rq, p, rf, dest_cpu, flags);
+
+       kfree(user_mask);
+
+       return ret;
  
  out:
-       task_rq_unlock(rq, p, &rf);
+       task_rq_unlock(rq, p, rf);
  
         return ret;
  }
  
+/*
+ * Change a given task's CPU affinity. Migrate the thread to a
+ * proper CPU and schedule it away if the CPU it's executing on
+ * is removed from the allowed bitmask.
+ *
+ * NOTE: the caller must have a valid reference to the task, the
+ * task must not exit() & deallocate itself prematurely. The
+ * call is not atomic; no spinlocks may be held.
+ */
+static int __set_cpus_allowed_ptr(struct task_struct *p,
+                                 const struct cpumask *new_mask, u32 flags)
+{
+       struct rq_flags rf;
+       struct rq *rq;
+
+       rq = task_rq_lock(p, &rf);
+       return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf);
+}
+
  int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
  {
         return __set_cpus_allowed_ptr(p, new_mask, 0);
  }
  EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
  
+/*
+ * Change a given task's CPU affinity to the intersection of its current
+ * affinity mask and @subset_mask, writing the resulting mask to @new_mask
+ * and pointing @p->user_cpus_ptr to a copy of the old mask.
+ * If the resulting mask is empty, leave the affinity unchanged and return
+ * -EINVAL.
+ */
+static int restrict_cpus_allowed_ptr(struct task_struct *p,
+                                    struct cpumask *new_mask,
+                                    const struct cpumask *subset_mask)
+{
+       struct cpumask *user_mask = NULL;
+       struct rq_flags rf;
+       struct rq *rq;
+       int err;
+
+       if (!p->user_cpus_ptr) {
+               user_mask = kmalloc(cpumask_size(), GFP_KERNEL);
+               if (!user_mask)
+                       return -ENOMEM;
+       }
+
+       rq = task_rq_lock(p, &rf);
+
+       /*
+        * Forcefully restricting the affinity of a deadline task is
+        * likely to cause problems, so fail and noisily override the
+        * mask entirely.
+        */
+       if (task_has_dl_policy(p) && dl_bandwidth_enabled()) {
+               err = -EPERM;
+               goto err_unlock;
+       }
+
+       if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) {
+               err = -EINVAL;
+               goto err_unlock;
+       }
+
+       /*
+        * We're about to butcher the task affinity, so keep track of what
+        * the user asked for in case we're able to restore it later on.
+        */
+       if (user_mask) {
+               cpumask_copy(user_mask, p->cpus_ptr);
+               p->user_cpus_ptr = user_mask;
+       }
+
+       return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, &rf);
+
+err_unlock:
+       task_rq_unlock(rq, p, &rf);
+       kfree(user_mask);
+       return err;
+}
+
+/*
+ * Restrict the CPU affinity of task @p so that it is a subset of
+ * task_cpu_possible_mask() and point @p->user_cpu_ptr to a copy of the
+ * old affinity mask. If the resulting mask is empty, we warn and walk
+ * up the cpuset hierarchy until we find a suitable mask.
+ */
+void force_compatible_cpus_allowed_ptr(struct task_struct *p)
+{
+       cpumask_var_t new_mask;
+       const struct cpumask *override_mask = task_cpu_possible_mask(p);
+
+       alloc_cpumask_var(&new_mask, GFP_KERNEL);
+
+       /*
+        * __migrate_task() can fail silently in the face of concurrent
+        * offlining of the chosen destination CPU, so take the hotplug
+        * lock to ensure that the migration succeeds.
+        */
+       cpus_read_lock();
+       if (!cpumask_available(new_mask))
+               goto out_set_mask;
+
+       if (!restrict_cpus_allowed_ptr(p, new_mask, override_mask))
+               goto out_free_mask;
+
+       /*
+        * We failed to find a valid subset of the affinity mask for the
+        * task, so override it based on its cpuset hierarchy.
+        */
+       cpuset_cpus_allowed(p, new_mask);
+       override_mask = new_mask;
+
+out_set_mask:
+       if (printk_ratelimit()) {
+               printk_deferred("Overriding affinity for process %d (%s) to CPUs %*pbl\n",
+                               task_pid_nr(p), p->comm,
+                               cpumask_pr_args(override_mask));
+       }
+
+       WARN_ON(set_cpus_allowed_ptr(p, override_mask));
+out_free_mask:
+       cpus_read_unlock();
+       free_cpumask_var(new_mask);
+}
+
+static int
+__sched_setaffinity(struct task_struct *p, const struct cpumask *mask);
+
+/*
+ * Restore the affinity of a task @p which was previously restricted by a
+ * call to force_compatible_cpus_allowed_ptr(). This will clear (and free)
+ * @p->user_cpus_ptr.
+ *
+ * It is the caller's responsibility to serialise this with any calls to
+ * force_compatible_cpus_allowed_ptr(@p).
+ */
+void relax_compatible_cpus_allowed_ptr(struct task_struct *p)
+{
+       struct cpumask *user_mask = p->user_cpus_ptr;
+       unsigned long flags;
+
+       /*
+        * Try to restore the old affinity mask. If this fails, then
+        * we free the mask explicitly to avoid it being inherited across
+        * a subsequent fork().
+        */
+       if (!user_mask || !__sched_setaffinity(p, user_mask))
+               return;
+
+       raw_spin_lock_irqsave(&p->pi_lock, flags);
+       user_mask = clear_user_cpus_ptr(p);
+       raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+
+       kfree(user_mask);
+}
+
  void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
  {
  #ifdef CONFIG_SCHED_DEBUG
@@ -7629,7 +7791,7 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask)
         }
  #endif
  again:
-       retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK);
+       retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | SCA_USER);
         if (retval)
                 goto out_free_new_mask;
  
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index 5fa0290..e7e2bba 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2244,6 +2244,7 @@ extern struct task_struct *pick_next_task_idle(struct rq *rq);
  #define SCA_CHECK              0x01
  #define SCA_MIGRATE_DISABLE    0x02
  #define SCA_MIGRATE_ENABLE     0x04
+#define SCA_USER               0x08
  
  #ifdef CONFIG_SMP
author	Will Deacon <will@kernel.org>
	Fri, 30 Jul 2021 11:24:35 +0000 (12:24 +0100)
committer	Peter Zijlstra <peterz@infradead.org>
	Fri, 20 Aug 2021 10:33:00 +0000 (12:33 +0200)
include/linux/sched.h		patch \| blob \| history
kernel/sched/core.c		patch \| blob \| history
kernel/sched/sched.h		patch \| blob \| history