sched: Prevent balance_push() on remote runqueues

[linux-2.6-microblaze.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 2d9ff40..b21a185 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -237,9 +237,30 @@ static DEFINE_MUTEX(sched_core_mutex);
  static atomic_t sched_core_count;
  static struct cpumask sched_core_mask;
  
+static void sched_core_lock(int cpu, unsigned long *flags)
+{
+       const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+       int t, i = 0;
+
+       local_irq_save(*flags);
+       for_each_cpu(t, smt_mask)
+               raw_spin_lock_nested(&cpu_rq(t)->__lock, i++);
+}
+
+static void sched_core_unlock(int cpu, unsigned long *flags)
+{
+       const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+       int t;
+
+       for_each_cpu(t, smt_mask)
+               raw_spin_unlock(&cpu_rq(t)->__lock);
+       local_irq_restore(*flags);
+}
+
  static void __sched_core_flip(bool enabled)
  {
-       int cpu, t, i;
+       unsigned long flags;
+       int cpu, t;
  
         cpus_read_lock();
  
@@ -250,19 +271,12 @@ static void __sched_core_flip(bool enabled)
         for_each_cpu(cpu, &sched_core_mask) {
                 const struct cpumask *smt_mask = cpu_smt_mask(cpu);
  
-               i = 0;
-               local_irq_disable();
-               for_each_cpu(t, smt_mask) {
-                       /* supports up to SMT8 */
-                       raw_spin_lock_nested(&cpu_rq(t)->__lock, i++);
-               }
+               sched_core_lock(cpu, &flags);
  
                 for_each_cpu(t, smt_mask)
                         cpu_rq(t)->core_enabled = enabled;
  
-               for_each_cpu(t, smt_mask)
-                       raw_spin_unlock(&cpu_rq(t)->__lock);
-               local_irq_enable();
+               sched_core_unlock(cpu, &flags);
  
                 cpumask_andnot(&sched_core_mask, &sched_core_mask, smt_mask);
         }
@@ -1981,12 +1995,18 @@ void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
         dequeue_task(rq, p, flags);
  }
  
-/*
- * __normal_prio - return the priority that is based on the static prio
- */
-static inline int __normal_prio(struct task_struct *p)
+static inline int __normal_prio(int policy, int rt_prio, int nice)
  {
-       return p->static_prio;
+       int prio;
+
+       if (dl_policy(policy))
+               prio = MAX_DL_PRIO - 1;
+       else if (rt_policy(policy))
+               prio = MAX_RT_PRIO - 1 - rt_prio;
+       else
+               prio = NICE_TO_PRIO(nice);
+
+       return prio;
  }
  
  /*
@@ -1998,15 +2018,7 @@ static inline int __normal_prio(struct task_struct *p)
   */
  static inline int normal_prio(struct task_struct *p)
  {
-       int prio;
-
-       if (task_has_dl_policy(p))
-               prio = MAX_DL_PRIO-1;
-       else if (task_has_rt_policy(p))
-               prio = MAX_RT_PRIO-1 - p->rt_priority;
-       else
-               prio = __normal_prio(p);
-       return prio;
+       return __normal_prio(p->policy, p->rt_priority, PRIO_TO_NICE(p->static_prio));
  }
  
  /*
@@ -4099,7 +4111,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
                 } else if (PRIO_TO_NICE(p->static_prio) < 0)
                         p->static_prio = NICE_TO_PRIO(0);
  
-               p->prio = p->normal_prio = __normal_prio(p);
+               p->prio = p->normal_prio = p->static_prio;
                 set_load_weight(p, false);
  
                 /*
@@ -5738,35 +5750,109 @@ void queue_core_balance(struct rq *rq)
         queue_balance_callback(rq, &per_cpu(core_balance_head, rq->cpu), sched_core_balance);
  }
  
-static inline void sched_core_cpu_starting(unsigned int cpu)
+static void sched_core_cpu_starting(unsigned int cpu)
  {
         const struct cpumask *smt_mask = cpu_smt_mask(cpu);
-       struct rq *rq, *core_rq = NULL;
-       int i;
+       struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
+       unsigned long flags;
+       int t;
+
+       sched_core_lock(cpu, &flags);
  
-       core_rq = cpu_rq(cpu)->core;
+       WARN_ON_ONCE(rq->core != rq);
  
-       if (!core_rq) {
-               for_each_cpu(i, smt_mask) {
-                       rq = cpu_rq(i);
-                       if (rq->core && rq->core == rq)
-                               core_rq = rq;
+       /* if we're the first, we'll be our own leader */
+       if (cpumask_weight(smt_mask) == 1)
+               goto unlock;
+
+       /* find the leader */
+       for_each_cpu(t, smt_mask) {
+               if (t == cpu)
+                       continue;
+               rq = cpu_rq(t);
+               if (rq->core == rq) {
+                       core_rq = rq;
+                       break;
                 }
+       }
  
-               if (!core_rq)
-                       core_rq = cpu_rq(cpu);
+       if (WARN_ON_ONCE(!core_rq)) /* whoopsie */
+               goto unlock;
  
-               for_each_cpu(i, smt_mask) {
-                       rq = cpu_rq(i);
+       /* install and validate core_rq */
+       for_each_cpu(t, smt_mask) {
+               rq = cpu_rq(t);
  
-                       WARN_ON_ONCE(rq->core && rq->core != core_rq);
+               if (t == cpu)
                         rq->core = core_rq;
-               }
+
+               WARN_ON_ONCE(rq->core != core_rq);
         }
+
+unlock:
+       sched_core_unlock(cpu, &flags);
  }
+
+static void sched_core_cpu_deactivate(unsigned int cpu)
+{
+       const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+       struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
+       unsigned long flags;
+       int t;
+
+       sched_core_lock(cpu, &flags);
+
+       /* if we're the last man standing, nothing to do */
+       if (cpumask_weight(smt_mask) == 1) {
+               WARN_ON_ONCE(rq->core != rq);
+               goto unlock;
+       }
+
+       /* if we're not the leader, nothing to do */
+       if (rq->core != rq)
+               goto unlock;
+
+       /* find a new leader */
+       for_each_cpu(t, smt_mask) {
+               if (t == cpu)
+                       continue;
+               core_rq = cpu_rq(t);
+               break;
+       }
+
+       if (WARN_ON_ONCE(!core_rq)) /* impossible */
+               goto unlock;
+
+       /* copy the shared state to the new leader */
+       core_rq->core_task_seq      = rq->core_task_seq;
+       core_rq->core_pick_seq      = rq->core_pick_seq;
+       core_rq->core_cookie        = rq->core_cookie;
+       core_rq->core_forceidle     = rq->core_forceidle;
+       core_rq->core_forceidle_seq = rq->core_forceidle_seq;
+
+       /* install new leader */
+       for_each_cpu(t, smt_mask) {
+               rq = cpu_rq(t);
+               rq->core = core_rq;
+       }
+
+unlock:
+       sched_core_unlock(cpu, &flags);
+}
+
+static inline void sched_core_cpu_dying(unsigned int cpu)
+{
+       struct rq *rq = cpu_rq(cpu);
+
+       if (rq->core != rq)
+               rq->core = rq;
+}
+
  #else /* !CONFIG_SCHED_CORE */
  
  static inline void sched_core_cpu_starting(unsigned int cpu) {}
+static inline void sched_core_cpu_deactivate(unsigned int cpu) {}
+static inline void sched_core_cpu_dying(unsigned int cpu) {}
  
  static struct task_struct *
  pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
@@ -6341,6 +6427,18 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag
  }
  EXPORT_SYMBOL(default_wake_function);
  
+static void __setscheduler_prio(struct task_struct *p, int prio)
+{
+       if (dl_prio(prio))
+               p->sched_class = &dl_sched_class;
+       else if (rt_prio(prio))
+               p->sched_class = &rt_sched_class;
+       else
+               p->sched_class = &fair_sched_class;
+
+       p->prio = prio;
+}
+
  #ifdef CONFIG_RT_MUTEXES
  
  static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
@@ -6456,22 +6554,19 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
                 } else {
                         p->dl.pi_se = &p->dl;
                 }
-               p->sched_class = &dl_sched_class;
         } else if (rt_prio(prio)) {
                 if (dl_prio(oldprio))
                         p->dl.pi_se = &p->dl;
                 if (oldprio < prio)
                         queue_flag |= ENQUEUE_HEAD;
-               p->sched_class = &rt_sched_class;
         } else {
                 if (dl_prio(oldprio))
                         p->dl.pi_se = &p->dl;
                 if (rt_prio(oldprio))
                         p->rt.timeout = 0;
-               p->sched_class = &fair_sched_class;
         }
  
-       p->prio = prio;
+       __setscheduler_prio(p, prio);
  
         if (queued)
                 enqueue_task(rq, p, queue_flag);
@@ -6824,35 +6919,6 @@ static void __setscheduler_params(struct task_struct *p,
         set_load_weight(p, true);
  }
  
-/* Actually do priority change: must hold pi & rq lock. */
-static void __setscheduler(struct rq *rq, struct task_struct *p,
-                          const struct sched_attr *attr, bool keep_boost)
-{
-       /*
-        * If params can't change scheduling class changes aren't allowed
-        * either.
-        */
-       if (attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)
-               return;
-
-       __setscheduler_params(p, attr);
-
-       /*
-        * Keep a potential priority boosting if called from
-        * sched_setscheduler().
-        */
-       p->prio = normal_prio(p);
-       if (keep_boost)
-               p->prio = rt_effective_prio(p, p->prio);
-
-       if (dl_prio(p->prio))
-               p->sched_class = &dl_sched_class;
-       else if (rt_prio(p->prio))
-               p->sched_class = &rt_sched_class;
-       else
-               p->sched_class = &fair_sched_class;
-}
-
  /*
   * Check the target process has a UID that matches the current process's:
   */
@@ -6873,10 +6939,8 @@ static int __sched_setscheduler(struct task_struct *p,
                                 const struct sched_attr *attr,
                                 bool user, bool pi)
  {
-       int newprio = dl_policy(attr->sched_policy) ? MAX_DL_PRIO - 1 :
-                     MAX_RT_PRIO - 1 - attr->sched_priority;
-       int retval, oldprio, oldpolicy = -1, queued, running;
-       int new_effective_prio, policy = attr->sched_policy;
+       int oldpolicy = -1, policy = attr->sched_policy;
+       int retval, oldprio, newprio, queued, running;
         const struct sched_class *prev_class;
         struct callback_head *head;
         struct rq_flags rf;
@@ -7074,6 +7138,7 @@ change:
         p->sched_reset_on_fork = reset_on_fork;
         oldprio = p->prio;
  
+       newprio = __normal_prio(policy, attr->sched_priority, attr->sched_nice);
         if (pi) {
                 /*
                  * Take priority boosted tasks into account. If the new
@@ -7082,8 +7147,8 @@ change:
                  * the runqueue. This will be done when the task deboost
                  * itself.
                  */
-               new_effective_prio = rt_effective_prio(p, newprio);
-               if (new_effective_prio == oldprio)
+               newprio = rt_effective_prio(p, newprio);
+               if (newprio == oldprio)
                         queue_flags &= ~DEQUEUE_MOVE;
         }
  
@@ -7096,7 +7161,10 @@ change:
  
         prev_class = p->sched_class;
  
-       __setscheduler(rq, p, attr, pi);
+       if (!(attr->sched_flags & SCHED_FLAG_KEEP_PARAMS)) {
+               __setscheduler_params(p, attr);
+               __setscheduler_prio(p, newprio);
+       }
         __setscheduler_uclamp(p, attr);
  
         if (queued) {
@@ -8455,7 +8523,6 @@ static void balance_push(struct rq *rq)
         struct task_struct *push_task = rq->curr;
  
         lockdep_assert_rq_held(rq);
-       SCHED_WARN_ON(rq->cpu != smp_processor_id());
  
         /*
          * Ensure the thing is persistent until balance_push_set(.on = false);
@@ -8463,9 +8530,10 @@ static void balance_push(struct rq *rq)
         rq->balance_callback = &balance_push_callback;
  
         /*
-        * Only active while going offline.
+        * Only active while going offline and when invoked on the outgoing
+        * CPU.
          */
-       if (!cpu_dying(rq->cpu))
+       if (!cpu_dying(rq->cpu) || rq != this_rq())
                 return;
  
         /*
@@ -8727,6 +8795,8 @@ int sched_cpu_deactivate(unsigned int cpu)
          */
         if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
                 static_branch_dec_cpuslocked(&sched_smt_present);
+
+       sched_core_cpu_deactivate(cpu);
  #endif
  
         if (!sched_smp_initialized)
@@ -8831,6 +8901,7 @@ int sched_cpu_dying(unsigned int cpu)
         calc_load_migrate(rq);
         update_max_interval();
         hrtick_clear(rq);
+       sched_core_cpu_dying(cpu);
         return 0;
  }
  #endif
@@ -9042,7 +9113,7 @@ void __init sched_init(void)
                 atomic_set(&rq->nr_iowait, 0);
  
  #ifdef CONFIG_SCHED_CORE
-               rq->core = NULL;
+               rq->core = rq;
                 rq->core_pick = NULL;
                 rq->core_enabled = 0;
                 rq->core_tree = RB_ROOT;