sched: Prevent balance_push() on remote runqueues
[linux-2.6-microblaze.git] / kernel / sched / core.c
index 20ffcc0..b21a185 100644 (file)
@@ -237,9 +237,30 @@ static DEFINE_MUTEX(sched_core_mutex);
 static atomic_t sched_core_count;
 static struct cpumask sched_core_mask;
 
+static void sched_core_lock(int cpu, unsigned long *flags)
+{
+       const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+       int t, i = 0;
+
+       local_irq_save(*flags);
+       for_each_cpu(t, smt_mask)
+               raw_spin_lock_nested(&cpu_rq(t)->__lock, i++);
+}
+
+static void sched_core_unlock(int cpu, unsigned long *flags)
+{
+       const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+       int t;
+
+       for_each_cpu(t, smt_mask)
+               raw_spin_unlock(&cpu_rq(t)->__lock);
+       local_irq_restore(*flags);
+}
+
 static void __sched_core_flip(bool enabled)
 {
-       int cpu, t, i;
+       unsigned long flags;
+       int cpu, t;
 
        cpus_read_lock();
 
@@ -250,19 +271,12 @@ static void __sched_core_flip(bool enabled)
        for_each_cpu(cpu, &sched_core_mask) {
                const struct cpumask *smt_mask = cpu_smt_mask(cpu);
 
-               i = 0;
-               local_irq_disable();
-               for_each_cpu(t, smt_mask) {
-                       /* supports up to SMT8 */
-                       raw_spin_lock_nested(&cpu_rq(t)->__lock, i++);
-               }
+               sched_core_lock(cpu, &flags);
 
                for_each_cpu(t, smt_mask)
                        cpu_rq(t)->core_enabled = enabled;
 
-               for_each_cpu(t, smt_mask)
-                       raw_spin_unlock(&cpu_rq(t)->__lock);
-               local_irq_enable();
+               sched_core_unlock(cpu, &flags);
 
                cpumask_andnot(&sched_core_mask, &sched_core_mask, smt_mask);
        }
@@ -5736,35 +5750,109 @@ void queue_core_balance(struct rq *rq)
        queue_balance_callback(rq, &per_cpu(core_balance_head, rq->cpu), sched_core_balance);
 }
 
-static inline void sched_core_cpu_starting(unsigned int cpu)
+static void sched_core_cpu_starting(unsigned int cpu)
 {
        const struct cpumask *smt_mask = cpu_smt_mask(cpu);
-       struct rq *rq, *core_rq = NULL;
-       int i;
+       struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
+       unsigned long flags;
+       int t;
 
-       core_rq = cpu_rq(cpu)->core;
+       sched_core_lock(cpu, &flags);
 
-       if (!core_rq) {
-               for_each_cpu(i, smt_mask) {
-                       rq = cpu_rq(i);
-                       if (rq->core && rq->core == rq)
-                               core_rq = rq;
+       WARN_ON_ONCE(rq->core != rq);
+
+       /* if we're the first, we'll be our own leader */
+       if (cpumask_weight(smt_mask) == 1)
+               goto unlock;
+
+       /* find the leader */
+       for_each_cpu(t, smt_mask) {
+               if (t == cpu)
+                       continue;
+               rq = cpu_rq(t);
+               if (rq->core == rq) {
+                       core_rq = rq;
+                       break;
                }
+       }
 
-               if (!core_rq)
-                       core_rq = cpu_rq(cpu);
+       if (WARN_ON_ONCE(!core_rq)) /* whoopsie */
+               goto unlock;
 
-               for_each_cpu(i, smt_mask) {
-                       rq = cpu_rq(i);
+       /* install and validate core_rq */
+       for_each_cpu(t, smt_mask) {
+               rq = cpu_rq(t);
 
-                       WARN_ON_ONCE(rq->core && rq->core != core_rq);
+               if (t == cpu)
                        rq->core = core_rq;
-               }
+
+               WARN_ON_ONCE(rq->core != core_rq);
+       }
+
+unlock:
+       sched_core_unlock(cpu, &flags);
+}
+
+static void sched_core_cpu_deactivate(unsigned int cpu)
+{
+       const struct cpumask *smt_mask = cpu_smt_mask(cpu);
+       struct rq *rq = cpu_rq(cpu), *core_rq = NULL;
+       unsigned long flags;
+       int t;
+
+       sched_core_lock(cpu, &flags);
+
+       /* if we're the last man standing, nothing to do */
+       if (cpumask_weight(smt_mask) == 1) {
+               WARN_ON_ONCE(rq->core != rq);
+               goto unlock;
        }
+
+       /* if we're not the leader, nothing to do */
+       if (rq->core != rq)
+               goto unlock;
+
+       /* find a new leader */
+       for_each_cpu(t, smt_mask) {
+               if (t == cpu)
+                       continue;
+               core_rq = cpu_rq(t);
+               break;
+       }
+
+       if (WARN_ON_ONCE(!core_rq)) /* impossible */
+               goto unlock;
+
+       /* copy the shared state to the new leader */
+       core_rq->core_task_seq      = rq->core_task_seq;
+       core_rq->core_pick_seq      = rq->core_pick_seq;
+       core_rq->core_cookie        = rq->core_cookie;
+       core_rq->core_forceidle     = rq->core_forceidle;
+       core_rq->core_forceidle_seq = rq->core_forceidle_seq;
+
+       /* install new leader */
+       for_each_cpu(t, smt_mask) {
+               rq = cpu_rq(t);
+               rq->core = core_rq;
+       }
+
+unlock:
+       sched_core_unlock(cpu, &flags);
+}
+
+static inline void sched_core_cpu_dying(unsigned int cpu)
+{
+       struct rq *rq = cpu_rq(cpu);
+
+       if (rq->core != rq)
+               rq->core = rq;
 }
+
 #else /* !CONFIG_SCHED_CORE */
 
 static inline void sched_core_cpu_starting(unsigned int cpu) {}
+static inline void sched_core_cpu_deactivate(unsigned int cpu) {}
+static inline void sched_core_cpu_dying(unsigned int cpu) {}
 
 static struct task_struct *
 pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
@@ -8435,7 +8523,6 @@ static void balance_push(struct rq *rq)
        struct task_struct *push_task = rq->curr;
 
        lockdep_assert_rq_held(rq);
-       SCHED_WARN_ON(rq->cpu != smp_processor_id());
 
        /*
         * Ensure the thing is persistent until balance_push_set(.on = false);
@@ -8443,9 +8530,10 @@ static void balance_push(struct rq *rq)
        rq->balance_callback = &balance_push_callback;
 
        /*
-        * Only active while going offline.
+        * Only active while going offline and when invoked on the outgoing
+        * CPU.
         */
-       if (!cpu_dying(rq->cpu))
+       if (!cpu_dying(rq->cpu) || rq != this_rq())
                return;
 
        /*
@@ -8707,6 +8795,8 @@ int sched_cpu_deactivate(unsigned int cpu)
         */
        if (cpumask_weight(cpu_smt_mask(cpu)) == 2)
                static_branch_dec_cpuslocked(&sched_smt_present);
+
+       sched_core_cpu_deactivate(cpu);
 #endif
 
        if (!sched_smp_initialized)
@@ -8811,6 +8901,7 @@ int sched_cpu_dying(unsigned int cpu)
        calc_load_migrate(rq);
        update_max_interval();
        hrtick_clear(rq);
+       sched_core_cpu_dying(cpu);
        return 0;
 }
 #endif
@@ -9022,7 +9113,7 @@ void __init sched_init(void)
                atomic_set(&rq->nr_iowait, 0);
 
 #ifdef CONFIG_SCHED_CORE
-               rq->core = NULL;
+               rq->core = rq;
                rq->core_pick = NULL;
                rq->core_enabled = 0;
                rq->core_tree = RB_ROOT;