sched/pelt: Check that *_avg are null when *_sum are

[linux-2.6-microblaze.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 20aa234..198514d 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -268,33 +268,11 @@ const struct sched_class fair_sched_class;
   */
  
  #ifdef CONFIG_FAIR_GROUP_SCHED
-static inline struct task_struct *task_of(struct sched_entity *se)
-{
-       SCHED_WARN_ON(!entity_is_task(se));
-       return container_of(se, struct task_struct, se);
-}
  
  /* Walk up scheduling entities hierarchy */
  #define for_each_sched_entity(se) \
                 for (; se; se = se->parent)
  
-static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
-{
-       return p->se.cfs_rq;
-}
-
-/* runqueue on which this entity is (to be) queued */
-static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
-{
-       return se->cfs_rq;
-}
-
-/* runqueue "owned" by this group */
-static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
-{
-       return grp->my_q;
-}
-
  static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
  {
         if (!path)
@@ -455,33 +433,9 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse)
  
  #else  /* !CONFIG_FAIR_GROUP_SCHED */
  
-static inline struct task_struct *task_of(struct sched_entity *se)
-{
-       return container_of(se, struct task_struct, se);
-}
-
  #define for_each_sched_entity(se) \
                 for (; se; se = NULL)
  
-static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
-{
-       return &task_rq(p)->cfs;
-}
-
-static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
-{
-       struct task_struct *p = task_of(se);
-       struct rq *rq = task_rq(p);
-
-       return &rq->cfs;
-}
-
-/* runqueue "owned" by this group */
-static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
-{
-       return NULL;
-}
-
  static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
  {
         if (path)
@@ -1107,7 +1061,7 @@ struct numa_group {
  static struct numa_group *deref_task_numa_group(struct task_struct *p)
  {
         return rcu_dereference_check(p->numa_group, p == current ||
-               (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu)));
+               (lockdep_is_held(__rq_lockp(task_rq(p))) && !READ_ONCE(p->on_cpu)));
  }
  
  static struct numa_group *deref_curr_numa_group(struct task_struct *p)
@@ -3139,7 +3093,7 @@ void reweight_task(struct task_struct *p, int prio)
   *
   *                     tg->weight * grq->load.weight
   *   ge->load.weight = -----------------------------               (1)
- *                       \Sum grq->load.weight
+ *                       \Sum grq->load.weight
   *
   * Now, because computing that sum is prohibitively expensive to compute (been
   * there, done that) we approximate it with this average stuff. The average
@@ -3153,7 +3107,7 @@ void reweight_task(struct task_struct *p, int prio)
   *
   *                     tg->weight * grq->avg.load_avg
   *   ge->load.weight = ------------------------------              (3)
- *                             tg->load_avg
+ *                             tg->load_avg
   *
   * Where: tg->load_avg ~= \Sum grq->avg.load_avg
   *
@@ -3169,7 +3123,7 @@ void reweight_task(struct task_struct *p, int prio)
   *
   *                     tg->weight * grq->load.weight
   *   ge->load.weight = ----------------------------- = tg->weight   (4)
- *                         grp->load.weight
+ *                         grp->load.weight
   *
   * That is, the sum collapses because all other CPUs are idle; the UP scenario.
   *
@@ -3188,7 +3142,7 @@ void reweight_task(struct task_struct *p, int prio)
   *
   *                     tg->weight * grq->load.weight
   *   ge->load.weight = -----------------------------              (6)
- *                             tg_load_avg'
+ *                             tg_load_avg'
   *
   * Where:
   *
@@ -3499,10 +3453,9 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
  static inline void
  update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
  {
-       long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
+       long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
         unsigned long load_avg;
         u64 load_sum = 0;
-       s64 delta_sum;
         u32 divider;
  
         if (!runnable_sum)
@@ -3549,13 +3502,13 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq
         load_sum = (s64)se_weight(se) * runnable_sum;
         load_avg = div_s64(load_sum, divider);
  
-       delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
-       delta_avg = load_avg - se->avg.load_avg;
+       delta = load_avg - se->avg.load_avg;
  
         se->avg.load_sum = runnable_sum;
         se->avg.load_avg = load_avg;
-       add_positive(&cfs_rq->avg.load_avg, delta_avg);
-       add_positive(&cfs_rq->avg.load_sum, delta_sum);
+
+       add_positive(&cfs_rq->avg.load_avg, delta);
+       cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider;
  }
  
  static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
@@ -3766,11 +3719,17 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
   */
  static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
+       /*
+        * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
+        * See ___update_load_avg() for details.
+        */
+       u32 divider = get_pelt_divider(&cfs_rq->avg);
+
         dequeue_load_avg(cfs_rq, se);
         sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
-       sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
+       cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
         sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg);
-       sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum);
+       cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
  
         add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
  
@@ -4419,6 +4378,8 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
  static void
  set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
+       clear_buddies(cfs_rq, se);
+
         /* 'current' is not kept within the tree. */
         if (se->on_rq) {
                 /*
@@ -4478,7 +4439,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
          * Avoid running the skip buddy, if running something else can
          * be done without getting too unfair.
          */
-       if (cfs_rq->skip == se) {
+       if (cfs_rq->skip && cfs_rq->skip == se) {
                 struct sched_entity *second;
  
                 if (se == curr) {
@@ -4505,8 +4466,6 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
                 se = cfs_rq->last;
         }
  
-       clear_buddies(cfs_rq, se);
-
         return se;
  }
  
@@ -5328,7 +5287,7 @@ static void __maybe_unused update_runtime_enabled(struct rq *rq)
  {
         struct task_group *tg;
  
-       lockdep_assert_held(&rq->lock);
+       lockdep_assert_rq_held(rq);
  
         rcu_read_lock();
         list_for_each_entry_rcu(tg, &task_groups, list) {
@@ -5347,7 +5306,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
  {
         struct task_group *tg;
  
-       lockdep_assert_held(&rq->lock);
+       lockdep_assert_rq_held(rq);
  
         rcu_read_lock();
         list_for_each_entry_rcu(tg, &task_groups, list) {
@@ -5935,11 +5894,15 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
  
         /* Traverse only the allowed CPUs */
         for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
+               struct rq *rq = cpu_rq(i);
+
+               if (!sched_core_cookie_match(rq, p))
+                       continue;
+
                 if (sched_idle_cpu(i))
                         return i;
  
                 if (available_idle_cpu(i)) {
-                       struct rq *rq = cpu_rq(i);
                         struct cpuidle_state *idle = idle_get_state(rq);
                         if (idle && idle->exit_latency < min_exit_latency) {
                                 /*
@@ -6025,9 +5988,10 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
         return new_cpu;
  }
  
-static inline int __select_idle_cpu(int cpu)
+static inline int __select_idle_cpu(int cpu, struct task_struct *p)
  {
-       if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
+       if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) &&
+           sched_cpu_cookie_match(cpu_rq(cpu), p))
                 return cpu;
  
         return -1;
@@ -6097,7 +6061,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
         int cpu;
  
         if (!static_branch_likely(&sched_smt_present))
-               return __select_idle_cpu(core);
+               return __select_idle_cpu(core, p);
  
         for_each_cpu(cpu, cpu_smt_mask(core)) {
                 if (!available_idle_cpu(cpu)) {
@@ -6153,7 +6117,7 @@ static inline bool test_idle_cores(int cpu, bool def)
  
  static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
  {
-       return __select_idle_cpu(core);
+       return __select_idle_cpu(core, p);
  }
  
  static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
@@ -6210,14 +6174,14 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
                 } else {
                         if (!--nr)
                                 return -1;
-                       idle_cpu = __select_idle_cpu(cpu);
+                       idle_cpu = __select_idle_cpu(cpu, p);
                         if ((unsigned int)idle_cpu < nr_cpumask_bits)
                                 break;
                 }
         }
  
         if (has_idle_core)
-               set_idle_cores(this, false);
+               set_idle_cores(target, false);
  
         if (sched_feat(SIS_PROP) && !has_idle_core) {
                 time = cpu_clock(this) - time;
@@ -6288,6 +6252,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
                 task_util = uclamp_task_util(p);
         }
  
+       /*
+        * per-cpu select_idle_mask usage
+        */
+       lockdep_assert_irqs_disabled();
+
         if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
             asym_fits_capacity(task_util, target))
                 return target;
@@ -6661,15 +6630,15 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
  {
         unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
         struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
+       int cpu, best_energy_cpu = prev_cpu, target = -1;
         unsigned long cpu_cap, util, base_energy = 0;
-       int cpu, best_energy_cpu = prev_cpu;
         struct sched_domain *sd;
         struct perf_domain *pd;
  
         rcu_read_lock();
         pd = rcu_dereference(rd->pd);
         if (!pd || READ_ONCE(rd->overutilized))
-               goto fail;
+               goto unlock;
  
         /*
          * Energy-aware wake-up happens on the lowest sched_domain starting
@@ -6679,7 +6648,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
         while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
                 sd = sd->parent;
         if (!sd)
-               goto fail;
+               goto unlock;
+
+       target = prev_cpu;
  
         sync_entity_load_avg(&p->se);
         if (!task_util_est(p))
@@ -6687,13 +6658,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
  
         for (; pd; pd = pd->next) {
                 unsigned long cur_delta, spare_cap, max_spare_cap = 0;
+               bool compute_prev_delta = false;
                 unsigned long base_energy_pd;
                 int max_spare_cap_cpu = -1;
  
-               /* Compute the 'base' energy of the pd, without @p */
-               base_energy_pd = compute_energy(p, -1, pd);
-               base_energy += base_energy_pd;
-
                 for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
                         if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                                 continue;
@@ -6714,26 +6682,40 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                         if (!fits_capacity(util, cpu_cap))
                                 continue;
  
-                       /* Always use prev_cpu as a candidate. */
                         if (cpu == prev_cpu) {
-                               prev_delta = compute_energy(p, prev_cpu, pd);
-                               prev_delta -= base_energy_pd;
-                               best_delta = min(best_delta, prev_delta);
-                       }
-
-                       /*
-                        * Find the CPU with the maximum spare capacity in
-                        * the performance domain
-                        */
-                       if (spare_cap > max_spare_cap) {
+                               /* Always use prev_cpu as a candidate. */
+                               compute_prev_delta = true;
+                       } else if (spare_cap > max_spare_cap) {
+                               /*
+                                * Find the CPU with the maximum spare capacity
+                                * in the performance domain.
+                                */
                                 max_spare_cap = spare_cap;
                                 max_spare_cap_cpu = cpu;
                         }
                 }
  
-               /* Evaluate the energy impact of using this CPU. */
-               if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
+               if (max_spare_cap_cpu < 0 && !compute_prev_delta)
+                       continue;
+
+               /* Compute the 'base' energy of the pd, without @p */
+               base_energy_pd = compute_energy(p, -1, pd);
+               base_energy += base_energy_pd;
+
+               /* Evaluate the energy impact of using prev_cpu. */
+               if (compute_prev_delta) {
+                       prev_delta = compute_energy(p, prev_cpu, pd);
+                       if (prev_delta < base_energy_pd)
+                               goto unlock;
+                       prev_delta -= base_energy_pd;
+                       best_delta = min(best_delta, prev_delta);
+               }
+
+               /* Evaluate the energy impact of using max_spare_cap_cpu. */
+               if (max_spare_cap_cpu >= 0) {
                         cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
+                       if (cur_delta < base_energy_pd)
+                               goto unlock;
                         cur_delta -= base_energy_pd;
                         if (cur_delta < best_delta) {
                                 best_delta = cur_delta;
@@ -6741,25 +6723,22 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                         }
                 }
         }
-unlock:
         rcu_read_unlock();
  
         /*
          * Pick the best CPU if prev_cpu cannot be used, or if it saves at
          * least 6% of the energy used by prev_cpu.
          */
-       if (prev_delta == ULONG_MAX)
-               return best_energy_cpu;
+       if ((prev_delta == ULONG_MAX) ||
+           (prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
+               target = best_energy_cpu;
  
-       if ((prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
-               return best_energy_cpu;
-
-       return prev_cpu;
+       return target;
  
-fail:
+unlock:
         rcu_read_unlock();
  
-       return -1;
+       return target;
  }
  
  /*
@@ -6771,8 +6750,6 @@ fail:
   * certain conditions an idle sibling CPU if the domain has SD_WAKE_AFFINE set.
   *
   * Returns the target CPU number.
- *
- * preempt must be disabled.
   */
  static int
  select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
@@ -6785,6 +6762,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
         /* SD_flags and WF_flags share the first nibble */
         int sd_flag = wake_flags & 0xF;
  
+       /*
+        * required for stable ->cpus_allowed
+        */
+       lockdep_assert_held(&p->pi_lock);
         if (wake_flags & WF_TTWU) {
                 record_wakee(p);
  
@@ -6874,7 +6855,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
                  * In case of TASK_ON_RQ_MIGRATING we in fact hold the 'old'
                  * rq->lock and can modify state directly.
                  */
-               lockdep_assert_held(&task_rq(p)->lock);
+               lockdep_assert_rq_held(task_rq(p));
                 detach_entity_cfs_rq(&p->se);
  
         } else {
@@ -7078,6 +7059,39 @@ preempt:
                 set_last_buddy(se);
  }
  
+#ifdef CONFIG_SMP
+static struct task_struct *pick_task_fair(struct rq *rq)
+{
+       struct sched_entity *se;
+       struct cfs_rq *cfs_rq;
+
+again:
+       cfs_rq = &rq->cfs;
+       if (!cfs_rq->nr_running)
+               return NULL;
+
+       do {
+               struct sched_entity *curr = cfs_rq->curr;
+
+               /* When we pick for a remote RQ, we'll not have done put_prev_entity() */
+               if (curr) {
+                       if (curr->on_rq)
+                               update_curr(cfs_rq);
+                       else
+                               curr = NULL;
+
+                       if (unlikely(check_cfs_rq_runtime(cfs_rq)))
+                               goto again;
+               }
+
+               se = pick_next_entity(cfs_rq, curr);
+               cfs_rq = group_cfs_rq(se);
+       } while (cfs_rq);
+
+       return task_of(se);
+}
+#endif
+
  struct task_struct *
  pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
  {
@@ -7501,7 +7515,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
  {
         s64 delta;
  
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
  
         if (p->sched_class != &fair_sched_class)
                 return 0;
@@ -7523,6 +7537,14 @@ static int task_hot(struct task_struct *p, struct lb_env *env)
  
         if (sysctl_sched_migration_cost == -1)
                 return 1;
+
+       /*
+        * Don't migrate task if the task's cookie does not match
+        * with the destination CPU's core cookie.
+        */
+       if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p))
+               return 1;
+
         if (sysctl_sched_migration_cost == 0)
                 return 0;
  
@@ -7599,7 +7621,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
  {
         int tsk_cache_hot;
  
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
  
         /*
          * We do not migrate tasks that are:
@@ -7688,7 +7710,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
   */
  static void detach_task(struct task_struct *p, struct lb_env *env)
  {
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
  
         deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
         set_task_cpu(p, env->dst_cpu);
@@ -7704,7 +7726,7 @@ static struct task_struct *detach_one_task(struct lb_env *env)
  {
         struct task_struct *p;
  
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
  
         list_for_each_entry_reverse(p,
                         &env->src_rq->cfs_tasks, se.group_node) {
@@ -7740,7 +7762,7 @@ static int detach_tasks(struct lb_env *env)
         struct task_struct *p;
         int detached = 0;
  
-       lockdep_assert_held(&env->src_rq->lock);
+       lockdep_assert_rq_held(env->src_rq);
  
         /*
          * Source run queue has been emptied by another CPU, clear
@@ -7870,7 +7892,7 @@ next:
   */
  static void attach_task(struct rq *rq, struct task_struct *p)
  {
-       lockdep_assert_held(&rq->lock);
+       lockdep_assert_rq_held(rq);
  
         BUG_ON(task_rq(p) != rq);
         activate_task(rq, p, ENQUEUE_NOCLOCK);
@@ -8004,6 +8026,15 @@ static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
         if (cfs_rq->avg.runnable_sum)
                 return false;
  
+       /*
+        * _avg must be null when _sum are null because _avg = _sum / divider
+        * Make sure that rounding and/or propagation of PELT values never
+        * break this.
+        */
+       SCHED_WARN_ON(cfs_rq->avg.load_avg ||
+                     cfs_rq->avg.util_avg ||
+                     cfs_rq->avg.runnable_avg);
+
         return true;
  }
  
@@ -8030,7 +8061,7 @@ static bool __update_blocked_fair(struct rq *rq, bool *done)
                 /* Propagate pending load changes to the parent, if any: */
                 se = cfs_rq->tg->se[cpu];
                 if (se && !skip_blocked_update(se))
-                       update_load_avg(cfs_rq_of(se), se, 0);
+                       update_load_avg(cfs_rq_of(se), se, UPDATE_TG);
  
                 /*
                  * There can be a lot of idle CPU cgroups.  Don't let fully
@@ -8853,6 +8884,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
                                         p->cpus_ptr))
                         continue;
  
+               /* Skip over this group if no cookie matched */
+               if (!sched_group_cookie_match(cpu_rq(this_cpu), p, group))
+                       continue;
+
                 local_group = cpumask_test_cpu(this_cpu,
                                                sched_group_span(group));
  
@@ -9781,7 +9816,7 @@ more_balance:
                 if (need_active_balance(&env)) {
                         unsigned long flags;
  
-                       raw_spin_lock_irqsave(&busiest->lock, flags);
+                       raw_spin_rq_lock_irqsave(busiest, flags);
  
                         /*
                          * Don't kick the active_load_balance_cpu_stop,
@@ -9789,8 +9824,7 @@ more_balance:
                          * moved to this_cpu:
                          */
                         if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) {
-                               raw_spin_unlock_irqrestore(&busiest->lock,
-                                                           flags);
+                               raw_spin_rq_unlock_irqrestore(busiest, flags);
                                 goto out_one_pinned;
                         }
  
@@ -9807,7 +9841,7 @@ more_balance:
                                 busiest->push_cpu = this_cpu;
                                 active_balance = 1;
                         }
-                       raw_spin_unlock_irqrestore(&busiest->lock, flags);
+                       raw_spin_rq_unlock_irqrestore(busiest, flags);
  
                         if (active_balance) {
                                 stop_one_cpu_nowait(cpu_of(busiest),
@@ -10592,6 +10626,14 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
         u64 curr_cost = 0;
  
         update_misfit_status(NULL, this_rq);
+
+       /*
+        * There is a task waiting to run. No need to search for one.
+        * Return 0; the task will be enqueued when switching to idle.
+        */
+       if (this_rq->ttwu_pending)
+               return 0;
+
         /*
          * We must set idle_stamp _before_ calling idle_balance(), such that we
          * measure the duration of idle_balance() as idle time.
@@ -10624,7 +10666,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
                 goto out;
         }
  
-       raw_spin_unlock(&this_rq->lock);
+       raw_spin_rq_unlock(this_rq);
  
         update_blocked_averages(this_cpu);
         rcu_read_lock();
@@ -10657,12 +10699,13 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
                  * Stop searching for tasks to pull if there are
                  * now runnable tasks on this rq.
                  */
-               if (pulled_task || this_rq->nr_running > 0)
+               if (pulled_task || this_rq->nr_running > 0 ||
+                   this_rq->ttwu_pending)
                         break;
         }
         rcu_read_unlock();
  
-       raw_spin_lock(&this_rq->lock);
+       raw_spin_rq_lock(this_rq);
  
         if (curr_cost > this_rq->max_idle_balance_cost)
                 this_rq->max_idle_balance_cost = curr_cost;
@@ -10755,6 +10798,119 @@ static void rq_offline_fair(struct rq *rq)
  
  #endif /* CONFIG_SMP */
  
+#ifdef CONFIG_SCHED_CORE
+static inline bool
+__entity_slice_used(struct sched_entity *se, int min_nr_tasks)
+{
+       u64 slice = sched_slice(cfs_rq_of(se), se);
+       u64 rtime = se->sum_exec_runtime - se->prev_sum_exec_runtime;
+
+       return (rtime * min_nr_tasks > slice);
+}
+
+#define MIN_NR_TASKS_DURING_FORCEIDLE  2
+static inline void task_tick_core(struct rq *rq, struct task_struct *curr)
+{
+       if (!sched_core_enabled(rq))
+               return;
+
+       /*
+        * If runqueue has only one task which used up its slice and
+        * if the sibling is forced idle, then trigger schedule to
+        * give forced idle task a chance.
+        *
+        * sched_slice() considers only this active rq and it gets the
+        * whole slice. But during force idle, we have siblings acting
+        * like a single runqueue and hence we need to consider runnable
+        * tasks on this CPU and the forced idle CPU. Ideally, we should
+        * go through the forced idle rq, but that would be a perf hit.
+        * We can assume that the forced idle CPU has at least
+        * MIN_NR_TASKS_DURING_FORCEIDLE - 1 tasks and use that to check
+        * if we need to give up the CPU.
+        */
+       if (rq->core->core_forceidle && rq->cfs.nr_running == 1 &&
+           __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE))
+               resched_curr(rq);
+}
+
+/*
+ * se_fi_update - Update the cfs_rq->min_vruntime_fi in a CFS hierarchy if needed.
+ */
+static void se_fi_update(struct sched_entity *se, unsigned int fi_seq, bool forceidle)
+{
+       for_each_sched_entity(se) {
+               struct cfs_rq *cfs_rq = cfs_rq_of(se);
+
+               if (forceidle) {
+                       if (cfs_rq->forceidle_seq == fi_seq)
+                               break;
+                       cfs_rq->forceidle_seq = fi_seq;
+               }
+
+               cfs_rq->min_vruntime_fi = cfs_rq->min_vruntime;
+       }
+}
+
+void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi)
+{
+       struct sched_entity *se = &p->se;
+
+       if (p->sched_class != &fair_sched_class)
+               return;
+
+       se_fi_update(se, rq->core->core_forceidle_seq, in_fi);
+}
+
+bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi)
+{
+       struct rq *rq = task_rq(a);
+       struct sched_entity *sea = &a->se;
+       struct sched_entity *seb = &b->se;
+       struct cfs_rq *cfs_rqa;
+       struct cfs_rq *cfs_rqb;
+       s64 delta;
+
+       SCHED_WARN_ON(task_rq(b)->core != rq->core);
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+       /*
+        * Find an se in the hierarchy for tasks a and b, such that the se's
+        * are immediate siblings.
+        */
+       while (sea->cfs_rq->tg != seb->cfs_rq->tg) {
+               int sea_depth = sea->depth;
+               int seb_depth = seb->depth;
+
+               if (sea_depth >= seb_depth)
+                       sea = parent_entity(sea);
+               if (sea_depth <= seb_depth)
+                       seb = parent_entity(seb);
+       }
+
+       se_fi_update(sea, rq->core->core_forceidle_seq, in_fi);
+       se_fi_update(seb, rq->core->core_forceidle_seq, in_fi);
+
+       cfs_rqa = sea->cfs_rq;
+       cfs_rqb = seb->cfs_rq;
+#else
+       cfs_rqa = &task_rq(a)->cfs;
+       cfs_rqb = &task_rq(b)->cfs;
+#endif
+
+       /*
+        * Find delta after normalizing se's vruntime with its cfs_rq's
+        * min_vruntime_fi, which would have been updated in prior calls
+        * to se_fi_update().
+        */
+       delta = (s64)(sea->vruntime - seb->vruntime) +
+               (s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi);
+
+       return delta > 0;
+}
+#else
+static inline void task_tick_core(struct rq *rq, struct task_struct *curr) {}
+#endif
+
  /*
   * scheduler tick hitting a task of our scheduling class.
   *
@@ -10778,6 +10934,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
  
         update_misfit_status(curr, rq);
         update_overutilized_status(task_rq(curr));
+
+       task_tick_core(rq, curr);
  }
  
  /*
@@ -11149,9 +11307,9 @@ void unregister_fair_sched_group(struct task_group *tg)
  
                 rq = cpu_rq(cpu);
  
-               raw_spin_lock_irqsave(&rq->lock, flags);
+               raw_spin_rq_lock_irqsave(rq, flags);
                 list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
-               raw_spin_unlock_irqrestore(&rq->lock, flags);
+               raw_spin_rq_unlock_irqrestore(rq, flags);
         }
  }
  
@@ -11273,6 +11431,7 @@ DEFINE_SCHED_CLASS(fair) = {
  
  #ifdef CONFIG_SMP
         .balance                = balance_fair,
+       .pick_task              = pick_task_fair,
         .select_task_rq         = select_task_rq_fair,
         .migrate_task_rq        = migrate_task_rq_fair,