sched/fair: Define sched_idle_cpu() only for SMP configurations

[linux-2.6-microblaze.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index ba749f5..fe4e0d7 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -801,7 +801,7 @@ void post_init_entity_util_avg(struct task_struct *p)
                  * For !fair tasks do:
                  *
                 update_cfs_rq_load_avg(now, cfs_rq);
-               attach_entity_load_avg(cfs_rq, se, 0);
+               attach_entity_load_avg(cfs_rq, se);
                 switched_from_fair(rq, p);
                  *
                  * such that the next switched_to_fair() has the
@@ -3114,7 +3114,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
  {
         struct rq *rq = rq_of(cfs_rq);
  
-       if (&rq->cfs == cfs_rq || (flags & SCHED_CPUFREQ_MIGRATION)) {
+       if (&rq->cfs == cfs_rq) {
                 /*
                  * There are a few boundary cases this might miss but it should
                  * get called often enough that that should (hopefully) not be
@@ -3366,16 +3366,17 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
  
         runnable_load_sum = (s64)se_runnable(se) * runnable_sum;
         runnable_load_avg = div_s64(runnable_load_sum, LOAD_AVG_MAX);
-       delta_sum = runnable_load_sum - se_weight(se) * se->avg.runnable_load_sum;
-       delta_avg = runnable_load_avg - se->avg.runnable_load_avg;
-
-       se->avg.runnable_load_sum = runnable_sum;
-       se->avg.runnable_load_avg = runnable_load_avg;
  
         if (se->on_rq) {
+               delta_sum = runnable_load_sum -
+                               se_weight(se) * se->avg.runnable_load_sum;
+               delta_avg = runnable_load_avg - se->avg.runnable_load_avg;
                 add_positive(&cfs_rq->avg.runnable_load_avg, delta_avg);
                 add_positive(&cfs_rq->avg.runnable_load_sum, delta_sum);
         }
+
+       se->avg.runnable_load_sum = runnable_sum;
+       se->avg.runnable_load_avg = runnable_load_avg;
  }
  
  static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
@@ -3520,7 +3521,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
   * Must call update_cfs_rq_load_avg() before this, since we rely on
   * cfs_rq->avg.last_update_time being current.
   */
-static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
  {
         u32 divider = LOAD_AVG_MAX - 1024 + cfs_rq->avg.period_contrib;
  
@@ -3556,7 +3557,7 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
  
         add_tg_cfs_propagate(cfs_rq, se->avg.load_sum);
  
-       cfs_rq_util_change(cfs_rq, flags);
+       cfs_rq_util_change(cfs_rq, 0);
  
         trace_pelt_cfs_tp(cfs_rq);
  }
@@ -3614,7 +3615,7 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
                  *
                  * IOW we're enqueueing a task on a new CPU.
                  */
-               attach_entity_load_avg(cfs_rq, se, SCHED_CPUFREQ_MIGRATION);
+               attach_entity_load_avg(cfs_rq, se);
                 update_tg_load_avg(cfs_rq, 0);
  
         } else if (decayed) {
@@ -3711,6 +3712,20 @@ static inline unsigned long task_util_est(struct task_struct *p)
         return max(task_util(p), _task_util_est(p));
  }
  
+#ifdef CONFIG_UCLAMP_TASK
+static inline unsigned long uclamp_task_util(struct task_struct *p)
+{
+       return clamp(task_util_est(p),
+                    uclamp_eff_value(p, UCLAMP_MIN),
+                    uclamp_eff_value(p, UCLAMP_MAX));
+}
+#else
+static inline unsigned long uclamp_task_util(struct task_struct *p)
+{
+       return task_util_est(p);
+}
+#endif
+
  static inline void util_est_enqueue(struct cfs_rq *cfs_rq,
                                     struct task_struct *p)
  {
@@ -3822,7 +3837,7 @@ done:
  
  static inline int task_fits_capacity(struct task_struct *p, long capacity)
  {
-       return fits_capacity(task_util_est(p), capacity);
+       return fits_capacity(uclamp_task_util(p), capacity);
  }
  
  static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
@@ -3857,7 +3872,7 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s
  static inline void remove_entity_load_avg(struct sched_entity *se) {}
  
  static inline void
-attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) {}
+attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
  static inline void
  detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
  
@@ -5196,6 +5211,20 @@ static inline void update_overutilized_status(struct rq *rq)
  static inline void update_overutilized_status(struct rq *rq) { }
  #endif
  
+/* Runqueue only has SCHED_IDLE tasks enqueued */
+static int sched_idle_rq(struct rq *rq)
+{
+       return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running &&
+                       rq->nr_running);
+}
+
+#ifdef CONFIG_SMP
+static int sched_idle_cpu(int cpu)
+{
+       return sched_idle_rq(cpu_rq(cpu));
+}
+#endif
+
  /*
   * The enqueue_task method is called before nr_running is
   * increased. Here we update the fair scheduling stats and
@@ -5310,6 +5339,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
         struct sched_entity *se = &p->se;
         int task_sleep = flags & DEQUEUE_SLEEP;
         int idle_h_nr_running = task_has_idle_policy(p);
+       bool was_sched_idle = sched_idle_rq(rq);
  
         for_each_sched_entity(se) {
                 cfs_rq = cfs_rq_of(se);
@@ -5356,6 +5386,10 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
         if (!se)
                 sub_nr_running(rq, 1);
  
+       /* balance early to pull high priority tasks */
+       if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
+               rq->next_balance = jiffies;
+
         util_est_dequeue(&rq->cfs, p, task_sleep);
         hrtick_update(rq);
  }
@@ -5378,15 +5412,6 @@ static struct {
  
  #endif /* CONFIG_NO_HZ_COMMON */
  
-/* CPU only has SCHED_IDLE tasks enqueued */
-static int sched_idle_cpu(int cpu)
-{
-       struct rq *rq = cpu_rq(cpu);
-
-       return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running &&
-                       rq->nr_running);
-}
-
  static unsigned long cpu_load(struct rq *rq)
  {
         return cfs_rq_load_avg(&rq->cfs);
@@ -5588,7 +5613,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
         unsigned int min_exit_latency = UINT_MAX;
         u64 latest_idle_timestamp = 0;
         int least_loaded_cpu = this_cpu;
-       int shallowest_idle_cpu = -1, si_cpu = -1;
+       int shallowest_idle_cpu = -1;
         int i;
  
         /* Check if we have any choice: */
@@ -5597,6 +5622,9 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
  
         /* Traverse only the allowed CPUs */
         for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
+               if (sched_idle_cpu(i))
+                       return i;
+
                 if (available_idle_cpu(i)) {
                         struct rq *rq = cpu_rq(i);
                         struct cpuidle_state *idle = idle_get_state(rq);
@@ -5619,12 +5647,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
                                 latest_idle_timestamp = rq->idle_stamp;
                                 shallowest_idle_cpu = i;
                         }
-               } else if (shallowest_idle_cpu == -1 && si_cpu == -1) {
-                       if (sched_idle_cpu(i)) {
-                               si_cpu = i;
-                               continue;
-                       }
-
+               } else if (shallowest_idle_cpu == -1) {
                         load = cpu_load(cpu_rq(i));
                         if (load < min_load) {
                                 min_load = load;
@@ -5633,11 +5656,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
                 }
         }
  
-       if (shallowest_idle_cpu != -1)
-               return shallowest_idle_cpu;
-       if (si_cpu != -1)
-               return si_cpu;
-       return least_loaded_cpu;
+       return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
  }
  
  static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p,
@@ -5790,7 +5809,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int
   */
  static int select_idle_smt(struct task_struct *p, int target)
  {
-       int cpu, si_cpu = -1;
+       int cpu;
  
         if (!static_branch_likely(&sched_smt_present))
                 return -1;
@@ -5798,13 +5817,11 @@ static int select_idle_smt(struct task_struct *p, int target)
         for_each_cpu(cpu, cpu_smt_mask(target)) {
                 if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                         continue;
-               if (available_idle_cpu(cpu))
+               if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
                         return cpu;
-               if (si_cpu == -1 && sched_idle_cpu(cpu))
-                       si_cpu = cpu;
         }
  
-       return si_cpu;
+       return -1;
  }
  
  #else /* CONFIG_SCHED_SMT */
@@ -5828,12 +5845,13 @@ static inline int select_idle_smt(struct task_struct *p, int target)
   */
  static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
  {
+       struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
         struct sched_domain *this_sd;
         u64 avg_cost, avg_idle;
         u64 time, cost;
         s64 delta;
         int this = smp_processor_id();
-       int cpu, nr = INT_MAX, si_cpu = -1;
+       int cpu, nr = INT_MAX;
  
         this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
         if (!this_sd)
@@ -5859,15 +5877,13 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
  
         time = cpu_clock(this);
  
-       for_each_cpu_wrap(cpu, sched_domain_span(sd), target) {
+       cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
+
+       for_each_cpu_wrap(cpu, cpus, target) {
                 if (!--nr)
-                       return si_cpu;
-               if (!cpumask_test_cpu(cpu, p->cpus_ptr))
-                       continue;
-               if (available_idle_cpu(cpu))
+                       return -1;
+               if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
                         break;
-               if (si_cpu == -1 && sched_idle_cpu(cpu))
-                       si_cpu = cpu;
         }
  
         time = cpu_clock(this) - time;
@@ -6268,9 +6284,18 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                         if (!cpumask_test_cpu(cpu, p->cpus_ptr))
                                 continue;
  
-                       /* Skip CPUs that will be overutilized. */
                         util = cpu_util_next(cpu, p, cpu);
                         cpu_cap = capacity_of(cpu);
+                       spare_cap = cpu_cap - util;
+
+                       /*
+                        * Skip CPUs that cannot satisfy the capacity request.
+                        * IOW, placing the task there would make the CPU
+                        * overutilized. Take uclamp into account to see how
+                        * much capacity we can get out of the CPU; this is
+                        * aligned with schedutil_cpu_util().
+                        */
+                       util = uclamp_rq_util_with(cpu_rq(cpu), util, p);
                         if (!fits_capacity(util, cpu_cap))
                                 continue;
  
@@ -6285,7 +6310,6 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
                          * Find the CPU with the maximum spare capacity in
                          * the performance domain
                          */
-                       spare_cap = cpu_cap - util;
                         if (spare_cap > max_spare_cap) {
                                 max_spare_cap = spare_cap;
                                 max_spare_cap_cpu = cpu;
@@ -7780,29 +7804,11 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
                  */
  
                 for_each_cpu(cpu, sched_group_span(sdg)) {
-                       struct sched_group_capacity *sgc;
-                       struct rq *rq = cpu_rq(cpu);
+                       unsigned long cpu_cap = capacity_of(cpu);
  
-                       /*
-                        * build_sched_domains() -> init_sched_groups_capacity()
-                        * gets here before we've attached the domains to the
-                        * runqueues.
-                        *
-                        * Use capacity_of(), which is set irrespective of domains
-                        * in update_cpu_capacity().
-                        *
-                        * This avoids capacity from being 0 and
-                        * causing divide-by-zero issues on boot.
-                        */
-                       if (unlikely(!rq->sd)) {
-                               capacity += capacity_of(cpu);
-                       } else {
-                               sgc = rq->sd->groups->sgc;
-                               capacity += sgc->capacity;
-                       }
-
-                       min_capacity = min(capacity, min_capacity);
-                       max_capacity = max(capacity, max_capacity);
+                       capacity += cpu_cap;
+                       min_capacity = min(cpu_cap, min_capacity);
+                       max_capacity = max(cpu_cap, max_capacity);
                 }
         } else  {
                 /*
@@ -8168,14 +8174,18 @@ static bool update_sd_pick_busiest(struct lb_env *env,
  
         case group_has_spare:
                 /*
-                * Select not overloaded group with lowest number of
-                * idle cpus. We could also compare the spare capacity
-                * which is more stable but it can end up that the
-                * group has less spare capacity but finally more idle
+                * Select not overloaded group with lowest number of idle cpus
+                * and highest number of running tasks. We could also compare
+                * the spare capacity which is more stable but it can end up
+                * that the group has less spare capacity but finally more idle
                  * CPUs which means less opportunity to pull tasks.
                  */
-               if (sgs->idle_cpus >= busiest->idle_cpus)
+               if (sgs->idle_cpus > busiest->idle_cpus)
+                       return false;
+               else if ((sgs->idle_cpus == busiest->idle_cpus) &&
+                        (sgs->sum_nr_running <= busiest->sum_nr_running))
                         return false;
+
                 break;
         }
  
@@ -9529,6 +9539,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
  {
         int continue_balancing = 1;
         int cpu = rq->cpu;
+       int busy = idle != CPU_IDLE && !sched_idle_cpu(cpu);
         unsigned long interval;
         struct sched_domain *sd;
         /* Earliest time when we have to do rebalance again */
@@ -9565,7 +9576,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
                         break;
                 }
  
-               interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
+               interval = get_sd_balance_interval(sd, busy);
  
                 need_serialize = sd->flags & SD_SERIALIZE;
                 if (need_serialize) {
@@ -9581,9 +9592,10 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
                                  * state even if we migrated tasks. Update it.
                                  */
                                 idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
+                               busy = idle != CPU_IDLE && !sched_idle_cpu(cpu);
                         }
                         sd->last_balance = jiffies;
-                       interval = get_sd_balance_interval(sd, idle != CPU_IDLE);
+                       interval = get_sd_balance_interval(sd, busy);
                 }
                 if (need_serialize)
                         spin_unlock(&balancing);
@@ -10333,6 +10345,9 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
         if (!task_on_rq_queued(p))
                 return;
  
+       if (rq->cfs.nr_running == 1)
+               return;
+
         /*
          * Reschedule if we are currently running on this runqueue and
          * our priority decreased, or if we are not currently running on
@@ -10423,7 +10438,7 @@ static void attach_entity_cfs_rq(struct sched_entity *se)
  
         /* Synchronize entity with its cfs_rq */
         update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
-       attach_entity_load_avg(cfs_rq, se, 0);
+       attach_entity_load_avg(cfs_rq, se);
         update_tg_load_avg(cfs_rq, false);
         propagate_entity_cfs_rq(se);
  }